diff options
Diffstat (limited to 'fs/xfs/libxfs')
54 files changed, 3253 insertions, 1684 deletions
diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c index 39d9525270..dc1873f76b 100644 --- a/fs/xfs/libxfs/xfs_ag.c +++ b/fs/xfs/libxfs/xfs_ag.c @@ -217,6 +217,7 @@ xfs_initialize_perag_data( */ if (fdblocks > sbp->sb_dblocks || ifree > ialloc) { xfs_alert(mp, "AGF corruption. Please run xfs_repair."); + xfs_fs_mark_sick(mp, XFS_SICK_FS_COUNTERS); error = -EFSCORRUPTED; goto out; } @@ -241,7 +242,7 @@ __xfs_free_perag( struct xfs_perag *pag = container_of(head, struct xfs_perag, rcu_head); ASSERT(!delayed_work_pending(&pag->pag_blockgc_work)); - kmem_free(pag); + kfree(pag); } /* @@ -263,7 +264,7 @@ xfs_free_perag( xfs_defer_drain_free(&pag->pag_intents_drain); cancel_delayed_work_sync(&pag->pag_blockgc_work); - xfs_buf_hash_destroy(pag); + xfs_buf_cache_destroy(&pag->pag_bcache); /* drop the mount's active reference */ xfs_perag_rele(pag); @@ -351,9 +352,9 @@ xfs_free_unused_perag_range( spin_unlock(&mp->m_perag_lock); if (!pag) break; - xfs_buf_hash_destroy(pag); + xfs_buf_cache_destroy(&pag->pag_bcache); xfs_defer_drain_free(&pag->pag_intents_drain); - kmem_free(pag); + kfree(pag); } } @@ -381,7 +382,7 @@ xfs_initialize_perag( continue; } - pag = kmem_zalloc(sizeof(*pag), KM_MAYFAIL); + pag = kzalloc(sizeof(*pag), GFP_KERNEL | __GFP_RETRY_MAYFAIL); if (!pag) { error = -ENOMEM; goto out_unwind_new_pags; @@ -389,7 +390,7 @@ xfs_initialize_perag( pag->pag_agno = index; pag->pag_mount = mp; - error = radix_tree_preload(GFP_NOFS); + error = radix_tree_preload(GFP_KERNEL | __GFP_RETRY_MAYFAIL); if (error) goto out_free_pag; @@ -416,9 +417,10 @@ xfs_initialize_perag( init_waitqueue_head(&pag->pag_active_wq); pag->pagb_count = 0; pag->pagb_tree = RB_ROOT; + xfs_hooks_init(&pag->pag_rmap_update_hooks); #endif /* __KERNEL__ */ - error = xfs_buf_hash_init(pag); + error = xfs_buf_cache_init(&pag->pag_bcache); if (error) goto out_remove_pag; @@ -453,7 +455,7 @@ out_remove_pag: radix_tree_delete(&mp->m_perag_tree, index); spin_unlock(&mp->m_perag_lock); out_free_pag: - kmem_free(pag); + kfree(pag); out_unwind_new_pags: /* unwind any prior newly initialized pags */ xfs_free_unused_perag_range(mp, first_initialised, agcount); @@ -491,7 +493,7 @@ xfs_btroot_init( struct xfs_buf *bp, struct aghdr_init_data *id) { - xfs_btree_init_block(mp, bp, id->type, 0, 0, id->agno); + xfs_btree_init_buf(mp, bp, id->bc_ops, 0, 0, id->agno); } /* Finish initializing a free space btree. */ @@ -549,7 +551,7 @@ xfs_freesp_init_recs( } /* - * Alloc btree root block init functions + * bnobt/cntbt btree root block init functions */ static void xfs_bnoroot_init( @@ -557,17 +559,7 @@ xfs_bnoroot_init( struct xfs_buf *bp, struct aghdr_init_data *id) { - xfs_btree_init_block(mp, bp, XFS_BTNUM_BNO, 0, 0, id->agno); - xfs_freesp_init_recs(mp, bp, id); -} - -static void -xfs_cntroot_init( - struct xfs_mount *mp, - struct xfs_buf *bp, - struct aghdr_init_data *id) -{ - xfs_btree_init_block(mp, bp, XFS_BTNUM_CNT, 0, 0, id->agno); + xfs_btree_init_buf(mp, bp, id->bc_ops, 0, 0, id->agno); xfs_freesp_init_recs(mp, bp, id); } @@ -583,7 +575,7 @@ xfs_rmaproot_init( struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); struct xfs_rmap_rec *rrec; - xfs_btree_init_block(mp, bp, XFS_BTNUM_RMAP, 0, 4, id->agno); + xfs_btree_init_buf(mp, bp, id->bc_ops, 0, 4, id->agno); /* * mark the AG header regions as static metadata The BNO @@ -678,14 +670,13 @@ xfs_agfblock_init( agf->agf_versionnum = cpu_to_be32(XFS_AGF_VERSION); agf->agf_seqno = cpu_to_be32(id->agno); agf->agf_length = cpu_to_be32(id->agsize); - agf->agf_roots[XFS_BTNUM_BNOi] = cpu_to_be32(XFS_BNO_BLOCK(mp)); - agf->agf_roots[XFS_BTNUM_CNTi] = cpu_to_be32(XFS_CNT_BLOCK(mp)); - agf->agf_levels[XFS_BTNUM_BNOi] = cpu_to_be32(1); - agf->agf_levels[XFS_BTNUM_CNTi] = cpu_to_be32(1); + agf->agf_bno_root = cpu_to_be32(XFS_BNO_BLOCK(mp)); + agf->agf_cnt_root = cpu_to_be32(XFS_CNT_BLOCK(mp)); + agf->agf_bno_level = cpu_to_be32(1); + agf->agf_cnt_level = cpu_to_be32(1); if (xfs_has_rmapbt(mp)) { - agf->agf_roots[XFS_BTNUM_RMAPi] = - cpu_to_be32(XFS_RMAP_BLOCK(mp)); - agf->agf_levels[XFS_BTNUM_RMAPi] = cpu_to_be32(1); + agf->agf_rmap_root = cpu_to_be32(XFS_RMAP_BLOCK(mp)); + agf->agf_rmap_level = cpu_to_be32(1); agf->agf_rmap_blocks = cpu_to_be32(1); } @@ -796,7 +787,7 @@ struct xfs_aghdr_grow_data { size_t numblks; const struct xfs_buf_ops *ops; aghdr_init_work_f work; - xfs_btnum_t type; + const struct xfs_btree_ops *bc_ops; bool need_init; }; @@ -850,13 +841,15 @@ xfs_ag_init_headers( .numblks = BTOBB(mp->m_sb.sb_blocksize), .ops = &xfs_bnobt_buf_ops, .work = &xfs_bnoroot_init, + .bc_ops = &xfs_bnobt_ops, .need_init = true }, { /* CNT root block */ .daddr = XFS_AGB_TO_DADDR(mp, id->agno, XFS_CNT_BLOCK(mp)), .numblks = BTOBB(mp->m_sb.sb_blocksize), .ops = &xfs_cntbt_buf_ops, - .work = &xfs_cntroot_init, + .work = &xfs_bnoroot_init, + .bc_ops = &xfs_cntbt_ops, .need_init = true }, { /* INO root block */ @@ -864,7 +857,7 @@ xfs_ag_init_headers( .numblks = BTOBB(mp->m_sb.sb_blocksize), .ops = &xfs_inobt_buf_ops, .work = &xfs_btroot_init, - .type = XFS_BTNUM_INO, + .bc_ops = &xfs_inobt_ops, .need_init = true }, { /* FINO root block */ @@ -872,7 +865,7 @@ xfs_ag_init_headers( .numblks = BTOBB(mp->m_sb.sb_blocksize), .ops = &xfs_finobt_buf_ops, .work = &xfs_btroot_init, - .type = XFS_BTNUM_FINO, + .bc_ops = &xfs_finobt_ops, .need_init = xfs_has_finobt(mp) }, { /* RMAP root block */ @@ -880,6 +873,7 @@ xfs_ag_init_headers( .numblks = BTOBB(mp->m_sb.sb_blocksize), .ops = &xfs_rmapbt_buf_ops, .work = &xfs_rmaproot_init, + .bc_ops = &xfs_rmapbt_ops, .need_init = xfs_has_rmapbt(mp) }, { /* REFC root block */ @@ -887,7 +881,7 @@ xfs_ag_init_headers( .numblks = BTOBB(mp->m_sb.sb_blocksize), .ops = &xfs_refcountbt_buf_ops, .work = &xfs_btroot_init, - .type = XFS_BTNUM_REFC, + .bc_ops = &xfs_refcountbt_ops, .need_init = xfs_has_reflink(mp) }, { /* NULL terminating block */ @@ -905,7 +899,7 @@ xfs_ag_init_headers( id->daddr = dp->daddr; id->numblks = dp->numblks; - id->type = dp->type; + id->bc_ops = dp->bc_ops; error = xfs_ag_init_hdr(mp, id, dp->work, dp->ops); if (error) break; @@ -950,8 +944,10 @@ xfs_ag_shrink_space( agf = agfbp->b_addr; aglen = be32_to_cpu(agi->agi_length); /* some extra paranoid checks before we shrink the ag */ - if (XFS_IS_CORRUPT(mp, agf->agf_length != agi->agi_length)) + if (XFS_IS_CORRUPT(mp, agf->agf_length != agi->agi_length)) { + xfs_ag_mark_sick(pag, XFS_SICK_AG_AGF); return -EFSCORRUPTED; + } if (delta >= aglen) return -EINVAL; @@ -979,14 +975,23 @@ xfs_ag_shrink_space( if (error) { /* - * if extent allocation fails, need to roll the transaction to + * If extent allocation fails, need to roll the transaction to * ensure that the AGFL fixup has been committed anyway. + * + * We need to hold the AGF across the roll to ensure nothing can + * access the AG for allocation until the shrink is fully + * cleaned up. And due to the resetting of the AG block + * reservation space needing to lock the AGI, we also have to + * hold that so we don't get AGI/AGF lock order inversions in + * the error handling path. */ xfs_trans_bhold(*tpp, agfbp); + xfs_trans_bhold(*tpp, agibp); err2 = xfs_trans_roll(tpp); if (err2) return err2; xfs_trans_bjoin(*tpp, agfbp); + xfs_trans_bjoin(*tpp, agibp); goto resv_init_out; } diff --git a/fs/xfs/libxfs/xfs_ag.h b/fs/xfs/libxfs/xfs_ag.h index 4b343c4fac..35de09a251 100644 --- a/fs/xfs/libxfs/xfs_ag.h +++ b/fs/xfs/libxfs/xfs_ag.h @@ -36,8 +36,9 @@ struct xfs_perag { atomic_t pag_active_ref; /* active reference count */ wait_queue_head_t pag_active_wq;/* woken active_ref falls to zero */ unsigned long pag_opstate; - uint8_t pagf_levels[XFS_BTNUM_AGF]; - /* # of levels in bno & cnt btree */ + uint8_t pagf_bno_level; /* # of levels in bno btree */ + uint8_t pagf_cnt_level; /* # of levels in cnt btree */ + uint8_t pagf_rmap_level;/* # of levels in rmap btree */ uint32_t pagf_flcount; /* count of blocks in freelist */ xfs_extlen_t pagf_freeblks; /* total free blocks */ xfs_extlen_t pagf_longest; /* longest free space */ @@ -86,8 +87,10 @@ struct xfs_perag { * Alternate btree heights so that online repair won't trip the write * verifiers while rebuilding the AG btrees. */ - uint8_t pagf_repair_levels[XFS_BTNUM_AGF]; + uint8_t pagf_repair_bno_level; + uint8_t pagf_repair_cnt_level; uint8_t pagf_repair_refcount_level; + uint8_t pagf_repair_rmap_level; #endif spinlock_t pag_state_lock; @@ -104,9 +107,7 @@ struct xfs_perag { int pag_ici_reclaimable; /* reclaimable inodes */ unsigned long pag_ici_reclaim_cursor; /* reclaim restart point */ - /* buffer cache index */ - spinlock_t pag_buf_lock; /* lock for pag_buf_hash */ - struct rhashtable pag_buf_hash; + struct xfs_buf_cache pag_bcache; /* background prealloc block trimming */ struct delayed_work pag_blockgc_work; @@ -119,6 +120,9 @@ struct xfs_perag { * inconsistencies. */ struct xfs_defer_drain pag_intents_drain; + + /* Hook to feed rmapbt updates to an active online repair. */ + struct xfs_hooks pag_rmap_update_hooks; #endif /* __KERNEL__ */ }; @@ -331,7 +335,7 @@ struct aghdr_init_data { /* per header data */ xfs_daddr_t daddr; /* header location */ size_t numblks; /* size of header */ - xfs_btnum_t type; /* type of btree root block */ + const struct xfs_btree_ops *bc_ops; /* btree ops */ }; int xfs_ag_init_headers(struct xfs_mount *mp, struct aghdr_init_data *id); diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 3bd0a33fee..9da52e9217 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -26,6 +26,7 @@ #include "xfs_ag.h" #include "xfs_ag_resv.h" #include "xfs_bmap.h" +#include "xfs_health.h" struct kmem_cache *xfs_extfree_item_cache; @@ -150,23 +151,38 @@ xfs_alloc_ag_max_usable( return mp->m_sb.sb_agblocks - blocks; } + +static int +xfs_alloc_lookup( + struct xfs_btree_cur *cur, + xfs_lookup_t dir, + xfs_agblock_t bno, + xfs_extlen_t len, + int *stat) +{ + int error; + + cur->bc_rec.a.ar_startblock = bno; + cur->bc_rec.a.ar_blockcount = len; + error = xfs_btree_lookup(cur, dir, stat); + if (*stat == 1) + cur->bc_flags |= XFS_BTREE_ALLOCBT_ACTIVE; + else + cur->bc_flags &= ~XFS_BTREE_ALLOCBT_ACTIVE; + return error; +} + /* * Lookup the record equal to [bno, len] in the btree given by cur. */ -STATIC int /* error */ +static inline int /* error */ xfs_alloc_lookup_eq( struct xfs_btree_cur *cur, /* btree cursor */ xfs_agblock_t bno, /* starting block of extent */ xfs_extlen_t len, /* length of extent */ int *stat) /* success/failure */ { - int error; - - cur->bc_rec.a.ar_startblock = bno; - cur->bc_rec.a.ar_blockcount = len; - error = xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat); - cur->bc_ag.abt.active = (*stat == 1); - return error; + return xfs_alloc_lookup(cur, XFS_LOOKUP_EQ, bno, len, stat); } /* @@ -180,13 +196,7 @@ xfs_alloc_lookup_ge( xfs_extlen_t len, /* length of extent */ int *stat) /* success/failure */ { - int error; - - cur->bc_rec.a.ar_startblock = bno; - cur->bc_rec.a.ar_blockcount = len; - error = xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat); - cur->bc_ag.abt.active = (*stat == 1); - return error; + return xfs_alloc_lookup(cur, XFS_LOOKUP_GE, bno, len, stat); } /* @@ -200,19 +210,14 @@ xfs_alloc_lookup_le( xfs_extlen_t len, /* length of extent */ int *stat) /* success/failure */ { - int error; - cur->bc_rec.a.ar_startblock = bno; - cur->bc_rec.a.ar_blockcount = len; - error = xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat); - cur->bc_ag.abt.active = (*stat == 1); - return error; + return xfs_alloc_lookup(cur, XFS_LOOKUP_LE, bno, len, stat); } static inline bool xfs_alloc_cur_active( struct xfs_btree_cur *cur) { - return cur && cur->bc_ag.abt.active; + return cur && (cur->bc_flags & XFS_BTREE_ALLOCBT_ACTIVE); } /* @@ -268,12 +273,12 @@ xfs_alloc_complain_bad_rec( struct xfs_mount *mp = cur->bc_mp; xfs_warn(mp, - "%s Freespace BTree record corruption in AG %d detected at %pS!", - cur->bc_btnum == XFS_BTNUM_BNO ? "Block" : "Size", - cur->bc_ag.pag->pag_agno, fa); + "%sbt record corruption in AG %d detected at %pS!", + cur->bc_ops->name, cur->bc_ag.pag->pag_agno, fa); xfs_warn(mp, "start block 0x%x block count 0x%x", irec->ar_startblock, irec->ar_blockcount); + xfs_btree_mark_sick(cur); return -EFSCORRUPTED; } @@ -497,14 +502,18 @@ xfs_alloc_fixup_trees( if (XFS_IS_CORRUPT(mp, i != 1 || nfbno1 != fbno || - nflen1 != flen)) + nflen1 != flen)) { + xfs_btree_mark_sick(cnt_cur); return -EFSCORRUPTED; + } #endif } else { if ((error = xfs_alloc_lookup_eq(cnt_cur, fbno, flen, &i))) return error; - if (XFS_IS_CORRUPT(mp, i != 1)) + if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cnt_cur); return -EFSCORRUPTED; + } } /* * Look up the record in the by-block tree if necessary. @@ -516,14 +525,18 @@ xfs_alloc_fixup_trees( if (XFS_IS_CORRUPT(mp, i != 1 || nfbno1 != fbno || - nflen1 != flen)) + nflen1 != flen)) { + xfs_btree_mark_sick(bno_cur); return -EFSCORRUPTED; + } #endif } else { if ((error = xfs_alloc_lookup_eq(bno_cur, fbno, flen, &i))) return error; - if (XFS_IS_CORRUPT(mp, i != 1)) + if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(bno_cur); return -EFSCORRUPTED; + } } #ifdef DEBUG @@ -536,8 +549,10 @@ xfs_alloc_fixup_trees( if (XFS_IS_CORRUPT(mp, bnoblock->bb_numrecs != - cntblock->bb_numrecs)) + cntblock->bb_numrecs)) { + xfs_btree_mark_sick(bno_cur); return -EFSCORRUPTED; + } } #endif @@ -567,30 +582,40 @@ xfs_alloc_fixup_trees( */ if ((error = xfs_btree_delete(cnt_cur, &i))) return error; - if (XFS_IS_CORRUPT(mp, i != 1)) + if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cnt_cur); return -EFSCORRUPTED; + } /* * Add new by-size btree entry(s). */ if (nfbno1 != NULLAGBLOCK) { if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i))) return error; - if (XFS_IS_CORRUPT(mp, i != 0)) + if (XFS_IS_CORRUPT(mp, i != 0)) { + xfs_btree_mark_sick(cnt_cur); return -EFSCORRUPTED; + } if ((error = xfs_btree_insert(cnt_cur, &i))) return error; - if (XFS_IS_CORRUPT(mp, i != 1)) + if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cnt_cur); return -EFSCORRUPTED; + } } if (nfbno2 != NULLAGBLOCK) { if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i))) return error; - if (XFS_IS_CORRUPT(mp, i != 0)) + if (XFS_IS_CORRUPT(mp, i != 0)) { + xfs_btree_mark_sick(cnt_cur); return -EFSCORRUPTED; + } if ((error = xfs_btree_insert(cnt_cur, &i))) return error; - if (XFS_IS_CORRUPT(mp, i != 1)) + if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cnt_cur); return -EFSCORRUPTED; + } } /* * Fix up the by-block btree entry(s). @@ -601,8 +626,10 @@ xfs_alloc_fixup_trees( */ if ((error = xfs_btree_delete(bno_cur, &i))) return error; - if (XFS_IS_CORRUPT(mp, i != 1)) + if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(bno_cur); return -EFSCORRUPTED; + } } else { /* * Update the by-block entry to start later|be shorter. @@ -616,12 +643,16 @@ xfs_alloc_fixup_trees( */ if ((error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i))) return error; - if (XFS_IS_CORRUPT(mp, i != 0)) + if (XFS_IS_CORRUPT(mp, i != 0)) { + xfs_btree_mark_sick(bno_cur); return -EFSCORRUPTED; + } if ((error = xfs_btree_insert(bno_cur, &i))) return error; - if (XFS_IS_CORRUPT(mp, i != 1)) + if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(bno_cur); return -EFSCORRUPTED; + } } return 0; } @@ -755,6 +786,8 @@ xfs_alloc_read_agfl( mp, tp, mp->m_ddev_targp, XFS_AG_DADDR(mp, pag->pag_agno, XFS_AGFL_DADDR(mp)), XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_agfl_buf_ops); + if (xfs_metadata_is_sick(error)) + xfs_ag_mark_sick(pag, XFS_SICK_AG_AGFL); if (error) return error; xfs_buf_set_ref(bp, XFS_AGFL_REF); @@ -776,6 +809,7 @@ xfs_alloc_update_counters( if (unlikely(be32_to_cpu(agf->agf_freeblks) > be32_to_cpu(agf->agf_length))) { xfs_buf_mark_corrupt(agbp); + xfs_ag_mark_sick(agbp->b_pag, XFS_SICK_AG_AGF); return -EFSCORRUPTED; } @@ -828,8 +862,8 @@ xfs_alloc_cur_setup( * attempt a small allocation. */ if (!acur->cnt) - acur->cnt = xfs_allocbt_init_cursor(args->mp, args->tp, - args->agbp, args->pag, XFS_BTNUM_CNT); + acur->cnt = xfs_cntbt_init_cursor(args->mp, args->tp, + args->agbp, args->pag); error = xfs_alloc_lookup_ge(acur->cnt, 0, args->maxlen, &i); if (error) return error; @@ -838,11 +872,11 @@ xfs_alloc_cur_setup( * Allocate the bnobt left and right search cursors. */ if (!acur->bnolt) - acur->bnolt = xfs_allocbt_init_cursor(args->mp, args->tp, - args->agbp, args->pag, XFS_BTNUM_BNO); + acur->bnolt = xfs_bnobt_init_cursor(args->mp, args->tp, + args->agbp, args->pag); if (!acur->bnogt) - acur->bnogt = xfs_allocbt_init_cursor(args->mp, args->tp, - args->agbp, args->pag, XFS_BTNUM_BNO); + acur->bnogt = xfs_bnobt_init_cursor(args->mp, args->tp, + args->agbp, args->pag); return i == 1 ? 0 : -ENOSPC; } @@ -884,15 +918,17 @@ xfs_alloc_cur_check( bool busy; unsigned busy_gen = 0; bool deactivate = false; - bool isbnobt = cur->bc_btnum == XFS_BTNUM_BNO; + bool isbnobt = xfs_btree_is_bno(cur->bc_ops); *new = 0; error = xfs_alloc_get_rec(cur, &bno, &len, &i); if (error) return error; - if (XFS_IS_CORRUPT(args->mp, i != 1)) + if (XFS_IS_CORRUPT(args->mp, i != 1)) { + xfs_btree_mark_sick(cur); return -EFSCORRUPTED; + } /* * Check minlen and deactivate a cntbt cursor if out of acceptable size @@ -958,9 +994,8 @@ xfs_alloc_cur_check( deactivate = true; out: if (deactivate) - cur->bc_ag.abt.active = false; - trace_xfs_alloc_cur_check(args->mp, cur->bc_btnum, bno, len, diff, - *new); + cur->bc_flags &= ~XFS_BTREE_ALLOCBT_ACTIVE; + trace_xfs_alloc_cur_check(cur, bno, len, diff, *new); return 0; } @@ -1098,6 +1133,7 @@ xfs_alloc_ag_vextent_small( if (error) goto error; if (XFS_IS_CORRUPT(args->mp, i != 1)) { + xfs_btree_mark_sick(ccur); error = -EFSCORRUPTED; goto error; } @@ -1132,6 +1168,7 @@ xfs_alloc_ag_vextent_small( *fbnop = args->agbno = fbno; *flenp = args->len = 1; if (XFS_IS_CORRUPT(args->mp, fbno >= be32_to_cpu(agf->agf_length))) { + xfs_btree_mark_sick(ccur); error = -EFSCORRUPTED; goto error; } @@ -1197,8 +1234,8 @@ xfs_alloc_ag_vextent_exact( /* * Allocate/initialize a cursor for the by-number freespace btree. */ - bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, - args->pag, XFS_BTNUM_BNO); + bno_cur = xfs_bnobt_init_cursor(args->mp, args->tp, args->agbp, + args->pag); /* * Lookup bno and minlen in the btree (minlen is irrelevant, really). @@ -1218,6 +1255,7 @@ xfs_alloc_ag_vextent_exact( if (error) goto error0; if (XFS_IS_CORRUPT(args->mp, i != 1)) { + xfs_btree_mark_sick(bno_cur); error = -EFSCORRUPTED; goto error0; } @@ -1257,8 +1295,8 @@ xfs_alloc_ag_vextent_exact( * We are allocating agbno for args->len * Allocate/initialize a cursor for the by-size btree. */ - cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, - args->pag, XFS_BTNUM_CNT); + cnt_cur = xfs_cntbt_init_cursor(args->mp, args->tp, args->agbp, + args->pag); ASSERT(args->agbno + args->len <= be32_to_cpu(agf->agf_length)); error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, args->agbno, args->len, XFSA_FIXUP_BNO_OK); @@ -1330,7 +1368,7 @@ xfs_alloc_walk_iter( if (error) return error; if (i == 0) - cur->bc_ag.abt.active = false; + cur->bc_flags &= ~XFS_BTREE_ALLOCBT_ACTIVE; if (count > 0) count--; @@ -1444,7 +1482,7 @@ xfs_alloc_ag_vextent_locality( if (error) return error; if (i) { - acur->cnt->bc_ag.abt.active = true; + acur->cnt->bc_flags |= XFS_BTREE_ALLOCBT_ACTIVE; fbcur = acur->cnt; fbinc = false; } @@ -1497,8 +1535,10 @@ xfs_alloc_ag_vextent_lastblock( error = xfs_alloc_get_rec(acur->cnt, bno, len, &i); if (error) return error; - if (XFS_IS_CORRUPT(args->mp, i != 1)) + if (XFS_IS_CORRUPT(args->mp, i != 1)) { + xfs_btree_mark_sick(acur->cnt); return -EFSCORRUPTED; + } if (*len >= args->minlen) break; error = xfs_btree_increment(acur->cnt, 0, &i); @@ -1670,8 +1710,8 @@ restart: /* * Allocate and initialize a cursor for the by-size btree. */ - cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, - args->pag, XFS_BTNUM_CNT); + cnt_cur = xfs_cntbt_init_cursor(args->mp, args->tp, args->agbp, + args->pag); bno_cur = NULL; /* @@ -1710,6 +1750,7 @@ restart: if (error) goto error0; if (XFS_IS_CORRUPT(args->mp, i != 1)) { + xfs_btree_mark_sick(cnt_cur); error = -EFSCORRUPTED; goto error0; } @@ -1756,6 +1797,7 @@ restart: rlen != 0 && (rlen > flen || rbno + rlen > fbno + flen))) { + xfs_btree_mark_sick(cnt_cur); error = -EFSCORRUPTED; goto error0; } @@ -1778,6 +1820,7 @@ restart: &i))) goto error0; if (XFS_IS_CORRUPT(args->mp, i != 1)) { + xfs_btree_mark_sick(cnt_cur); error = -EFSCORRUPTED; goto error0; } @@ -1790,6 +1833,7 @@ restart: rlen != 0 && (rlen > flen || rbno + rlen > fbno + flen))) { + xfs_btree_mark_sick(cnt_cur); error = -EFSCORRUPTED; goto error0; } @@ -1806,6 +1850,7 @@ restart: &i))) goto error0; if (XFS_IS_CORRUPT(args->mp, i != 1)) { + xfs_btree_mark_sick(cnt_cur); error = -EFSCORRUPTED; goto error0; } @@ -1844,14 +1889,15 @@ restart: rlen = args->len; if (XFS_IS_CORRUPT(args->mp, rlen > flen)) { + xfs_btree_mark_sick(cnt_cur); error = -EFSCORRUPTED; goto error0; } /* * Allocate and initialize a cursor for the by-block tree. */ - bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, - args->pag, XFS_BTNUM_BNO); + bno_cur = xfs_bnobt_init_cursor(args->mp, args->tp, args->agbp, + args->pag); if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, rbno, rlen, XFSA_FIXUP_CNT_OK))) goto error0; @@ -1863,6 +1909,7 @@ restart: if (XFS_IS_CORRUPT(args->mp, args->agbno + args->len > be32_to_cpu(agf->agf_length))) { + xfs_ag_mark_sick(args->pag, XFS_SICK_AG_BNOBT); error = -EFSCORRUPTED; goto error0; } @@ -1924,7 +1971,7 @@ xfs_free_ag_extent( /* * Allocate and initialize a cursor for the by-block btree. */ - bno_cur = xfs_allocbt_init_cursor(mp, tp, agbp, pag, XFS_BTNUM_BNO); + bno_cur = xfs_bnobt_init_cursor(mp, tp, agbp, pag); /* * Look for a neighboring block on the left (lower block numbers) * that is contiguous with this space. @@ -1938,6 +1985,7 @@ xfs_free_ag_extent( if ((error = xfs_alloc_get_rec(bno_cur, <bno, <len, &i))) goto error0; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(bno_cur); error = -EFSCORRUPTED; goto error0; } @@ -1953,6 +2001,7 @@ xfs_free_ag_extent( * Very bad. */ if (XFS_IS_CORRUPT(mp, ltbno + ltlen > bno)) { + xfs_btree_mark_sick(bno_cur); error = -EFSCORRUPTED; goto error0; } @@ -1971,6 +2020,7 @@ xfs_free_ag_extent( if ((error = xfs_alloc_get_rec(bno_cur, >bno, >len, &i))) goto error0; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(bno_cur); error = -EFSCORRUPTED; goto error0; } @@ -1986,6 +2036,7 @@ xfs_free_ag_extent( * Very bad. */ if (XFS_IS_CORRUPT(mp, bno + len > gtbno)) { + xfs_btree_mark_sick(bno_cur); error = -EFSCORRUPTED; goto error0; } @@ -1994,7 +2045,7 @@ xfs_free_ag_extent( /* * Now allocate and initialize a cursor for the by-size tree. */ - cnt_cur = xfs_allocbt_init_cursor(mp, tp, agbp, pag, XFS_BTNUM_CNT); + cnt_cur = xfs_cntbt_init_cursor(mp, tp, agbp, pag); /* * Have both left and right contiguous neighbors. * Merge all three into a single free block. @@ -2006,12 +2057,14 @@ xfs_free_ag_extent( if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i))) goto error0; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cnt_cur); error = -EFSCORRUPTED; goto error0; } if ((error = xfs_btree_delete(cnt_cur, &i))) goto error0; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cnt_cur); error = -EFSCORRUPTED; goto error0; } @@ -2021,12 +2074,14 @@ xfs_free_ag_extent( if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i))) goto error0; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cnt_cur); error = -EFSCORRUPTED; goto error0; } if ((error = xfs_btree_delete(cnt_cur, &i))) goto error0; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cnt_cur); error = -EFSCORRUPTED; goto error0; } @@ -2036,6 +2091,7 @@ xfs_free_ag_extent( if ((error = xfs_btree_delete(bno_cur, &i))) goto error0; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(bno_cur); error = -EFSCORRUPTED; goto error0; } @@ -2045,6 +2101,7 @@ xfs_free_ag_extent( if ((error = xfs_btree_decrement(bno_cur, 0, &i))) goto error0; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(bno_cur); error = -EFSCORRUPTED; goto error0; } @@ -2064,6 +2121,7 @@ xfs_free_ag_extent( i != 1 || xxbno != ltbno || xxlen != ltlen)) { + xfs_btree_mark_sick(bno_cur); error = -EFSCORRUPTED; goto error0; } @@ -2088,12 +2146,14 @@ xfs_free_ag_extent( if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i))) goto error0; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cnt_cur); error = -EFSCORRUPTED; goto error0; } if ((error = xfs_btree_delete(cnt_cur, &i))) goto error0; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cnt_cur); error = -EFSCORRUPTED; goto error0; } @@ -2104,6 +2164,7 @@ xfs_free_ag_extent( if ((error = xfs_btree_decrement(bno_cur, 0, &i))) goto error0; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(bno_cur); error = -EFSCORRUPTED; goto error0; } @@ -2123,12 +2184,14 @@ xfs_free_ag_extent( if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i))) goto error0; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cnt_cur); error = -EFSCORRUPTED; goto error0; } if ((error = xfs_btree_delete(cnt_cur, &i))) goto error0; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cnt_cur); error = -EFSCORRUPTED; goto error0; } @@ -2151,6 +2214,7 @@ xfs_free_ag_extent( if ((error = xfs_btree_insert(bno_cur, &i))) goto error0; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(bno_cur); error = -EFSCORRUPTED; goto error0; } @@ -2163,12 +2227,14 @@ xfs_free_ag_extent( if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i))) goto error0; if (XFS_IS_CORRUPT(mp, i != 0)) { + xfs_btree_mark_sick(cnt_cur); error = -EFSCORRUPTED; goto error0; } if ((error = xfs_btree_insert(cnt_cur, &i))) goto error0; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cnt_cur); error = -EFSCORRUPTED; goto error0; } @@ -2267,8 +2333,9 @@ xfs_alloc_min_freelist( struct xfs_perag *pag) { /* AG btrees have at least 1 level. */ - static const uint8_t fake_levels[XFS_BTNUM_AGF] = {1, 1, 1}; - const uint8_t *levels = pag ? pag->pagf_levels : fake_levels; + const unsigned int bno_level = pag ? pag->pagf_bno_level : 1; + const unsigned int cnt_level = pag ? pag->pagf_cnt_level : 1; + const unsigned int rmap_level = pag ? pag->pagf_rmap_level : 1; unsigned int min_free; ASSERT(mp->m_alloc_maxlevels > 0); @@ -2295,16 +2362,12 @@ xfs_alloc_min_freelist( */ /* space needed by-bno freespace btree */ - min_free = min_t(unsigned int, levels[XFS_BTNUM_BNOi] + 1, - mp->m_alloc_maxlevels) * 2 - 2; + min_free = min(bno_level + 1, mp->m_alloc_maxlevels) * 2 - 2; /* space needed by-size freespace btree */ - min_free += min_t(unsigned int, levels[XFS_BTNUM_CNTi] + 1, - mp->m_alloc_maxlevels) * 2 - 2; + min_free += min(cnt_level + 1, mp->m_alloc_maxlevels) * 2 - 2; /* space needed reverse mapping used space btree */ if (xfs_has_rmapbt(mp)) - min_free += min_t(unsigned int, levels[XFS_BTNUM_RMAPi] + 1, - mp->m_rmap_maxlevels) * 2 - 2; - + min_free += min(rmap_level + 1, mp->m_rmap_maxlevels) * 2 - 2; return min_free; } @@ -2691,13 +2754,14 @@ xfs_exact_minlen_extent_available( xfs_extlen_t flen; int error = 0; - cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, agbp, - args->pag, XFS_BTNUM_CNT); + cnt_cur = xfs_cntbt_init_cursor(args->mp, args->tp, agbp, + args->pag); error = xfs_alloc_lookup_ge(cnt_cur, 0, args->minlen, stat); if (error) goto out; if (*stat == 0) { + xfs_btree_mark_sick(cnt_cur); error = -EFSCORRUPTED; goto out; } @@ -2987,8 +3051,8 @@ xfs_alloc_log_agf( offsetof(xfs_agf_t, agf_versionnum), offsetof(xfs_agf_t, agf_seqno), offsetof(xfs_agf_t, agf_length), - offsetof(xfs_agf_t, agf_roots[0]), - offsetof(xfs_agf_t, agf_levels[0]), + offsetof(xfs_agf_t, agf_bno_root), /* also cnt/rmap root */ + offsetof(xfs_agf_t, agf_bno_level), /* also cnt/rmap levels */ offsetof(xfs_agf_t, agf_flfirst), offsetof(xfs_agf_t, agf_fllast), offsetof(xfs_agf_t, agf_flcount), @@ -3167,12 +3231,10 @@ xfs_agf_verify( be32_to_cpu(agf->agf_freeblks) > agf_length) return __this_address; - if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) < 1 || - be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) < 1 || - be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) > - mp->m_alloc_maxlevels || - be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) > - mp->m_alloc_maxlevels) + if (be32_to_cpu(agf->agf_bno_level) < 1 || + be32_to_cpu(agf->agf_cnt_level) < 1 || + be32_to_cpu(agf->agf_bno_level) > mp->m_alloc_maxlevels || + be32_to_cpu(agf->agf_cnt_level) > mp->m_alloc_maxlevels) return __this_address; if (xfs_has_lazysbcount(mp) && @@ -3183,9 +3245,8 @@ xfs_agf_verify( if (be32_to_cpu(agf->agf_rmap_blocks) > agf_length) return __this_address; - if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) < 1 || - be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) > - mp->m_rmap_maxlevels) + if (be32_to_cpu(agf->agf_rmap_level) < 1 || + be32_to_cpu(agf->agf_rmap_level) > mp->m_rmap_maxlevels) return __this_address; } @@ -3268,6 +3329,8 @@ xfs_read_agf( error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, XFS_AG_DADDR(mp, pag->pag_agno, XFS_AGF_DADDR(mp)), XFS_FSS_TO_BB(mp, 1), flags, agfbpp, &xfs_agf_buf_ops); + if (xfs_metadata_is_sick(error)) + xfs_ag_mark_sick(pag, XFS_SICK_AG_AGF); if (error) return error; @@ -3309,12 +3372,9 @@ xfs_alloc_read_agf( pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks); pag->pagf_flcount = be32_to_cpu(agf->agf_flcount); pag->pagf_longest = be32_to_cpu(agf->agf_longest); - pag->pagf_levels[XFS_BTNUM_BNOi] = - be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]); - pag->pagf_levels[XFS_BTNUM_CNTi] = - be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]); - pag->pagf_levels[XFS_BTNUM_RMAPi] = - be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAPi]); + pag->pagf_bno_level = be32_to_cpu(agf->agf_bno_level); + pag->pagf_cnt_level = be32_to_cpu(agf->agf_cnt_level); + pag->pagf_rmap_level = be32_to_cpu(agf->agf_rmap_level); pag->pagf_refcount_level = be32_to_cpu(agf->agf_refcount_level); if (xfs_agfl_needs_reset(pag->pag_mount, agf)) set_bit(XFS_AGSTATE_AGFL_NEEDS_RESET, &pag->pag_opstate); @@ -3343,10 +3403,8 @@ xfs_alloc_read_agf( ASSERT(pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks)); ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount)); ASSERT(pag->pagf_longest == be32_to_cpu(agf->agf_longest)); - ASSERT(pag->pagf_levels[XFS_BTNUM_BNOi] == - be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi])); - ASSERT(pag->pagf_levels[XFS_BTNUM_CNTi] == - be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi])); + ASSERT(pag->pagf_bno_level == be32_to_cpu(agf->agf_bno_level)); + ASSERT(pag->pagf_cnt_level == be32_to_cpu(agf->agf_cnt_level)); } #endif if (agfbpp) @@ -3895,17 +3953,23 @@ __xfs_free_extent( return -EIO; error = xfs_free_extent_fix_freelist(tp, pag, &agbp); - if (error) + if (error) { + if (xfs_metadata_is_sick(error)) + xfs_ag_mark_sick(pag, XFS_SICK_AG_BNOBT); return error; + } + agf = agbp->b_addr; if (XFS_IS_CORRUPT(mp, agbno >= mp->m_sb.sb_agblocks)) { + xfs_ag_mark_sick(pag, XFS_SICK_AG_BNOBT); error = -EFSCORRUPTED; goto err_release; } /* validate the extent size is legal now we have the agf locked */ if (XFS_IS_CORRUPT(mp, agbno + len > be32_to_cpu(agf->agf_length))) { + xfs_ag_mark_sick(pag, XFS_SICK_AG_BNOBT); error = -EFSCORRUPTED; goto err_release; } @@ -3962,7 +4026,7 @@ xfs_alloc_query_range( union xfs_btree_irec high_brec = { .a = *high_rec }; struct xfs_alloc_query_range_info query = { .priv = priv, .fn = fn }; - ASSERT(cur->bc_btnum == XFS_BTNUM_BNO); + ASSERT(xfs_btree_is_bno(cur->bc_ops)); return xfs_btree_query_range(cur, &low_brec, &high_brec, xfs_alloc_query_range_helper, &query); } @@ -3976,7 +4040,7 @@ xfs_alloc_query_all( { struct xfs_alloc_query_range_info query; - ASSERT(cur->bc_btnum == XFS_BTNUM_BNO); + ASSERT(xfs_btree_is_bno(cur->bc_ops)); query.priv = priv; query.fn = fn; return xfs_btree_query_all(cur, xfs_alloc_query_range_helper, &query); diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c index a7032bf0cd..6ef5ddd896 100644 --- a/fs/xfs/libxfs/xfs_alloc_btree.c +++ b/fs/xfs/libxfs/xfs_alloc_btree.c @@ -16,6 +16,7 @@ #include "xfs_alloc.h" #include "xfs_extent_busy.h" #include "xfs_error.h" +#include "xfs_health.h" #include "xfs_trace.h" #include "xfs_trans.h" #include "xfs_ag.h" @@ -23,13 +24,22 @@ static struct kmem_cache *xfs_allocbt_cur_cache; STATIC struct xfs_btree_cur * -xfs_allocbt_dup_cursor( +xfs_bnobt_dup_cursor( struct xfs_btree_cur *cur) { - return xfs_allocbt_init_cursor(cur->bc_mp, cur->bc_tp, - cur->bc_ag.agbp, cur->bc_ag.pag, cur->bc_btnum); + return xfs_bnobt_init_cursor(cur->bc_mp, cur->bc_tp, cur->bc_ag.agbp, + cur->bc_ag.pag); } +STATIC struct xfs_btree_cur * +xfs_cntbt_dup_cursor( + struct xfs_btree_cur *cur) +{ + return xfs_cntbt_init_cursor(cur->bc_mp, cur->bc_tp, cur->bc_ag.agbp, + cur->bc_ag.pag); +} + + STATIC void xfs_allocbt_set_root( struct xfs_btree_cur *cur, @@ -38,13 +48,18 @@ xfs_allocbt_set_root( { struct xfs_buf *agbp = cur->bc_ag.agbp; struct xfs_agf *agf = agbp->b_addr; - int btnum = cur->bc_btnum; ASSERT(ptr->s != 0); - agf->agf_roots[btnum] = ptr->s; - be32_add_cpu(&agf->agf_levels[btnum], inc); - cur->bc_ag.pag->pagf_levels[btnum] += inc; + if (xfs_btree_is_bno(cur->bc_ops)) { + agf->agf_bno_root = ptr->s; + be32_add_cpu(&agf->agf_bno_level, inc); + cur->bc_ag.pag->pagf_bno_level += inc; + } else { + agf->agf_cnt_root = ptr->s; + be32_add_cpu(&agf->agf_cnt_level, inc); + cur->bc_ag.pag->pagf_cnt_level += inc; + } xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS); } @@ -116,7 +131,7 @@ xfs_allocbt_update_lastrec( __be32 len; int numrecs; - ASSERT(cur->bc_btnum == XFS_BTNUM_CNT); + ASSERT(!xfs_btree_is_bno(cur->bc_ops)); switch (reason) { case LASTREC_UPDATE: @@ -226,7 +241,10 @@ xfs_allocbt_init_ptr_from_cur( ASSERT(cur->bc_ag.pag->pag_agno == be32_to_cpu(agf->agf_seqno)); - ptr->s = agf->agf_roots[cur->bc_btnum]; + if (xfs_btree_is_bno(cur->bc_ops)) + ptr->s = agf->agf_bno_root; + else + ptr->s = agf->agf_cnt_root; } STATIC int64_t @@ -299,13 +317,12 @@ xfs_allocbt_verify( struct xfs_perag *pag = bp->b_pag; xfs_failaddr_t fa; unsigned int level; - xfs_btnum_t btnum = XFS_BTNUM_BNOi; if (!xfs_verify_magic(bp, block->bb_magic)) return __this_address; if (xfs_has_crc(mp)) { - fa = xfs_btree_sblock_v5hdr_verify(bp); + fa = xfs_btree_agblock_v5hdr_verify(bp); if (fa) return fa; } @@ -320,26 +337,32 @@ xfs_allocbt_verify( * against. */ level = be16_to_cpu(block->bb_level); - if (bp->b_ops->magic[0] == cpu_to_be32(XFS_ABTC_MAGIC)) - btnum = XFS_BTNUM_CNTi; if (pag && xfs_perag_initialised_agf(pag)) { - unsigned int maxlevel = pag->pagf_levels[btnum]; + unsigned int maxlevel, repair_maxlevel = 0; -#ifdef CONFIG_XFS_ONLINE_REPAIR /* * Online repair could be rewriting the free space btrees, so * we'll validate against the larger of either tree while this * is going on. */ - maxlevel = max_t(unsigned int, maxlevel, - pag->pagf_repair_levels[btnum]); + if (bp->b_ops->magic[0] == cpu_to_be32(XFS_ABTC_MAGIC)) { + maxlevel = pag->pagf_cnt_level; +#ifdef CONFIG_XFS_ONLINE_REPAIR + repair_maxlevel = pag->pagf_repair_cnt_level; +#endif + } else { + maxlevel = pag->pagf_bno_level; +#ifdef CONFIG_XFS_ONLINE_REPAIR + repair_maxlevel = pag->pagf_repair_bno_level; #endif - if (level >= maxlevel) + } + + if (level >= max(maxlevel, repair_maxlevel)) return __this_address; } else if (level >= mp->m_alloc_maxlevels) return __this_address; - return xfs_btree_sblock_verify(bp, mp->m_alloc_mxr[level != 0]); + return xfs_btree_agblock_verify(bp, mp->m_alloc_mxr[level != 0]); } static void @@ -348,7 +371,7 @@ xfs_allocbt_read_verify( { xfs_failaddr_t fa; - if (!xfs_btree_sblock_verify_crc(bp)) + if (!xfs_btree_agblock_verify_crc(bp)) xfs_verifier_error(bp, -EFSBADCRC, __this_address); else { fa = xfs_allocbt_verify(bp); @@ -372,7 +395,7 @@ xfs_allocbt_write_verify( xfs_verifier_error(bp, -EFSCORRUPTED, fa); return; } - xfs_btree_sblock_calc_crc(bp); + xfs_btree_agblock_calc_crc(bp); } @@ -454,11 +477,19 @@ xfs_allocbt_keys_contiguous( be32_to_cpu(key2->alloc.ar_startblock)); } -static const struct xfs_btree_ops xfs_bnobt_ops = { +const struct xfs_btree_ops xfs_bnobt_ops = { + .name = "bno", + .type = XFS_BTREE_TYPE_AG, + .rec_len = sizeof(xfs_alloc_rec_t), .key_len = sizeof(xfs_alloc_key_t), + .ptr_len = XFS_BTREE_SHORT_PTR_LEN, - .dup_cursor = xfs_allocbt_dup_cursor, + .lru_refs = XFS_ALLOC_BTREE_REF, + .statoff = XFS_STATS_CALC_INDEX(xs_abtb_2), + .sick_mask = XFS_SICK_AG_BNOBT, + + .dup_cursor = xfs_bnobt_dup_cursor, .set_root = xfs_allocbt_set_root, .alloc_block = xfs_allocbt_alloc_block, .free_block = xfs_allocbt_free_block, @@ -477,11 +508,20 @@ static const struct xfs_btree_ops xfs_bnobt_ops = { .keys_contiguous = xfs_allocbt_keys_contiguous, }; -static const struct xfs_btree_ops xfs_cntbt_ops = { +const struct xfs_btree_ops xfs_cntbt_ops = { + .name = "cnt", + .type = XFS_BTREE_TYPE_AG, + .geom_flags = XFS_BTGEO_LASTREC_UPDATE, + .rec_len = sizeof(xfs_alloc_rec_t), .key_len = sizeof(xfs_alloc_key_t), + .ptr_len = XFS_BTREE_SHORT_PTR_LEN, + + .lru_refs = XFS_ALLOC_BTREE_REF, + .statoff = XFS_STATS_CALC_INDEX(xs_abtc_2), + .sick_mask = XFS_SICK_AG_CNTBT, - .dup_cursor = xfs_allocbt_dup_cursor, + .dup_cursor = xfs_cntbt_dup_cursor, .set_root = xfs_allocbt_set_root, .alloc_block = xfs_allocbt_alloc_block, .free_block = xfs_allocbt_free_block, @@ -500,76 +540,55 @@ static const struct xfs_btree_ops xfs_cntbt_ops = { .keys_contiguous = NULL, /* not needed right now */ }; -/* Allocate most of a new allocation btree cursor. */ -STATIC struct xfs_btree_cur * -xfs_allocbt_init_common( +/* + * Allocate a new bnobt cursor. + * + * For staging cursors tp and agbp are NULL. + */ +struct xfs_btree_cur * +xfs_bnobt_init_cursor( struct xfs_mount *mp, struct xfs_trans *tp, - struct xfs_perag *pag, - xfs_btnum_t btnum) + struct xfs_buf *agbp, + struct xfs_perag *pag) { struct xfs_btree_cur *cur; - ASSERT(btnum == XFS_BTNUM_BNO || btnum == XFS_BTNUM_CNT); - - cur = xfs_btree_alloc_cursor(mp, tp, btnum, mp->m_alloc_maxlevels, - xfs_allocbt_cur_cache); - cur->bc_ag.abt.active = false; - - if (btnum == XFS_BTNUM_CNT) { - cur->bc_ops = &xfs_cntbt_ops; - cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_abtc_2); - cur->bc_flags = XFS_BTREE_LASTREC_UPDATE; - } else { - cur->bc_ops = &xfs_bnobt_ops; - cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_abtb_2); - } - + cur = xfs_btree_alloc_cursor(mp, tp, &xfs_bnobt_ops, + mp->m_alloc_maxlevels, xfs_allocbt_cur_cache); cur->bc_ag.pag = xfs_perag_hold(pag); + cur->bc_ag.agbp = agbp; + if (agbp) { + struct xfs_agf *agf = agbp->b_addr; - if (xfs_has_crc(mp)) - cur->bc_flags |= XFS_BTREE_CRC_BLOCKS; - + cur->bc_nlevels = be32_to_cpu(agf->agf_bno_level); + } return cur; } /* - * Allocate a new allocation btree cursor. + * Allocate a new cntbt cursor. + * + * For staging cursors tp and agbp are NULL. */ -struct xfs_btree_cur * /* new alloc btree cursor */ -xfs_allocbt_init_cursor( - struct xfs_mount *mp, /* file system mount point */ - struct xfs_trans *tp, /* transaction pointer */ - struct xfs_buf *agbp, /* buffer for agf structure */ - struct xfs_perag *pag, - xfs_btnum_t btnum) /* btree identifier */ -{ - struct xfs_agf *agf = agbp->b_addr; - struct xfs_btree_cur *cur; - - cur = xfs_allocbt_init_common(mp, tp, pag, btnum); - if (btnum == XFS_BTNUM_CNT) - cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]); - else - cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]); - - cur->bc_ag.agbp = agbp; - - return cur; -} - -/* Create a free space btree cursor with a fake root for staging. */ struct xfs_btree_cur * -xfs_allocbt_stage_cursor( +xfs_cntbt_init_cursor( struct xfs_mount *mp, - struct xbtree_afakeroot *afake, - struct xfs_perag *pag, - xfs_btnum_t btnum) + struct xfs_trans *tp, + struct xfs_buf *agbp, + struct xfs_perag *pag) { struct xfs_btree_cur *cur; - cur = xfs_allocbt_init_common(mp, NULL, pag, btnum); - xfs_btree_stage_afakeroot(cur, afake); + cur = xfs_btree_alloc_cursor(mp, tp, &xfs_cntbt_ops, + mp->m_alloc_maxlevels, xfs_allocbt_cur_cache); + cur->bc_ag.pag = xfs_perag_hold(pag); + cur->bc_ag.agbp = agbp; + if (agbp) { + struct xfs_agf *agf = agbp->b_addr; + + cur->bc_nlevels = be32_to_cpu(agf->agf_cnt_level); + } return cur; } @@ -588,16 +607,16 @@ xfs_allocbt_commit_staged_btree( ASSERT(cur->bc_flags & XFS_BTREE_STAGING); - agf->agf_roots[cur->bc_btnum] = cpu_to_be32(afake->af_root); - agf->agf_levels[cur->bc_btnum] = cpu_to_be32(afake->af_levels); - xfs_alloc_log_agf(tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS); - - if (cur->bc_btnum == XFS_BTNUM_BNO) { - xfs_btree_commit_afakeroot(cur, tp, agbp, &xfs_bnobt_ops); + if (xfs_btree_is_bno(cur->bc_ops)) { + agf->agf_bno_root = cpu_to_be32(afake->af_root); + agf->agf_bno_level = cpu_to_be32(afake->af_levels); } else { - cur->bc_flags |= XFS_BTREE_LASTREC_UPDATE; - xfs_btree_commit_afakeroot(cur, tp, agbp, &xfs_cntbt_ops); + agf->agf_cnt_root = cpu_to_be32(afake->af_root); + agf->agf_cnt_level = cpu_to_be32(afake->af_levels); } + xfs_alloc_log_agf(tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS); + + xfs_btree_commit_afakeroot(cur, tp, agbp); } /* Calculate number of records in an alloc btree block. */ diff --git a/fs/xfs/libxfs/xfs_alloc_btree.h b/fs/xfs/libxfs/xfs_alloc_btree.h index 45df893ef6..155b47f231 100644 --- a/fs/xfs/libxfs/xfs_alloc_btree.h +++ b/fs/xfs/libxfs/xfs_alloc_btree.h @@ -47,12 +47,12 @@ struct xbtree_afakeroot; (maxrecs) * sizeof(xfs_alloc_key_t) + \ ((index) - 1) * sizeof(xfs_alloc_ptr_t))) -extern struct xfs_btree_cur *xfs_allocbt_init_cursor(struct xfs_mount *mp, +struct xfs_btree_cur *xfs_bnobt_init_cursor(struct xfs_mount *mp, struct xfs_trans *tp, struct xfs_buf *bp, - struct xfs_perag *pag, xfs_btnum_t btnum); -struct xfs_btree_cur *xfs_allocbt_stage_cursor(struct xfs_mount *mp, - struct xbtree_afakeroot *afake, struct xfs_perag *pag, - xfs_btnum_t btnum); + struct xfs_perag *pag); +struct xfs_btree_cur *xfs_cntbt_init_cursor(struct xfs_mount *mp, + struct xfs_trans *tp, struct xfs_buf *bp, + struct xfs_perag *pag); extern int xfs_allocbt_maxrecs(struct xfs_mount *, int, int); extern xfs_extlen_t xfs_allocbt_calc_size(struct xfs_mount *mp, unsigned long long len); diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index e965a48e7d..673a4b6d2e 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -224,7 +224,7 @@ int xfs_attr_get_ilocked( struct xfs_da_args *args) { - ASSERT(xfs_isilocked(args->dp, XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)); + xfs_assert_ilocked(args->dp, XFS_ILOCK_SHARED | XFS_ILOCK_EXCL); if (!xfs_inode_hasattr(args->dp)) return -ENOATTR; @@ -891,7 +891,8 @@ xfs_attr_defer_add( struct xfs_attr_intent *new; - new = kmem_cache_zalloc(xfs_attr_intent_cache, GFP_NOFS | __GFP_NOFAIL); + new = kmem_cache_zalloc(xfs_attr_intent_cache, + GFP_KERNEL | __GFP_NOFAIL); new->xattri_op_flags = op_flags; new->xattri_da_args = args; diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c index 6374bf1072..ac904cc1a9 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.c +++ b/fs/xfs/libxfs/xfs_attr_leaf.c @@ -29,6 +29,7 @@ #include "xfs_log.h" #include "xfs_ag.h" #include "xfs_errortag.h" +#include "xfs_health.h" /* @@ -879,8 +880,7 @@ xfs_attr_shortform_to_leaf( trace_xfs_attr_sf_to_leaf(args); - tmpbuffer = kmem_alloc(size, 0); - ASSERT(tmpbuffer != NULL); + tmpbuffer = kmalloc(size, GFP_KERNEL | __GFP_NOFAIL); memcpy(tmpbuffer, ifp->if_data, size); sf = (struct xfs_attr_sf_hdr *)tmpbuffer; @@ -924,7 +924,7 @@ xfs_attr_shortform_to_leaf( } error = 0; out: - kmem_free(tmpbuffer); + kfree(tmpbuffer); return error; } @@ -1059,7 +1059,7 @@ xfs_attr3_leaf_to_shortform( trace_xfs_attr_leaf_to_sf(args); - tmpbuffer = kmem_alloc(args->geo->blksize, 0); + tmpbuffer = kmalloc(args->geo->blksize, GFP_KERNEL | __GFP_NOFAIL); if (!tmpbuffer) return -ENOMEM; @@ -1125,7 +1125,7 @@ xfs_attr3_leaf_to_shortform( error = 0; out: - kmem_free(tmpbuffer); + kfree(tmpbuffer); return error; } @@ -1533,7 +1533,7 @@ xfs_attr3_leaf_compact( trace_xfs_attr_leaf_compact(args); - tmpbuffer = kmem_alloc(args->geo->blksize, 0); + tmpbuffer = kmalloc(args->geo->blksize, GFP_KERNEL | __GFP_NOFAIL); memcpy(tmpbuffer, bp->b_addr, args->geo->blksize); memset(bp->b_addr, 0, args->geo->blksize); leaf_src = (xfs_attr_leafblock_t *)tmpbuffer; @@ -1571,7 +1571,7 @@ xfs_attr3_leaf_compact( */ xfs_trans_log_buf(trans, bp, 0, args->geo->blksize - 1); - kmem_free(tmpbuffer); + kfree(tmpbuffer); } /* @@ -2250,7 +2250,8 @@ xfs_attr3_leaf_unbalance( struct xfs_attr_leafblock *tmp_leaf; struct xfs_attr3_icleaf_hdr tmphdr; - tmp_leaf = kmem_zalloc(state->args->geo->blksize, 0); + tmp_leaf = kzalloc(state->args->geo->blksize, + GFP_KERNEL | __GFP_NOFAIL); /* * Copy the header into the temp leaf so that all the stuff @@ -2290,7 +2291,7 @@ xfs_attr3_leaf_unbalance( } memcpy(save_leaf, tmp_leaf, state->args->geo->blksize); savehdr = tmphdr; /* struct copy */ - kmem_free(tmp_leaf); + kfree(tmp_leaf); } xfs_attr3_leaf_hdr_to_disk(state->args->geo, save_leaf, &savehdr); @@ -2343,6 +2344,7 @@ xfs_attr3_leaf_lookup_int( entries = xfs_attr3_leaf_entryp(leaf); if (ichdr.count >= args->geo->blksize / 8) { xfs_buf_mark_corrupt(bp); + xfs_da_mark_sick(args); return -EFSCORRUPTED; } @@ -2362,10 +2364,12 @@ xfs_attr3_leaf_lookup_int( } if (!(probe >= 0 && (!ichdr.count || probe < ichdr.count))) { xfs_buf_mark_corrupt(bp); + xfs_da_mark_sick(args); return -EFSCORRUPTED; } if (!(span <= 4 || be32_to_cpu(entry->hashval) == hashval)) { xfs_buf_mark_corrupt(bp); + xfs_da_mark_sick(args); return -EFSCORRUPTED; } diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c index d440393b40..ff04128287 100644 --- a/fs/xfs/libxfs/xfs_attr_remote.c +++ b/fs/xfs/libxfs/xfs_attr_remote.c @@ -22,6 +22,7 @@ #include "xfs_attr_remote.h" #include "xfs_trace.h" #include "xfs_error.h" +#include "xfs_health.h" #define ATTR_RMTVALUE_MAPSIZE 1 /* # of map entries at once */ @@ -276,17 +277,18 @@ xfs_attr3_rmt_hdr_set( */ STATIC int xfs_attr_rmtval_copyout( - struct xfs_mount *mp, - struct xfs_buf *bp, - xfs_ino_t ino, - int *offset, - int *valuelen, - uint8_t **dst) + struct xfs_mount *mp, + struct xfs_buf *bp, + struct xfs_inode *dp, + int *offset, + int *valuelen, + uint8_t **dst) { - char *src = bp->b_addr; - xfs_daddr_t bno = xfs_buf_daddr(bp); - int len = BBTOB(bp->b_length); - int blksize = mp->m_attr_geo->blksize; + char *src = bp->b_addr; + xfs_ino_t ino = dp->i_ino; + xfs_daddr_t bno = xfs_buf_daddr(bp); + int len = BBTOB(bp->b_length); + int blksize = mp->m_attr_geo->blksize; ASSERT(len >= blksize); @@ -302,6 +304,7 @@ xfs_attr_rmtval_copyout( xfs_alert(mp, "remote attribute header mismatch bno/off/len/owner (0x%llx/0x%x/Ox%x/0x%llx)", bno, *offset, byte_cnt, ino); + xfs_dirattr_mark_sick(dp, XFS_ATTR_FORK); return -EFSCORRUPTED; } hdr_size = sizeof(struct xfs_attr3_rmt_hdr); @@ -418,10 +421,12 @@ xfs_attr_rmtval_get( dblkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount); error = xfs_buf_read(mp->m_ddev_targp, dblkno, dblkcnt, 0, &bp, &xfs_attr3_rmt_buf_ops); + if (xfs_metadata_is_sick(error)) + xfs_dirattr_mark_sick(args->dp, XFS_ATTR_FORK); if (error) return error; - error = xfs_attr_rmtval_copyout(mp, bp, args->dp->i_ino, + error = xfs_attr_rmtval_copyout(mp, bp, args->dp, &offset, &valuelen, &dst); xfs_buf_relse(bp); @@ -545,11 +550,13 @@ xfs_attr_rmtval_stale( struct xfs_buf *bp; int error; - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); + xfs_assert_ilocked(ip, XFS_ILOCK_EXCL); if (XFS_IS_CORRUPT(mp, map->br_startblock == DELAYSTARTBLOCK) || - XFS_IS_CORRUPT(mp, map->br_startblock == HOLESTARTBLOCK)) + XFS_IS_CORRUPT(mp, map->br_startblock == HOLESTARTBLOCK)) { + xfs_bmap_mark_sick(ip, XFS_ATTR_FORK); return -EFSCORRUPTED; + } error = xfs_buf_incore(mp->m_ddev_targp, XFS_FSB_TO_DADDR(mp, map->br_startblock), @@ -659,8 +666,10 @@ xfs_attr_rmtval_invalidate( blkcnt, &map, &nmap, XFS_BMAPI_ATTRFORK); if (error) return error; - if (XFS_IS_CORRUPT(args->dp->i_mount, nmap != 1)) + if (XFS_IS_CORRUPT(args->dp->i_mount, nmap != 1)) { + xfs_bmap_mark_sick(args->dp, XFS_ATTR_FORK); return -EFSCORRUPTED; + } error = xfs_attr_rmtval_stale(args->dp, &map, XBF_TRYLOCK); if (error) return error; diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index f362345467..656c95a22f 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -36,6 +36,9 @@ #include "xfs_refcount.h" #include "xfs_icache.h" #include "xfs_iomap.h" +#include "xfs_health.h" +#include "xfs_bmap_item.h" +#include "xfs_symlink_remote.h" struct kmem_cache *xfs_bmap_intent_cache; @@ -225,6 +228,28 @@ xfs_bmap_forkoff_reset( } } +static int +xfs_bmap_read_buf( + struct xfs_mount *mp, /* file system mount point */ + struct xfs_trans *tp, /* transaction pointer */ + xfs_fsblock_t fsbno, /* file system block number */ + struct xfs_buf **bpp) /* buffer for fsbno */ +{ + struct xfs_buf *bp; /* return value */ + int error; + + if (!xfs_verify_fsbno(mp, fsbno)) + return -EFSCORRUPTED; + error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, + XFS_FSB_TO_DADDR(mp, fsbno), mp->m_bsize, 0, &bp, + &xfs_bmbt_buf_ops); + if (!error) { + xfs_buf_set_ref(bp, XFS_BMAP_BTREE_REF); + *bpp = bp; + } + return error; +} + #ifdef DEBUG STATIC struct xfs_buf * xfs_bmap_get_bp( @@ -364,9 +389,9 @@ xfs_bmap_check_leaf_extents( bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno)); if (!bp) { bp_release = 1; - error = xfs_btree_read_bufl(mp, NULL, bno, &bp, - XFS_BMAP_BTREE_REF, - &xfs_bmbt_buf_ops); + error = xfs_bmap_read_buf(mp, NULL, bno, &bp); + if (xfs_metadata_is_sick(error)) + xfs_btree_mark_sick(cur); if (error) goto error_norelse; } @@ -383,6 +408,7 @@ xfs_bmap_check_leaf_extents( pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]); bno = be64_to_cpu(*pp); if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbno(mp, bno))) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto error0; } @@ -450,9 +476,9 @@ xfs_bmap_check_leaf_extents( bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno)); if (!bp) { bp_release = 1; - error = xfs_btree_read_bufl(mp, NULL, bno, &bp, - XFS_BMAP_BTREE_REF, - &xfs_bmbt_buf_ops); + error = xfs_bmap_read_buf(mp, NULL, bno, &bp); + if (xfs_metadata_is_sick(error)) + xfs_btree_mark_sick(cur); if (error) goto error_norelse; } @@ -562,11 +588,14 @@ xfs_bmap_btree_to_extents( pp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, ifp->if_broot_bytes); cbno = be64_to_cpu(*pp); #ifdef DEBUG - if (XFS_IS_CORRUPT(cur->bc_mp, !xfs_btree_check_lptr(cur, cbno, 1))) + if (XFS_IS_CORRUPT(cur->bc_mp, !xfs_verify_fsbno(mp, cbno))) { + xfs_btree_mark_sick(cur); return -EFSCORRUPTED; + } #endif - error = xfs_btree_read_bufl(mp, tp, cbno, &cbp, XFS_BMAP_BTREE_REF, - &xfs_bmbt_buf_ops); + error = xfs_bmap_read_buf(mp, tp, cbno, &cbp); + if (xfs_metadata_is_sick(error)) + xfs_btree_mark_sick(cur); if (error) return error; cblock = XFS_BUF_TO_BLOCK(cbp); @@ -634,14 +663,13 @@ xfs_bmap_extents_to_btree( * Fill in the root. */ block = ifp->if_broot; - xfs_btree_init_block_int(mp, block, XFS_BUF_DADDR_NULL, - XFS_BTNUM_BMAP, 1, 1, ip->i_ino, - XFS_BTREE_LONG_PTRS); + xfs_bmbt_init_block(ip, block, NULL, 1, 1); /* * Need a cursor. Can't allocate until bb_level is filled in. */ cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); - cur->bc_ino.flags = wasdel ? XFS_BTCUR_BMBT_WASDEL : 0; + if (wasdel) + cur->bc_flags |= XFS_BTREE_BMBT_WASDEL; /* * Convert to a btree with two levels, one record in root. */ @@ -667,7 +695,7 @@ xfs_bmap_extents_to_btree( goto out_root_realloc; } - cur->bc_ino.allocated++; + cur->bc_bmap.allocated++; ip->i_nblocks++; xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L); error = xfs_trans_get_buf(tp, mp->m_ddev_targp, @@ -679,11 +707,8 @@ xfs_bmap_extents_to_btree( /* * Fill in the child block. */ - abp->b_ops = &xfs_bmbt_buf_ops; ablock = XFS_BUF_TO_BLOCK(abp); - xfs_btree_init_block_int(mp, ablock, xfs_buf_daddr(abp), - XFS_BTNUM_BMAP, 0, 0, ip->i_ino, - XFS_BTREE_LONG_PTRS); + xfs_bmbt_init_block(ip, ablock, abp, 0, 0); for_each_xfs_iext(ifp, &icur, &rec) { if (isnullstartblock(rec.br_startblock)) @@ -878,6 +903,7 @@ xfs_bmap_add_attrfork_btree( goto error0; /* must be at least one entry */ if (XFS_IS_CORRUPT(mp, stat != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto error0; } @@ -887,7 +913,7 @@ xfs_bmap_add_attrfork_btree( xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); return -ENOSPC; } - cur->bc_ino.allocated = 0; + cur->bc_bmap.allocated = 0; xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); } return 0; @@ -915,7 +941,7 @@ xfs_bmap_add_attrfork_extents( error = xfs_bmap_extents_to_btree(tp, ip, &cur, 0, flags, XFS_DATA_FORK); if (cur) { - cur->bc_ino.allocated = 0; + cur->bc_bmap.allocated = 0; xfs_btree_del_cursor(cur, error); } return error; @@ -960,6 +986,7 @@ xfs_bmap_add_attrfork_local( /* should only be called for types that support local format data */ ASSERT(0); + xfs_bmap_mark_sick(ip, XFS_ATTR_FORK); return -EFSCORRUPTED; } @@ -1143,6 +1170,7 @@ xfs_iread_bmbt_block( (unsigned long long)ip->i_ino); xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__, block, sizeof(*block), __this_address); + xfs_bmap_mark_sick(ip, whichfork); return -EFSCORRUPTED; } @@ -1158,6 +1186,7 @@ xfs_iread_bmbt_block( xfs_inode_verifier_error(ip, -EFSCORRUPTED, "xfs_iread_extents(2)", frp, sizeof(*frp), fa); + xfs_bmap_mark_sick(ip, whichfork); return xfs_bmap_complain_bad_rec(ip, whichfork, fa, &new); } @@ -1189,7 +1218,7 @@ xfs_iread_extents( if (!xfs_need_iread_extents(ifp)) return 0; - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); + xfs_assert_ilocked(ip, XFS_ILOCK_EXCL); ir.loaded = 0; xfs_iext_first(ifp, &ir.icur); @@ -1201,6 +1230,7 @@ xfs_iread_extents( goto out; if (XFS_IS_CORRUPT(mp, ir.loaded != ifp->if_nextents)) { + xfs_bmap_mark_sick(ip, whichfork); error = -EFSCORRUPTED; goto out; } @@ -1213,6 +1243,8 @@ xfs_iread_extents( smp_store_release(&ifp->if_needextents, 0); return 0; out: + if (xfs_metadata_is_sick(error)) + xfs_bmap_mark_sick(ip, whichfork); xfs_iext_destroy(ifp); return error; } @@ -1292,6 +1324,7 @@ xfs_bmap_last_before( break; default: ASSERT(0); + xfs_bmap_mark_sick(ip, whichfork); return -EFSCORRUPTED; } @@ -1388,8 +1421,10 @@ xfs_bmap_last_offset( if (ifp->if_format == XFS_DINODE_FMT_LOCAL) return 0; - if (XFS_IS_CORRUPT(ip->i_mount, !xfs_ifork_has_extents(ifp))) + if (XFS_IS_CORRUPT(ip->i_mount, !xfs_ifork_has_extents(ifp))) { + xfs_bmap_mark_sick(ip, whichfork); return -EFSCORRUPTED; + } error = xfs_bmap_last_extent(NULL, ip, whichfork, &rec, &is_empty); if (error || is_empty) @@ -1429,8 +1464,7 @@ xfs_bmap_add_extent_delay_real( ASSERT(whichfork != XFS_ATTR_FORK); ASSERT(!isnullstartblock(new->br_startblock)); - ASSERT(!bma->cur || - (bma->cur->bc_ino.flags & XFS_BTCUR_BMBT_WASDEL)); + ASSERT(!bma->cur || (bma->cur->bc_flags & XFS_BTREE_BMBT_WASDEL)); XFS_STATS_INC(mp, xs_add_exlist); @@ -1528,6 +1562,7 @@ xfs_bmap_add_extent_delay_real( if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(bma->cur); error = -EFSCORRUPTED; goto done; } @@ -1535,6 +1570,7 @@ xfs_bmap_add_extent_delay_real( if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(bma->cur); error = -EFSCORRUPTED; goto done; } @@ -1542,6 +1578,7 @@ xfs_bmap_add_extent_delay_real( if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(bma->cur); error = -EFSCORRUPTED; goto done; } @@ -1571,6 +1608,7 @@ xfs_bmap_add_extent_delay_real( if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(bma->cur); error = -EFSCORRUPTED; goto done; } @@ -1604,6 +1642,7 @@ xfs_bmap_add_extent_delay_real( if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(bma->cur); error = -EFSCORRUPTED; goto done; } @@ -1632,6 +1671,7 @@ xfs_bmap_add_extent_delay_real( if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 0)) { + xfs_btree_mark_sick(bma->cur); error = -EFSCORRUPTED; goto done; } @@ -1639,6 +1679,7 @@ xfs_bmap_add_extent_delay_real( if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(bma->cur); error = -EFSCORRUPTED; goto done; } @@ -1673,6 +1714,7 @@ xfs_bmap_add_extent_delay_real( if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(bma->cur); error = -EFSCORRUPTED; goto done; } @@ -1698,6 +1740,7 @@ xfs_bmap_add_extent_delay_real( if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 0)) { + xfs_btree_mark_sick(bma->cur); error = -EFSCORRUPTED; goto done; } @@ -1705,6 +1748,7 @@ xfs_bmap_add_extent_delay_real( if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(bma->cur); error = -EFSCORRUPTED; goto done; } @@ -1721,7 +1765,7 @@ xfs_bmap_add_extent_delay_real( temp = PREV.br_blockcount - new->br_blockcount; da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp), startblockval(PREV.br_startblock) - - (bma->cur ? bma->cur->bc_ino.allocated : 0)); + (bma->cur ? bma->cur->bc_bmap.allocated : 0)); PREV.br_startoff = new_endoff; PREV.br_blockcount = temp; @@ -1749,6 +1793,7 @@ xfs_bmap_add_extent_delay_real( if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(bma->cur); error = -EFSCORRUPTED; goto done; } @@ -1785,6 +1830,7 @@ xfs_bmap_add_extent_delay_real( if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 0)) { + xfs_btree_mark_sick(bma->cur); error = -EFSCORRUPTED; goto done; } @@ -1792,6 +1838,7 @@ xfs_bmap_add_extent_delay_real( if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(bma->cur); error = -EFSCORRUPTED; goto done; } @@ -1808,7 +1855,7 @@ xfs_bmap_add_extent_delay_real( temp = PREV.br_blockcount - new->br_blockcount; da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp), startblockval(PREV.br_startblock) - - (bma->cur ? bma->cur->bc_ino.allocated : 0)); + (bma->cur ? bma->cur->bc_bmap.allocated : 0)); PREV.br_startblock = nullstartblock(da_new); PREV.br_blockcount = temp; @@ -1871,6 +1918,7 @@ xfs_bmap_add_extent_delay_real( if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 0)) { + xfs_btree_mark_sick(bma->cur); error = -EFSCORRUPTED; goto done; } @@ -1878,6 +1926,7 @@ xfs_bmap_add_extent_delay_real( if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(bma->cur); error = -EFSCORRUPTED; goto done; } @@ -1929,8 +1978,8 @@ xfs_bmap_add_extent_delay_real( xfs_mod_delalloc(mp, (int64_t)da_new - da_old); if (bma->cur) { - da_new += bma->cur->bc_ino.allocated; - bma->cur->bc_ino.allocated = 0; + da_new += bma->cur->bc_bmap.allocated; + bma->cur->bc_bmap.allocated = 0; } /* adjust for changes in reserved delayed indirect blocks */ @@ -2074,30 +2123,35 @@ xfs_bmap_add_extent_unwritten_real( if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } if ((error = xfs_btree_delete(cur, &i))) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } if ((error = xfs_btree_decrement(cur, 0, &i))) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } if ((error = xfs_btree_delete(cur, &i))) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } if ((error = xfs_btree_decrement(cur, 0, &i))) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } @@ -2126,18 +2180,21 @@ xfs_bmap_add_extent_unwritten_real( if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } if ((error = xfs_btree_delete(cur, &i))) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } if ((error = xfs_btree_decrement(cur, 0, &i))) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } @@ -2169,18 +2226,21 @@ xfs_bmap_add_extent_unwritten_real( if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } if ((error = xfs_btree_delete(cur, &i))) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } if ((error = xfs_btree_decrement(cur, 0, &i))) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } @@ -2207,6 +2267,7 @@ xfs_bmap_add_extent_unwritten_real( if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } @@ -2240,6 +2301,7 @@ xfs_bmap_add_extent_unwritten_real( if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } @@ -2277,6 +2339,7 @@ xfs_bmap_add_extent_unwritten_real( if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } @@ -2287,6 +2350,7 @@ xfs_bmap_add_extent_unwritten_real( if ((error = xfs_btree_insert(cur, &i))) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } @@ -2317,6 +2381,7 @@ xfs_bmap_add_extent_unwritten_real( if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } @@ -2353,6 +2418,7 @@ xfs_bmap_add_extent_unwritten_real( if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } @@ -2363,12 +2429,14 @@ xfs_bmap_add_extent_unwritten_real( if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 0)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } if ((error = xfs_btree_insert(cur, &i))) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } @@ -2405,6 +2473,7 @@ xfs_bmap_add_extent_unwritten_real( if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } @@ -2417,6 +2486,7 @@ xfs_bmap_add_extent_unwritten_real( if ((error = xfs_btree_insert(cur, &i))) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } @@ -2429,6 +2499,7 @@ xfs_bmap_add_extent_unwritten_real( if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 0)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } @@ -2436,6 +2507,7 @@ xfs_bmap_add_extent_unwritten_real( if ((error = xfs_btree_insert(cur, &i))) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } @@ -2472,7 +2544,7 @@ xfs_bmap_add_extent_unwritten_real( /* clear out the allocated field, done with it now in any case. */ if (cur) { - cur->bc_ino.allocated = 0; + cur->bc_bmap.allocated = 0; *curp = cur; } @@ -2651,7 +2723,7 @@ xfs_bmap_add_extent_hole_real( struct xfs_bmbt_irec old; ASSERT(!isnullstartblock(new->br_startblock)); - ASSERT(!cur || !(cur->bc_ino.flags & XFS_BTCUR_BMBT_WASDEL)); + ASSERT(!cur || !(cur->bc_flags & XFS_BTREE_BMBT_WASDEL)); XFS_STATS_INC(mp, xs_add_exlist); @@ -2721,6 +2793,7 @@ xfs_bmap_add_extent_hole_real( if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } @@ -2728,6 +2801,7 @@ xfs_bmap_add_extent_hole_real( if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } @@ -2735,6 +2809,7 @@ xfs_bmap_add_extent_hole_real( if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } @@ -2764,6 +2839,7 @@ xfs_bmap_add_extent_hole_real( if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } @@ -2794,6 +2870,7 @@ xfs_bmap_add_extent_hole_real( if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } @@ -2820,6 +2897,7 @@ xfs_bmap_add_extent_hole_real( if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 0)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } @@ -2827,6 +2905,7 @@ xfs_bmap_add_extent_hole_real( if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } @@ -2853,7 +2932,7 @@ xfs_bmap_add_extent_hole_real( /* clear out the allocated field, done with it now in any case. */ if (cur) - cur->bc_ino.allocated = 0; + cur->bc_bmap.allocated = 0; xfs_bmap_check_leaf_extents(cur, ip, whichfork); done: @@ -3898,14 +3977,18 @@ xfs_bmapi_read( ASSERT(*nmap >= 1); ASSERT(!(flags & ~(XFS_BMAPI_ATTRFORK | XFS_BMAPI_ENTIRE))); - ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)); + xfs_assert_ilocked(ip, XFS_ILOCK_SHARED | XFS_ILOCK_EXCL); - if (WARN_ON_ONCE(!ifp)) + if (WARN_ON_ONCE(!ifp)) { + xfs_bmap_mark_sick(ip, whichfork); return -EFSCORRUPTED; + } if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) || - XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) + XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) { + xfs_bmap_mark_sick(ip, whichfork); return -EFSCORRUPTED; + } if (xfs_is_shutdown(mp)) return -EIO; @@ -4160,9 +4243,8 @@ xfs_bmapi_allocate( */ bma->nallocs++; - if (bma->cur) - bma->cur->bc_ino.flags = - bma->wasdel ? XFS_BTCUR_BMBT_WASDEL : 0; + if (bma->cur && bma->wasdel) + bma->cur->bc_flags |= XFS_BTREE_BMBT_WASDEL; bma->got.br_startoff = bma->offset; bma->got.br_startblock = bma->blkno; @@ -4369,7 +4451,7 @@ xfs_bmapi_write( ASSERT(tp != NULL); ASSERT(len > 0); ASSERT(ifp->if_format != XFS_DINODE_FMT_LOCAL); - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); + xfs_assert_ilocked(ip, XFS_ILOCK_EXCL); ASSERT(!(flags & XFS_BMAPI_REMAP)); /* zeroing is for currently only for data extents, not metadata */ @@ -4386,6 +4468,7 @@ xfs_bmapi_write( if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) || XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) { + xfs_bmap_mark_sick(ip, whichfork); return -EFSCORRUPTED; } @@ -4613,9 +4696,11 @@ xfs_bmapi_convert_delalloc( error = -ENOSPC; if (WARN_ON_ONCE(bma.blkno == NULLFSBLOCK)) goto out_finish; - error = -EFSCORRUPTED; - if (WARN_ON_ONCE(!xfs_valid_startblock(ip, bma.got.br_startblock))) + if (WARN_ON_ONCE(!xfs_valid_startblock(ip, bma.got.br_startblock))) { + xfs_bmap_mark_sick(ip, whichfork); + error = -EFSCORRUPTED; goto out_finish; + } XFS_STATS_ADD(mp, xs_xstrat_bytes, XFS_FSB_TO_B(mp, bma.length)); XFS_STATS_INC(mp, xs_xstrat_quick); @@ -4666,7 +4751,7 @@ xfs_bmapi_remap( ifp = xfs_ifork_ptr(ip, whichfork); ASSERT(len > 0); ASSERT(len <= (xfs_filblks_t)XFS_MAX_BMBT_EXTLEN); - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); + xfs_assert_ilocked(ip, XFS_ILOCK_EXCL); ASSERT(!(flags & ~(XFS_BMAPI_ATTRFORK | XFS_BMAPI_PREALLOC | XFS_BMAPI_NORMAP))); ASSERT((flags & (XFS_BMAPI_ATTRFORK | XFS_BMAPI_PREALLOC)) != @@ -4674,6 +4759,7 @@ xfs_bmapi_remap( if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) || XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) { + xfs_bmap_mark_sick(ip, whichfork); return -EFSCORRUPTED; } @@ -4693,10 +4779,8 @@ xfs_bmapi_remap( ip->i_nblocks += len; xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - if (ifp->if_format == XFS_DINODE_FMT_BTREE) { + if (ifp->if_format == XFS_DINODE_FMT_BTREE) cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); - cur->bc_ino.flags = 0; - } got.br_startoff = bno; got.br_startblock = startblock; @@ -4831,7 +4915,7 @@ xfs_bmap_del_extent_delay( XFS_STATS_INC(mp, xs_del_exlist); - isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip); + isrt = xfs_ifork_is_realtime(ip, whichfork); del_endoff = del->br_startoff + del->br_blockcount; got_endoff = got->br_startoff + got->br_blockcount; da_old = startblockval(got->br_startblock); @@ -5067,7 +5151,7 @@ xfs_bmap_del_extent_real( return -ENOSPC; *logflagsp = XFS_ILOG_CORE; - if (whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip)) { + if (xfs_ifork_is_realtime(ip, whichfork)) { if (!(bflags & XFS_BMAPI_REMAP)) { error = xfs_rtfree_blocks(tp, del->br_startblock, del->br_blockcount); @@ -5088,8 +5172,10 @@ xfs_bmap_del_extent_real( error = xfs_bmbt_lookup_eq(cur, &got, &i); if (error) return error; - if (XFS_IS_CORRUPT(mp, i != 1)) + if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); return -EFSCORRUPTED; + } } if (got.br_startoff == del->br_startoff) @@ -5113,8 +5199,10 @@ xfs_bmap_del_extent_real( } if ((error = xfs_btree_delete(cur, &i))) return error; - if (XFS_IS_CORRUPT(mp, i != 1)) + if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); return -EFSCORRUPTED; + } break; case BMAP_LEFT_FILLING: /* @@ -5186,8 +5274,10 @@ xfs_bmap_del_extent_real( error = xfs_bmbt_lookup_eq(cur, &got, &i); if (error) return error; - if (XFS_IS_CORRUPT(mp, i != 1)) + if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); return -EFSCORRUPTED; + } /* * Update the btree record back * to the original value. @@ -5203,8 +5293,10 @@ xfs_bmap_del_extent_real( *logflagsp = 0; return -ENOSPC; } - if (XFS_IS_CORRUPT(mp, i != 1)) + if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); return -EFSCORRUPTED; + } } else *logflagsp |= xfs_ilog_fext(whichfork); @@ -5286,12 +5378,14 @@ __xfs_bunmapi( whichfork = xfs_bmapi_whichfork(flags); ASSERT(whichfork != XFS_COW_FORK); ifp = xfs_ifork_ptr(ip, whichfork); - if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp))) + if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp))) { + xfs_bmap_mark_sick(ip, whichfork); return -EFSCORRUPTED; + } if (xfs_is_shutdown(mp)) return -EIO; - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); + xfs_assert_ilocked(ip, XFS_ILOCK_EXCL); ASSERT(len > 0); ASSERT(nexts >= 0); @@ -5304,7 +5398,7 @@ __xfs_bunmapi( return 0; } XFS_STATS_INC(mp, xs_blk_unmap); - isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip); + isrt = xfs_ifork_is_realtime(ip, whichfork); end = start + len; if (!xfs_iext_lookup_extent_before(ip, ifp, &end, &icur, &got)) { @@ -5317,7 +5411,6 @@ __xfs_bunmapi( if (ifp->if_format == XFS_DINODE_FMT_BTREE) { ASSERT(ifp->if_format == XFS_DINODE_FMT_BTREE); cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); - cur->bc_ino.flags = 0; } else cur = NULL; @@ -5367,7 +5460,7 @@ __xfs_bunmapi( if (del.br_startoff + del.br_blockcount > end + 1) del.br_blockcount = end + 1 - del.br_startoff; - if (!isrt) + if (!isrt || (flags & XFS_BMAPI_REMAP)) goto delete; mod = xfs_rtb_to_rtxoff(mp, @@ -5385,7 +5478,7 @@ __xfs_bunmapi( * This piece is unwritten, or we're not * using unwritten extents. Skip over it. */ - ASSERT(end >= mod); + ASSERT((flags & XFS_BMAPI_REMAP) || end >= mod); end -= mod > del.br_blockcount ? del.br_blockcount : mod; if (end < got.br_startoff && @@ -5555,7 +5648,7 @@ error0: xfs_trans_log_inode(tp, ip, logflags); if (cur) { if (!error) - cur->bc_ino.allocated = 0; + cur->bc_bmap.allocated = 0; xfs_btree_del_cursor(cur, error); } return error; @@ -5635,8 +5728,7 @@ xfs_bmse_merge( blockcount = left->br_blockcount + got->br_blockcount; - ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); + xfs_assert_ilocked(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); ASSERT(xfs_bmse_can_merge(left, got, shift)); new = *left; @@ -5657,21 +5749,27 @@ xfs_bmse_merge( error = xfs_bmbt_lookup_eq(cur, got, &i); if (error) return error; - if (XFS_IS_CORRUPT(mp, i != 1)) + if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); return -EFSCORRUPTED; + } error = xfs_btree_delete(cur, &i); if (error) return error; - if (XFS_IS_CORRUPT(mp, i != 1)) + if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); return -EFSCORRUPTED; + } /* lookup and update size of the previous extent */ error = xfs_bmbt_lookup_eq(cur, left, &i); if (error) return error; - if (XFS_IS_CORRUPT(mp, i != 1)) + if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); return -EFSCORRUPTED; + } error = xfs_bmbt_update(cur, &new); if (error) @@ -5719,8 +5817,10 @@ xfs_bmap_shift_update_extent( error = xfs_bmbt_lookup_eq(cur, &prev, &i); if (error) return error; - if (XFS_IS_CORRUPT(mp, i != 1)) + if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); return -EFSCORRUPTED; + } error = xfs_bmbt_update(cur, got); if (error) @@ -5758,28 +5858,28 @@ xfs_bmap_collapse_extents( if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) || XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) { + xfs_bmap_mark_sick(ip, whichfork); return -EFSCORRUPTED; } if (xfs_is_shutdown(mp)) return -EIO; - ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL)); + xfs_assert_ilocked(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); error = xfs_iread_extents(tp, ip, whichfork); if (error) return error; - if (ifp->if_format == XFS_DINODE_FMT_BTREE) { + if (ifp->if_format == XFS_DINODE_FMT_BTREE) cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); - cur->bc_ino.flags = 0; - } if (!xfs_iext_lookup_extent(ip, ifp, *next_fsb, &icur, &got)) { *done = true; goto del_cursor; } if (XFS_IS_CORRUPT(mp, isnullstartblock(got.br_startblock))) { + xfs_bmap_mark_sick(ip, whichfork); error = -EFSCORRUPTED; goto del_cursor; } @@ -5837,7 +5937,7 @@ xfs_bmap_can_insert_extents( int is_empty; int error = 0; - ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); + xfs_assert_ilocked(ip, XFS_IOLOCK_EXCL); if (xfs_is_shutdown(ip->i_mount)) return -EIO; @@ -5873,22 +5973,21 @@ xfs_bmap_insert_extents( if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) || XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) { + xfs_bmap_mark_sick(ip, whichfork); return -EFSCORRUPTED; } if (xfs_is_shutdown(mp)) return -EIO; - ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL)); + xfs_assert_ilocked(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL); error = xfs_iread_extents(tp, ip, whichfork); if (error) return error; - if (ifp->if_format == XFS_DINODE_FMT_BTREE) { + if (ifp->if_format == XFS_DINODE_FMT_BTREE) cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); - cur->bc_ino.flags = 0; - } if (*next_fsb == NULLFSBLOCK) { xfs_iext_last(ifp, &icur); @@ -5904,11 +6003,13 @@ xfs_bmap_insert_extents( } } if (XFS_IS_CORRUPT(mp, isnullstartblock(got.br_startblock))) { + xfs_bmap_mark_sick(ip, whichfork); error = -EFSCORRUPTED; goto del_cursor; } if (XFS_IS_CORRUPT(mp, stop_fsb > got.br_startoff)) { + xfs_bmap_mark_sick(ip, whichfork); error = -EFSCORRUPTED; goto del_cursor; } @@ -5976,6 +6077,7 @@ xfs_bmap_split_extent( if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ifp)) || XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) { + xfs_bmap_mark_sick(ip, whichfork); return -EFSCORRUPTED; } @@ -6002,11 +6104,11 @@ xfs_bmap_split_extent( if (ifp->if_format == XFS_DINODE_FMT_BTREE) { cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); - cur->bc_ino.flags = 0; error = xfs_bmbt_lookup_eq(cur, &got, &i); if (error) goto del_cursor; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto del_cursor; } @@ -6034,6 +6136,7 @@ xfs_bmap_split_extent( if (error) goto del_cursor; if (XFS_IS_CORRUPT(mp, i != 0)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto del_cursor; } @@ -6041,6 +6144,7 @@ xfs_bmap_split_extent( if (error) goto del_cursor; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto del_cursor; } @@ -6060,7 +6164,7 @@ xfs_bmap_split_extent( del_cursor: if (cur) { - cur->bc_ino.allocated = 0; + cur->bc_bmap.allocated = 0; xfs_btree_del_cursor(cur, error); } @@ -6069,17 +6173,8 @@ del_cursor: return error; } -/* Deferred mapping is only for real extents in the data fork. */ -static bool -xfs_bmap_is_update_needed( - struct xfs_bmbt_irec *bmap) -{ - return bmap->br_startblock != HOLESTARTBLOCK && - bmap->br_startblock != DELAYSTARTBLOCK; -} - /* Record a bmap intent. */ -static int +static inline void __xfs_bmap_add( struct xfs_trans *tp, enum xfs_bmap_intent_type type, @@ -6089,25 +6184,19 @@ __xfs_bmap_add( { struct xfs_bmap_intent *bi; - trace_xfs_bmap_defer(tp->t_mountp, - XFS_FSB_TO_AGNO(tp->t_mountp, bmap->br_startblock), - type, - XFS_FSB_TO_AGBNO(tp->t_mountp, bmap->br_startblock), - ip->i_ino, whichfork, - bmap->br_startoff, - bmap->br_blockcount, - bmap->br_state); + if ((whichfork != XFS_DATA_FORK && whichfork != XFS_ATTR_FORK) || + bmap->br_startblock == HOLESTARTBLOCK || + bmap->br_startblock == DELAYSTARTBLOCK) + return; - bi = kmem_cache_alloc(xfs_bmap_intent_cache, GFP_NOFS | __GFP_NOFAIL); + bi = kmem_cache_alloc(xfs_bmap_intent_cache, GFP_KERNEL | __GFP_NOFAIL); INIT_LIST_HEAD(&bi->bi_list); bi->bi_type = type; bi->bi_owner = ip; bi->bi_whichfork = whichfork; bi->bi_bmap = *bmap; - xfs_bmap_update_get_group(tp->t_mountp, bi); - xfs_defer_add(tp, &bi->bi_list, &xfs_bmap_update_defer_type); - return 0; + xfs_bmap_defer_add(tp, bi); } /* Map an extent into a file. */ @@ -6115,12 +6204,10 @@ void xfs_bmap_map_extent( struct xfs_trans *tp, struct xfs_inode *ip, + int whichfork, struct xfs_bmbt_irec *PREV) { - if (!xfs_bmap_is_update_needed(PREV)) - return; - - __xfs_bmap_add(tp, XFS_BMAP_MAP, ip, XFS_DATA_FORK, PREV); + __xfs_bmap_add(tp, XFS_BMAP_MAP, ip, whichfork, PREV); } /* Unmap an extent out of a file. */ @@ -6128,12 +6215,10 @@ void xfs_bmap_unmap_extent( struct xfs_trans *tp, struct xfs_inode *ip, + int whichfork, struct xfs_bmbt_irec *PREV) { - if (!xfs_bmap_is_update_needed(PREV)) - return; - - __xfs_bmap_add(tp, XFS_BMAP_UNMAP, ip, XFS_DATA_FORK, PREV); + __xfs_bmap_add(tp, XFS_BMAP_UNMAP, ip, whichfork, PREV); } /* @@ -6147,36 +6232,35 @@ xfs_bmap_finish_one( { struct xfs_bmbt_irec *bmap = &bi->bi_bmap; int error = 0; + int flags = 0; - ASSERT(tp->t_highest_agno == NULLAGNUMBER); + if (bi->bi_whichfork == XFS_ATTR_FORK) + flags |= XFS_BMAPI_ATTRFORK; - trace_xfs_bmap_deferred(tp->t_mountp, - XFS_FSB_TO_AGNO(tp->t_mountp, bmap->br_startblock), - bi->bi_type, - XFS_FSB_TO_AGBNO(tp->t_mountp, bmap->br_startblock), - bi->bi_owner->i_ino, bi->bi_whichfork, - bmap->br_startoff, bmap->br_blockcount, - bmap->br_state); + ASSERT(tp->t_highest_agno == NULLAGNUMBER); - if (WARN_ON_ONCE(bi->bi_whichfork != XFS_DATA_FORK)) - return -EFSCORRUPTED; + trace_xfs_bmap_deferred(bi); - if (XFS_TEST_ERROR(false, tp->t_mountp, - XFS_ERRTAG_BMAP_FINISH_ONE)) + if (XFS_TEST_ERROR(false, tp->t_mountp, XFS_ERRTAG_BMAP_FINISH_ONE)) return -EIO; switch (bi->bi_type) { case XFS_BMAP_MAP: + if (bi->bi_bmap.br_state == XFS_EXT_UNWRITTEN) + flags |= XFS_BMAPI_PREALLOC; error = xfs_bmapi_remap(tp, bi->bi_owner, bmap->br_startoff, - bmap->br_blockcount, bmap->br_startblock, 0); + bmap->br_blockcount, bmap->br_startblock, + flags); bmap->br_blockcount = 0; break; case XFS_BMAP_UNMAP: error = __xfs_bunmapi(tp, bi->bi_owner, bmap->br_startoff, - &bmap->br_blockcount, XFS_BMAPI_REMAP, 1); + &bmap->br_blockcount, flags | XFS_BMAPI_REMAP, + 1); break; default: ASSERT(0); + xfs_bmap_mark_sick(bi->bi_owner, bi->bi_whichfork); error = -EFSCORRUPTED; } @@ -6257,7 +6341,7 @@ xfs_bunmapi_range( xfs_filblks_t unmap_len = endoff - startoff + 1; int error = 0; - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); + xfs_assert_ilocked(ip, XFS_ILOCK_EXCL); while (unmap_len > 0) { ASSERT((*tpp)->t_highest_agno == NULLAGNUMBER); @@ -6274,3 +6358,46 @@ xfs_bunmapi_range( out: return error; } + +struct xfs_bmap_query_range { + xfs_bmap_query_range_fn fn; + void *priv; +}; + +/* Format btree record and pass to our callback. */ +STATIC int +xfs_bmap_query_range_helper( + struct xfs_btree_cur *cur, + const union xfs_btree_rec *rec, + void *priv) +{ + struct xfs_bmap_query_range *query = priv; + struct xfs_bmbt_irec irec; + xfs_failaddr_t fa; + + xfs_bmbt_disk_get_all(&rec->bmbt, &irec); + fa = xfs_bmap_validate_extent(cur->bc_ino.ip, cur->bc_ino.whichfork, + &irec); + if (fa) { + xfs_btree_mark_sick(cur); + return xfs_bmap_complain_bad_rec(cur->bc_ino.ip, + cur->bc_ino.whichfork, fa, &irec); + } + + return query->fn(cur, &irec, query->priv); +} + +/* Find all bmaps. */ +int +xfs_bmap_query_all( + struct xfs_btree_cur *cur, + xfs_bmap_query_range_fn fn, + void *priv) +{ + struct xfs_bmap_query_range query = { + .priv = priv, + .fn = fn, + }; + + return xfs_btree_query_all(cur, xfs_bmap_query_range_helper, &query); +} diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h index f6b73f1bad..f766259530 100644 --- a/fs/xfs/libxfs/xfs_bmap.h +++ b/fs/xfs/libxfs/xfs_bmap.h @@ -232,6 +232,10 @@ enum xfs_bmap_intent_type { XFS_BMAP_UNMAP, }; +#define XFS_BMAP_INTENT_STRINGS \ + { XFS_BMAP_MAP, "map" }, \ + { XFS_BMAP_UNMAP, "unmap" } + struct xfs_bmap_intent { struct list_head bi_list; enum xfs_bmap_intent_type bi_type; @@ -241,14 +245,11 @@ struct xfs_bmap_intent { struct xfs_bmbt_irec bi_bmap; }; -void xfs_bmap_update_get_group(struct xfs_mount *mp, - struct xfs_bmap_intent *bi); - int xfs_bmap_finish_one(struct xfs_trans *tp, struct xfs_bmap_intent *bi); void xfs_bmap_map_extent(struct xfs_trans *tp, struct xfs_inode *ip, - struct xfs_bmbt_irec *imap); + int whichfork, struct xfs_bmbt_irec *imap); void xfs_bmap_unmap_extent(struct xfs_trans *tp, struct xfs_inode *ip, - struct xfs_bmbt_irec *imap); + int whichfork, struct xfs_bmbt_irec *imap); static inline uint32_t xfs_bmap_fork_to_state(int whichfork) { @@ -280,4 +281,12 @@ extern struct kmem_cache *xfs_bmap_intent_cache; int __init xfs_bmap_intent_init_cache(void); void xfs_bmap_intent_destroy_cache(void); +typedef int (*xfs_bmap_query_range_fn)( + struct xfs_btree_cur *cur, + struct xfs_bmbt_irec *rec, + void *priv); + +int xfs_bmap_query_all(struct xfs_btree_cur *cur, xfs_bmap_query_range_fn fn, + void *priv); + #endif /* __XFS_BMAP_H__ */ diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c index 71f2d50f78..f5d84dcb58 100644 --- a/fs/xfs/libxfs/xfs_bmap_btree.c +++ b/fs/xfs/libxfs/xfs_bmap_btree.c @@ -26,6 +26,22 @@ static struct kmem_cache *xfs_bmbt_cur_cache; +void +xfs_bmbt_init_block( + struct xfs_inode *ip, + struct xfs_btree_block *buf, + struct xfs_buf *bp, + __u16 level, + __u16 numrecs) +{ + if (bp) + xfs_btree_init_buf(ip->i_mount, bp, &xfs_bmbt_ops, level, + numrecs, ip->i_ino); + else + xfs_btree_init_block(ip->i_mount, buf, &xfs_bmbt_ops, level, + numrecs, ip->i_ino); +} + /* * Convert on-disk form of btree root to in-memory form. */ @@ -44,9 +60,7 @@ xfs_bmdr_to_bmbt( xfs_bmbt_key_t *tkp; __be64 *tpp; - xfs_btree_init_block_int(mp, rblock, XFS_BUF_DADDR_NULL, - XFS_BTNUM_BMAP, 0, 0, ip->i_ino, - XFS_BTREE_LONG_PTRS); + xfs_bmbt_init_block(ip, rblock, NULL, 0, 0); rblock->bb_level = dblock->bb_level; ASSERT(be16_to_cpu(rblock->bb_level) > 0); rblock->bb_numrecs = dblock->bb_numrecs; @@ -171,13 +185,8 @@ xfs_bmbt_dup_cursor( new = xfs_bmbt_init_cursor(cur->bc_mp, cur->bc_tp, cur->bc_ino.ip, cur->bc_ino.whichfork); - - /* - * Copy the firstblock, dfops, and flags values, - * since init cursor doesn't get them. - */ - new->bc_ino.flags = cur->bc_ino.flags; - + new->bc_flags |= (cur->bc_flags & + (XFS_BTREE_BMBT_INVALID_OWNER | XFS_BTREE_BMBT_WASDEL)); return new; } @@ -189,10 +198,10 @@ xfs_bmbt_update_cursor( ASSERT((dst->bc_tp->t_highest_agno != NULLAGNUMBER) || (dst->bc_ino.ip->i_diflags & XFS_DIFLAG_REALTIME)); - dst->bc_ino.allocated += src->bc_ino.allocated; + dst->bc_bmap.allocated += src->bc_bmap.allocated; dst->bc_tp->t_highest_agno = src->bc_tp->t_highest_agno; - src->bc_ino.allocated = 0; + src->bc_bmap.allocated = 0; } STATIC int @@ -211,7 +220,7 @@ xfs_bmbt_alloc_block( xfs_rmap_ino_bmbt_owner(&args.oinfo, cur->bc_ino.ip->i_ino, cur->bc_ino.whichfork); args.minlen = args.maxlen = args.prod = 1; - args.wasdel = cur->bc_ino.flags & XFS_BTCUR_BMBT_WASDEL; + args.wasdel = cur->bc_flags & XFS_BTREE_BMBT_WASDEL; if (!args.wasdel && args.tp->t_blk_res == 0) return -ENOSPC; @@ -247,7 +256,7 @@ xfs_bmbt_alloc_block( } ASSERT(args.len == 1); - cur->bc_ino.allocated++; + cur->bc_bmap.allocated++; cur->bc_ino.ip->i_nblocks++; xfs_trans_log_inode(args.tp, cur->bc_ino.ip, XFS_ILOG_CORE); xfs_trans_mod_dquot_byino(args.tp, cur->bc_ino.ip, @@ -360,14 +369,6 @@ xfs_bmbt_init_rec_from_cur( xfs_bmbt_disk_set_all(&rec->bmbt, &cur->bc_rec.b); } -STATIC void -xfs_bmbt_init_ptr_from_cur( - struct xfs_btree_cur *cur, - union xfs_btree_ptr *ptr) -{ - ptr->l = 0; -} - STATIC int64_t xfs_bmbt_key_diff( struct xfs_btree_cur *cur, @@ -419,7 +420,7 @@ xfs_bmbt_verify( * XXX: need a better way of verifying the owner here. Right now * just make sure there has been one set. */ - fa = xfs_btree_lblock_v5hdr_verify(bp, XFS_RMAP_OWN_UNKNOWN); + fa = xfs_btree_fsblock_v5hdr_verify(bp, XFS_RMAP_OWN_UNKNOWN); if (fa) return fa; } @@ -435,7 +436,7 @@ xfs_bmbt_verify( if (level > max(mp->m_bm_maxlevels[0], mp->m_bm_maxlevels[1])) return __this_address; - return xfs_btree_lblock_verify(bp, mp->m_bmap_dmxr[level != 0]); + return xfs_btree_fsblock_verify(bp, mp->m_bmap_dmxr[level != 0]); } static void @@ -444,7 +445,7 @@ xfs_bmbt_read_verify( { xfs_failaddr_t fa; - if (!xfs_btree_lblock_verify_crc(bp)) + if (!xfs_btree_fsblock_verify_crc(bp)) xfs_verifier_error(bp, -EFSBADCRC, __this_address); else { fa = xfs_bmbt_verify(bp); @@ -468,7 +469,7 @@ xfs_bmbt_write_verify( xfs_verifier_error(bp, -EFSCORRUPTED, fa); return; } - xfs_btree_lblock_calc_crc(bp); + xfs_btree_fsblock_calc_crc(bp); } const struct xfs_buf_ops xfs_bmbt_buf_ops = { @@ -515,9 +516,16 @@ xfs_bmbt_keys_contiguous( be64_to_cpu(key2->bmbt.br_startoff)); } -static const struct xfs_btree_ops xfs_bmbt_ops = { +const struct xfs_btree_ops xfs_bmbt_ops = { + .name = "bmap", + .type = XFS_BTREE_TYPE_INODE, + .rec_len = sizeof(xfs_bmbt_rec_t), .key_len = sizeof(xfs_bmbt_key_t), + .ptr_len = XFS_BTREE_LONG_PTR_LEN, + + .lru_refs = XFS_BMAP_BTREE_REF, + .statoff = XFS_STATS_CALC_INDEX(xs_bmbt_2), .dup_cursor = xfs_bmbt_dup_cursor, .update_cursor = xfs_bmbt_update_cursor, @@ -529,7 +537,6 @@ static const struct xfs_btree_ops xfs_bmbt_ops = { .init_key_from_rec = xfs_bmbt_init_key_from_rec, .init_high_key_from_rec = xfs_bmbt_init_high_key_from_rec, .init_rec_from_cur = xfs_bmbt_init_rec_from_cur, - .init_ptr_from_cur = xfs_bmbt_init_ptr_from_cur, .key_diff = xfs_bmbt_key_diff, .diff_two_keys = xfs_bmbt_diff_two_keys, .buf_ops = &xfs_bmbt_buf_ops, @@ -538,35 +545,10 @@ static const struct xfs_btree_ops xfs_bmbt_ops = { .keys_contiguous = xfs_bmbt_keys_contiguous, }; -static struct xfs_btree_cur * -xfs_bmbt_init_common( - struct xfs_mount *mp, - struct xfs_trans *tp, - struct xfs_inode *ip, - int whichfork) -{ - struct xfs_btree_cur *cur; - - ASSERT(whichfork != XFS_COW_FORK); - - cur = xfs_btree_alloc_cursor(mp, tp, XFS_BTNUM_BMAP, - mp->m_bm_maxlevels[whichfork], xfs_bmbt_cur_cache); - cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_bmbt_2); - - cur->bc_ops = &xfs_bmbt_ops; - cur->bc_flags = XFS_BTREE_LONG_PTRS | XFS_BTREE_ROOT_IN_INODE; - if (xfs_has_crc(mp)) - cur->bc_flags |= XFS_BTREE_CRC_BLOCKS; - - cur->bc_ino.ip = ip; - cur->bc_ino.allocated = 0; - cur->bc_ino.flags = 0; - - return cur; -} - /* - * Allocate a new bmap btree cursor. + * Create a new bmap btree cursor. + * + * For staging cursors -1 in passed in whichfork. */ struct xfs_btree_cur * xfs_bmbt_init_cursor( @@ -575,15 +557,34 @@ xfs_bmbt_init_cursor( struct xfs_inode *ip, int whichfork) { - struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork); struct xfs_btree_cur *cur; + unsigned int maxlevels; - cur = xfs_bmbt_init_common(mp, tp, ip, whichfork); + ASSERT(whichfork != XFS_COW_FORK); - cur->bc_nlevels = be16_to_cpu(ifp->if_broot->bb_level) + 1; - cur->bc_ino.forksize = xfs_inode_fork_size(ip, whichfork); + /* + * The Data fork always has larger maxlevel, so use that for staging + * cursors. + */ + switch (whichfork) { + case XFS_STAGING_FORK: + maxlevels = mp->m_bm_maxlevels[XFS_DATA_FORK]; + break; + default: + maxlevels = mp->m_bm_maxlevels[whichfork]; + break; + } + cur = xfs_btree_alloc_cursor(mp, tp, &xfs_bmbt_ops, maxlevels, + xfs_bmbt_cur_cache); + cur->bc_ino.ip = ip; cur->bc_ino.whichfork = whichfork; + cur->bc_bmap.allocated = 0; + if (whichfork != XFS_STAGING_FORK) { + struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork); + cur->bc_nlevels = be16_to_cpu(ifp->if_broot->bb_level) + 1; + cur->bc_ino.forksize = xfs_inode_fork_size(ip, whichfork); + } return cur; } @@ -599,33 +600,6 @@ xfs_bmbt_block_maxrecs( } /* - * Allocate a new bmap btree cursor for reloading an inode block mapping data - * structure. Note that callers can use the staged cursor to reload extents - * format inode forks if they rebuild the iext tree and commit the staged - * cursor immediately. - */ -struct xfs_btree_cur * -xfs_bmbt_stage_cursor( - struct xfs_mount *mp, - struct xfs_inode *ip, - struct xbtree_ifakeroot *ifake) -{ - struct xfs_btree_cur *cur; - struct xfs_btree_ops *ops; - - /* data fork always has larger maxheight */ - cur = xfs_bmbt_init_common(mp, NULL, ip, XFS_DATA_FORK); - cur->bc_nlevels = ifake->if_levels; - cur->bc_ino.forksize = ifake->if_fork_size; - - /* Don't let anyone think we're attached to the real fork yet. */ - cur->bc_ino.whichfork = -1; - xfs_btree_stage_ifakeroot(cur, ifake, &ops); - ops->update_cursor = NULL; - return cur; -} - -/* * Swap in the new inode fork root. Once we pass this point the newly rebuilt * mappings are in place and we have to kill off any old btree blocks. */ @@ -665,7 +639,7 @@ xfs_bmbt_commit_staged_btree( break; } xfs_trans_log_inode(tp, cur->bc_ino.ip, flags); - xfs_btree_commit_ifakeroot(cur, tp, whichfork, &xfs_bmbt_ops); + xfs_btree_commit_ifakeroot(cur, tp, whichfork); } /* @@ -751,7 +725,7 @@ xfs_bmbt_change_owner( ASSERT(xfs_ifork_ptr(ip, whichfork)->if_format == XFS_DINODE_FMT_BTREE); cur = xfs_bmbt_init_cursor(ip->i_mount, tp, ip, whichfork); - cur->bc_ino.flags |= XFS_BTCUR_BMBT_INVALID_OWNER; + cur->bc_flags |= XFS_BTREE_BMBT_INVALID_OWNER; error = xfs_btree_change_owner(cur, new_owner, buffer_list); xfs_btree_del_cursor(cur, error); diff --git a/fs/xfs/libxfs/xfs_bmap_btree.h b/fs/xfs/libxfs/xfs_bmap_btree.h index 151b8491f6..de1b73f122 100644 --- a/fs/xfs/libxfs/xfs_bmap_btree.h +++ b/fs/xfs/libxfs/xfs_bmap_btree.h @@ -107,8 +107,6 @@ extern int xfs_bmbt_change_owner(struct xfs_trans *tp, struct xfs_inode *ip, extern struct xfs_btree_cur *xfs_bmbt_init_cursor(struct xfs_mount *, struct xfs_trans *, struct xfs_inode *, int); -struct xfs_btree_cur *xfs_bmbt_stage_cursor(struct xfs_mount *mp, - struct xfs_inode *ip, struct xbtree_ifakeroot *ifake); void xfs_bmbt_commit_staged_btree(struct xfs_btree_cur *cur, struct xfs_trans *tp, int whichfork); @@ -120,4 +118,7 @@ unsigned int xfs_bmbt_maxlevels_ondisk(void); int __init xfs_bmbt_init_cur_cache(void); void xfs_bmbt_destroy_cur_cache(void); +void xfs_bmbt_init_block(struct xfs_inode *ip, struct xfs_btree_block *buf, + struct xfs_buf *bp, __u16 level, __u16 numrecs); + #endif /* __XFS_BMAP_BTREE_H__ */ diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index ea8d3659df..d29547572a 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -27,28 +27,24 @@ #include "xfs_bmap_btree.h" #include "xfs_rmap_btree.h" #include "xfs_refcount_btree.h" +#include "xfs_health.h" +#include "xfs_buf_mem.h" +#include "xfs_btree_mem.h" /* * Btree magic numbers. */ -static const uint32_t xfs_magics[2][XFS_BTNUM_MAX] = { - { XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, 0, XFS_BMAP_MAGIC, XFS_IBT_MAGIC, - XFS_FIBT_MAGIC, 0 }, - { XFS_ABTB_CRC_MAGIC, XFS_ABTC_CRC_MAGIC, XFS_RMAP_CRC_MAGIC, - XFS_BMAP_CRC_MAGIC, XFS_IBT_CRC_MAGIC, XFS_FIBT_CRC_MAGIC, - XFS_REFC_CRC_MAGIC } -}; - uint32_t xfs_btree_magic( - int crc, - xfs_btnum_t btnum) + struct xfs_mount *mp, + const struct xfs_btree_ops *ops) { - uint32_t magic = xfs_magics[crc][btnum]; + int idx = xfs_has_crc(mp) ? 1 : 0; + __be32 magic = ops->buf_ops->magic[idx]; /* Ensure we asked for crc for crc-only magics. */ ASSERT(magic != 0); - return magic; + return be32_to_cpu(magic); } /* @@ -63,10 +59,8 @@ xfs_btree_magic( * bytes. */ static inline xfs_failaddr_t -xfs_btree_check_lblock_siblings( +xfs_btree_check_fsblock_siblings( struct xfs_mount *mp, - struct xfs_btree_cur *cur, - int level, xfs_fsblock_t fsb, __be64 dsibling) { @@ -78,22 +72,33 @@ xfs_btree_check_lblock_siblings( sibling = be64_to_cpu(dsibling); if (sibling == fsb) return __this_address; - if (level >= 0) { - if (!xfs_btree_check_lptr(cur, sibling, level + 1)) - return __this_address; - } else { - if (!xfs_verify_fsbno(mp, sibling)) - return __this_address; - } + if (!xfs_verify_fsbno(mp, sibling)) + return __this_address; + return NULL; +} +static inline xfs_failaddr_t +xfs_btree_check_memblock_siblings( + struct xfs_buftarg *btp, + xfbno_t bno, + __be64 dsibling) +{ + xfbno_t sibling; + + if (dsibling == cpu_to_be64(NULLFSBLOCK)) + return NULL; + + sibling = be64_to_cpu(dsibling); + if (sibling == bno) + return __this_address; + if (!xmbuf_verify_daddr(btp, xfbno_to_daddr(sibling))) + return __this_address; return NULL; } static inline xfs_failaddr_t -xfs_btree_check_sblock_siblings( +xfs_btree_check_agblock_siblings( struct xfs_perag *pag, - struct xfs_btree_cur *cur, - int level, xfs_agblock_t agbno, __be32 dsibling) { @@ -105,34 +110,21 @@ xfs_btree_check_sblock_siblings( sibling = be32_to_cpu(dsibling); if (sibling == agbno) return __this_address; - if (level >= 0) { - if (!xfs_btree_check_sptr(cur, sibling, level + 1)) - return __this_address; - } else { - if (!xfs_verify_agbno(pag, sibling)) - return __this_address; - } + if (!xfs_verify_agbno(pag, sibling)) + return __this_address; return NULL; } -/* - * Check a long btree block header. Return the address of the failing check, - * or NULL if everything is ok. - */ -xfs_failaddr_t -__xfs_btree_check_lblock( +static xfs_failaddr_t +__xfs_btree_check_lblock_hdr( struct xfs_btree_cur *cur, struct xfs_btree_block *block, int level, struct xfs_buf *bp) { struct xfs_mount *mp = cur->bc_mp; - xfs_btnum_t btnum = cur->bc_btnum; - int crc = xfs_has_crc(mp); - xfs_failaddr_t fa; - xfs_fsblock_t fsb = NULLFSBLOCK; - if (crc) { + if (xfs_has_crc(mp)) { if (!uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid)) return __this_address; if (block->bb_u.l.bb_blkno != @@ -142,7 +134,7 @@ __xfs_btree_check_lblock( return __this_address; } - if (be32_to_cpu(block->bb_magic) != xfs_btree_magic(crc, btnum)) + if (be32_to_cpu(block->bb_magic) != xfs_btree_magic(mp, cur->bc_ops)) return __this_address; if (be16_to_cpu(block->bb_level) != level) return __this_address; @@ -150,44 +142,83 @@ __xfs_btree_check_lblock( cur->bc_ops->get_maxrecs(cur, level)) return __this_address; - if (bp) - fsb = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp)); + return NULL; +} + +/* + * Check a long btree block header. Return the address of the failing check, + * or NULL if everything is ok. + */ +static xfs_failaddr_t +__xfs_btree_check_fsblock( + struct xfs_btree_cur *cur, + struct xfs_btree_block *block, + int level, + struct xfs_buf *bp) +{ + struct xfs_mount *mp = cur->bc_mp; + xfs_failaddr_t fa; + xfs_fsblock_t fsb; + + fa = __xfs_btree_check_lblock_hdr(cur, block, level, bp); + if (fa) + return fa; - fa = xfs_btree_check_lblock_siblings(mp, cur, level, fsb, + /* + * For inode-rooted btrees, the root block sits in the inode fork. In + * that case bp is NULL, and the block must not have any siblings. + */ + if (!bp) { + if (block->bb_u.l.bb_leftsib != cpu_to_be64(NULLFSBLOCK)) + return __this_address; + if (block->bb_u.l.bb_rightsib != cpu_to_be64(NULLFSBLOCK)) + return __this_address; + return NULL; + } + + fsb = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp)); + fa = xfs_btree_check_fsblock_siblings(mp, fsb, block->bb_u.l.bb_leftsib); if (!fa) - fa = xfs_btree_check_lblock_siblings(mp, cur, level, fsb, + fa = xfs_btree_check_fsblock_siblings(mp, fsb, block->bb_u.l.bb_rightsib); return fa; } -/* Check a long btree block header. */ -static int -xfs_btree_check_lblock( +/* + * Check an in-memory btree block header. Return the address of the failing + * check, or NULL if everything is ok. + */ +static xfs_failaddr_t +__xfs_btree_check_memblock( struct xfs_btree_cur *cur, struct xfs_btree_block *block, int level, struct xfs_buf *bp) { - struct xfs_mount *mp = cur->bc_mp; + struct xfs_buftarg *btp = cur->bc_mem.xfbtree->target; xfs_failaddr_t fa; + xfbno_t bno; - fa = __xfs_btree_check_lblock(cur, block, level, bp); - if (XFS_IS_CORRUPT(mp, fa != NULL) || - XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BTREE_CHECK_LBLOCK)) { - if (bp) - trace_xfs_btree_corrupt(bp, _RET_IP_); - return -EFSCORRUPTED; - } - return 0; + fa = __xfs_btree_check_lblock_hdr(cur, block, level, bp); + if (fa) + return fa; + + bno = xfs_daddr_to_xfbno(xfs_buf_daddr(bp)); + fa = xfs_btree_check_memblock_siblings(btp, bno, + block->bb_u.l.bb_leftsib); + if (!fa) + fa = xfs_btree_check_memblock_siblings(btp, bno, + block->bb_u.l.bb_rightsib); + return fa; } /* * Check a short btree block header. Return the address of the failing check, * or NULL if everything is ok. */ -xfs_failaddr_t -__xfs_btree_check_sblock( +static xfs_failaddr_t +__xfs_btree_check_agblock( struct xfs_btree_cur *cur, struct xfs_btree_block *block, int level, @@ -195,20 +226,17 @@ __xfs_btree_check_sblock( { struct xfs_mount *mp = cur->bc_mp; struct xfs_perag *pag = cur->bc_ag.pag; - xfs_btnum_t btnum = cur->bc_btnum; - int crc = xfs_has_crc(mp); xfs_failaddr_t fa; - xfs_agblock_t agbno = NULLAGBLOCK; + xfs_agblock_t agbno; - if (crc) { + if (xfs_has_crc(mp)) { if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid)) return __this_address; - if (block->bb_u.s.bb_blkno != - cpu_to_be64(bp ? xfs_buf_daddr(bp) : XFS_BUF_DADDR_NULL)) + if (block->bb_u.s.bb_blkno != cpu_to_be64(xfs_buf_daddr(bp))) return __this_address; } - if (be32_to_cpu(block->bb_magic) != xfs_btree_magic(crc, btnum)) + if (be32_to_cpu(block->bb_magic) != xfs_btree_magic(mp, cur->bc_ops)) return __this_address; if (be16_to_cpu(block->bb_level) != level) return __this_address; @@ -216,36 +244,45 @@ __xfs_btree_check_sblock( cur->bc_ops->get_maxrecs(cur, level)) return __this_address; - if (bp) - agbno = xfs_daddr_to_agbno(mp, xfs_buf_daddr(bp)); - - fa = xfs_btree_check_sblock_siblings(pag, cur, level, agbno, + agbno = xfs_daddr_to_agbno(mp, xfs_buf_daddr(bp)); + fa = xfs_btree_check_agblock_siblings(pag, agbno, block->bb_u.s.bb_leftsib); if (!fa) - fa = xfs_btree_check_sblock_siblings(pag, cur, level, agbno, + fa = xfs_btree_check_agblock_siblings(pag, agbno, block->bb_u.s.bb_rightsib); return fa; } -/* Check a short btree block header. */ -STATIC int -xfs_btree_check_sblock( +/* + * Internal btree block check. + * + * Return NULL if the block is ok or the address of the failed check otherwise. + */ +xfs_failaddr_t +__xfs_btree_check_block( struct xfs_btree_cur *cur, struct xfs_btree_block *block, int level, struct xfs_buf *bp) { - struct xfs_mount *mp = cur->bc_mp; - xfs_failaddr_t fa; - - fa = __xfs_btree_check_sblock(cur, block, level, bp); - if (XFS_IS_CORRUPT(mp, fa != NULL) || - XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BTREE_CHECK_SBLOCK)) { - if (bp) - trace_xfs_btree_corrupt(bp, _RET_IP_); - return -EFSCORRUPTED; + switch (cur->bc_ops->type) { + case XFS_BTREE_TYPE_MEM: + return __xfs_btree_check_memblock(cur, block, level, bp); + case XFS_BTREE_TYPE_AG: + return __xfs_btree_check_agblock(cur, block, level, bp); + case XFS_BTREE_TYPE_INODE: + return __xfs_btree_check_fsblock(cur, block, level, bp); + default: + ASSERT(0); + return __this_address; } - return 0; +} + +static inline unsigned int xfs_btree_block_errtag(struct xfs_btree_cur *cur) +{ + if (cur->bc_ops->ptr_len == XFS_BTREE_SHORT_PTR_LEN) + return XFS_ERRTAG_BTREE_CHECK_SBLOCK; + return XFS_ERRTAG_BTREE_CHECK_LBLOCK; } /* @@ -258,34 +295,49 @@ xfs_btree_check_block( int level, /* level of the btree block */ struct xfs_buf *bp) /* buffer containing block, if any */ { - if (cur->bc_flags & XFS_BTREE_LONG_PTRS) - return xfs_btree_check_lblock(cur, block, level, bp); - else - return xfs_btree_check_sblock(cur, block, level, bp); -} + struct xfs_mount *mp = cur->bc_mp; + xfs_failaddr_t fa; -/* Check that this long pointer is valid and points within the fs. */ -bool -xfs_btree_check_lptr( - struct xfs_btree_cur *cur, - xfs_fsblock_t fsbno, - int level) -{ - if (level <= 0) - return false; - return xfs_verify_fsbno(cur->bc_mp, fsbno); + fa = __xfs_btree_check_block(cur, block, level, bp); + if (XFS_IS_CORRUPT(mp, fa != NULL) || + XFS_TEST_ERROR(false, mp, xfs_btree_block_errtag(cur))) { + if (bp) + trace_xfs_btree_corrupt(bp, _RET_IP_); + xfs_btree_mark_sick(cur); + return -EFSCORRUPTED; + } + return 0; } -/* Check that this short pointer is valid and points within the AG. */ -bool -xfs_btree_check_sptr( - struct xfs_btree_cur *cur, - xfs_agblock_t agbno, - int level) +int +__xfs_btree_check_ptr( + struct xfs_btree_cur *cur, + const union xfs_btree_ptr *ptr, + int index, + int level) { if (level <= 0) - return false; - return xfs_verify_agbno(cur->bc_ag.pag, agbno); + return -EFSCORRUPTED; + + switch (cur->bc_ops->type) { + case XFS_BTREE_TYPE_MEM: + if (!xfbtree_verify_bno(cur->bc_mem.xfbtree, + be64_to_cpu((&ptr->l)[index]))) + return -EFSCORRUPTED; + break; + case XFS_BTREE_TYPE_INODE: + if (!xfs_verify_fsbno(cur->bc_mp, + be64_to_cpu((&ptr->l)[index]))) + return -EFSCORRUPTED; + break; + case XFS_BTREE_TYPE_AG: + if (!xfs_verify_agbno(cur->bc_ag.pag, + be32_to_cpu((&ptr->s)[index]))) + return -EFSCORRUPTED; + break; + } + + return 0; } /* @@ -299,26 +351,35 @@ xfs_btree_check_ptr( int index, int level) { - if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { - if (xfs_btree_check_lptr(cur, be64_to_cpu((&ptr->l)[index]), - level)) - return 0; - xfs_err(cur->bc_mp, -"Inode %llu fork %d: Corrupt btree %d pointer at level %d index %d.", + int error; + + error = __xfs_btree_check_ptr(cur, ptr, index, level); + if (error) { + switch (cur->bc_ops->type) { + case XFS_BTREE_TYPE_MEM: + xfs_err(cur->bc_mp, +"In-memory: Corrupt %sbt flags 0x%x pointer at level %d index %d fa %pS.", + cur->bc_ops->name, cur->bc_flags, level, index, + __this_address); + break; + case XFS_BTREE_TYPE_INODE: + xfs_err(cur->bc_mp, +"Inode %llu fork %d: Corrupt %sbt pointer at level %d index %d.", cur->bc_ino.ip->i_ino, - cur->bc_ino.whichfork, cur->bc_btnum, + cur->bc_ino.whichfork, cur->bc_ops->name, level, index); - } else { - if (xfs_btree_check_sptr(cur, be32_to_cpu((&ptr->s)[index]), - level)) - return 0; - xfs_err(cur->bc_mp, -"AG %u: Corrupt btree %d pointer at level %d index %d.", - cur->bc_ag.pag->pag_agno, cur->bc_btnum, + break; + case XFS_BTREE_TYPE_AG: + xfs_err(cur->bc_mp, +"AG %u: Corrupt %sbt pointer at level %d index %d.", + cur->bc_ag.pag->pag_agno, cur->bc_ops->name, level, index); + break; + } + xfs_btree_mark_sick(cur); } - return -EFSCORRUPTED; + return error; } #ifdef DEBUG @@ -336,7 +397,7 @@ xfs_btree_check_ptr( * it to disk. */ void -xfs_btree_lblock_calc_crc( +xfs_btree_fsblock_calc_crc( struct xfs_buf *bp) { struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); @@ -350,7 +411,7 @@ xfs_btree_lblock_calc_crc( } bool -xfs_btree_lblock_verify_crc( +xfs_btree_fsblock_verify_crc( struct xfs_buf *bp) { struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); @@ -374,7 +435,7 @@ xfs_btree_lblock_verify_crc( * it to disk. */ void -xfs_btree_sblock_calc_crc( +xfs_btree_agblock_calc_crc( struct xfs_buf *bp) { struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); @@ -388,7 +449,7 @@ xfs_btree_sblock_calc_crc( } bool -xfs_btree_sblock_verify_crc( +xfs_btree_agblock_verify_crc( struct xfs_buf *bp) { struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); @@ -410,6 +471,17 @@ xfs_btree_free_block( { int error; + trace_xfs_btree_free_block(cur, bp); + + /* + * Don't allow block freeing for a staging cursor, because staging + * cursors do not support regular btree modifications. + */ + if (unlikely(cur->bc_flags & XFS_BTREE_STAGING)) { + ASSERT(0); + return -EFSCORRUPTED; + } + error = cur->bc_ops->free_block(cur, bp); if (!error) { xfs_trans_binval(cur->bc_tp, bp); @@ -448,33 +520,70 @@ xfs_btree_del_cursor( * zero, then we should be shut down or on our way to shutdown due to * cancelling a dirty transaction on error. */ - ASSERT(cur->bc_btnum != XFS_BTNUM_BMAP || cur->bc_ino.allocated == 0 || + ASSERT(!xfs_btree_is_bmap(cur->bc_ops) || cur->bc_bmap.allocated == 0 || xfs_is_shutdown(cur->bc_mp) || error != 0); - if (unlikely(cur->bc_flags & XFS_BTREE_STAGING)) - kmem_free(cur->bc_ops); - if (!(cur->bc_flags & XFS_BTREE_LONG_PTRS) && cur->bc_ag.pag) - xfs_perag_put(cur->bc_ag.pag); + + switch (cur->bc_ops->type) { + case XFS_BTREE_TYPE_AG: + if (cur->bc_ag.pag) + xfs_perag_put(cur->bc_ag.pag); + break; + case XFS_BTREE_TYPE_INODE: + /* nothing to do */ + break; + case XFS_BTREE_TYPE_MEM: + if (cur->bc_mem.pag) + xfs_perag_put(cur->bc_mem.pag); + break; + } + kmem_cache_free(cur->bc_cache, cur); } +/* Return the buffer target for this btree's buffer. */ +static inline struct xfs_buftarg * +xfs_btree_buftarg( + struct xfs_btree_cur *cur) +{ + if (cur->bc_ops->type == XFS_BTREE_TYPE_MEM) + return cur->bc_mem.xfbtree->target; + return cur->bc_mp->m_ddev_targp; +} + +/* Return the block size (in units of 512b sectors) for this btree. */ +static inline unsigned int +xfs_btree_bbsize( + struct xfs_btree_cur *cur) +{ + if (cur->bc_ops->type == XFS_BTREE_TYPE_MEM) + return XFBNO_BBSIZE; + return cur->bc_mp->m_bsize; +} + /* * Duplicate the btree cursor. * Allocate a new one, copy the record, re-get the buffers. */ -int /* error */ +int /* error */ xfs_btree_dup_cursor( - struct xfs_btree_cur *cur, /* input cursor */ - struct xfs_btree_cur **ncur) /* output cursor */ + struct xfs_btree_cur *cur, /* input cursor */ + struct xfs_btree_cur **ncur) /* output cursor */ { - struct xfs_buf *bp; /* btree block's buffer pointer */ - int error; /* error return value */ - int i; /* level number of btree block */ - xfs_mount_t *mp; /* mount structure for filesystem */ - struct xfs_btree_cur *new; /* new cursor value */ - xfs_trans_t *tp; /* transaction pointer, can be NULL */ + struct xfs_mount *mp = cur->bc_mp; + struct xfs_trans *tp = cur->bc_tp; + struct xfs_buf *bp; + struct xfs_btree_cur *new; + int error; + int i; - tp = cur->bc_tp; - mp = cur->bc_mp; + /* + * Don't allow staging cursors to be duplicated because they're supposed + * to be kept private to a single thread. + */ + if (unlikely(cur->bc_flags & XFS_BTREE_STAGING)) { + ASSERT(0); + return -EFSCORRUPTED; + } /* * Allocate a new cursor like the old one. @@ -494,10 +603,13 @@ xfs_btree_dup_cursor( new->bc_levels[i].ra = cur->bc_levels[i].ra; bp = cur->bc_levels[i].bp; if (bp) { - error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, - xfs_buf_daddr(bp), mp->m_bsize, - 0, &bp, - cur->bc_ops->buf_ops); + error = xfs_trans_read_buf(mp, tp, + xfs_btree_buftarg(cur), + xfs_buf_daddr(bp), + xfs_btree_bbsize(cur), 0, &bp, + cur->bc_ops->buf_ops); + if (xfs_metadata_is_sick(error)) + xfs_btree_mark_sick(new); if (error) { xfs_btree_del_cursor(new, error); *ncur = NULL; @@ -539,7 +651,7 @@ xfs_btree_dup_cursor( * record, key or pointer (xfs_btree_*_addr). Note that all addressing * inside the btree block is done using indices starting at one, not zero! * - * If XFS_BTREE_OVERLAPPING is set, then this btree supports keys containing + * If XFS_BTGEO_OVERLAPPING is set, then this btree supports keys containing * overlapping intervals. In such a tree, records are still sorted lowest to * highest and indexed by the smallest key value that refers to the record. * However, nodes are different: each pointer has two associated keys -- one @@ -589,26 +701,17 @@ xfs_btree_dup_cursor( */ static inline size_t xfs_btree_block_len(struct xfs_btree_cur *cur) { - if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { - if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) + if (cur->bc_ops->ptr_len == XFS_BTREE_LONG_PTR_LEN) { + if (xfs_has_crc(cur->bc_mp)) return XFS_BTREE_LBLOCK_CRC_LEN; return XFS_BTREE_LBLOCK_LEN; } - if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) + if (xfs_has_crc(cur->bc_mp)) return XFS_BTREE_SBLOCK_CRC_LEN; return XFS_BTREE_SBLOCK_LEN; } /* - * Return size of btree block pointers for this btree instance. - */ -static inline size_t xfs_btree_ptr_len(struct xfs_btree_cur *cur) -{ - return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ? - sizeof(__be64) : sizeof(__be32); -} - -/* * Calculate offset of the n-th record in a btree block. */ STATIC size_t @@ -655,7 +758,7 @@ xfs_btree_ptr_offset( { return xfs_btree_block_len(cur) + cur->bc_ops->get_maxrecs(cur, level) * cur->bc_ops->key_len + - (n - 1) * xfs_btree_ptr_len(cur); + (n - 1) * cur->bc_ops->ptr_len; } /* @@ -718,7 +821,7 @@ struct xfs_ifork * xfs_btree_ifork_ptr( struct xfs_btree_cur *cur) { - ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE); + ASSERT(cur->bc_ops->type == XFS_BTREE_TYPE_INODE); if (cur->bc_flags & XFS_BTREE_STAGING) return cur->bc_ino.ifake->if_fork; @@ -750,8 +853,7 @@ xfs_btree_get_block( int level, /* level in btree */ struct xfs_buf **bpp) /* buffer containing the block */ { - if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && - (level == cur->bc_nlevels - 1)) { + if (xfs_btree_at_iroot(cur, level)) { *bpp = NULL; return xfs_btree_get_iroot(cur); } @@ -856,95 +958,52 @@ xfs_btree_offsets( } } -/* - * Get a buffer for the block, return it read in. - * Long-form addressing. - */ -int -xfs_btree_read_bufl( - struct xfs_mount *mp, /* file system mount point */ - struct xfs_trans *tp, /* transaction pointer */ - xfs_fsblock_t fsbno, /* file system block number */ - struct xfs_buf **bpp, /* buffer for fsbno */ - int refval, /* ref count value for buffer */ - const struct xfs_buf_ops *ops) -{ - struct xfs_buf *bp; /* return value */ - xfs_daddr_t d; /* real disk block address */ - int error; - - if (!xfs_verify_fsbno(mp, fsbno)) - return -EFSCORRUPTED; - d = XFS_FSB_TO_DADDR(mp, fsbno); - error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d, - mp->m_bsize, 0, &bp, ops); - if (error) - return error; - if (bp) - xfs_buf_set_ref(bp, refval); - *bpp = bp; - return 0; -} - -/* - * Read-ahead the block, don't wait for it, don't return a buffer. - * Long-form addressing. - */ -/* ARGSUSED */ -void -xfs_btree_reada_bufl( - struct xfs_mount *mp, /* file system mount point */ - xfs_fsblock_t fsbno, /* file system block number */ - xfs_extlen_t count, /* count of filesystem blocks */ - const struct xfs_buf_ops *ops) +STATIC int +xfs_btree_readahead_fsblock( + struct xfs_btree_cur *cur, + int lr, + struct xfs_btree_block *block) { - xfs_daddr_t d; + struct xfs_mount *mp = cur->bc_mp; + xfs_fsblock_t left = be64_to_cpu(block->bb_u.l.bb_leftsib); + xfs_fsblock_t right = be64_to_cpu(block->bb_u.l.bb_rightsib); + int rval = 0; - ASSERT(fsbno != NULLFSBLOCK); - d = XFS_FSB_TO_DADDR(mp, fsbno); - xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count, ops); -} + if ((lr & XFS_BTCUR_LEFTRA) && left != NULLFSBLOCK) { + xfs_buf_readahead(mp->m_ddev_targp, XFS_FSB_TO_DADDR(mp, left), + mp->m_bsize, cur->bc_ops->buf_ops); + rval++; + } -/* - * Read-ahead the block, don't wait for it, don't return a buffer. - * Short-form addressing. - */ -/* ARGSUSED */ -void -xfs_btree_reada_bufs( - struct xfs_mount *mp, /* file system mount point */ - xfs_agnumber_t agno, /* allocation group number */ - xfs_agblock_t agbno, /* allocation group block number */ - xfs_extlen_t count, /* count of filesystem blocks */ - const struct xfs_buf_ops *ops) -{ - xfs_daddr_t d; + if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLFSBLOCK) { + xfs_buf_readahead(mp->m_ddev_targp, XFS_FSB_TO_DADDR(mp, right), + mp->m_bsize, cur->bc_ops->buf_ops); + rval++; + } - ASSERT(agno != NULLAGNUMBER); - ASSERT(agbno != NULLAGBLOCK); - d = XFS_AGB_TO_DADDR(mp, agno, agbno); - xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count, ops); + return rval; } STATIC int -xfs_btree_readahead_lblock( +xfs_btree_readahead_memblock( struct xfs_btree_cur *cur, int lr, struct xfs_btree_block *block) { + struct xfs_buftarg *btp = cur->bc_mem.xfbtree->target; + xfbno_t left = be64_to_cpu(block->bb_u.l.bb_leftsib); + xfbno_t right = be64_to_cpu(block->bb_u.l.bb_rightsib); int rval = 0; - xfs_fsblock_t left = be64_to_cpu(block->bb_u.l.bb_leftsib); - xfs_fsblock_t right = be64_to_cpu(block->bb_u.l.bb_rightsib); if ((lr & XFS_BTCUR_LEFTRA) && left != NULLFSBLOCK) { - xfs_btree_reada_bufl(cur->bc_mp, left, 1, - cur->bc_ops->buf_ops); + xfs_buf_readahead(btp, xfbno_to_daddr(left), XFBNO_BBSIZE, + cur->bc_ops->buf_ops); rval++; } if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLFSBLOCK) { - xfs_btree_reada_bufl(cur->bc_mp, right, 1, - cur->bc_ops->buf_ops); + xfs_buf_readahead(btp, xfbno_to_daddr(right), XFBNO_BBSIZE, + cur->bc_ops->buf_ops); rval++; } @@ -952,25 +1011,28 @@ xfs_btree_readahead_lblock( } STATIC int -xfs_btree_readahead_sblock( +xfs_btree_readahead_agblock( struct xfs_btree_cur *cur, int lr, - struct xfs_btree_block *block) + struct xfs_btree_block *block) { - int rval = 0; + struct xfs_mount *mp = cur->bc_mp; + xfs_agnumber_t agno = cur->bc_ag.pag->pag_agno; xfs_agblock_t left = be32_to_cpu(block->bb_u.s.bb_leftsib); xfs_agblock_t right = be32_to_cpu(block->bb_u.s.bb_rightsib); - + int rval = 0; if ((lr & XFS_BTCUR_LEFTRA) && left != NULLAGBLOCK) { - xfs_btree_reada_bufs(cur->bc_mp, cur->bc_ag.pag->pag_agno, - left, 1, cur->bc_ops->buf_ops); + xfs_buf_readahead(mp->m_ddev_targp, + XFS_AGB_TO_DADDR(mp, agno, left), + mp->m_bsize, cur->bc_ops->buf_ops); rval++; } if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLAGBLOCK) { - xfs_btree_reada_bufs(cur->bc_mp, cur->bc_ag.pag->pag_agno, - right, 1, cur->bc_ops->buf_ops); + xfs_buf_readahead(mp->m_ddev_targp, + XFS_AGB_TO_DADDR(mp, agno, right), + mp->m_bsize, cur->bc_ops->buf_ops); rval++; } @@ -993,8 +1055,7 @@ xfs_btree_readahead( * No readahead needed if we are at the root level and the * btree root is stored in the inode. */ - if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && - (lev == cur->bc_nlevels - 1)) + if (xfs_btree_at_iroot(cur, lev)) return 0; if ((cur->bc_levels[lev].ra | lr) == cur->bc_levels[lev].ra) @@ -1003,9 +1064,17 @@ xfs_btree_readahead( cur->bc_levels[lev].ra |= lr; block = XFS_BUF_TO_BLOCK(cur->bc_levels[lev].bp); - if (cur->bc_flags & XFS_BTREE_LONG_PTRS) - return xfs_btree_readahead_lblock(cur, lr, block); - return xfs_btree_readahead_sblock(cur, lr, block); + switch (cur->bc_ops->type) { + case XFS_BTREE_TYPE_AG: + return xfs_btree_readahead_agblock(cur, lr, block); + case XFS_BTREE_TYPE_INODE: + return xfs_btree_readahead_fsblock(cur, lr, block); + case XFS_BTREE_TYPE_MEM: + return xfs_btree_readahead_memblock(cur, lr, block); + default: + ASSERT(0); + return 0; + } } STATIC int @@ -1014,23 +1083,24 @@ xfs_btree_ptr_to_daddr( const union xfs_btree_ptr *ptr, xfs_daddr_t *daddr) { - xfs_fsblock_t fsbno; - xfs_agblock_t agbno; int error; error = xfs_btree_check_ptr(cur, ptr, 0, 1); if (error) return error; - if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { - fsbno = be64_to_cpu(ptr->l); - *daddr = XFS_FSB_TO_DADDR(cur->bc_mp, fsbno); - } else { - agbno = be32_to_cpu(ptr->s); + switch (cur->bc_ops->type) { + case XFS_BTREE_TYPE_AG: *daddr = XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_ag.pag->pag_agno, - agbno); + be32_to_cpu(ptr->s)); + break; + case XFS_BTREE_TYPE_INODE: + *daddr = XFS_FSB_TO_DADDR(cur->bc_mp, be64_to_cpu(ptr->l)); + break; + case XFS_BTREE_TYPE_MEM: + *daddr = xfbno_to_daddr(be64_to_cpu(ptr->l)); + break; } - return 0; } @@ -1050,8 +1120,9 @@ xfs_btree_readahead_ptr( if (xfs_btree_ptr_to_daddr(cur, ptr, &daddr)) return; - xfs_buf_readahead(cur->bc_mp->m_ddev_targp, daddr, - cur->bc_mp->m_bsize * count, cur->bc_ops->buf_ops); + xfs_buf_readahead(xfs_btree_buftarg(cur), daddr, + xfs_btree_bbsize(cur) * count, + cur->bc_ops->buf_ops); } /* @@ -1072,7 +1143,7 @@ xfs_btree_setbuf( cur->bc_levels[lev].ra = 0; b = XFS_BUF_TO_BLOCK(bp); - if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { + if (cur->bc_ops->ptr_len == XFS_BTREE_LONG_PTR_LEN) { if (b->bb_u.l.bb_leftsib == cpu_to_be64(NULLFSBLOCK)) cur->bc_levels[lev].ra |= XFS_BTCUR_LEFTRA; if (b->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK)) @@ -1090,7 +1161,7 @@ xfs_btree_ptr_is_null( struct xfs_btree_cur *cur, const union xfs_btree_ptr *ptr) { - if (cur->bc_flags & XFS_BTREE_LONG_PTRS) + if (cur->bc_ops->ptr_len == XFS_BTREE_LONG_PTR_LEN) return ptr->l == cpu_to_be64(NULLFSBLOCK); else return ptr->s == cpu_to_be32(NULLAGBLOCK); @@ -1101,12 +1172,23 @@ xfs_btree_set_ptr_null( struct xfs_btree_cur *cur, union xfs_btree_ptr *ptr) { - if (cur->bc_flags & XFS_BTREE_LONG_PTRS) + if (cur->bc_ops->ptr_len == XFS_BTREE_LONG_PTR_LEN) ptr->l = cpu_to_be64(NULLFSBLOCK); else ptr->s = cpu_to_be32(NULLAGBLOCK); } +static inline bool +xfs_btree_ptrs_equal( + struct xfs_btree_cur *cur, + union xfs_btree_ptr *ptr1, + union xfs_btree_ptr *ptr2) +{ + if (cur->bc_ops->ptr_len == XFS_BTREE_LONG_PTR_LEN) + return ptr1->l == ptr2->l; + return ptr1->s == ptr2->s; +} + /* * Get/set/init sibling pointers */ @@ -1119,7 +1201,7 @@ xfs_btree_get_sibling( { ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB); - if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { + if (cur->bc_ops->ptr_len == XFS_BTREE_LONG_PTR_LEN) { if (lr == XFS_BB_RIGHTSIB) ptr->l = block->bb_u.l.bb_rightsib; else @@ -1141,7 +1223,7 @@ xfs_btree_set_sibling( { ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB); - if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { + if (cur->bc_ops->ptr_len == XFS_BTREE_LONG_PTR_LEN) { if (lr == XFS_BB_RIGHTSIB) block->bb_u.l.bb_rightsib = ptr->l; else @@ -1154,25 +1236,24 @@ xfs_btree_set_sibling( } } -void -xfs_btree_init_block_int( +static void +__xfs_btree_init_block( struct xfs_mount *mp, struct xfs_btree_block *buf, + const struct xfs_btree_ops *ops, xfs_daddr_t blkno, - xfs_btnum_t btnum, __u16 level, __u16 numrecs, - __u64 owner, - unsigned int flags) + __u64 owner) { - int crc = xfs_has_crc(mp); - __u32 magic = xfs_btree_magic(crc, btnum); + bool crc = xfs_has_crc(mp); + __u32 magic = xfs_btree_magic(mp, ops); buf->bb_magic = cpu_to_be32(magic); buf->bb_level = cpu_to_be16(level); buf->bb_numrecs = cpu_to_be16(numrecs); - if (flags & XFS_BTREE_LONG_PTRS) { + if (ops->ptr_len == XFS_BTREE_LONG_PTR_LEN) { buf->bb_u.l.bb_leftsib = cpu_to_be64(NULLFSBLOCK); buf->bb_u.l.bb_rightsib = cpu_to_be64(NULLFSBLOCK); if (crc) { @@ -1183,14 +1264,12 @@ xfs_btree_init_block_int( buf->bb_u.l.bb_lsn = 0; } } else { - /* owner is a 32 bit value on short blocks */ - __u32 __owner = (__u32)owner; - buf->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK); buf->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK); if (crc) { buf->bb_u.s.bb_blkno = cpu_to_be64(blkno); - buf->bb_u.s.bb_owner = cpu_to_be32(__owner); + /* owner is a 32 bit value on short blocks */ + buf->bb_u.s.bb_owner = cpu_to_be32((__u32)owner); uuid_copy(&buf->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid); buf->bb_u.s.bb_lsn = 0; } @@ -1199,15 +1278,46 @@ xfs_btree_init_block_int( void xfs_btree_init_block( - struct xfs_mount *mp, - struct xfs_buf *bp, - xfs_btnum_t btnum, - __u16 level, - __u16 numrecs, - __u64 owner) + struct xfs_mount *mp, + struct xfs_btree_block *block, + const struct xfs_btree_ops *ops, + __u16 level, + __u16 numrecs, + __u64 owner) +{ + __xfs_btree_init_block(mp, block, ops, XFS_BUF_DADDR_NULL, level, + numrecs, owner); +} + +void +xfs_btree_init_buf( + struct xfs_mount *mp, + struct xfs_buf *bp, + const struct xfs_btree_ops *ops, + __u16 level, + __u16 numrecs, + __u64 owner) +{ + __xfs_btree_init_block(mp, XFS_BUF_TO_BLOCK(bp), ops, + xfs_buf_daddr(bp), level, numrecs, owner); + bp->b_ops = ops->buf_ops; +} + +static inline __u64 +xfs_btree_owner( + struct xfs_btree_cur *cur) { - xfs_btree_init_block_int(mp, XFS_BUF_TO_BLOCK(bp), xfs_buf_daddr(bp), - btnum, level, numrecs, owner, 0); + switch (cur->bc_ops->type) { + case XFS_BTREE_TYPE_MEM: + return cur->bc_mem.xfbtree->owner; + case XFS_BTREE_TYPE_INODE: + return cur->bc_ino.ip->i_ino; + case XFS_BTREE_TYPE_AG: + return cur->bc_ag.pag->pag_agno; + default: + ASSERT(0); + return 0; + } } void @@ -1217,22 +1327,8 @@ xfs_btree_init_block_cur( int level, int numrecs) { - __u64 owner; - - /* - * we can pull the owner from the cursor right now as the different - * owners align directly with the pointer size of the btree. This may - * change in future, but is safe for current users of the generic btree - * code. - */ - if (cur->bc_flags & XFS_BTREE_LONG_PTRS) - owner = cur->bc_ino.ip->i_ino; - else - owner = cur->bc_ag.pag->pag_agno; - - xfs_btree_init_block_int(cur->bc_mp, XFS_BUF_TO_BLOCK(bp), - xfs_buf_daddr(bp), cur->bc_btnum, level, - numrecs, owner, cur->bc_flags); + xfs_btree_init_buf(cur->bc_mp, bp, cur->bc_ops, level, numrecs, + xfs_btree_owner(cur)); } /* @@ -1250,7 +1346,7 @@ xfs_btree_is_lastrec( if (level > 0) return 0; - if (!(cur->bc_flags & XFS_BTREE_LASTREC_UPDATE)) + if (!(cur->bc_ops->geom_flags & XFS_BTGEO_LASTREC_UPDATE)) return 0; xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB); @@ -1265,41 +1361,27 @@ xfs_btree_buf_to_ptr( struct xfs_buf *bp, union xfs_btree_ptr *ptr) { - if (cur->bc_flags & XFS_BTREE_LONG_PTRS) - ptr->l = cpu_to_be64(XFS_DADDR_TO_FSB(cur->bc_mp, - xfs_buf_daddr(bp))); - else { + switch (cur->bc_ops->type) { + case XFS_BTREE_TYPE_AG: ptr->s = cpu_to_be32(xfs_daddr_to_agbno(cur->bc_mp, xfs_buf_daddr(bp))); + break; + case XFS_BTREE_TYPE_INODE: + ptr->l = cpu_to_be64(XFS_DADDR_TO_FSB(cur->bc_mp, + xfs_buf_daddr(bp))); + break; + case XFS_BTREE_TYPE_MEM: + ptr->l = cpu_to_be64(xfs_daddr_to_xfbno(xfs_buf_daddr(bp))); + break; } } -STATIC void +static inline void xfs_btree_set_refs( struct xfs_btree_cur *cur, struct xfs_buf *bp) { - switch (cur->bc_btnum) { - case XFS_BTNUM_BNO: - case XFS_BTNUM_CNT: - xfs_buf_set_ref(bp, XFS_ALLOC_BTREE_REF); - break; - case XFS_BTNUM_INO: - case XFS_BTNUM_FINO: - xfs_buf_set_ref(bp, XFS_INO_BTREE_REF); - break; - case XFS_BTNUM_BMAP: - xfs_buf_set_ref(bp, XFS_BMAP_BTREE_REF); - break; - case XFS_BTNUM_RMAP: - xfs_buf_set_ref(bp, XFS_RMAP_BTREE_REF); - break; - case XFS_BTNUM_REFC: - xfs_buf_set_ref(bp, XFS_REFC_BTREE_REF); - break; - default: - ASSERT(0); - } + xfs_buf_set_ref(bp, cur->bc_ops->lru_refs); } int @@ -1309,15 +1391,14 @@ xfs_btree_get_buf_block( struct xfs_btree_block **block, struct xfs_buf **bpp) { - struct xfs_mount *mp = cur->bc_mp; - xfs_daddr_t d; - int error; + xfs_daddr_t d; + int error; error = xfs_btree_ptr_to_daddr(cur, ptr, &d); if (error) return error; - error = xfs_trans_get_buf(cur->bc_tp, mp->m_ddev_targp, d, mp->m_bsize, - 0, bpp); + error = xfs_trans_get_buf(cur->bc_tp, xfs_btree_buftarg(cur), d, + xfs_btree_bbsize(cur), 0, bpp); if (error) return error; @@ -1348,9 +1429,11 @@ xfs_btree_read_buf_block( error = xfs_btree_ptr_to_daddr(cur, ptr, &d); if (error) return error; - error = xfs_trans_read_buf(mp, cur->bc_tp, mp->m_ddev_targp, d, - mp->m_bsize, flags, bpp, - cur->bc_ops->buf_ops); + error = xfs_trans_read_buf(mp, cur->bc_tp, xfs_btree_buftarg(cur), d, + xfs_btree_bbsize(cur), flags, bpp, + cur->bc_ops->buf_ops); + if (xfs_metadata_is_sick(error)) + xfs_btree_mark_sick(cur); if (error) return error; @@ -1398,7 +1481,7 @@ xfs_btree_copy_ptrs( int numptrs) { ASSERT(numptrs >= 0); - memcpy(dst_ptr, src_ptr, numptrs * xfs_btree_ptr_len(cur)); + memcpy(dst_ptr, src_ptr, numptrs * cur->bc_ops->ptr_len); } /* @@ -1454,8 +1537,8 @@ xfs_btree_shift_ptrs( ASSERT(numptrs >= 0); ASSERT(dir == 1 || dir == -1); - dst_ptr = (char *)ptr + (dir * xfs_btree_ptr_len(cur)); - memmove(dst_ptr, ptr, numptrs * xfs_btree_ptr_len(cur)); + dst_ptr = (char *)ptr + (dir * cur->bc_ops->ptr_len); + memmove(dst_ptr, ptr, numptrs * cur->bc_ops->ptr_len); } /* @@ -1566,7 +1649,7 @@ xfs_btree_log_block( if (bp) { int nbits; - if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) { + if (xfs_has_crc(cur->bc_mp)) { /* * We don't log the CRC when updating a btree * block but instead recreate it during log @@ -1581,7 +1664,7 @@ xfs_btree_log_block( nbits = XFS_BB_NUM_BITS; } xfs_btree_offsets(fields, - (cur->bc_flags & XFS_BTREE_LONG_PTRS) ? + (cur->bc_ops->ptr_len == XFS_BTREE_LONG_PTR_LEN) ? loffsets : soffsets, nbits, &first, &last); xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF); @@ -1658,9 +1741,10 @@ xfs_btree_increment( * confused or have the tree root in an inode. */ if (lev == cur->bc_nlevels) { - if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) + if (cur->bc_ops->type == XFS_BTREE_TYPE_INODE) goto out0; ASSERT(0); + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto error0; } @@ -1751,9 +1835,10 @@ xfs_btree_decrement( * or the root of the tree is in an inode. */ if (lev == cur->bc_nlevels) { - if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) + if (cur->bc_ops->type == XFS_BTREE_TYPE_INODE) goto out0; ASSERT(0); + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto error0; } @@ -1786,6 +1871,33 @@ error0: return error; } +/* + * Check the btree block owner now that we have the context to know who the + * real owner is. + */ +static inline xfs_failaddr_t +xfs_btree_check_block_owner( + struct xfs_btree_cur *cur, + struct xfs_btree_block *block) +{ + __u64 owner; + + if (!xfs_has_crc(cur->bc_mp) || + (cur->bc_flags & XFS_BTREE_BMBT_INVALID_OWNER)) + return NULL; + + owner = xfs_btree_owner(cur); + if (cur->bc_ops->ptr_len == XFS_BTREE_LONG_PTR_LEN) { + if (be64_to_cpu(block->bb_u.l.bb_owner) != owner) + return __this_address; + } else { + if (be32_to_cpu(block->bb_u.s.bb_owner) != owner) + return __this_address; + } + + return NULL; +} + int xfs_btree_lookup_get_block( struct xfs_btree_cur *cur, /* btree cursor */ @@ -1798,8 +1910,7 @@ xfs_btree_lookup_get_block( int error = 0; /* special case the root block if in an inode */ - if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && - (level == cur->bc_nlevels - 1)) { + if (xfs_btree_at_iroot(cur, level)) { *blkp = xfs_btree_get_iroot(cur); return 0; } @@ -1824,11 +1935,7 @@ xfs_btree_lookup_get_block( return error; /* Check the inode owner since the verifiers don't. */ - if (xfs_has_crc(cur->bc_mp) && - !(cur->bc_ino.flags & XFS_BTCUR_BMBT_INVALID_OWNER) && - (cur->bc_flags & XFS_BTREE_LONG_PTRS) && - be64_to_cpu((*blkp)->bb_u.l.bb_owner) != - cur->bc_ino.ip->i_ino) + if (xfs_btree_check_block_owner(cur, *blkp) != NULL) goto out_bad; /* Did we get the level we were looking for? */ @@ -1846,6 +1953,7 @@ out_bad: *blkp = NULL; xfs_buf_mark_corrupt(bp); xfs_trans_brelse(cur->bc_tp, bp); + xfs_btree_mark_sick(cur); return -EFSCORRUPTED; } @@ -1872,6 +1980,27 @@ xfs_lookup_get_search_key( } /* + * Initialize a pointer to the root block. + */ +void +xfs_btree_init_ptr_from_cur( + struct xfs_btree_cur *cur, + union xfs_btree_ptr *ptr) +{ + if (cur->bc_ops->type == XFS_BTREE_TYPE_INODE) { + /* + * Inode-rooted btrees call xfs_btree_get_iroot to find the root + * in xfs_btree_lookup_get_block and don't need a pointer here. + */ + ptr->l = 0; + } else if (cur->bc_flags & XFS_BTREE_STAGING) { + ptr->s = cpu_to_be32(cur->bc_ag.afake->af_root); + } else { + cur->bc_ops->init_ptr_from_cur(cur, ptr); + } +} + +/* * Lookup the record. The cursor is made to point to it, based on dir. * stat is set to 0 if can't find any such record, 1 for success. */ @@ -1892,14 +2021,16 @@ xfs_btree_lookup( XFS_BTREE_STATS_INC(cur, lookup); /* No such thing as a zero-level tree. */ - if (XFS_IS_CORRUPT(cur->bc_mp, cur->bc_nlevels == 0)) + if (XFS_IS_CORRUPT(cur->bc_mp, cur->bc_nlevels == 0)) { + xfs_btree_mark_sick(cur); return -EFSCORRUPTED; + } block = NULL; keyno = 0; /* initialise start pointer from cursor */ - cur->bc_ops->init_ptr_from_cur(cur, &ptr); + xfs_btree_init_ptr_from_cur(cur, &ptr); pp = &ptr; /* @@ -1936,6 +2067,7 @@ xfs_btree_lookup( XFS_ERRLEVEL_LOW, cur->bc_mp, block, sizeof(*block)); + xfs_btree_mark_sick(cur); return -EFSCORRUPTED; } @@ -2012,8 +2144,10 @@ xfs_btree_lookup( error = xfs_btree_increment(cur, 0, &i); if (error) goto error0; - if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) + if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { + xfs_btree_mark_sick(cur); return -EFSCORRUPTED; + } *stat = 1; return 0; } @@ -2040,7 +2174,7 @@ xfs_btree_high_key_from_key( struct xfs_btree_cur *cur, union xfs_btree_key *key) { - ASSERT(cur->bc_flags & XFS_BTREE_OVERLAPPING); + ASSERT(cur->bc_ops->geom_flags & XFS_BTGEO_OVERLAPPING); return (union xfs_btree_key *)((char *)key + (cur->bc_ops->key_len / 2)); } @@ -2061,7 +2195,7 @@ xfs_btree_get_leaf_keys( rec = xfs_btree_rec_addr(cur, 1, block); cur->bc_ops->init_key_from_rec(key, rec); - if (cur->bc_flags & XFS_BTREE_OVERLAPPING) { + if (cur->bc_ops->geom_flags & XFS_BTGEO_OVERLAPPING) { cur->bc_ops->init_high_key_from_rec(&max_hkey, rec); for (n = 2; n <= xfs_btree_get_numrecs(block); n++) { @@ -2088,7 +2222,7 @@ xfs_btree_get_node_keys( union xfs_btree_key *high; int n; - if (cur->bc_flags & XFS_BTREE_OVERLAPPING) { + if (cur->bc_ops->geom_flags & XFS_BTGEO_OVERLAPPING) { memcpy(key, xfs_btree_key_addr(cur, 1, block), cur->bc_ops->key_len / 2); @@ -2132,7 +2266,7 @@ xfs_btree_needs_key_update( struct xfs_btree_cur *cur, int ptr) { - return (cur->bc_flags & XFS_BTREE_OVERLAPPING) || ptr == 1; + return (cur->bc_ops->geom_flags & XFS_BTGEO_OVERLAPPING) || ptr == 1; } /* @@ -2156,7 +2290,7 @@ __xfs_btree_updkeys( struct xfs_buf *bp; int ptr; - ASSERT(cur->bc_flags & XFS_BTREE_OVERLAPPING); + ASSERT(cur->bc_ops->geom_flags & XFS_BTGEO_OVERLAPPING); /* Exit if there aren't any parent levels to update. */ if (level + 1 >= cur->bc_nlevels) @@ -2225,7 +2359,7 @@ xfs_btree_update_keys( ASSERT(level >= 0); block = xfs_btree_get_block(cur, level, &bp); - if (cur->bc_flags & XFS_BTREE_OVERLAPPING) + if (cur->bc_ops->geom_flags & XFS_BTGEO_OVERLAPPING) return __xfs_btree_updkeys(cur, level, block, bp, false); /* @@ -2332,8 +2466,7 @@ xfs_btree_lshift( int error; /* error return value */ int i; - if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && - level == cur->bc_nlevels - 1) + if (xfs_btree_at_iroot(cur, level)) goto out0; /* Set up variables for this block as "right". */ @@ -2460,12 +2593,13 @@ xfs_btree_lshift( * Using a temporary cursor, update the parent key values of the * block on the left. */ - if (cur->bc_flags & XFS_BTREE_OVERLAPPING) { + if (cur->bc_ops->geom_flags & XFS_BTGEO_OVERLAPPING) { error = xfs_btree_dup_cursor(cur, &tcur); if (error) goto error0; i = xfs_btree_firstrec(tcur, level); if (XFS_IS_CORRUPT(tcur->bc_mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto error0; } @@ -2527,8 +2661,7 @@ xfs_btree_rshift( int error; /* error return value */ int i; /* loop counter */ - if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && - (level == cur->bc_nlevels - 1)) + if (xfs_btree_at_iroot(cur, level)) goto out0; /* Set up variables for this block as "left". */ @@ -2636,6 +2769,7 @@ xfs_btree_rshift( goto error0; i = xfs_btree_lastrec(tcur, level); if (XFS_IS_CORRUPT(tcur->bc_mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto error0; } @@ -2645,7 +2779,7 @@ xfs_btree_rshift( goto error1; /* Update the parent high keys of the left block, if needed. */ - if (cur->bc_flags & XFS_BTREE_OVERLAPPING) { + if (cur->bc_ops->geom_flags & XFS_BTGEO_OVERLAPPING) { error = xfs_btree_update_keys(cur, level); if (error) goto error1; @@ -2673,6 +2807,32 @@ error1: return error; } +static inline int +xfs_btree_alloc_block( + struct xfs_btree_cur *cur, + const union xfs_btree_ptr *hint_block, + union xfs_btree_ptr *new_block, + int *stat) +{ + int error; + + /* + * Don't allow block allocation for a staging cursor, because staging + * cursors do not support regular btree modifications. + * + * Bulk loading uses a separate callback to obtain new blocks from a + * preallocated list, which prevents ENOSPC failures during loading. + */ + if (unlikely(cur->bc_flags & XFS_BTREE_STAGING)) { + ASSERT(0); + return -EFSCORRUPTED; + } + + error = cur->bc_ops->alloc_block(cur, hint_block, new_block, stat); + trace_xfs_btree_alloc_block(cur, new_block, *stat, error); + return error; +} + /* * Split cur/level block in half. * Return new block number and the key to its first @@ -2716,7 +2876,7 @@ __xfs_btree_split( xfs_btree_buf_to_ptr(cur, lbp, &lptr); /* Allocate the new block. If we can't do it, we're toast. Give up. */ - error = cur->bc_ops->alloc_block(cur, &lptr, &rptr, stat); + error = xfs_btree_alloc_block(cur, &lptr, &rptr, stat); if (error) goto error0; if (*stat == 0) @@ -2823,7 +2983,7 @@ __xfs_btree_split( } /* Update the parent high keys of the left block, if needed. */ - if (cur->bc_flags & XFS_BTREE_OVERLAPPING) { + if (cur->bc_ops->geom_flags & XFS_BTGEO_OVERLAPPING) { error = xfs_btree_update_keys(cur, level); if (error) goto error0; @@ -2941,7 +3101,7 @@ xfs_btree_split( struct xfs_btree_split_args args; DECLARE_COMPLETION_ONSTACK(done); - if (cur->bc_btnum != XFS_BTNUM_BMAP || + if (!xfs_btree_is_bmap(cur->bc_ops) || cur->bc_tp->t_highest_agno == NULLAGNUMBER) return __xfs_btree_split(cur, level, ptrp, key, curp, stat); @@ -2963,7 +3123,6 @@ xfs_btree_split( #define xfs_btree_split __xfs_btree_split #endif /* __KERNEL__ */ - /* * Copy the old inode root contents into a real block and make the * broot point to it. @@ -2988,7 +3147,7 @@ xfs_btree_new_iroot( XFS_BTREE_STATS_INC(cur, newroot); - ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE); + ASSERT(cur->bc_ops->type == XFS_BTREE_TYPE_INODE); level = cur->bc_nlevels - 1; @@ -2996,7 +3155,7 @@ xfs_btree_new_iroot( pp = xfs_btree_ptr_addr(cur, 1, block); /* Allocate the new block. If we can't do it, we're toast. Give up. */ - error = cur->bc_ops->alloc_block(cur, pp, &nptr, stat); + error = xfs_btree_alloc_block(cur, pp, &nptr, stat); if (error) goto error0; if (*stat == 0) @@ -3014,9 +3173,9 @@ xfs_btree_new_iroot( * In that case have to also ensure the blkno remains correct */ memcpy(cblock, block, xfs_btree_block_len(cur)); - if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) { + if (xfs_has_crc(cur->bc_mp)) { __be64 bno = cpu_to_be64(xfs_buf_daddr(cbp)); - if (cur->bc_flags & XFS_BTREE_LONG_PTRS) + if (cur->bc_ops->ptr_len == XFS_BTREE_LONG_PTR_LEN) cblock->bb_u.l.bb_blkno = bno; else cblock->bb_u.s.bb_blkno = bno; @@ -3069,6 +3228,21 @@ error0: return error; } +static void +xfs_btree_set_root( + struct xfs_btree_cur *cur, + const union xfs_btree_ptr *ptr, + int inc) +{ + if (cur->bc_flags & XFS_BTREE_STAGING) { + /* Update the btree root information for a per-AG fake root. */ + cur->bc_ag.afake->af_root = be32_to_cpu(ptr->s); + cur->bc_ag.afake->af_levels += inc; + } else { + cur->bc_ops->set_root(cur, ptr, inc); + } +} + /* * Allocate a new root block, fill it in. */ @@ -3093,10 +3267,10 @@ xfs_btree_new_root( XFS_BTREE_STATS_INC(cur, newroot); /* initialise our start point from the cursor */ - cur->bc_ops->init_ptr_from_cur(cur, &rptr); + xfs_btree_init_ptr_from_cur(cur, &rptr); /* Allocate the new block. If we can't do it, we're toast. Give up. */ - error = cur->bc_ops->alloc_block(cur, &rptr, &lptr, stat); + error = xfs_btree_alloc_block(cur, &rptr, &lptr, stat); if (error) goto error0; if (*stat == 0) @@ -3109,7 +3283,7 @@ xfs_btree_new_root( goto error0; /* Set the root in the holding structure increasing the level by 1. */ - cur->bc_ops->set_root(cur, &lptr, 1); + xfs_btree_set_root(cur, &lptr, 1); /* * At the previous root level there are now two blocks: the old root, @@ -3213,8 +3387,7 @@ xfs_btree_make_block_unfull( { int error = 0; - if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && - level == cur->bc_nlevels - 1) { + if (xfs_btree_at_iroot(cur, level)) { struct xfs_inode *ip = cur->bc_ino.ip; if (numrecs < cur->bc_ops->get_dmaxrecs(cur, level)) { @@ -3299,8 +3472,8 @@ xfs_btree_insrec( * If we have an external root pointer, and we've made it to the * root level, allocate a new root block and we're done. */ - if (!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && - (level >= cur->bc_nlevels)) { + if (cur->bc_ops->type != XFS_BTREE_TYPE_INODE && + level >= cur->bc_nlevels) { error = xfs_btree_new_root(cur, stat); xfs_btree_set_ptr_null(cur, ptrp); @@ -3524,6 +3697,7 @@ xfs_btree_insert( } if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto error0; } @@ -3537,7 +3711,8 @@ xfs_btree_insert( if (pcur != cur && (ncur || xfs_btree_ptr_is_null(cur, &nptr))) { /* Save the state from the cursor before we trash it */ - if (cur->bc_ops->update_cursor) + if (cur->bc_ops->update_cursor && + !(cur->bc_flags & XFS_BTREE_STAGING)) cur->bc_ops->update_cursor(pcur, cur); cur->bc_nlevels = pcur->bc_nlevels; xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR); @@ -3586,7 +3761,7 @@ xfs_btree_kill_iroot( #endif int i; - ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE); + ASSERT(cur->bc_ops->type == XFS_BTREE_TYPE_INODE); ASSERT(cur->bc_nlevels > 1); /* @@ -3680,7 +3855,7 @@ xfs_btree_kill_root( * Update the root pointer, decreasing the level by 1 and then * free the old root. */ - cur->bc_ops->set_root(cur, newroot, -1); + xfs_btree_set_root(cur, newroot, -1); error = xfs_btree_free_block(cur, bp); if (error) @@ -3822,27 +3997,25 @@ xfs_btree_delrec( * Try to get rid of the next level down. If we can't then there's * nothing left to do. */ - if (level == cur->bc_nlevels - 1) { - if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) { - xfs_iroot_realloc(cur->bc_ino.ip, -1, - cur->bc_ino.whichfork); + if (xfs_btree_at_iroot(cur, level)) { + xfs_iroot_realloc(cur->bc_ino.ip, -1, cur->bc_ino.whichfork); - error = xfs_btree_kill_iroot(cur); - if (error) - goto error0; + error = xfs_btree_kill_iroot(cur); + if (error) + goto error0; - error = xfs_btree_dec_cursor(cur, level, stat); - if (error) - goto error0; - *stat = 1; - return 0; - } + error = xfs_btree_dec_cursor(cur, level, stat); + if (error) + goto error0; + *stat = 1; + return 0; + } - /* - * If this is the root level, and there's only one entry left, - * and it's NOT the leaf level, then we can get rid of this - * level. - */ + /* + * If this is the root level, and there's only one entry left, and it's + * NOT the leaf level, then we can get rid of this level. + */ + if (level == cur->bc_nlevels - 1) { if (numrecs == 1 && level > 0) { union xfs_btree_ptr *pp; /* @@ -3891,7 +4064,7 @@ xfs_btree_delrec( xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB); xfs_btree_get_sibling(cur, block, &lptr, XFS_BB_LEFTSIB); - if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) { + if (cur->bc_ops->type == XFS_BTREE_TYPE_INODE) { /* * One child of root, need to get a chance to copy its contents * into the root and delete it. Can't go up to next level, @@ -3931,6 +4104,7 @@ xfs_btree_delrec( */ i = xfs_btree_lastrec(tcur, level); if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto error0; } @@ -3939,12 +4113,14 @@ xfs_btree_delrec( if (error) goto error0; if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto error0; } i = xfs_btree_lastrec(tcur, level); if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto error0; } @@ -3992,6 +4168,7 @@ xfs_btree_delrec( if (!xfs_btree_ptr_is_null(cur, &lptr)) { i = xfs_btree_firstrec(tcur, level); if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto error0; } @@ -4000,6 +4177,7 @@ xfs_btree_delrec( if (error) goto error0; if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto error0; } @@ -4017,6 +4195,7 @@ xfs_btree_delrec( */ i = xfs_btree_firstrec(tcur, level); if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto error0; } @@ -4026,6 +4205,7 @@ xfs_btree_delrec( goto error0; i = xfs_btree_firstrec(tcur, level); if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto error0; } @@ -4201,8 +4381,8 @@ xfs_btree_delrec( * If we joined with the right neighbor and there's a level above * us, increment the cursor at that level. */ - else if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) || - (level + 1 < cur->bc_nlevels)) { + else if (cur->bc_ops->type == XFS_BTREE_TYPE_INODE || + level + 1 < cur->bc_nlevels) { error = xfs_btree_increment(cur, level + 1, &i); if (error) goto error0; @@ -4270,7 +4450,7 @@ xfs_btree_delete( * If we combined blocks as part of deleting the record, delrec won't * have updated the parent high keys so we have to do that here. */ - if (joined && (cur->bc_flags & XFS_BTREE_OVERLAPPING)) { + if (joined && (cur->bc_ops->geom_flags & XFS_BTGEO_OVERLAPPING)) { error = xfs_btree_updkeys_force(cur, 0); if (error) goto error0; @@ -4344,7 +4524,7 @@ xfs_btree_visit_block( { struct xfs_btree_block *block; struct xfs_buf *bp; - union xfs_btree_ptr rptr; + union xfs_btree_ptr rptr, bufptr; int error; /* do right sibling readahead */ @@ -4367,15 +4547,12 @@ xfs_btree_visit_block( * return the same block without checking if the right sibling points * back to us and creates a cyclic reference in the btree. */ - if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { - if (be64_to_cpu(rptr.l) == XFS_DADDR_TO_FSB(cur->bc_mp, - xfs_buf_daddr(bp))) - return -EFSCORRUPTED; - } else { - if (be32_to_cpu(rptr.s) == xfs_daddr_to_agbno(cur->bc_mp, - xfs_buf_daddr(bp))) - return -EFSCORRUPTED; + xfs_btree_buf_to_ptr(cur, bp, &bufptr); + if (xfs_btree_ptrs_equal(cur, &rptr, &bufptr)) { + xfs_btree_mark_sick(cur); + return -EFSCORRUPTED; } + return xfs_btree_lookup_get_block(cur, level, &rptr, &block); } @@ -4393,7 +4570,7 @@ xfs_btree_visit_blocks( struct xfs_btree_block *block = NULL; int error = 0; - cur->bc_ops->init_ptr_from_cur(cur, &lptr); + xfs_btree_init_ptr_from_cur(cur, &lptr); /* for each level */ for (level = cur->bc_nlevels - 1; level >= 0; level--) { @@ -4471,7 +4648,7 @@ xfs_btree_block_change_owner( /* modify the owner */ block = xfs_btree_get_block(cur, level, &bp); - if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { + if (cur->bc_ops->ptr_len == XFS_BTREE_LONG_PTR_LEN) { if (block->bb_u.l.bb_owner == cpu_to_be64(bbcoi->new_owner)) return 0; block->bb_u.l.bb_owner = cpu_to_be64(bbcoi->new_owner); @@ -4489,7 +4666,7 @@ xfs_btree_block_change_owner( * though, so everything is consistent in memory. */ if (!bp) { - ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE); + ASSERT(cur->bc_ops->type == XFS_BTREE_TYPE_INODE); ASSERT(level == cur->bc_nlevels - 1); return 0; } @@ -4523,7 +4700,7 @@ xfs_btree_change_owner( /* Verify the v5 fields of a long-format btree block. */ xfs_failaddr_t -xfs_btree_lblock_v5hdr_verify( +xfs_btree_fsblock_v5hdr_verify( struct xfs_buf *bp, uint64_t owner) { @@ -4544,7 +4721,7 @@ xfs_btree_lblock_v5hdr_verify( /* Verify a long-format btree block. */ xfs_failaddr_t -xfs_btree_lblock_verify( +xfs_btree_fsblock_verify( struct xfs_buf *bp, unsigned int max_recs) { @@ -4553,28 +4730,60 @@ xfs_btree_lblock_verify( xfs_fsblock_t fsb; xfs_failaddr_t fa; + ASSERT(!xfs_buftarg_is_mem(bp->b_target)); + /* numrecs verification */ if (be16_to_cpu(block->bb_numrecs) > max_recs) return __this_address; /* sibling pointer verification */ fsb = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp)); - fa = xfs_btree_check_lblock_siblings(mp, NULL, -1, fsb, + fa = xfs_btree_check_fsblock_siblings(mp, fsb, block->bb_u.l.bb_leftsib); if (!fa) - fa = xfs_btree_check_lblock_siblings(mp, NULL, -1, fsb, + fa = xfs_btree_check_fsblock_siblings(mp, fsb, block->bb_u.l.bb_rightsib); return fa; } +/* Verify an in-memory btree block. */ +xfs_failaddr_t +xfs_btree_memblock_verify( + struct xfs_buf *bp, + unsigned int max_recs) +{ + struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); + struct xfs_buftarg *btp = bp->b_target; + xfs_failaddr_t fa; + xfbno_t bno; + + ASSERT(xfs_buftarg_is_mem(bp->b_target)); + + /* numrecs verification */ + if (be16_to_cpu(block->bb_numrecs) > max_recs) + return __this_address; + + /* sibling pointer verification */ + bno = xfs_daddr_to_xfbno(xfs_buf_daddr(bp)); + fa = xfs_btree_check_memblock_siblings(btp, bno, + block->bb_u.l.bb_leftsib); + if (fa) + return fa; + fa = xfs_btree_check_memblock_siblings(btp, bno, + block->bb_u.l.bb_rightsib); + if (fa) + return fa; + + return NULL; +} /** - * xfs_btree_sblock_v5hdr_verify() -- verify the v5 fields of a short-format + * xfs_btree_agblock_v5hdr_verify() -- verify the v5 fields of a short-format * btree block * * @bp: buffer containing the btree block */ xfs_failaddr_t -xfs_btree_sblock_v5hdr_verify( +xfs_btree_agblock_v5hdr_verify( struct xfs_buf *bp) { struct xfs_mount *mp = bp->b_mount; @@ -4593,13 +4802,13 @@ xfs_btree_sblock_v5hdr_verify( } /** - * xfs_btree_sblock_verify() -- verify a short-format btree block + * xfs_btree_agblock_verify() -- verify a short-format btree block * * @bp: buffer containing the btree block * @max_recs: maximum records allowed in this btree node */ xfs_failaddr_t -xfs_btree_sblock_verify( +xfs_btree_agblock_verify( struct xfs_buf *bp, unsigned int max_recs) { @@ -4608,16 +4817,18 @@ xfs_btree_sblock_verify( xfs_agblock_t agbno; xfs_failaddr_t fa; + ASSERT(!xfs_buftarg_is_mem(bp->b_target)); + /* numrecs verification */ if (be16_to_cpu(block->bb_numrecs) > max_recs) return __this_address; /* sibling pointer verification */ agbno = xfs_daddr_to_agbno(mp, xfs_buf_daddr(bp)); - fa = xfs_btree_check_sblock_siblings(bp->b_pag, NULL, -1, agbno, + fa = xfs_btree_check_agblock_siblings(bp->b_pag, agbno, block->bb_u.s.bb_leftsib); if (!fa) - fa = xfs_btree_check_sblock_siblings(bp->b_pag, NULL, -1, agbno, + fa = xfs_btree_check_agblock_siblings(bp->b_pag, agbno, block->bb_u.s.bb_rightsib); return fa; } @@ -4815,7 +5026,7 @@ xfs_btree_overlapped_query_range( /* Load the root of the btree. */ level = cur->bc_nlevels - 1; - cur->bc_ops->init_ptr_from_cur(cur, &ptr); + xfs_btree_init_ptr_from_cur(cur, &ptr); error = xfs_btree_lookup_get_block(cur, level, &ptr, &block); if (error) return error; @@ -4966,7 +5177,7 @@ xfs_btree_query_range( if (!xfs_btree_keycmp_le(cur, &low_key, &high_key)) return -EINVAL; - if (!(cur->bc_flags & XFS_BTREE_OVERLAPPING)) + if (!(cur->bc_ops->geom_flags & XFS_BTGEO_OVERLAPPING)) return xfs_btree_simple_query_range(cur, &low_key, &high_key, fn, priv); return xfs_btree_overlapped_query_range(cur, &low_key, &high_key, @@ -5020,7 +5231,7 @@ xfs_btree_diff_two_ptrs( const union xfs_btree_ptr *a, const union xfs_btree_ptr *b) { - if (cur->bc_flags & XFS_BTREE_LONG_PTRS) + if (cur->bc_ops->ptr_len == XFS_BTREE_LONG_PTR_LEN) return (int64_t)be64_to_cpu(a->l) - be64_to_cpu(b->l); return (int64_t)be32_to_cpu(a->s) - be32_to_cpu(b->s); } @@ -5074,7 +5285,7 @@ xfs_btree_has_records_helper( key_contig = cur->bc_ops->keys_contiguous(cur, &info->high_key, &rec_key, info->key_mask); if (key_contig == XBTREE_KEY_OVERLAP && - !(cur->bc_flags & XFS_BTREE_OVERLAPPING)) + !(cur->bc_ops->geom_flags & XFS_BTGEO_OVERLAPPING)) return -EFSCORRUPTED; if (key_contig == XBTREE_KEY_GAP) return -ECANCELED; @@ -5168,7 +5379,7 @@ xfs_btree_has_more_records( return true; /* There are more record blocks. */ - if (cur->bc_flags & XFS_BTREE_LONG_PTRS) + if (cur->bc_ops->ptr_len == XFS_BTREE_LONG_PTR_LEN) return block->bb_u.l.bb_rightsib != cpu_to_be64(NULLFSBLOCK); else return block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK); @@ -5233,6 +5444,7 @@ xfs_btree_goto_left_edge( return error; if (stat != 0) { ASSERT(0); + xfs_btree_mark_sick(cur); return -EFSCORRUPTED; } diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h index d906324e25..f93374278a 100644 --- a/fs/xfs/libxfs/xfs_btree.h +++ b/fs/xfs/libxfs/xfs_btree.h @@ -55,15 +55,8 @@ union xfs_btree_rec { #define XFS_LOOKUP_LE ((xfs_lookup_t)XFS_LOOKUP_LEi) #define XFS_LOOKUP_GE ((xfs_lookup_t)XFS_LOOKUP_GEi) -#define XFS_BTNUM_BNO ((xfs_btnum_t)XFS_BTNUM_BNOi) -#define XFS_BTNUM_CNT ((xfs_btnum_t)XFS_BTNUM_CNTi) -#define XFS_BTNUM_BMAP ((xfs_btnum_t)XFS_BTNUM_BMAPi) -#define XFS_BTNUM_INO ((xfs_btnum_t)XFS_BTNUM_INOi) -#define XFS_BTNUM_FINO ((xfs_btnum_t)XFS_BTNUM_FINOi) -#define XFS_BTNUM_RMAP ((xfs_btnum_t)XFS_BTNUM_RMAPi) -#define XFS_BTNUM_REFC ((xfs_btnum_t)XFS_BTNUM_REFCi) - -uint32_t xfs_btree_magic(int crc, xfs_btnum_t btnum); +struct xfs_btree_ops; +uint32_t xfs_btree_magic(struct xfs_mount *mp, const struct xfs_btree_ops *ops); /* * For logging record fields. @@ -86,9 +79,11 @@ uint32_t xfs_btree_magic(int crc, xfs_btnum_t btnum); * Generic stats interface */ #define XFS_BTREE_STATS_INC(cur, stat) \ - XFS_STATS_INC_OFF((cur)->bc_mp, (cur)->bc_statoff + __XBTS_ ## stat) + XFS_STATS_INC_OFF((cur)->bc_mp, \ + (cur)->bc_ops->statoff + __XBTS_ ## stat) #define XFS_BTREE_STATS_ADD(cur, stat, val) \ - XFS_STATS_ADD_OFF((cur)->bc_mp, (cur)->bc_statoff + __XBTS_ ## stat, val) + XFS_STATS_ADD_OFF((cur)->bc_mp, \ + (cur)->bc_ops->statoff + __XBTS_ ## stat, val) enum xbtree_key_contig { XBTREE_KEY_GAP = 0, @@ -111,10 +106,37 @@ static inline enum xbtree_key_contig xbtree_key_contig(uint64_t x, uint64_t y) return XBTREE_KEY_OVERLAP; } +#define XFS_BTREE_LONG_PTR_LEN (sizeof(__be64)) +#define XFS_BTREE_SHORT_PTR_LEN (sizeof(__be32)) + +enum xfs_btree_type { + XFS_BTREE_TYPE_AG, + XFS_BTREE_TYPE_INODE, + XFS_BTREE_TYPE_MEM, +}; + struct xfs_btree_ops { - /* size of the key and record structures */ - size_t key_len; - size_t rec_len; + const char *name; + + /* Type of btree - AG-rooted or inode-rooted */ + enum xfs_btree_type type; + + /* XFS_BTGEO_* flags that determine the geometry of the btree */ + unsigned int geom_flags; + + /* size of the key, pointer, and record structures */ + size_t key_len; + size_t ptr_len; + size_t rec_len; + + /* LRU refcount to set on each btree buffer created */ + unsigned int lru_refs; + + /* offset of btree stats array */ + unsigned int statoff; + + /* sick mask for health reporting (only for XFS_BTREE_TYPE_AG) */ + unsigned int sick_mask; /* cursor operations */ struct xfs_btree_cur *(*dup_cursor)(struct xfs_btree_cur *); @@ -199,6 +221,10 @@ struct xfs_btree_ops { const union xfs_btree_key *mask); }; +/* btree geometry flags */ +#define XFS_BTGEO_LASTREC_UPDATE (1U << 0) /* track last rec externally */ +#define XFS_BTGEO_OVERLAPPING (1U << 1) /* overlapping intervals */ + /* * Reasons for the update_lastrec method to be called. */ @@ -215,39 +241,6 @@ union xfs_btree_irec { struct xfs_refcount_irec rc; }; -/* Per-AG btree information. */ -struct xfs_btree_cur_ag { - struct xfs_perag *pag; - union { - struct xfs_buf *agbp; - struct xbtree_afakeroot *afake; /* for staging cursor */ - }; - union { - struct { - unsigned int nr_ops; /* # record updates */ - unsigned int shape_changes; /* # of extent splits */ - } refc; - struct { - bool active; /* allocation cursor state */ - } abt; - }; -}; - -/* Btree-in-inode cursor information */ -struct xfs_btree_cur_ino { - struct xfs_inode *ip; - struct xbtree_ifakeroot *ifake; /* for staging cursor */ - int allocated; - short forksize; - char whichfork; - char flags; -/* We are converting a delalloc reservation */ -#define XFS_BTCUR_BMBT_WASDEL (1 << 0) - -/* For extent swap, ignore owner check in verifier */ -#define XFS_BTCUR_BMBT_INVALID_OWNER (1 << 1) -}; - struct xfs_btree_level { /* buffer pointer */ struct xfs_buf *bp; @@ -272,21 +265,38 @@ struct xfs_btree_cur const struct xfs_btree_ops *bc_ops; struct kmem_cache *bc_cache; /* cursor cache */ unsigned int bc_flags; /* btree features - below */ - xfs_btnum_t bc_btnum; /* identifies which btree type */ union xfs_btree_irec bc_rec; /* current insert/search record value */ uint8_t bc_nlevels; /* number of levels in the tree */ uint8_t bc_maxlevels; /* maximum levels for this btree type */ - int bc_statoff; /* offset of btree stats array */ - /* - * Short btree pointers need an agno to be able to turn the pointers - * into physical addresses for IO, so the btree cursor switches between - * bc_ino and bc_ag based on whether XFS_BTREE_LONG_PTRS is set for the - * cursor. - */ + /* per-type information */ union { - struct xfs_btree_cur_ag bc_ag; - struct xfs_btree_cur_ino bc_ino; + struct { + struct xfs_inode *ip; + short forksize; + char whichfork; + struct xbtree_ifakeroot *ifake; /* for staging cursor */ + } bc_ino; + struct { + struct xfs_perag *pag; + struct xfs_buf *agbp; + struct xbtree_afakeroot *afake; /* for staging cursor */ + } bc_ag; + struct { + struct xfbtree *xfbtree; + struct xfs_perag *pag; + } bc_mem; + }; + + /* per-format private data */ + union { + struct { + int allocated; + } bc_bmap; /* bmapbt */ + struct { + unsigned int nr_ops; /* # record updates */ + unsigned int shape_changes; /* # of extent splits */ + } bc_refc; /* refcountbt */ }; /* Must be at the end of the struct! */ @@ -304,18 +314,22 @@ xfs_btree_cur_sizeof(unsigned int nlevels) return struct_size_t(struct xfs_btree_cur, bc_levels, nlevels); } -/* cursor flags */ -#define XFS_BTREE_LONG_PTRS (1<<0) /* pointers are 64bits long */ -#define XFS_BTREE_ROOT_IN_INODE (1<<1) /* root may be variable size */ -#define XFS_BTREE_LASTREC_UPDATE (1<<2) /* track last rec externally */ -#define XFS_BTREE_CRC_BLOCKS (1<<3) /* uses extended btree blocks */ -#define XFS_BTREE_OVERLAPPING (1<<4) /* overlapping intervals */ +/* cursor state flags */ /* * The root of this btree is a fakeroot structure so that we can stage a btree * rebuild without leaving it accessible via primary metadata. The ops struct * is dynamically allocated and must be freed when the cursor is deleted. */ -#define XFS_BTREE_STAGING (1<<5) +#define XFS_BTREE_STAGING (1U << 0) + +/* We are converting a delalloc reservation (only for bmbt btrees) */ +#define XFS_BTREE_BMBT_WASDEL (1U << 1) + +/* For extent swap, ignore owner check in verifier (only for bmbt btrees) */ +#define XFS_BTREE_BMBT_INVALID_OWNER (1U << 2) + +/* Cursor is active (only for allocbt btrees) */ +#define XFS_BTREE_ALLOCBT_ACTIVE (1U << 3) #define XFS_BTREE_NOERROR 0 #define XFS_BTREE_ERROR 1 @@ -325,14 +339,10 @@ xfs_btree_cur_sizeof(unsigned int nlevels) */ #define XFS_BUF_TO_BLOCK(bp) ((struct xfs_btree_block *)((bp)->b_addr)) -/* - * Internal long and short btree block checks. They return NULL if the - * block is ok or the address of the failed check otherwise. - */ -xfs_failaddr_t __xfs_btree_check_lblock(struct xfs_btree_cur *cur, - struct xfs_btree_block *block, int level, struct xfs_buf *bp); -xfs_failaddr_t __xfs_btree_check_sblock(struct xfs_btree_cur *cur, +xfs_failaddr_t __xfs_btree_check_block(struct xfs_btree_cur *cur, struct xfs_btree_block *block, int level, struct xfs_buf *bp); +int __xfs_btree_check_ptr(struct xfs_btree_cur *cur, + const union xfs_btree_ptr *ptr, int index, int level); /* * Check that block header is ok. @@ -345,24 +355,6 @@ xfs_btree_check_block( struct xfs_buf *bp); /* buffer containing block, if any */ /* - * Check that (long) pointer is ok. - */ -bool /* error (0 or EFSCORRUPTED) */ -xfs_btree_check_lptr( - struct xfs_btree_cur *cur, /* btree cursor */ - xfs_fsblock_t fsbno, /* btree block disk address */ - int level); /* btree block level */ - -/* - * Check that (short) pointer is ok. - */ -bool /* error (0 or EFSCORRUPTED) */ -xfs_btree_check_sptr( - struct xfs_btree_cur *cur, /* btree cursor */ - xfs_agblock_t agbno, /* btree block disk address */ - int level); /* btree block level */ - -/* * Delete the btree cursor. */ void @@ -392,63 +384,14 @@ xfs_btree_offsets( int *last); /* output: last byte offset */ /* - * Get a buffer for the block, return it read in. - * Long-form addressing. - */ -int /* error */ -xfs_btree_read_bufl( - struct xfs_mount *mp, /* file system mount point */ - struct xfs_trans *tp, /* transaction pointer */ - xfs_fsblock_t fsbno, /* file system block number */ - struct xfs_buf **bpp, /* buffer for fsbno */ - int refval, /* ref count value for buffer */ - const struct xfs_buf_ops *ops); - -/* - * Read-ahead the block, don't wait for it, don't return a buffer. - * Long-form addressing. - */ -void /* error */ -xfs_btree_reada_bufl( - struct xfs_mount *mp, /* file system mount point */ - xfs_fsblock_t fsbno, /* file system block number */ - xfs_extlen_t count, /* count of filesystem blocks */ - const struct xfs_buf_ops *ops); - -/* - * Read-ahead the block, don't wait for it, don't return a buffer. - * Short-form addressing. - */ -void /* error */ -xfs_btree_reada_bufs( - struct xfs_mount *mp, /* file system mount point */ - xfs_agnumber_t agno, /* allocation group number */ - xfs_agblock_t agbno, /* allocation group block number */ - xfs_extlen_t count, /* count of filesystem blocks */ - const struct xfs_buf_ops *ops); - -/* * Initialise a new btree block header */ -void -xfs_btree_init_block( - struct xfs_mount *mp, - struct xfs_buf *bp, - xfs_btnum_t btnum, - __u16 level, - __u16 numrecs, - __u64 owner); - -void -xfs_btree_init_block_int( - struct xfs_mount *mp, - struct xfs_btree_block *buf, - xfs_daddr_t blkno, - xfs_btnum_t btnum, - __u16 level, - __u16 numrecs, - __u64 owner, - unsigned int flags); +void xfs_btree_init_buf(struct xfs_mount *mp, struct xfs_buf *bp, + const struct xfs_btree_ops *ops, __u16 level, __u16 numrecs, + __u64 owner); +void xfs_btree_init_block(struct xfs_mount *mp, + struct xfs_btree_block *buf, const struct xfs_btree_ops *ops, + __u16 level, __u16 numrecs, __u64 owner); /* * Common btree core entry points. @@ -467,10 +410,10 @@ int xfs_btree_change_owner(struct xfs_btree_cur *cur, uint64_t new_owner, /* * btree block CRC helpers */ -void xfs_btree_lblock_calc_crc(struct xfs_buf *); -bool xfs_btree_lblock_verify_crc(struct xfs_buf *); -void xfs_btree_sblock_calc_crc(struct xfs_buf *); -bool xfs_btree_sblock_verify_crc(struct xfs_buf *); +void xfs_btree_fsblock_calc_crc(struct xfs_buf *); +bool xfs_btree_fsblock_verify_crc(struct xfs_buf *); +void xfs_btree_agblock_calc_crc(struct xfs_buf *); +bool xfs_btree_agblock_verify_crc(struct xfs_buf *); /* * Internal btree helpers also used by xfs_bmap.c. @@ -510,12 +453,14 @@ static inline int xfs_btree_get_level(const struct xfs_btree_block *block) #define XFS_FILBLKS_MIN(a,b) min_t(xfs_filblks_t, (a), (b)) #define XFS_FILBLKS_MAX(a,b) max_t(xfs_filblks_t, (a), (b)) -xfs_failaddr_t xfs_btree_sblock_v5hdr_verify(struct xfs_buf *bp); -xfs_failaddr_t xfs_btree_sblock_verify(struct xfs_buf *bp, +xfs_failaddr_t xfs_btree_agblock_v5hdr_verify(struct xfs_buf *bp); +xfs_failaddr_t xfs_btree_agblock_verify(struct xfs_buf *bp, unsigned int max_recs); -xfs_failaddr_t xfs_btree_lblock_v5hdr_verify(struct xfs_buf *bp, +xfs_failaddr_t xfs_btree_fsblock_v5hdr_verify(struct xfs_buf *bp, uint64_t owner); -xfs_failaddr_t xfs_btree_lblock_verify(struct xfs_buf *bp, +xfs_failaddr_t xfs_btree_fsblock_verify(struct xfs_buf *bp, + unsigned int max_recs); +xfs_failaddr_t xfs_btree_memblock_verify(struct xfs_buf *bp, unsigned int max_recs); unsigned int xfs_btree_compute_maxlevels(const unsigned int *limits, @@ -690,7 +635,7 @@ xfs_btree_islastblock( block = xfs_btree_get_block(cur, level, &bp); - if (cur->bc_flags & XFS_BTREE_LONG_PTRS) + if (cur->bc_ops->ptr_len == XFS_BTREE_LONG_PTR_LEN) return block->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK); return block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK); } @@ -714,21 +659,28 @@ void xfs_btree_copy_ptrs(struct xfs_btree_cur *cur, void xfs_btree_copy_keys(struct xfs_btree_cur *cur, union xfs_btree_key *dst_key, const union xfs_btree_key *src_key, int numkeys); +void xfs_btree_init_ptr_from_cur(struct xfs_btree_cur *cur, + union xfs_btree_ptr *ptr); static inline struct xfs_btree_cur * xfs_btree_alloc_cursor( struct xfs_mount *mp, struct xfs_trans *tp, - xfs_btnum_t btnum, + const struct xfs_btree_ops *ops, uint8_t maxlevels, struct kmem_cache *cache) { struct xfs_btree_cur *cur; - cur = kmem_cache_zalloc(cache, GFP_NOFS | __GFP_NOFAIL); + ASSERT(ops->ptr_len == XFS_BTREE_LONG_PTR_LEN || + ops->ptr_len == XFS_BTREE_SHORT_PTR_LEN); + + /* BMBT allocations can come through from non-transactional context. */ + cur = kmem_cache_zalloc(cache, + GFP_KERNEL | __GFP_NOLOCKDEP | __GFP_NOFAIL); + cur->bc_ops = ops; cur->bc_tp = tp; cur->bc_mp = mp; - cur->bc_btnum = btnum; cur->bc_maxlevels = maxlevels; cur->bc_cache = cache; @@ -740,4 +692,14 @@ void xfs_btree_destroy_cur_caches(void); int xfs_btree_goto_left_edge(struct xfs_btree_cur *cur); +/* Does this level of the cursor point to the inode root (and not a block)? */ +static inline bool +xfs_btree_at_iroot( + const struct xfs_btree_cur *cur, + int level) +{ + return cur->bc_ops->type == XFS_BTREE_TYPE_INODE && + level == cur->bc_nlevels - 1; +} + #endif /* __XFS_BTREE_H__ */ diff --git a/fs/xfs/libxfs/xfs_btree_mem.c b/fs/xfs/libxfs/xfs_btree_mem.c new file mode 100644 index 0000000000..036061fe32 --- /dev/null +++ b/fs/xfs/libxfs/xfs_btree_mem.c @@ -0,0 +1,347 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2021-2024 Oracle. All Rights Reserved. + * Author: Darrick J. Wong <djwong@kernel.org> + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" +#include "xfs_mount.h" +#include "xfs_trans.h" +#include "xfs_btree.h" +#include "xfs_error.h" +#include "xfs_buf_mem.h" +#include "xfs_btree_mem.h" +#include "xfs_ag.h" +#include "xfs_buf_item.h" +#include "xfs_trace.h" + +/* Set the root of an in-memory btree. */ +void +xfbtree_set_root( + struct xfs_btree_cur *cur, + const union xfs_btree_ptr *ptr, + int inc) +{ + ASSERT(cur->bc_ops->type == XFS_BTREE_TYPE_MEM); + + cur->bc_mem.xfbtree->root = *ptr; + cur->bc_mem.xfbtree->nlevels += inc; +} + +/* Initialize a pointer from the in-memory btree header. */ +void +xfbtree_init_ptr_from_cur( + struct xfs_btree_cur *cur, + union xfs_btree_ptr *ptr) +{ + ASSERT(cur->bc_ops->type == XFS_BTREE_TYPE_MEM); + + *ptr = cur->bc_mem.xfbtree->root; +} + +/* Duplicate an in-memory btree cursor. */ +struct xfs_btree_cur * +xfbtree_dup_cursor( + struct xfs_btree_cur *cur) +{ + struct xfs_btree_cur *ncur; + + ASSERT(cur->bc_ops->type == XFS_BTREE_TYPE_MEM); + + ncur = xfs_btree_alloc_cursor(cur->bc_mp, cur->bc_tp, cur->bc_ops, + cur->bc_maxlevels, cur->bc_cache); + ncur->bc_flags = cur->bc_flags; + ncur->bc_nlevels = cur->bc_nlevels; + ncur->bc_mem.xfbtree = cur->bc_mem.xfbtree; + + if (cur->bc_mem.pag) + ncur->bc_mem.pag = xfs_perag_hold(cur->bc_mem.pag); + + return ncur; +} + +/* Close the btree xfile and release all resources. */ +void +xfbtree_destroy( + struct xfbtree *xfbt) +{ + xfs_buftarg_drain(xfbt->target); +} + +/* Compute the number of bytes available for records. */ +static inline unsigned int +xfbtree_rec_bytes( + struct xfs_mount *mp, + const struct xfs_btree_ops *ops) +{ + return XMBUF_BLOCKSIZE - XFS_BTREE_LBLOCK_CRC_LEN; +} + +/* Initialize an empty leaf block as the btree root. */ +STATIC int +xfbtree_init_leaf_block( + struct xfs_mount *mp, + struct xfbtree *xfbt, + const struct xfs_btree_ops *ops) +{ + struct xfs_buf *bp; + xfbno_t bno = xfbt->highest_bno++; + int error; + + error = xfs_buf_get(xfbt->target, xfbno_to_daddr(bno), XFBNO_BBSIZE, + &bp); + if (error) + return error; + + trace_xfbtree_create_root_buf(xfbt, bp); + + bp->b_ops = ops->buf_ops; + xfs_btree_init_buf(mp, bp, ops, 0, 0, xfbt->owner); + xfs_buf_relse(bp); + + xfbt->root.l = cpu_to_be64(bno); + return 0; +} + +/* + * Create an in-memory btree root that can be used with the given xmbuf. + * Callers must set xfbt->owner. + */ +int +xfbtree_init( + struct xfs_mount *mp, + struct xfbtree *xfbt, + struct xfs_buftarg *btp, + const struct xfs_btree_ops *ops) +{ + unsigned int blocklen = xfbtree_rec_bytes(mp, ops); + unsigned int keyptr_len; + int error; + + /* Requires a long-format CRC-format btree */ + if (!xfs_has_crc(mp)) { + ASSERT(xfs_has_crc(mp)); + return -EINVAL; + } + if (ops->ptr_len != XFS_BTREE_LONG_PTR_LEN) { + ASSERT(ops->ptr_len == XFS_BTREE_LONG_PTR_LEN); + return -EINVAL; + } + + memset(xfbt, 0, sizeof(*xfbt)); + xfbt->target = btp; + + /* Set up min/maxrecs for this btree. */ + keyptr_len = ops->key_len + sizeof(__be64); + xfbt->maxrecs[0] = blocklen / ops->rec_len; + xfbt->maxrecs[1] = blocklen / keyptr_len; + xfbt->minrecs[0] = xfbt->maxrecs[0] / 2; + xfbt->minrecs[1] = xfbt->maxrecs[1] / 2; + xfbt->highest_bno = 0; + xfbt->nlevels = 1; + + /* Initialize the empty btree. */ + error = xfbtree_init_leaf_block(mp, xfbt, ops); + if (error) + goto err_freesp; + + trace_xfbtree_init(mp, xfbt, ops); + + return 0; + +err_freesp: + xfs_buftarg_drain(xfbt->target); + return error; +} + +/* Allocate a block to our in-memory btree. */ +int +xfbtree_alloc_block( + struct xfs_btree_cur *cur, + const union xfs_btree_ptr *start, + union xfs_btree_ptr *new, + int *stat) +{ + struct xfbtree *xfbt = cur->bc_mem.xfbtree; + xfbno_t bno = xfbt->highest_bno++; + + ASSERT(cur->bc_ops->type == XFS_BTREE_TYPE_MEM); + + trace_xfbtree_alloc_block(xfbt, cur, bno); + + /* Fail if the block address exceeds the maximum for the buftarg. */ + if (!xfbtree_verify_bno(xfbt, bno)) { + ASSERT(xfbtree_verify_bno(xfbt, bno)); + *stat = 0; + return 0; + } + + new->l = cpu_to_be64(bno); + *stat = 1; + return 0; +} + +/* Free a block from our in-memory btree. */ +int +xfbtree_free_block( + struct xfs_btree_cur *cur, + struct xfs_buf *bp) +{ + struct xfbtree *xfbt = cur->bc_mem.xfbtree; + xfs_daddr_t daddr = xfs_buf_daddr(bp); + xfbno_t bno = xfs_daddr_to_xfbno(daddr); + + ASSERT(cur->bc_ops->type == XFS_BTREE_TYPE_MEM); + + trace_xfbtree_free_block(xfbt, cur, bno); + + if (bno + 1 == xfbt->highest_bno) + xfbt->highest_bno--; + + return 0; +} + +/* Return the minimum number of records for a btree block. */ +int +xfbtree_get_minrecs( + struct xfs_btree_cur *cur, + int level) +{ + struct xfbtree *xfbt = cur->bc_mem.xfbtree; + + return xfbt->minrecs[level != 0]; +} + +/* Return the maximum number of records for a btree block. */ +int +xfbtree_get_maxrecs( + struct xfs_btree_cur *cur, + int level) +{ + struct xfbtree *xfbt = cur->bc_mem.xfbtree; + + return xfbt->maxrecs[level != 0]; +} + +/* If this log item is a buffer item that came from the xfbtree, return it. */ +static inline struct xfs_buf * +xfbtree_buf_match( + struct xfbtree *xfbt, + const struct xfs_log_item *lip) +{ + const struct xfs_buf_log_item *bli; + struct xfs_buf *bp; + + if (lip->li_type != XFS_LI_BUF) + return NULL; + + bli = container_of(lip, struct xfs_buf_log_item, bli_item); + bp = bli->bli_buf; + if (bp->b_target != xfbt->target) + return NULL; + + return bp; +} + +/* + * Commit changes to the incore btree immediately by writing all dirty xfbtree + * buffers to the backing xfile. This detaches all xfbtree buffers from the + * transaction, even on failure. The buffer locks are dropped between the + * delwri queue and submit, so the caller must synchronize btree access. + * + * Normally we'd let the buffers commit with the transaction and get written to + * the xfile via the log, but online repair stages ephemeral btrees in memory + * and uses the btree_staging functions to write new btrees to disk atomically. + * The in-memory btree (and its backing store) are discarded at the end of the + * repair phase, which means that xfbtree buffers cannot commit with the rest + * of a transaction. + * + * In other words, online repair only needs the transaction to collect buffer + * pointers and to avoid buffer deadlocks, not to guarantee consistency of + * updates. + */ +int +xfbtree_trans_commit( + struct xfbtree *xfbt, + struct xfs_trans *tp) +{ + struct xfs_log_item *lip, *n; + bool tp_dirty = false; + int error = 0; + + /* + * For each xfbtree buffer attached to the transaction, write the dirty + * buffers to the xfile and release them. + */ + list_for_each_entry_safe(lip, n, &tp->t_items, li_trans) { + struct xfs_buf *bp = xfbtree_buf_match(xfbt, lip); + + if (!bp) { + if (test_bit(XFS_LI_DIRTY, &lip->li_flags)) + tp_dirty |= true; + continue; + } + + trace_xfbtree_trans_commit_buf(xfbt, bp); + + xmbuf_trans_bdetach(tp, bp); + + /* + * If the buffer fails verification, note the failure but + * continue walking the transaction items so that we remove all + * ephemeral btree buffers. + */ + if (!error) + error = xmbuf_finalize(bp); + + xfs_buf_relse(bp); + } + + /* + * Reset the transaction's dirty flag to reflect the dirty state of the + * log items that are still attached. + */ + tp->t_flags = (tp->t_flags & ~XFS_TRANS_DIRTY) | + (tp_dirty ? XFS_TRANS_DIRTY : 0); + + return error; +} + +/* + * Cancel changes to the incore btree by detaching all the xfbtree buffers. + * Changes are not undone, so callers must not access the btree ever again. + */ +void +xfbtree_trans_cancel( + struct xfbtree *xfbt, + struct xfs_trans *tp) +{ + struct xfs_log_item *lip, *n; + bool tp_dirty = false; + + list_for_each_entry_safe(lip, n, &tp->t_items, li_trans) { + struct xfs_buf *bp = xfbtree_buf_match(xfbt, lip); + + if (!bp) { + if (test_bit(XFS_LI_DIRTY, &lip->li_flags)) + tp_dirty |= true; + continue; + } + + trace_xfbtree_trans_cancel_buf(xfbt, bp); + + xmbuf_trans_bdetach(tp, bp); + xfs_buf_relse(bp); + } + + /* + * Reset the transaction's dirty flag to reflect the dirty state of the + * log items that are still attached. + */ + tp->t_flags = (tp->t_flags & ~XFS_TRANS_DIRTY) | + (tp_dirty ? XFS_TRANS_DIRTY : 0); +} diff --git a/fs/xfs/libxfs/xfs_btree_mem.h b/fs/xfs/libxfs/xfs_btree_mem.h new file mode 100644 index 0000000000..1c3825786e --- /dev/null +++ b/fs/xfs/libxfs/xfs_btree_mem.h @@ -0,0 +1,75 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2021-2024 Oracle. All Rights Reserved. + * Author: Darrick J. Wong <djwong@kernel.org> + */ +#ifndef __XFS_BTREE_MEM_H__ +#define __XFS_BTREE_MEM_H__ + +typedef uint64_t xfbno_t; + +#define XFBNO_BLOCKSIZE (XMBUF_BLOCKSIZE) +#define XFBNO_BBSHIFT (XMBUF_BLOCKSHIFT - BBSHIFT) +#define XFBNO_BBSIZE (XFBNO_BLOCKSIZE >> BBSHIFT) + +static inline xfs_daddr_t xfbno_to_daddr(xfbno_t blkno) +{ + return blkno << XFBNO_BBSHIFT; +} + +static inline xfbno_t xfs_daddr_to_xfbno(xfs_daddr_t daddr) +{ + return daddr >> XFBNO_BBSHIFT; +} + +struct xfbtree { + /* buffer cache target for this in-memory btree */ + struct xfs_buftarg *target; + + /* Highest block number that has been written to. */ + xfbno_t highest_bno; + + /* Owner of this btree. */ + unsigned long long owner; + + /* Btree header */ + union xfs_btree_ptr root; + unsigned int nlevels; + + /* Minimum and maximum records per block. */ + unsigned int maxrecs[2]; + unsigned int minrecs[2]; +}; + +#ifdef CONFIG_XFS_BTREE_IN_MEM +static inline bool xfbtree_verify_bno(struct xfbtree *xfbt, xfbno_t bno) +{ + return xmbuf_verify_daddr(xfbt->target, xfbno_to_daddr(bno)); +} + +void xfbtree_set_root(struct xfs_btree_cur *cur, + const union xfs_btree_ptr *ptr, int inc); +void xfbtree_init_ptr_from_cur(struct xfs_btree_cur *cur, + union xfs_btree_ptr *ptr); +struct xfs_btree_cur *xfbtree_dup_cursor(struct xfs_btree_cur *cur); + +int xfbtree_get_minrecs(struct xfs_btree_cur *cur, int level); +int xfbtree_get_maxrecs(struct xfs_btree_cur *cur, int level); + +int xfbtree_alloc_block(struct xfs_btree_cur *cur, + const union xfs_btree_ptr *start, union xfs_btree_ptr *ptr, + int *stat); +int xfbtree_free_block(struct xfs_btree_cur *cur, struct xfs_buf *bp); + +/* Callers must set xfbt->target and xfbt->owner before calling this */ +int xfbtree_init(struct xfs_mount *mp, struct xfbtree *xfbt, + struct xfs_buftarg *btp, const struct xfs_btree_ops *ops); +void xfbtree_destroy(struct xfbtree *xfbt); + +int xfbtree_trans_commit(struct xfbtree *xfbt, struct xfs_trans *tp); +void xfbtree_trans_cancel(struct xfbtree *xfbt, struct xfs_trans *tp); +#else +# define xfbtree_verify_bno(...) (false) +#endif /* CONFIG_XFS_BTREE_IN_MEM */ + +#endif /* __XFS_BTREE_MEM_H__ */ diff --git a/fs/xfs/libxfs/xfs_btree_staging.c b/fs/xfs/libxfs/xfs_btree_staging.c index e276eba87c..6949297031 100644 --- a/fs/xfs/libxfs/xfs_btree_staging.c +++ b/fs/xfs/libxfs/xfs_btree_staging.c @@ -39,63 +39,6 @@ */ /* - * Don't allow staging cursors to be duplicated because they're supposed to be - * kept private to a single thread. - */ -STATIC struct xfs_btree_cur * -xfs_btree_fakeroot_dup_cursor( - struct xfs_btree_cur *cur) -{ - ASSERT(0); - return NULL; -} - -/* - * Don't allow block allocation for a staging cursor, because staging cursors - * do not support regular btree modifications. - * - * Bulk loading uses a separate callback to obtain new blocks from a - * preallocated list, which prevents ENOSPC failures during loading. - */ -STATIC int -xfs_btree_fakeroot_alloc_block( - struct xfs_btree_cur *cur, - const union xfs_btree_ptr *start_bno, - union xfs_btree_ptr *new_bno, - int *stat) -{ - ASSERT(0); - return -EFSCORRUPTED; -} - -/* - * Don't allow block freeing for a staging cursor, because staging cursors - * do not support regular btree modifications. - */ -STATIC int -xfs_btree_fakeroot_free_block( - struct xfs_btree_cur *cur, - struct xfs_buf *bp) -{ - ASSERT(0); - return -EFSCORRUPTED; -} - -/* Initialize a pointer to the root block from the fakeroot. */ -STATIC void -xfs_btree_fakeroot_init_ptr_from_cur( - struct xfs_btree_cur *cur, - union xfs_btree_ptr *ptr) -{ - struct xbtree_afakeroot *afake; - - ASSERT(cur->bc_flags & XFS_BTREE_STAGING); - - afake = cur->bc_ag.afake; - ptr->s = cpu_to_be32(afake->af_root); -} - -/* * Bulk Loading for AG Btrees * ========================== * @@ -109,47 +52,20 @@ xfs_btree_fakeroot_init_ptr_from_cur( * cursor into a regular btree cursor. */ -/* Update the btree root information for a per-AG fake root. */ -STATIC void -xfs_btree_afakeroot_set_root( - struct xfs_btree_cur *cur, - const union xfs_btree_ptr *ptr, - int inc) -{ - struct xbtree_afakeroot *afake = cur->bc_ag.afake; - - ASSERT(cur->bc_flags & XFS_BTREE_STAGING); - afake->af_root = be32_to_cpu(ptr->s); - afake->af_levels += inc; -} - /* * Initialize a AG-rooted btree cursor with the given AG btree fake root. - * The btree cursor's bc_ops will be overridden as needed to make the staging - * functionality work. */ void xfs_btree_stage_afakeroot( struct xfs_btree_cur *cur, struct xbtree_afakeroot *afake) { - struct xfs_btree_ops *nops; - ASSERT(!(cur->bc_flags & XFS_BTREE_STAGING)); - ASSERT(!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)); + ASSERT(cur->bc_ops->type != XFS_BTREE_TYPE_INODE); ASSERT(cur->bc_tp == NULL); - nops = kmem_alloc(sizeof(struct xfs_btree_ops), KM_NOFS); - memcpy(nops, cur->bc_ops, sizeof(struct xfs_btree_ops)); - nops->alloc_block = xfs_btree_fakeroot_alloc_block; - nops->free_block = xfs_btree_fakeroot_free_block; - nops->init_ptr_from_cur = xfs_btree_fakeroot_init_ptr_from_cur; - nops->set_root = xfs_btree_afakeroot_set_root; - nops->dup_cursor = xfs_btree_fakeroot_dup_cursor; - cur->bc_ag.afake = afake; cur->bc_nlevels = afake->af_levels; - cur->bc_ops = nops; cur->bc_flags |= XFS_BTREE_STAGING; } @@ -163,17 +79,15 @@ void xfs_btree_commit_afakeroot( struct xfs_btree_cur *cur, struct xfs_trans *tp, - struct xfs_buf *agbp, - const struct xfs_btree_ops *ops) + struct xfs_buf *agbp) { ASSERT(cur->bc_flags & XFS_BTREE_STAGING); ASSERT(cur->bc_tp == NULL); trace_xfs_btree_commit_afakeroot(cur); - kmem_free((void *)cur->bc_ops); + cur->bc_ag.afake = NULL; cur->bc_ag.agbp = agbp; - cur->bc_ops = ops; cur->bc_flags &= ~XFS_BTREE_STAGING; cur->bc_tp = tp; } @@ -211,29 +125,16 @@ xfs_btree_commit_afakeroot( void xfs_btree_stage_ifakeroot( struct xfs_btree_cur *cur, - struct xbtree_ifakeroot *ifake, - struct xfs_btree_ops **new_ops) + struct xbtree_ifakeroot *ifake) { - struct xfs_btree_ops *nops; - ASSERT(!(cur->bc_flags & XFS_BTREE_STAGING)); - ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE); + ASSERT(cur->bc_ops->type == XFS_BTREE_TYPE_INODE); ASSERT(cur->bc_tp == NULL); - nops = kmem_alloc(sizeof(struct xfs_btree_ops), KM_NOFS); - memcpy(nops, cur->bc_ops, sizeof(struct xfs_btree_ops)); - nops->alloc_block = xfs_btree_fakeroot_alloc_block; - nops->free_block = xfs_btree_fakeroot_free_block; - nops->init_ptr_from_cur = xfs_btree_fakeroot_init_ptr_from_cur; - nops->dup_cursor = xfs_btree_fakeroot_dup_cursor; - cur->bc_ino.ifake = ifake; cur->bc_nlevels = ifake->if_levels; - cur->bc_ops = nops; + cur->bc_ino.forksize = ifake->if_fork_size; cur->bc_flags |= XFS_BTREE_STAGING; - - if (new_ops) - *new_ops = nops; } /* @@ -246,18 +147,15 @@ void xfs_btree_commit_ifakeroot( struct xfs_btree_cur *cur, struct xfs_trans *tp, - int whichfork, - const struct xfs_btree_ops *ops) + int whichfork) { ASSERT(cur->bc_flags & XFS_BTREE_STAGING); ASSERT(cur->bc_tp == NULL); trace_xfs_btree_commit_ifakeroot(cur); - kmem_free((void *)cur->bc_ops); cur->bc_ino.ifake = NULL; cur->bc_ino.whichfork = whichfork; - cur->bc_ops = ops; cur->bc_flags &= ~XFS_BTREE_STAGING; cur->bc_tp = tp; } @@ -397,8 +295,7 @@ xfs_btree_bload_prep_block( struct xfs_btree_block *new_block; int ret; - if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && - level == cur->bc_nlevels - 1) { + if (xfs_btree_at_iroot(cur, level)) { struct xfs_ifork *ifp = xfs_btree_ifork_ptr(cur); size_t new_size; @@ -406,14 +303,12 @@ xfs_btree_bload_prep_block( /* Allocate a new incore btree root block. */ new_size = bbl->iroot_size(cur, level, nr_this_block, priv); - ifp->if_broot = kmem_zalloc(new_size, 0); + ifp->if_broot = kzalloc(new_size, GFP_KERNEL | __GFP_NOFAIL); ifp->if_broot_bytes = (int)new_size; /* Initialize it and send it out. */ - xfs_btree_init_block_int(cur->bc_mp, ifp->if_broot, - XFS_BUF_DADDR_NULL, cur->bc_btnum, level, - nr_this_block, cur->bc_ino.ip->i_ino, - cur->bc_flags); + xfs_btree_init_block(cur->bc_mp, ifp->if_broot, cur->bc_ops, + level, nr_this_block, cur->bc_ino.ip->i_ino); *bpp = NULL; *blockp = ifp->if_broot; @@ -704,7 +599,7 @@ xfs_btree_bload_compute_geometry( xfs_btree_bload_level_geometry(cur, bbl, level, nr_this_level, &avg_per_block, &level_blocks, &dontcare64); - if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) { + if (cur->bc_ops->type == XFS_BTREE_TYPE_INODE) { /* * If all the items we want to store at this level * would fit in the inode root block, then we have our @@ -763,7 +658,7 @@ xfs_btree_bload_compute_geometry( return -EOVERFLOW; bbl->btree_height = cur->bc_nlevels; - if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) + if (cur->bc_ops->type == XFS_BTREE_TYPE_INODE) bbl->nr_blocks = nr_blocks - 1; else bbl->nr_blocks = nr_blocks; @@ -890,7 +785,7 @@ xfs_btree_bload( } /* Initialize the new root. */ - if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) { + if (cur->bc_ops->type == XFS_BTREE_TYPE_INODE) { ASSERT(xfs_btree_ptr_is_null(cur, &ptr)); cur->bc_ino.ifake->if_levels = cur->bc_nlevels; cur->bc_ino.ifake->if_blocks = total_blocks - 1; diff --git a/fs/xfs/libxfs/xfs_btree_staging.h b/fs/xfs/libxfs/xfs_btree_staging.h index 055ea43b1e..0c9c2ffb12 100644 --- a/fs/xfs/libxfs/xfs_btree_staging.h +++ b/fs/xfs/libxfs/xfs_btree_staging.h @@ -22,7 +22,7 @@ struct xbtree_afakeroot { void xfs_btree_stage_afakeroot(struct xfs_btree_cur *cur, struct xbtree_afakeroot *afake); void xfs_btree_commit_afakeroot(struct xfs_btree_cur *cur, struct xfs_trans *tp, - struct xfs_buf *agbp, const struct xfs_btree_ops *ops); + struct xfs_buf *agbp); /* Fake root for an inode-rooted btree. */ struct xbtree_ifakeroot { @@ -41,10 +41,9 @@ struct xbtree_ifakeroot { /* Cursor interactions with fake roots for inode-rooted btrees. */ void xfs_btree_stage_ifakeroot(struct xfs_btree_cur *cur, - struct xbtree_ifakeroot *ifake, - struct xfs_btree_ops **new_ops); + struct xbtree_ifakeroot *ifake); void xfs_btree_commit_ifakeroot(struct xfs_btree_cur *cur, struct xfs_trans *tp, - int whichfork, const struct xfs_btree_ops *ops); + int whichfork); /* Bulk loading of staged btrees. */ typedef int (*xfs_btree_bload_get_records_fn)(struct xfs_btree_cur *cur, @@ -76,8 +75,7 @@ struct xfs_btree_bload { /* * This function should return the size of the in-core btree root - * block. It is only necessary for XFS_BTREE_ROOT_IN_INODE btree - * types. + * block. It is only necessary for XFS_BTREE_TYPE_INODE btrees. */ xfs_btree_bload_iroot_size_fn iroot_size; diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c index 5457188bb4..718d071bb2 100644 --- a/fs/xfs/libxfs/xfs_da_btree.c +++ b/fs/xfs/libxfs/xfs_da_btree.c @@ -23,6 +23,7 @@ #include "xfs_buf_item.h" #include "xfs_log.h" #include "xfs_errortag.h" +#include "xfs_health.h" /* * xfs_da_btree.c @@ -85,7 +86,8 @@ xfs_da_state_alloc( { struct xfs_da_state *state; - state = kmem_cache_zalloc(xfs_da_state_cache, GFP_NOFS | __GFP_NOFAIL); + state = kmem_cache_zalloc(xfs_da_state_cache, + GFP_KERNEL | __GFP_NOLOCKDEP | __GFP_NOFAIL); state->args = args; state->mp = args->dp->i_mount; return state; @@ -352,6 +354,8 @@ const struct xfs_buf_ops xfs_da3_node_buf_ops = { static int xfs_da3_node_set_type( struct xfs_trans *tp, + struct xfs_inode *dp, + int whichfork, struct xfs_buf *bp) { struct xfs_da_blkinfo *info = bp->b_addr; @@ -373,6 +377,7 @@ xfs_da3_node_set_type( XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, tp->t_mountp, info, sizeof(*info)); xfs_trans_brelse(tp, bp); + xfs_dirattr_mark_sick(dp, whichfork); return -EFSCORRUPTED; } } @@ -391,7 +396,7 @@ xfs_da3_node_read( &xfs_da3_node_buf_ops); if (error || !*bpp || !tp) return error; - return xfs_da3_node_set_type(tp, *bpp); + return xfs_da3_node_set_type(tp, dp, whichfork, *bpp); } int @@ -408,6 +413,8 @@ xfs_da3_node_read_mapped( error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, mappedbno, XFS_FSB_TO_BB(mp, xfs_dabuf_nfsb(mp, whichfork)), 0, bpp, &xfs_da3_node_buf_ops); + if (xfs_metadata_is_sick(error)) + xfs_dirattr_mark_sick(dp, whichfork); if (error || !*bpp) return error; @@ -418,7 +425,7 @@ xfs_da3_node_read_mapped( if (!tp) return 0; - return xfs_da3_node_set_type(tp, *bpp); + return xfs_da3_node_set_type(tp, dp, whichfork, *bpp); } /* @@ -631,6 +638,7 @@ xfs_da3_split( if (node->hdr.info.forw) { if (be32_to_cpu(node->hdr.info.forw) != addblk->blkno) { xfs_buf_mark_corrupt(oldblk->bp); + xfs_da_mark_sick(state->args); error = -EFSCORRUPTED; goto out; } @@ -644,6 +652,7 @@ xfs_da3_split( if (node->hdr.info.back) { if (be32_to_cpu(node->hdr.info.back) != addblk->blkno) { xfs_buf_mark_corrupt(oldblk->bp); + xfs_da_mark_sick(state->args); error = -EFSCORRUPTED; goto out; } @@ -1635,6 +1644,7 @@ xfs_da3_node_lookup_int( if (magic != XFS_DA_NODE_MAGIC && magic != XFS_DA3_NODE_MAGIC) { xfs_buf_mark_corrupt(blk->bp); + xfs_da_mark_sick(args); return -EFSCORRUPTED; } @@ -1650,6 +1660,7 @@ xfs_da3_node_lookup_int( /* Tree taller than we can handle; bail out! */ if (nodehdr.level >= XFS_DA_NODE_MAXDEPTH) { xfs_buf_mark_corrupt(blk->bp); + xfs_da_mark_sick(args); return -EFSCORRUPTED; } @@ -1658,6 +1669,7 @@ xfs_da3_node_lookup_int( expected_level = nodehdr.level - 1; else if (expected_level != nodehdr.level) { xfs_buf_mark_corrupt(blk->bp); + xfs_da_mark_sick(args); return -EFSCORRUPTED; } else expected_level--; @@ -1709,12 +1721,16 @@ xfs_da3_node_lookup_int( } /* We can't point back to the root. */ - if (XFS_IS_CORRUPT(dp->i_mount, blkno == args->geo->leafblk)) + if (XFS_IS_CORRUPT(dp->i_mount, blkno == args->geo->leafblk)) { + xfs_da_mark_sick(args); return -EFSCORRUPTED; + } } - if (XFS_IS_CORRUPT(dp->i_mount, expected_level != 0)) + if (XFS_IS_CORRUPT(dp->i_mount, expected_level != 0)) { + xfs_da_mark_sick(args); return -EFSCORRUPTED; + } /* * A leaf block that ends in the hashval that we are interested in @@ -1732,6 +1748,7 @@ xfs_da3_node_lookup_int( args->blkno = blk->blkno; } else { ASSERT(0); + xfs_da_mark_sick(args); return -EFSCORRUPTED; } if (((retval == -ENOENT) || (retval == -ENOATTR)) && @@ -2182,7 +2199,8 @@ xfs_da_grow_inode_int( * If we didn't get it and the block might work if fragmented, * try without the CONTIG flag. Loop until we get it all. */ - mapp = kmem_alloc(sizeof(*mapp) * count, 0); + mapp = kmalloc(sizeof(*mapp) * count, + GFP_KERNEL | __GFP_NOFAIL); for (b = *bno, mapi = 0; b < *bno + count; ) { c = (int)(*bno + count - b); nmap = min(XFS_BMAP_MAX_NMAP, c); @@ -2219,7 +2237,7 @@ xfs_da_grow_inode_int( out_free_map: if (mapp != &map) - kmem_free(mapp); + kfree(mapp); return error; } @@ -2297,8 +2315,10 @@ xfs_da3_swap_lastblock( error = xfs_bmap_last_before(tp, dp, &lastoff, w); if (error) return error; - if (XFS_IS_CORRUPT(mp, lastoff == 0)) + if (XFS_IS_CORRUPT(mp, lastoff == 0)) { + xfs_da_mark_sick(args); return -EFSCORRUPTED; + } /* * Read the last block in the btree space. */ @@ -2348,6 +2368,7 @@ xfs_da3_swap_lastblock( if (XFS_IS_CORRUPT(mp, be32_to_cpu(sib_info->forw) != last_blkno || sib_info->magic != dead_info->magic)) { + xfs_da_mark_sick(args); error = -EFSCORRUPTED; goto done; } @@ -2368,6 +2389,7 @@ xfs_da3_swap_lastblock( if (XFS_IS_CORRUPT(mp, be32_to_cpu(sib_info->back) != last_blkno || sib_info->magic != dead_info->magic)) { + xfs_da_mark_sick(args); error = -EFSCORRUPTED; goto done; } @@ -2390,6 +2412,7 @@ xfs_da3_swap_lastblock( xfs_da3_node_hdr_from_disk(dp->i_mount, &par_hdr, par_node); if (XFS_IS_CORRUPT(mp, level >= 0 && level != par_hdr.level + 1)) { + xfs_da_mark_sick(args); error = -EFSCORRUPTED; goto done; } @@ -2401,6 +2424,7 @@ xfs_da3_swap_lastblock( entno++) continue; if (XFS_IS_CORRUPT(mp, entno == par_hdr.count)) { + xfs_da_mark_sick(args); error = -EFSCORRUPTED; goto done; } @@ -2426,6 +2450,7 @@ xfs_da3_swap_lastblock( xfs_trans_brelse(tp, par_buf); par_buf = NULL; if (XFS_IS_CORRUPT(mp, par_blkno == 0)) { + xfs_da_mark_sick(args); error = -EFSCORRUPTED; goto done; } @@ -2435,6 +2460,7 @@ xfs_da3_swap_lastblock( par_node = par_buf->b_addr; xfs_da3_node_hdr_from_disk(dp->i_mount, &par_hdr, par_node); if (XFS_IS_CORRUPT(mp, par_hdr.level != level)) { + xfs_da_mark_sick(args); error = -EFSCORRUPTED; goto done; } @@ -2518,7 +2544,8 @@ xfs_dabuf_map( int error = 0, nirecs, i; if (nfsb > 1) - irecs = kmem_zalloc(sizeof(irec) * nfsb, KM_NOFS); + irecs = kzalloc(sizeof(irec) * nfsb, + GFP_KERNEL | __GFP_NOLOCKDEP | __GFP_NOFAIL); nirecs = nfsb; error = xfs_bmapi_read(dp, bno, nfsb, irecs, &nirecs, @@ -2531,7 +2558,8 @@ xfs_dabuf_map( * larger one that needs to be free by the caller. */ if (nirecs > 1) { - map = kmem_zalloc(nirecs * sizeof(struct xfs_buf_map), KM_NOFS); + map = kzalloc(nirecs * sizeof(struct xfs_buf_map), + GFP_KERNEL | __GFP_NOLOCKDEP | __GFP_NOFAIL); if (!map) { error = -ENOMEM; goto out_free_irecs; @@ -2557,12 +2585,13 @@ xfs_dabuf_map( *nmaps = nirecs; out_free_irecs: if (irecs != &irec) - kmem_free(irecs); + kfree(irecs); return error; invalid_mapping: /* Caller ok with no mapping. */ if (XFS_IS_CORRUPT(mp, !(flags & XFS_DABUF_MAP_HOLE_OK))) { + xfs_dirattr_mark_sick(dp, whichfork); error = -EFSCORRUPTED; if (xfs_error_level >= XFS_ERRLEVEL_LOW) { xfs_alert(mp, "%s: bno %u inode %llu", @@ -2613,7 +2642,7 @@ xfs_da_get_buf( out_free: if (mapp != &map) - kmem_free(mapp); + kfree(mapp); return error; } @@ -2644,6 +2673,8 @@ xfs_da_read_buf( error = xfs_trans_read_buf_map(mp, tp, mp->m_ddev_targp, mapp, nmap, 0, &bp, ops); + if (xfs_metadata_is_sick(error)) + xfs_dirattr_mark_sick(dp, whichfork); if (error) goto out_free; @@ -2654,7 +2685,7 @@ xfs_da_read_buf( *bpp = bp; out_free: if (mapp != &map) - kmem_free(mapp); + kfree(mapp); return error; } @@ -2685,7 +2716,7 @@ xfs_da_reada_buf( out_free: if (mapp != &map) - kmem_free(mapp); + kfree(mapp); return error; } diff --git a/fs/xfs/libxfs/xfs_da_format.h b/fs/xfs/libxfs/xfs_da_format.h index 24f9d1461f..060e5c96b7 100644 --- a/fs/xfs/libxfs/xfs_da_format.h +++ b/fs/xfs/libxfs/xfs_da_format.h @@ -159,6 +159,17 @@ struct xfs_da3_intnode { #define XFS_DIR3_FT_MAX 9 +#define XFS_DIR3_FTYPE_STR \ + { XFS_DIR3_FT_UNKNOWN, "unknown" }, \ + { XFS_DIR3_FT_REG_FILE, "file" }, \ + { XFS_DIR3_FT_DIR, "directory" }, \ + { XFS_DIR3_FT_CHRDEV, "char" }, \ + { XFS_DIR3_FT_BLKDEV, "block" }, \ + { XFS_DIR3_FT_FIFO, "fifo" }, \ + { XFS_DIR3_FT_SOCK, "sock" }, \ + { XFS_DIR3_FT_SYMLINK, "symlink" }, \ + { XFS_DIR3_FT_WHT, "whiteout" } + /* * Byte offset in data block and shortform entry. */ diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c index 66a17910d0..c13276095c 100644 --- a/fs/xfs/libxfs/xfs_defer.c +++ b/fs/xfs/libxfs/xfs_defer.c @@ -819,16 +819,16 @@ xfs_defer_can_append( /* Create a new pending item at the end of the transaction list. */ static inline struct xfs_defer_pending * xfs_defer_alloc( - struct xfs_trans *tp, + struct list_head *dfops, const struct xfs_defer_op_type *ops) { struct xfs_defer_pending *dfp; dfp = kmem_cache_zalloc(xfs_defer_pending_cache, - GFP_NOFS | __GFP_NOFAIL); + GFP_KERNEL | __GFP_NOFAIL); dfp->dfp_ops = ops; INIT_LIST_HEAD(&dfp->dfp_work); - list_add_tail(&dfp->dfp_list, &tp->t_dfops); + list_add_tail(&dfp->dfp_list, dfops); return dfp; } @@ -846,7 +846,7 @@ xfs_defer_add( dfp = xfs_defer_find_last(tp, ops); if (!dfp || !xfs_defer_can_append(dfp, ops)) - dfp = xfs_defer_alloc(tp, ops); + dfp = xfs_defer_alloc(&tp->t_dfops, ops); xfs_defer_add_item(dfp, li); trace_xfs_defer_add_item(tp->t_mountp, dfp, li); @@ -870,7 +870,7 @@ xfs_defer_add_barrier( if (dfp) return; - xfs_defer_alloc(tp, &xfs_barrier_defer_type); + xfs_defer_alloc(&tp->t_dfops, &xfs_barrier_defer_type); trace_xfs_defer_add_item(tp->t_mountp, dfp, NULL); } @@ -885,14 +885,9 @@ xfs_defer_start_recovery( struct list_head *r_dfops, const struct xfs_defer_op_type *ops) { - struct xfs_defer_pending *dfp; + struct xfs_defer_pending *dfp = xfs_defer_alloc(r_dfops, ops); - dfp = kmem_cache_zalloc(xfs_defer_pending_cache, - GFP_NOFS | __GFP_NOFAIL); - dfp->dfp_ops = ops; dfp->dfp_intent = lip; - INIT_LIST_HEAD(&dfp->dfp_work); - list_add_tail(&dfp->dfp_list, r_dfops); } /* @@ -979,7 +974,7 @@ xfs_defer_ops_capture( return ERR_PTR(error); /* Create an object to capture the defer ops. */ - dfc = kmem_zalloc(sizeof(*dfc), KM_NOFS); + dfc = kzalloc(sizeof(*dfc), GFP_KERNEL | __GFP_NOFAIL); INIT_LIST_HEAD(&dfc->dfc_list); INIT_LIST_HEAD(&dfc->dfc_dfops); @@ -1011,7 +1006,7 @@ xfs_defer_ops_capture( * transaction. */ for (i = 0; i < dfc->dfc_held.dr_inos; i++) { - ASSERT(xfs_isilocked(dfc->dfc_held.dr_ip[i], XFS_ILOCK_EXCL)); + xfs_assert_ilocked(dfc->dfc_held.dr_ip[i], XFS_ILOCK_EXCL); ihold(VFS_I(dfc->dfc_held.dr_ip[i])); } @@ -1038,7 +1033,7 @@ xfs_defer_ops_capture_abort( for (i = 0; i < dfc->dfc_held.dr_inos; i++) xfs_irele(dfc->dfc_held.dr_ip[i]); - kmem_free(dfc); + kfree(dfc); } /* @@ -1114,7 +1109,7 @@ xfs_defer_ops_continue( list_splice_init(&dfc->dfc_dfops, &tp->t_dfops); tp->t_flags |= dfc->dfc_tpflags; - kmem_free(dfc); + kfree(dfc); } /* Release the resources captured and continued during recovery. */ diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c index a766732815..4821519efa 100644 --- a/fs/xfs/libxfs/xfs_dir2.c +++ b/fs/xfs/libxfs/xfs_dir2.c @@ -18,6 +18,7 @@ #include "xfs_errortag.h" #include "xfs_error.h" #include "xfs_trace.h" +#include "xfs_health.h" const struct xfs_name xfs_name_dotdot = { .name = (const unsigned char *)"..", @@ -25,6 +26,12 @@ const struct xfs_name xfs_name_dotdot = { .type = XFS_DIR3_FT_DIR, }; +const struct xfs_name xfs_name_dot = { + .name = (const unsigned char *)".", + .len = 1, + .type = XFS_DIR3_FT_DIR, +}; + /* * Convert inode mode to directory entry filetype */ @@ -104,13 +111,13 @@ xfs_da_mount( ASSERT(mp->m_sb.sb_versionnum & XFS_SB_VERSION_DIRV2BIT); ASSERT(xfs_dir2_dirblock_bytes(&mp->m_sb) <= XFS_MAX_BLOCKSIZE); - mp->m_dir_geo = kmem_zalloc(sizeof(struct xfs_da_geometry), - KM_MAYFAIL); - mp->m_attr_geo = kmem_zalloc(sizeof(struct xfs_da_geometry), - KM_MAYFAIL); + mp->m_dir_geo = kzalloc(sizeof(struct xfs_da_geometry), + GFP_KERNEL | __GFP_RETRY_MAYFAIL); + mp->m_attr_geo = kzalloc(sizeof(struct xfs_da_geometry), + GFP_KERNEL | __GFP_RETRY_MAYFAIL); if (!mp->m_dir_geo || !mp->m_attr_geo) { - kmem_free(mp->m_dir_geo); - kmem_free(mp->m_attr_geo); + kfree(mp->m_dir_geo); + kfree(mp->m_attr_geo); return -ENOMEM; } @@ -178,8 +185,8 @@ void xfs_da_unmount( struct xfs_mount *mp) { - kmem_free(mp->m_dir_geo); - kmem_free(mp->m_attr_geo); + kfree(mp->m_dir_geo); + kfree(mp->m_attr_geo); } /* @@ -236,7 +243,7 @@ xfs_dir_init( if (error) return error; - args = kmem_zalloc(sizeof(*args), KM_NOFS); + args = kzalloc(sizeof(*args), GFP_KERNEL | __GFP_NOFAIL); if (!args) return -ENOMEM; @@ -244,7 +251,7 @@ xfs_dir_init( args->dp = dp; args->trans = tp; error = xfs_dir2_sf_create(args, pdp->i_ino); - kmem_free(args); + kfree(args); return error; } @@ -273,7 +280,7 @@ xfs_dir_createname( XFS_STATS_INC(dp->i_mount, xs_dir_create); } - args = kmem_zalloc(sizeof(*args), KM_NOFS); + args = kzalloc(sizeof(*args), GFP_KERNEL | __GFP_NOFAIL); if (!args) return -ENOMEM; @@ -313,7 +320,7 @@ xfs_dir_createname( rval = xfs_dir2_node_addname(args); out_free: - kmem_free(args); + kfree(args); return rval; } @@ -333,7 +340,8 @@ xfs_dir_cilookup_result( !(args->op_flags & XFS_DA_OP_CILOOKUP)) return -EEXIST; - args->value = kmem_alloc(len, KM_NOFS | KM_MAYFAIL); + args->value = kmalloc(len, + GFP_KERNEL | __GFP_NOLOCKDEP | __GFP_RETRY_MAYFAIL); if (!args->value) return -ENOMEM; @@ -364,15 +372,8 @@ xfs_dir_lookup( ASSERT(S_ISDIR(VFS_I(dp)->i_mode)); XFS_STATS_INC(dp->i_mount, xs_dir_lookup); - /* - * We need to use KM_NOFS here so that lockdep will not throw false - * positive deadlock warnings on a non-transactional lookup path. It is - * safe to recurse into inode recalim in that case, but lockdep can't - * easily be taught about it. Hence KM_NOFS avoids having to add more - * lockdep Doing this avoids having to add a bunch of lockdep class - * annotations into the reclaim path for the ilock. - */ - args = kmem_zalloc(sizeof(*args), KM_NOFS); + args = kzalloc(sizeof(*args), + GFP_KERNEL | __GFP_NOLOCKDEP | __GFP_NOFAIL); args->geo = dp->i_mount->m_dir_geo; args->name = name->name; args->namelen = name->len; @@ -419,7 +420,7 @@ out_check_rval: } out_free: xfs_iunlock(dp, lock_mode); - kmem_free(args); + kfree(args); return rval; } @@ -441,7 +442,7 @@ xfs_dir_removename( ASSERT(S_ISDIR(VFS_I(dp)->i_mode)); XFS_STATS_INC(dp->i_mount, xs_dir_remove); - args = kmem_zalloc(sizeof(*args), KM_NOFS); + args = kzalloc(sizeof(*args), GFP_KERNEL | __GFP_NOFAIL); if (!args) return -ENOMEM; @@ -477,7 +478,7 @@ xfs_dir_removename( else rval = xfs_dir2_node_removename(args); out_free: - kmem_free(args); + kfree(args); return rval; } @@ -502,7 +503,7 @@ xfs_dir_replace( if (rval) return rval; - args = kmem_zalloc(sizeof(*args), KM_NOFS); + args = kzalloc(sizeof(*args), GFP_KERNEL | __GFP_NOFAIL); if (!args) return -ENOMEM; @@ -538,7 +539,7 @@ xfs_dir_replace( else rval = xfs_dir2_node_replace(args); out_free: - kmem_free(args); + kfree(args); return rval; } @@ -626,8 +627,10 @@ xfs_dir2_isblock( return 0; *isblock = true; - if (XFS_IS_CORRUPT(mp, args->dp->i_disk_size != args->geo->blksize)) + if (XFS_IS_CORRUPT(mp, args->dp->i_disk_size != args->geo->blksize)) { + xfs_da_mark_sick(args); return -EFSCORRUPTED; + } return 0; } diff --git a/fs/xfs/libxfs/xfs_dir2.h b/fs/xfs/libxfs/xfs_dir2.h index 19af22a16c..8497d041f3 100644 --- a/fs/xfs/libxfs/xfs_dir2.h +++ b/fs/xfs/libxfs/xfs_dir2.h @@ -22,6 +22,19 @@ struct xfs_dir3_icfree_hdr; struct xfs_dir3_icleaf_hdr; extern const struct xfs_name xfs_name_dotdot; +extern const struct xfs_name xfs_name_dot; + +static inline bool +xfs_dir2_samename( + const struct xfs_name *n1, + const struct xfs_name *n2) +{ + if (n1 == n2) + return true; + if (n1->len != n2->len) + return false; + return !memcmp(n1->name, n2->name, n1->len); +} /* * Convert inode mode to directory entry filetype diff --git a/fs/xfs/libxfs/xfs_dir2_block.c b/fs/xfs/libxfs/xfs_dir2_block.c index 3c256d4cc4..a2da007adb 100644 --- a/fs/xfs/libxfs/xfs_dir2_block.c +++ b/fs/xfs/libxfs/xfs_dir2_block.c @@ -20,6 +20,7 @@ #include "xfs_error.h" #include "xfs_trace.h" #include "xfs_log.h" +#include "xfs_health.h" /* * Local function prototypes. @@ -152,6 +153,7 @@ xfs_dir3_block_read( __xfs_buf_mark_corrupt(*bpp, fa); xfs_trans_brelse(tp, *bpp); *bpp = NULL; + xfs_dirattr_mark_sick(dp, XFS_DATA_FORK); return -EFSCORRUPTED; } @@ -1108,7 +1110,7 @@ xfs_dir2_sf_to_block( * Copy the directory into a temporary buffer. * Then pitch the incore inode data so we can make extents. */ - sfp = kmem_alloc(ifp->if_bytes, 0); + sfp = kmalloc(ifp->if_bytes, GFP_KERNEL | __GFP_NOFAIL); memcpy(sfp, oldsfp, ifp->if_bytes); xfs_idata_realloc(dp, -ifp->if_bytes, XFS_DATA_FORK); @@ -1253,7 +1255,7 @@ xfs_dir2_sf_to_block( sfep = xfs_dir2_sf_nextentry(mp, sfp, sfep); } /* Done with the temporary buffer */ - kmem_free(sfp); + kfree(sfp); /* * Sort the leaf entries by hash value. */ @@ -1268,6 +1270,6 @@ xfs_dir2_sf_to_block( xfs_dir3_data_check(dp, bp); return 0; out_free: - kmem_free(sfp); + kfree(sfp); return error; } diff --git a/fs/xfs/libxfs/xfs_dir2_data.c b/fs/xfs/libxfs/xfs_dir2_data.c index dbcf58979a..7a6d965bea 100644 --- a/fs/xfs/libxfs/xfs_dir2_data.c +++ b/fs/xfs/libxfs/xfs_dir2_data.c @@ -18,6 +18,7 @@ #include "xfs_trans.h" #include "xfs_buf_item.h" #include "xfs_log.h" +#include "xfs_health.h" static xfs_failaddr_t xfs_dir2_data_freefind_verify( struct xfs_dir2_data_hdr *hdr, struct xfs_dir2_data_free *bf, @@ -433,6 +434,7 @@ xfs_dir3_data_read( __xfs_buf_mark_corrupt(*bpp, fa); xfs_trans_brelse(tp, *bpp); *bpp = NULL; + xfs_dirattr_mark_sick(dp, XFS_DATA_FORK); return -EFSCORRUPTED; } @@ -1198,6 +1200,7 @@ xfs_dir2_data_use_free( corrupt: xfs_corruption_error(__func__, XFS_ERRLEVEL_LOW, args->dp->i_mount, hdr, sizeof(*hdr), __FILE__, __LINE__, fa); + xfs_da_mark_sick(args); return -EFSCORRUPTED; } diff --git a/fs/xfs/libxfs/xfs_dir2_leaf.c b/fs/xfs/libxfs/xfs_dir2_leaf.c index cb9e950a91..08dda5ce9d 100644 --- a/fs/xfs/libxfs/xfs_dir2_leaf.c +++ b/fs/xfs/libxfs/xfs_dir2_leaf.c @@ -19,6 +19,7 @@ #include "xfs_trace.h" #include "xfs_trans.h" #include "xfs_buf_item.h" +#include "xfs_health.h" /* * Local function declarations. @@ -1393,8 +1394,10 @@ xfs_dir2_leaf_removename( bestsp = xfs_dir2_leaf_bests_p(ltp); if (be16_to_cpu(bestsp[db]) != oldbest) { xfs_buf_mark_corrupt(lbp); + xfs_da_mark_sick(args); return -EFSCORRUPTED; } + /* * Mark the former data entry unused. */ diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c index 7a03aeb9f4..be0b883402 100644 --- a/fs/xfs/libxfs/xfs_dir2_node.c +++ b/fs/xfs/libxfs/xfs_dir2_node.c @@ -20,6 +20,7 @@ #include "xfs_trans.h" #include "xfs_buf_item.h" #include "xfs_log.h" +#include "xfs_health.h" /* * Function declarations. @@ -231,6 +232,7 @@ __xfs_dir3_free_read( __xfs_buf_mark_corrupt(*bpp, fa); xfs_trans_brelse(tp, *bpp); *bpp = NULL; + xfs_dirattr_mark_sick(dp, XFS_DATA_FORK); return -EFSCORRUPTED; } @@ -443,6 +445,7 @@ xfs_dir2_leaf_to_node( if (be32_to_cpu(ltp->bestcount) > (uint)dp->i_disk_size / args->geo->blksize) { xfs_buf_mark_corrupt(lbp); + xfs_da_mark_sick(args); return -EFSCORRUPTED; } @@ -517,6 +520,7 @@ xfs_dir2_leafn_add( */ if (index < 0) { xfs_buf_mark_corrupt(bp); + xfs_da_mark_sick(args); return -EFSCORRUPTED; } @@ -736,6 +740,7 @@ xfs_dir2_leafn_lookup_for_addname( cpu_to_be16(NULLDATAOFF))) { if (curfdb != newfdb) xfs_trans_brelse(tp, curbp); + xfs_da_mark_sick(args); return -EFSCORRUPTED; } curfdb = newfdb; @@ -804,6 +809,7 @@ xfs_dir2_leafn_lookup_for_entry( xfs_dir3_leaf_check(dp, bp); if (leafhdr.count <= 0) { xfs_buf_mark_corrupt(bp); + xfs_da_mark_sick(args); return -EFSCORRUPTED; } @@ -1739,6 +1745,7 @@ xfs_dir2_node_add_datablk( } else { xfs_alert(mp, " ... fblk is NULL"); } + xfs_da_mark_sick(args); return -EFSCORRUPTED; } diff --git a/fs/xfs/libxfs/xfs_dir2_sf.c b/fs/xfs/libxfs/xfs_dir2_sf.c index e1f83fc7b6..17a20384c8 100644 --- a/fs/xfs/libxfs/xfs_dir2_sf.c +++ b/fs/xfs/libxfs/xfs_dir2_sf.c @@ -276,7 +276,7 @@ xfs_dir2_block_to_sf( * format the data into. Once we have formatted the data, we can free * the block and copy the formatted data into the inode literal area. */ - sfp = kmem_alloc(mp->m_sb.sb_inodesize, 0); + sfp = kmalloc(mp->m_sb.sb_inodesize, GFP_KERNEL | __GFP_NOFAIL); memcpy(sfp, sfhp, xfs_dir2_sf_hdr_size(sfhp->i8count)); /* @@ -350,7 +350,7 @@ xfs_dir2_block_to_sf( xfs_dir2_sf_check(args); out: xfs_trans_log_inode(args->trans, dp, logflags); - kmem_free(sfp); + kfree(sfp); return error; } @@ -524,7 +524,7 @@ xfs_dir2_sf_addname_hard( * Copy the old directory to the stack buffer. */ old_isize = (int)dp->i_disk_size; - buf = kmem_alloc(old_isize, 0); + buf = kmalloc(old_isize, GFP_KERNEL | __GFP_NOFAIL); oldsfp = (xfs_dir2_sf_hdr_t *)buf; memcpy(oldsfp, dp->i_df.if_data, old_isize); /* @@ -576,7 +576,7 @@ xfs_dir2_sf_addname_hard( sfep = xfs_dir2_sf_nextentry(mp, sfp, sfep); memcpy(sfep, oldsfep, old_isize - nbytes); } - kmem_free(buf); + kfree(buf); dp->i_disk_size = new_isize; xfs_dir2_sf_check(args); } @@ -1151,7 +1151,7 @@ xfs_dir2_sf_toino4( * Don't want xfs_idata_realloc copying the data here. */ oldsize = dp->i_df.if_bytes; - buf = kmem_alloc(oldsize, 0); + buf = kmalloc(oldsize, GFP_KERNEL | __GFP_NOFAIL); ASSERT(oldsfp->i8count == 1); memcpy(buf, oldsfp, oldsize); /* @@ -1190,7 +1190,7 @@ xfs_dir2_sf_toino4( /* * Clean up the inode. */ - kmem_free(buf); + kfree(buf); dp->i_disk_size = newsize; xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA); } @@ -1223,7 +1223,7 @@ xfs_dir2_sf_toino8( * Don't want xfs_idata_realloc copying the data here. */ oldsize = dp->i_df.if_bytes; - buf = kmem_alloc(oldsize, 0); + buf = kmalloc(oldsize, GFP_KERNEL | __GFP_NOFAIL); ASSERT(oldsfp->i8count == 0); memcpy(buf, oldsfp, oldsize); /* @@ -1262,7 +1262,7 @@ xfs_dir2_sf_toino8( /* * Clean up the inode. */ - kmem_free(buf); + kfree(buf); dp->i_disk_size = newsize; xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA); } diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h index 382ab1e71c..2b2f9050fb 100644 --- a/fs/xfs/libxfs/xfs_format.h +++ b/fs/xfs/libxfs/xfs_format.h @@ -477,15 +477,9 @@ xfs_is_quota_inode(struct xfs_sb *sbp, xfs_ino_t ino) #define XFS_AGI_GOOD_VERSION(v) ((v) == XFS_AGI_VERSION) /* - * Btree number 0 is bno, 1 is cnt, 2 is rmap. This value gives the size of the - * arrays below. - */ -#define XFS_BTNUM_AGF ((int)XFS_BTNUM_RMAPi + 1) - -/* - * The second word of agf_levels in the first a.g. overlaps the EFS - * superblock's magic number. Since the magic numbers valid for EFS - * are > 64k, our value cannot be confused for an EFS superblock's. + * agf_cnt_level in the first AGF overlaps the EFS superblock's magic number. + * Since the magic numbers valid for EFS are > 64k, our value cannot be confused + * for an EFS superblock. */ typedef struct xfs_agf { @@ -499,8 +493,13 @@ typedef struct xfs_agf { /* * Freespace and rmap information */ - __be32 agf_roots[XFS_BTNUM_AGF]; /* root blocks */ - __be32 agf_levels[XFS_BTNUM_AGF]; /* btree levels */ + __be32 agf_bno_root; /* bnobt root block */ + __be32 agf_cnt_root; /* cntbt root block */ + __be32 agf_rmap_root; /* rmapbt root block */ + + __be32 agf_bno_level; /* bnobt btree levels */ + __be32 agf_cnt_level; /* cntbt btree levels */ + __be32 agf_rmap_level; /* rmapbt btree levels */ __be32 agf_flfirst; /* first freelist block's index */ __be32 agf_fllast; /* last freelist block's index */ diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h index 6360073865..ca1b17d014 100644 --- a/fs/xfs/libxfs/xfs_fs.h +++ b/fs/xfs/libxfs/xfs_fs.h @@ -195,6 +195,8 @@ struct xfs_fsop_geom { #define XFS_FSOP_GEOM_SICK_PQUOTA (1 << 3) /* project quota */ #define XFS_FSOP_GEOM_SICK_RT_BITMAP (1 << 4) /* realtime bitmap */ #define XFS_FSOP_GEOM_SICK_RT_SUMMARY (1 << 5) /* realtime summary */ +#define XFS_FSOP_GEOM_SICK_QUOTACHECK (1 << 6) /* quota counts */ +#define XFS_FSOP_GEOM_SICK_NLINKS (1 << 7) /* inode link counts */ /* Output for XFS_FS_COUNTS */ typedef struct xfs_fsop_counts { @@ -292,6 +294,7 @@ struct xfs_ag_geometry { #define XFS_AG_GEOM_SICK_FINOBT (1 << 7) /* free inode index */ #define XFS_AG_GEOM_SICK_RMAPBT (1 << 8) /* reverse mappings */ #define XFS_AG_GEOM_SICK_REFCNTBT (1 << 9) /* reference counts */ +#define XFS_AG_GEOM_SICK_INODES (1 << 10) /* bad inodes were seen */ /* * Structures for XFS_IOC_FSGROWFSDATA, XFS_IOC_FSGROWFSLOG & XFS_IOC_FSGROWFSRT @@ -709,9 +712,12 @@ struct xfs_scrub_metadata { #define XFS_SCRUB_TYPE_GQUOTA 22 /* group quotas */ #define XFS_SCRUB_TYPE_PQUOTA 23 /* project quotas */ #define XFS_SCRUB_TYPE_FSCOUNTERS 24 /* fs summary counters */ +#define XFS_SCRUB_TYPE_QUOTACHECK 25 /* quota counters */ +#define XFS_SCRUB_TYPE_NLINKS 26 /* inode link counts */ +#define XFS_SCRUB_TYPE_HEALTHY 27 /* everything checked out ok */ /* Number of scrub subcommands. */ -#define XFS_SCRUB_TYPE_NR 25 +#define XFS_SCRUB_TYPE_NR 28 /* i: Repair this metadata. */ #define XFS_SCRUB_IFLAG_REPAIR (1u << 0) diff --git a/fs/xfs/libxfs/xfs_health.h b/fs/xfs/libxfs/xfs_health.h index 6296993ff8..3c64b5f9bd 100644 --- a/fs/xfs/libxfs/xfs_health.h +++ b/fs/xfs/libxfs/xfs_health.h @@ -26,21 +26,40 @@ * and the "sick" field tells us if that piece was found to need repairs. * Therefore we can conclude that for a given sick flag value: * - * - checked && sick => metadata needs repair - * - checked && !sick => metadata is ok - * - !checked => has not been examined since mount + * - checked && sick => metadata needs repair + * - checked && !sick => metadata is ok + * - !checked && sick => errors have been observed during normal operation, + * but the metadata has not been checked thoroughly + * - !checked && !sick => has not been examined since mount + * + * Evidence of health problems can be sorted into three basic categories: + * + * a) Primary evidence, which signals that something is defective within the + * general grouping of metadata. + * + * b) Secondary evidence, which are side effects of primary problem but are + * not themselves problems. These can be forgotten when the primary + * health problems are addressed. + * + * c) Indirect evidence, which points to something being wrong in another + * group, but we had to release resources and this is all that's left of + * that state. */ struct xfs_mount; struct xfs_perag; struct xfs_inode; struct xfs_fsop_geom; +struct xfs_btree_cur; +struct xfs_da_args; /* Observable health issues for metadata spanning the entire filesystem. */ #define XFS_SICK_FS_COUNTERS (1 << 0) /* summary counters */ #define XFS_SICK_FS_UQUOTA (1 << 1) /* user quota */ #define XFS_SICK_FS_GQUOTA (1 << 2) /* group quota */ #define XFS_SICK_FS_PQUOTA (1 << 3) /* project quota */ +#define XFS_SICK_FS_QUOTACHECK (1 << 4) /* quota counts */ +#define XFS_SICK_FS_NLINKS (1 << 5) /* inode link counts */ /* Observable health issues for realtime volume metadata. */ #define XFS_SICK_RT_BITMAP (1 << 0) /* realtime bitmap */ @@ -57,6 +76,7 @@ struct xfs_fsop_geom; #define XFS_SICK_AG_FINOBT (1 << 7) /* free inode index */ #define XFS_SICK_AG_RMAPBT (1 << 8) /* reverse mappings */ #define XFS_SICK_AG_REFCNTBT (1 << 9) /* reference counts */ +#define XFS_SICK_AG_INODES (1 << 10) /* inactivated bad inodes */ /* Observable health issues for inode metadata. */ #define XFS_SICK_INO_CORE (1 << 0) /* inode core */ @@ -73,11 +93,16 @@ struct xfs_fsop_geom; #define XFS_SICK_INO_DIR_ZAPPED (1 << 10) /* directory erased */ #define XFS_SICK_INO_SYMLINK_ZAPPED (1 << 11) /* symlink erased */ +/* Don't propagate sick status to ag health summary during inactivation */ +#define XFS_SICK_INO_FORGET (1 << 12) + /* Primary evidence of health problems in a given group. */ #define XFS_SICK_FS_PRIMARY (XFS_SICK_FS_COUNTERS | \ XFS_SICK_FS_UQUOTA | \ XFS_SICK_FS_GQUOTA | \ - XFS_SICK_FS_PQUOTA) + XFS_SICK_FS_PQUOTA | \ + XFS_SICK_FS_QUOTACHECK | \ + XFS_SICK_FS_NLINKS) #define XFS_SICK_RT_PRIMARY (XFS_SICK_RT_BITMAP | \ XFS_SICK_RT_SUMMARY) @@ -107,29 +132,86 @@ struct xfs_fsop_geom; XFS_SICK_INO_DIR_ZAPPED | \ XFS_SICK_INO_SYMLINK_ZAPPED) -/* These functions must be provided by the xfs implementation. */ +/* Secondary state related to (but not primary evidence of) health problems. */ +#define XFS_SICK_FS_SECONDARY (0) +#define XFS_SICK_RT_SECONDARY (0) +#define XFS_SICK_AG_SECONDARY (0) +#define XFS_SICK_INO_SECONDARY (XFS_SICK_INO_FORGET) + +/* Evidence of health problems elsewhere. */ +#define XFS_SICK_FS_INDIRECT (0) +#define XFS_SICK_RT_INDIRECT (0) +#define XFS_SICK_AG_INDIRECT (XFS_SICK_AG_INODES) +#define XFS_SICK_INO_INDIRECT (0) + +/* All health masks. */ +#define XFS_SICK_FS_ALL (XFS_SICK_FS_PRIMARY | \ + XFS_SICK_FS_SECONDARY | \ + XFS_SICK_FS_INDIRECT) + +#define XFS_SICK_RT_ALL (XFS_SICK_RT_PRIMARY | \ + XFS_SICK_RT_SECONDARY | \ + XFS_SICK_RT_INDIRECT) + +#define XFS_SICK_AG_ALL (XFS_SICK_AG_PRIMARY | \ + XFS_SICK_AG_SECONDARY | \ + XFS_SICK_AG_INDIRECT) + +#define XFS_SICK_INO_ALL (XFS_SICK_INO_PRIMARY | \ + XFS_SICK_INO_SECONDARY | \ + XFS_SICK_INO_INDIRECT | \ + XFS_SICK_INO_ZAPPED) + +/* + * These functions must be provided by the xfs implementation. Function + * behavior with respect to the first argument should be as follows: + * + * xfs_*_mark_sick: Set the sick flags and do not set checked flags. + * Runtime code should call this upon encountering + * a corruption. + * + * xfs_*_mark_corrupt: Set the sick and checked flags simultaneously. + * Fsck tools should call this when corruption is + * found. + * + * xfs_*_mark_healthy: Clear the sick flags and set the checked flags. + * Fsck tools should call this after correcting errors. + * + * xfs_*_measure_sickness: Return the sick and check status in the provided + * out parameters. + */ void xfs_fs_mark_sick(struct xfs_mount *mp, unsigned int mask); +void xfs_fs_mark_corrupt(struct xfs_mount *mp, unsigned int mask); void xfs_fs_mark_healthy(struct xfs_mount *mp, unsigned int mask); void xfs_fs_measure_sickness(struct xfs_mount *mp, unsigned int *sick, unsigned int *checked); void xfs_rt_mark_sick(struct xfs_mount *mp, unsigned int mask); +void xfs_rt_mark_corrupt(struct xfs_mount *mp, unsigned int mask); void xfs_rt_mark_healthy(struct xfs_mount *mp, unsigned int mask); void xfs_rt_measure_sickness(struct xfs_mount *mp, unsigned int *sick, unsigned int *checked); +void xfs_agno_mark_sick(struct xfs_mount *mp, xfs_agnumber_t agno, + unsigned int mask); void xfs_ag_mark_sick(struct xfs_perag *pag, unsigned int mask); +void xfs_ag_mark_corrupt(struct xfs_perag *pag, unsigned int mask); void xfs_ag_mark_healthy(struct xfs_perag *pag, unsigned int mask); void xfs_ag_measure_sickness(struct xfs_perag *pag, unsigned int *sick, unsigned int *checked); void xfs_inode_mark_sick(struct xfs_inode *ip, unsigned int mask); +void xfs_inode_mark_corrupt(struct xfs_inode *ip, unsigned int mask); void xfs_inode_mark_healthy(struct xfs_inode *ip, unsigned int mask); void xfs_inode_measure_sickness(struct xfs_inode *ip, unsigned int *sick, unsigned int *checked); void xfs_health_unmount(struct xfs_mount *mp); +void xfs_bmap_mark_sick(struct xfs_inode *ip, int whichfork); +void xfs_btree_mark_sick(struct xfs_btree_cur *cur); +void xfs_dirattr_mark_sick(struct xfs_inode *ip, int whichfork); +void xfs_da_mark_sick(struct xfs_da_args *args); /* Now some helpers. */ @@ -197,4 +279,7 @@ void xfs_fsop_geom_health(struct xfs_mount *mp, struct xfs_fsop_geom *geo); void xfs_ag_geom_health(struct xfs_perag *pag, struct xfs_ag_geometry *ageo); void xfs_bulkstat_health(struct xfs_inode *ip, struct xfs_bulkstat *bs); +#define xfs_metadata_is_sick(error) \ + (unlikely((error) == -EFSCORRUPTED || (error) == -EFSBADCRC)) + #endif /* __XFS_HEALTH_H__ */ diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index 2361a22035..e5ac3e5430 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -27,6 +27,7 @@ #include "xfs_log.h" #include "xfs_rmap.h" #include "xfs_ag.h" +#include "xfs_health.h" /* * Lookup a record by ino in the btree given by cur. @@ -140,13 +141,13 @@ xfs_inobt_complain_bad_rec( struct xfs_mount *mp = cur->bc_mp; xfs_warn(mp, - "%s Inode BTree record corruption in AG %d detected at %pS!", - cur->bc_btnum == XFS_BTNUM_INO ? "Used" : "Free", - cur->bc_ag.pag->pag_agno, fa); + "%sbt record corruption in AG %d detected at %pS!", + cur->bc_ops->name, cur->bc_ag.pag->pag_agno, fa); xfs_warn(mp, "start inode 0x%x, count 0x%x, free 0x%x freemask 0x%llx, holemask 0x%x", irec->ir_startino, irec->ir_count, irec->ir_freecount, irec->ir_free, irec->ir_holemask); + xfs_btree_mark_sick(cur); return -EFSCORRUPTED; } @@ -205,14 +206,17 @@ xfs_inobt_insert( struct xfs_buf *agbp, xfs_agino_t newino, xfs_agino_t newlen, - xfs_btnum_t btnum) + bool is_finobt) { struct xfs_btree_cur *cur; xfs_agino_t thisino; int i; int error; - cur = xfs_inobt_init_cursor(pag, tp, agbp, btnum); + if (is_finobt) + cur = xfs_finobt_init_cursor(pag, tp, agbp); + else + cur = xfs_inobt_init_cursor(pag, tp, agbp); for (thisino = newino; thisino < newino + newlen; @@ -528,16 +532,14 @@ __xfs_inobt_rec_merge( } /* - * Insert a new sparse inode chunk into the associated inode btree. The inode - * record for the sparse chunk is pre-aligned to a startino that should match - * any pre-existing sparse inode record in the tree. This allows sparse chunks - * to fill over time. + * Insert a new sparse inode chunk into the associated inode allocation btree. + * The inode record for the sparse chunk is pre-aligned to a startino that + * should match any pre-existing sparse inode record in the tree. This allows + * sparse chunks to fill over time. * - * This function supports two modes of handling preexisting records depending on - * the merge flag. If merge is true, the provided record is merged with the + * If no preexisting record exists, the provided record is inserted. + * If there is a preexisting record, the provided record is merged with the * existing record and updated in place. The merged record is returned in nrec. - * If merge is false, an existing record is replaced with the provided record. - * If no preexisting record exists, the provided record is always inserted. * * It is considered corruption if a merge is requested and not possible. Given * the sparse inode alignment constraints, this should never happen. @@ -547,9 +549,7 @@ xfs_inobt_insert_sprec( struct xfs_perag *pag, struct xfs_trans *tp, struct xfs_buf *agbp, - int btnum, - struct xfs_inobt_rec_incore *nrec, /* in/out: new/merged rec. */ - bool merge) /* merge or replace */ + struct xfs_inobt_rec_incore *nrec) /* in/out: new/merged rec. */ { struct xfs_mount *mp = pag->pag_mount; struct xfs_btree_cur *cur; @@ -557,7 +557,7 @@ xfs_inobt_insert_sprec( int i; struct xfs_inobt_rec_incore rec; - cur = xfs_inobt_init_cursor(pag, tp, agbp, btnum); + cur = xfs_inobt_init_cursor(pag, tp, agbp); /* the new record is pre-aligned so we know where to look */ error = xfs_inobt_lookup(cur, nrec->ir_startino, XFS_LOOKUP_EQ, &i); @@ -571,6 +571,7 @@ xfs_inobt_insert_sprec( if (error) goto error; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto error; } @@ -579,45 +580,45 @@ xfs_inobt_insert_sprec( } /* - * A record exists at this startino. Merge or replace the record - * depending on what we've been asked to do. + * A record exists at this startino. Merge the records. */ - if (merge) { - error = xfs_inobt_get_rec(cur, &rec, &i); - if (error) - goto error; - if (XFS_IS_CORRUPT(mp, i != 1)) { - error = -EFSCORRUPTED; - goto error; - } - if (XFS_IS_CORRUPT(mp, rec.ir_startino != nrec->ir_startino)) { - error = -EFSCORRUPTED; - goto error; - } + error = xfs_inobt_get_rec(cur, &rec, &i); + if (error) + goto error; + if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); + error = -EFSCORRUPTED; + goto error; + } + if (XFS_IS_CORRUPT(mp, rec.ir_startino != nrec->ir_startino)) { + xfs_btree_mark_sick(cur); + error = -EFSCORRUPTED; + goto error; + } - /* - * This should never fail. If we have coexisting records that - * cannot merge, something is seriously wrong. - */ - if (XFS_IS_CORRUPT(mp, !__xfs_inobt_can_merge(nrec, &rec))) { - error = -EFSCORRUPTED; - goto error; - } + /* + * This should never fail. If we have coexisting records that + * cannot merge, something is seriously wrong. + */ + if (XFS_IS_CORRUPT(mp, !__xfs_inobt_can_merge(nrec, &rec))) { + xfs_btree_mark_sick(cur); + error = -EFSCORRUPTED; + goto error; + } - trace_xfs_irec_merge_pre(mp, pag->pag_agno, rec.ir_startino, - rec.ir_holemask, nrec->ir_startino, - nrec->ir_holemask); + trace_xfs_irec_merge_pre(mp, pag->pag_agno, rec.ir_startino, + rec.ir_holemask, nrec->ir_startino, + nrec->ir_holemask); - /* merge to nrec to output the updated record */ - __xfs_inobt_rec_merge(nrec, &rec); + /* merge to nrec to output the updated record */ + __xfs_inobt_rec_merge(nrec, &rec); - trace_xfs_irec_merge_post(mp, pag->pag_agno, nrec->ir_startino, - nrec->ir_holemask); + trace_xfs_irec_merge_post(mp, pag->pag_agno, nrec->ir_startino, + nrec->ir_holemask); - error = xfs_inobt_rec_check_count(mp, nrec); - if (error) - goto error; - } + error = xfs_inobt_rec_check_count(mp, nrec); + if (error) + goto error; error = xfs_inobt_update(cur, nrec); if (error) @@ -632,6 +633,59 @@ error: } /* + * Insert a new sparse inode chunk into the free inode btree. The inode + * record for the sparse chunk is pre-aligned to a startino that should match + * any pre-existing sparse inode record in the tree. This allows sparse chunks + * to fill over time. + * + * The new record is always inserted, overwriting a pre-existing record if + * there is one. + */ +STATIC int +xfs_finobt_insert_sprec( + struct xfs_perag *pag, + struct xfs_trans *tp, + struct xfs_buf *agbp, + struct xfs_inobt_rec_incore *nrec) /* in/out: new rec. */ +{ + struct xfs_mount *mp = pag->pag_mount; + struct xfs_btree_cur *cur; + int error; + int i; + + cur = xfs_finobt_init_cursor(pag, tp, agbp); + + /* the new record is pre-aligned so we know where to look */ + error = xfs_inobt_lookup(cur, nrec->ir_startino, XFS_LOOKUP_EQ, &i); + if (error) + goto error; + /* if nothing there, insert a new record and return */ + if (i == 0) { + error = xfs_inobt_insert_rec(cur, nrec->ir_holemask, + nrec->ir_count, nrec->ir_freecount, + nrec->ir_free, &i); + if (error) + goto error; + if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); + error = -EFSCORRUPTED; + goto error; + } + } else { + error = xfs_inobt_update(cur, nrec); + if (error) + goto error; + } + + xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); + return 0; +error: + xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); + return error; +} + + +/* * Allocate new inodes in the allocation group specified by agbp. Returns 0 if * inodes were allocated in this AG; -EAGAIN if there was no space in this AG so * the caller knows it can try another AG, a hard -ENOSPC when over the maximum @@ -857,8 +911,7 @@ sparse_alloc: * if necessary. If a merge does occur, rec is updated to the * merged record. */ - error = xfs_inobt_insert_sprec(pag, tp, agbp, - XFS_BTNUM_INO, &rec, true); + error = xfs_inobt_insert_sprec(pag, tp, agbp, &rec); if (error == -EFSCORRUPTED) { xfs_alert(args.mp, "invalid sparse inode record: ino 0x%llx holemask 0x%x count %u", @@ -882,21 +935,19 @@ sparse_alloc: * existing record with this one. */ if (xfs_has_finobt(args.mp)) { - error = xfs_inobt_insert_sprec(pag, tp, agbp, - XFS_BTNUM_FINO, &rec, false); + error = xfs_finobt_insert_sprec(pag, tp, agbp, &rec); if (error) return error; } } else { /* full chunk - insert new records to both btrees */ - error = xfs_inobt_insert(pag, tp, agbp, newino, newlen, - XFS_BTNUM_INO); + error = xfs_inobt_insert(pag, tp, agbp, newino, newlen, false); if (error) return error; if (xfs_has_finobt(args.mp)) { error = xfs_inobt_insert(pag, tp, agbp, newino, - newlen, XFS_BTNUM_FINO); + newlen, true); if (error) return error; } @@ -949,8 +1000,10 @@ xfs_ialloc_next_rec( error = xfs_inobt_get_rec(cur, rec, &i); if (error) return error; - if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) + if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { + xfs_btree_mark_sick(cur); return -EFSCORRUPTED; + } } return 0; @@ -974,8 +1027,10 @@ xfs_ialloc_get_rec( error = xfs_inobt_get_rec(cur, rec, &i); if (error) return error; - if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) + if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { + xfs_btree_mark_sick(cur); return -EFSCORRUPTED; + } } return 0; @@ -1030,7 +1085,7 @@ xfs_dialloc_ag_inobt( ASSERT(pag->pagi_freecount > 0); restart_pagno: - cur = xfs_inobt_init_cursor(pag, tp, agbp, XFS_BTNUM_INO); + cur = xfs_inobt_init_cursor(pag, tp, agbp); /* * If pagino is 0 (this is the root inode allocation) use newino. * This must work because we've just allocated some. @@ -1053,6 +1108,7 @@ xfs_dialloc_ag_inobt( if (error) goto error0; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto error0; } @@ -1061,6 +1117,7 @@ xfs_dialloc_ag_inobt( if (error) goto error0; if (XFS_IS_CORRUPT(mp, j != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto error0; } @@ -1219,6 +1276,7 @@ xfs_dialloc_ag_inobt( if (error) goto error0; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto error0; } @@ -1228,6 +1286,7 @@ xfs_dialloc_ag_inobt( if (error) goto error0; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto error0; } @@ -1237,6 +1296,7 @@ xfs_dialloc_ag_inobt( if (error) goto error0; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto error0; } @@ -1297,8 +1357,10 @@ xfs_dialloc_ag_finobt_near( error = xfs_inobt_get_rec(lcur, rec, &i); if (error) return error; - if (XFS_IS_CORRUPT(lcur->bc_mp, i != 1)) + if (XFS_IS_CORRUPT(lcur->bc_mp, i != 1)) { + xfs_btree_mark_sick(lcur); return -EFSCORRUPTED; + } /* * See if we've landed in the parent inode record. The finobt @@ -1322,12 +1384,14 @@ xfs_dialloc_ag_finobt_near( if (error) goto error_rcur; if (XFS_IS_CORRUPT(lcur->bc_mp, j != 1)) { + xfs_btree_mark_sick(lcur); error = -EFSCORRUPTED; goto error_rcur; } } if (XFS_IS_CORRUPT(lcur->bc_mp, i != 1 && j != 1)) { + xfs_btree_mark_sick(lcur); error = -EFSCORRUPTED; goto error_rcur; } @@ -1383,8 +1447,10 @@ xfs_dialloc_ag_finobt_newino( error = xfs_inobt_get_rec(cur, rec, &i); if (error) return error; - if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) + if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { + xfs_btree_mark_sick(cur); return -EFSCORRUPTED; + } return 0; } } @@ -1395,14 +1461,18 @@ xfs_dialloc_ag_finobt_newino( error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i); if (error) return error; - if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) + if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { + xfs_btree_mark_sick(cur); return -EFSCORRUPTED; + } error = xfs_inobt_get_rec(cur, rec, &i); if (error) return error; - if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) + if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { + xfs_btree_mark_sick(cur); return -EFSCORRUPTED; + } return 0; } @@ -1424,14 +1494,18 @@ xfs_dialloc_ag_update_inobt( error = xfs_inobt_lookup(cur, frec->ir_startino, XFS_LOOKUP_EQ, &i); if (error) return error; - if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) + if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { + xfs_btree_mark_sick(cur); return -EFSCORRUPTED; + } error = xfs_inobt_get_rec(cur, &rec, &i); if (error) return error; - if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) + if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { + xfs_btree_mark_sick(cur); return -EFSCORRUPTED; + } ASSERT((XFS_AGINO_TO_OFFSET(cur->bc_mp, rec.ir_startino) % XFS_INODES_PER_CHUNK) == 0); @@ -1440,8 +1514,10 @@ xfs_dialloc_ag_update_inobt( if (XFS_IS_CORRUPT(cur->bc_mp, rec.ir_free != frec->ir_free || - rec.ir_freecount != frec->ir_freecount)) + rec.ir_freecount != frec->ir_freecount)) { + xfs_btree_mark_sick(cur); return -EFSCORRUPTED; + } return xfs_inobt_update(cur, &rec); } @@ -1483,7 +1559,7 @@ xfs_dialloc_ag( if (!pagino) pagino = be32_to_cpu(agi->agi_newino); - cur = xfs_inobt_init_cursor(pag, tp, agbp, XFS_BTNUM_FINO); + cur = xfs_finobt_init_cursor(pag, tp, agbp); error = xfs_check_agi_freecount(cur); if (error) @@ -1526,7 +1602,7 @@ xfs_dialloc_ag( * the original freecount. If all is well, make the equivalent update to * the inobt using the finobt record and offset information. */ - icur = xfs_inobt_init_cursor(pag, tp, agbp, XFS_BTNUM_INO); + icur = xfs_inobt_init_cursor(pag, tp, agbp); error = xfs_check_agi_freecount(icur); if (error) @@ -1943,7 +2019,7 @@ xfs_difree_inobt( /* * Initialize the cursor. */ - cur = xfs_inobt_init_cursor(pag, tp, agbp, XFS_BTNUM_INO); + cur = xfs_inobt_init_cursor(pag, tp, agbp); error = xfs_check_agi_freecount(cur); if (error) @@ -1958,6 +2034,7 @@ xfs_difree_inobt( goto error0; } if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto error0; } @@ -1968,6 +2045,7 @@ xfs_difree_inobt( goto error0; } if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto error0; } @@ -2068,7 +2146,7 @@ xfs_difree_finobt( int error; int i; - cur = xfs_inobt_init_cursor(pag, tp, agbp, XFS_BTNUM_FINO); + cur = xfs_finobt_init_cursor(pag, tp, agbp); error = xfs_inobt_lookup(cur, ibtrec->ir_startino, XFS_LOOKUP_EQ, &i); if (error) @@ -2080,6 +2158,7 @@ xfs_difree_finobt( * something is out of sync. */ if (XFS_IS_CORRUPT(mp, ibtrec->ir_freecount != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto error; } @@ -2106,6 +2185,7 @@ xfs_difree_finobt( if (error) goto error; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto error; } @@ -2116,6 +2196,7 @@ xfs_difree_finobt( if (XFS_IS_CORRUPT(mp, rec.ir_free != ibtrec->ir_free || rec.ir_freecount != ibtrec->ir_freecount)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto error; } @@ -2265,7 +2346,7 @@ xfs_imap_lookup( * we have a record, we need to ensure it contains the inode number * we are looking up. */ - cur = xfs_inobt_init_cursor(pag, tp, agbp, XFS_BTNUM_INO); + cur = xfs_inobt_init_cursor(pag, tp, agbp); error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i); if (!error) { if (i) @@ -2604,6 +2685,8 @@ xfs_read_agi( error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, XFS_AG_DADDR(mp, pag->pag_agno, XFS_AGI_DADDR(mp)), XFS_FSS_TO_BB(mp, 1), 0, agibpp, &xfs_agi_buf_ops); + if (xfs_metadata_is_sick(error)) + xfs_ag_mark_sick(pag, XFS_SICK_AG_AGI); if (error) return error; if (tp) @@ -2765,7 +2848,7 @@ xfs_ialloc_count_inodes( struct xfs_ialloc_count_inodes ci = {0}; int error; - ASSERT(cur->bc_btnum == XFS_BTNUM_INO); + ASSERT(xfs_btree_is_ino(cur->bc_ops)); error = xfs_btree_query_all(cur, xfs_ialloc_count_inodes_rec, &ci); if (error) return error; @@ -2982,7 +3065,7 @@ xfs_ialloc_check_shrink( if (!xfs_has_sparseinodes(pag->pag_mount)) return 0; - cur = xfs_inobt_init_cursor(pag, tp, agibp, XFS_BTNUM_INO); + cur = xfs_inobt_init_cursor(pag, tp, agibp); /* Look up the inobt record that would correspond to the new EOFS. */ agino = XFS_AGB_TO_AGINO(pag->pag_mount, new_length); @@ -2995,6 +3078,7 @@ xfs_ialloc_check_shrink( goto out; if (!has) { + xfs_ag_mark_sick(pag, XFS_SICK_AG_INOBT); error = -EFSCORRUPTED; goto out; } diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c index 42a5e1f227..cc661fca6f 100644 --- a/fs/xfs/libxfs/xfs_ialloc_btree.c +++ b/fs/xfs/libxfs/xfs_ialloc_btree.c @@ -17,6 +17,7 @@ #include "xfs_ialloc_btree.h" #include "xfs_alloc.h" #include "xfs_error.h" +#include "xfs_health.h" #include "xfs_trace.h" #include "xfs_trans.h" #include "xfs_rmap.h" @@ -37,7 +38,15 @@ xfs_inobt_dup_cursor( struct xfs_btree_cur *cur) { return xfs_inobt_init_cursor(cur->bc_ag.pag, cur->bc_tp, - cur->bc_ag.agbp, cur->bc_btnum); + cur->bc_ag.agbp); +} + +STATIC struct xfs_btree_cur * +xfs_finobt_dup_cursor( + struct xfs_btree_cur *cur) +{ + return xfs_finobt_init_cursor(cur->bc_ag.pag, cur->bc_tp, + cur->bc_ag.agbp); } STATIC void @@ -81,9 +90,9 @@ xfs_inobt_mod_blockcount( if (!xfs_has_inobtcounts(cur->bc_mp)) return; - if (cur->bc_btnum == XFS_BTNUM_FINO) + if (xfs_btree_is_fino(cur->bc_ops)) be32_add_cpu(&agi->agi_fblocks, howmuch); - else if (cur->bc_btnum == XFS_BTNUM_INO) + else be32_add_cpu(&agi->agi_iblocks, howmuch); xfs_ialloc_log_agi(cur->bc_tp, agbp, XFS_AGI_IBLOCKS); } @@ -300,7 +309,7 @@ xfs_inobt_verify( * xfs_perag_initialised_agi(pag)) if we ever do. */ if (xfs_has_crc(mp)) { - fa = xfs_btree_sblock_v5hdr_verify(bp); + fa = xfs_btree_agblock_v5hdr_verify(bp); if (fa) return fa; } @@ -310,7 +319,7 @@ xfs_inobt_verify( if (level >= M_IGEO(mp)->inobt_maxlevels) return __this_address; - return xfs_btree_sblock_verify(bp, + return xfs_btree_agblock_verify(bp, M_IGEO(mp)->inobt_mxr[level != 0]); } @@ -320,7 +329,7 @@ xfs_inobt_read_verify( { xfs_failaddr_t fa; - if (!xfs_btree_sblock_verify_crc(bp)) + if (!xfs_btree_agblock_verify_crc(bp)) xfs_verifier_error(bp, -EFSBADCRC, __this_address); else { fa = xfs_inobt_verify(bp); @@ -344,7 +353,7 @@ xfs_inobt_write_verify( xfs_verifier_error(bp, -EFSCORRUPTED, fa); return; } - xfs_btree_sblock_calc_crc(bp); + xfs_btree_agblock_calc_crc(bp); } @@ -398,9 +407,17 @@ xfs_inobt_keys_contiguous( be32_to_cpu(key2->inobt.ir_startino)); } -static const struct xfs_btree_ops xfs_inobt_ops = { +const struct xfs_btree_ops xfs_inobt_ops = { + .name = "ino", + .type = XFS_BTREE_TYPE_AG, + .rec_len = sizeof(xfs_inobt_rec_t), .key_len = sizeof(xfs_inobt_key_t), + .ptr_len = XFS_BTREE_SHORT_PTR_LEN, + + .lru_refs = XFS_INO_BTREE_REF, + .statoff = XFS_STATS_CALC_INDEX(xs_ibt_2), + .sick_mask = XFS_SICK_AG_INOBT, .dup_cursor = xfs_inobt_dup_cursor, .set_root = xfs_inobt_set_root, @@ -420,11 +437,19 @@ static const struct xfs_btree_ops xfs_inobt_ops = { .keys_contiguous = xfs_inobt_keys_contiguous, }; -static const struct xfs_btree_ops xfs_finobt_ops = { +const struct xfs_btree_ops xfs_finobt_ops = { + .name = "fino", + .type = XFS_BTREE_TYPE_AG, + .rec_len = sizeof(xfs_inobt_rec_t), .key_len = sizeof(xfs_inobt_key_t), + .ptr_len = XFS_BTREE_SHORT_PTR_LEN, - .dup_cursor = xfs_inobt_dup_cursor, + .lru_refs = XFS_INO_BTREE_REF, + .statoff = XFS_STATS_CALC_INDEX(xs_fibt_2), + .sick_mask = XFS_SICK_AG_FINOBT, + + .dup_cursor = xfs_finobt_dup_cursor, .set_root = xfs_finobt_set_root, .alloc_block = xfs_finobt_alloc_block, .free_block = xfs_finobt_free_block, @@ -443,65 +468,54 @@ static const struct xfs_btree_ops xfs_finobt_ops = { }; /* - * Initialize a new inode btree cursor. + * Create an inode btree cursor. + * + * For staging cursors tp and agbp are NULL. */ -static struct xfs_btree_cur * -xfs_inobt_init_common( +struct xfs_btree_cur * +xfs_inobt_init_cursor( struct xfs_perag *pag, - struct xfs_trans *tp, /* transaction pointer */ - xfs_btnum_t btnum) /* ialloc or free ino btree */ + struct xfs_trans *tp, + struct xfs_buf *agbp) { struct xfs_mount *mp = pag->pag_mount; struct xfs_btree_cur *cur; - cur = xfs_btree_alloc_cursor(mp, tp, btnum, + cur = xfs_btree_alloc_cursor(mp, tp, &xfs_inobt_ops, M_IGEO(mp)->inobt_maxlevels, xfs_inobt_cur_cache); - if (btnum == XFS_BTNUM_INO) { - cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_ibt_2); - cur->bc_ops = &xfs_inobt_ops; - } else { - cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_fibt_2); - cur->bc_ops = &xfs_finobt_ops; - } - - if (xfs_has_crc(mp)) - cur->bc_flags |= XFS_BTREE_CRC_BLOCKS; - cur->bc_ag.pag = xfs_perag_hold(pag); + cur->bc_ag.agbp = agbp; + if (agbp) { + struct xfs_agi *agi = agbp->b_addr; + + cur->bc_nlevels = be32_to_cpu(agi->agi_level); + } return cur; } -/* Create an inode btree cursor. */ +/* + * Create a free inode btree cursor. + * + * For staging cursors tp and agbp are NULL. + */ struct xfs_btree_cur * -xfs_inobt_init_cursor( +xfs_finobt_init_cursor( struct xfs_perag *pag, struct xfs_trans *tp, - struct xfs_buf *agbp, - xfs_btnum_t btnum) + struct xfs_buf *agbp) { + struct xfs_mount *mp = pag->pag_mount; struct xfs_btree_cur *cur; - struct xfs_agi *agi = agbp->b_addr; - cur = xfs_inobt_init_common(pag, tp, btnum); - if (btnum == XFS_BTNUM_INO) - cur->bc_nlevels = be32_to_cpu(agi->agi_level); - else - cur->bc_nlevels = be32_to_cpu(agi->agi_free_level); + cur = xfs_btree_alloc_cursor(mp, tp, &xfs_finobt_ops, + M_IGEO(mp)->inobt_maxlevels, xfs_inobt_cur_cache); + cur->bc_ag.pag = xfs_perag_hold(pag); cur->bc_ag.agbp = agbp; - return cur; -} + if (agbp) { + struct xfs_agi *agi = agbp->b_addr; -/* Create an inode btree cursor with a fake root for staging. */ -struct xfs_btree_cur * -xfs_inobt_stage_cursor( - struct xfs_perag *pag, - struct xbtree_afakeroot *afake, - xfs_btnum_t btnum) -{ - struct xfs_btree_cur *cur; - - cur = xfs_inobt_init_common(pag, NULL, btnum); - xfs_btree_stage_afakeroot(cur, afake); + cur->bc_nlevels = be32_to_cpu(agi->agi_free_level); + } return cur; } @@ -521,7 +535,7 @@ xfs_inobt_commit_staged_btree( ASSERT(cur->bc_flags & XFS_BTREE_STAGING); - if (cur->bc_btnum == XFS_BTNUM_INO) { + if (xfs_btree_is_ino(cur->bc_ops)) { fields = XFS_AGI_ROOT | XFS_AGI_LEVEL; agi->agi_root = cpu_to_be32(afake->af_root); agi->agi_level = cpu_to_be32(afake->af_levels); @@ -530,7 +544,7 @@ xfs_inobt_commit_staged_btree( fields |= XFS_AGI_IBLOCKS; } xfs_ialloc_log_agi(tp, agbp, fields); - xfs_btree_commit_afakeroot(cur, tp, agbp, &xfs_inobt_ops); + xfs_btree_commit_afakeroot(cur, tp, agbp); } else { fields = XFS_AGI_FREE_ROOT | XFS_AGI_FREE_LEVEL; agi->agi_free_root = cpu_to_be32(afake->af_root); @@ -540,7 +554,7 @@ xfs_inobt_commit_staged_btree( fields |= XFS_AGI_IBLOCKS; } xfs_ialloc_log_agi(tp, agbp, fields); - xfs_btree_commit_afakeroot(cur, tp, agbp, &xfs_finobt_ops); + xfs_btree_commit_afakeroot(cur, tp, agbp); } } @@ -721,45 +735,21 @@ xfs_inobt_max_size( XFS_INODES_PER_CHUNK); } -/* Read AGI and create inobt cursor. */ -int -xfs_inobt_cur( - struct xfs_perag *pag, - struct xfs_trans *tp, - xfs_btnum_t which, - struct xfs_btree_cur **curpp, - struct xfs_buf **agi_bpp) -{ - struct xfs_btree_cur *cur; - int error; - - ASSERT(*agi_bpp == NULL); - ASSERT(*curpp == NULL); - - error = xfs_ialloc_read_agi(pag, tp, agi_bpp); - if (error) - return error; - - cur = xfs_inobt_init_cursor(pag, tp, *agi_bpp, which); - *curpp = cur; - return 0; -} - static int -xfs_inobt_count_blocks( +xfs_finobt_count_blocks( struct xfs_perag *pag, struct xfs_trans *tp, - xfs_btnum_t btnum, xfs_extlen_t *tree_blocks) { struct xfs_buf *agbp = NULL; - struct xfs_btree_cur *cur = NULL; + struct xfs_btree_cur *cur; int error; - error = xfs_inobt_cur(pag, tp, btnum, &cur, &agbp); + error = xfs_ialloc_read_agi(pag, tp, &agbp); if (error) return error; + cur = xfs_inobt_init_cursor(pag, tp, agbp); error = xfs_btree_count_blocks(cur, tree_blocks); xfs_btree_del_cursor(cur, error); xfs_trans_brelse(tp, agbp); @@ -807,8 +797,7 @@ xfs_finobt_calc_reserves( if (xfs_has_inobtcounts(pag->pag_mount)) error = xfs_finobt_read_blocks(pag, tp, &tree_len); else - error = xfs_inobt_count_blocks(pag, tp, XFS_BTNUM_FINO, - &tree_len); + error = xfs_finobt_count_blocks(pag, tp, &tree_len); if (error) return error; diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.h b/fs/xfs/libxfs/xfs_ialloc_btree.h index 3262c3fe5e..6472ec1ecb 100644 --- a/fs/xfs/libxfs/xfs_ialloc_btree.h +++ b/fs/xfs/libxfs/xfs_ialloc_btree.h @@ -46,10 +46,10 @@ struct xfs_perag; (maxrecs) * sizeof(xfs_inobt_key_t) + \ ((index) - 1) * sizeof(xfs_inobt_ptr_t))) -extern struct xfs_btree_cur *xfs_inobt_init_cursor(struct xfs_perag *pag, - struct xfs_trans *tp, struct xfs_buf *agbp, xfs_btnum_t btnum); -struct xfs_btree_cur *xfs_inobt_stage_cursor(struct xfs_perag *pag, - struct xbtree_afakeroot *afake, xfs_btnum_t btnum); +struct xfs_btree_cur *xfs_inobt_init_cursor(struct xfs_perag *pag, + struct xfs_trans *tp, struct xfs_buf *agbp); +struct xfs_btree_cur *xfs_finobt_init_cursor(struct xfs_perag *pag, + struct xfs_trans *tp, struct xfs_buf *agbp); extern int xfs_inobt_maxrecs(struct xfs_mount *, int, int); /* ir_holemask to inode allocation bitmap conversion */ @@ -66,9 +66,6 @@ int xfs_finobt_calc_reserves(struct xfs_perag *perag, struct xfs_trans *tp, xfs_extlen_t *ask, xfs_extlen_t *used); extern xfs_extlen_t xfs_iallocbt_calc_size(struct xfs_mount *mp, unsigned long long len); -int xfs_inobt_cur(struct xfs_perag *pag, struct xfs_trans *tp, - xfs_btnum_t btnum, struct xfs_btree_cur **curpp, - struct xfs_buf **agi_bpp); void xfs_inobt_commit_staged_btree(struct xfs_btree_cur *cur, struct xfs_trans *tp, struct xfs_buf *agbp); diff --git a/fs/xfs/libxfs/xfs_iext_tree.c b/fs/xfs/libxfs/xfs_iext_tree.c index f4e6b200cd..8796f2b3e5 100644 --- a/fs/xfs/libxfs/xfs_iext_tree.c +++ b/fs/xfs/libxfs/xfs_iext_tree.c @@ -394,11 +394,18 @@ xfs_iext_leaf_key( return leaf->recs[n].lo & XFS_IEXT_STARTOFF_MASK; } +static inline void * +xfs_iext_alloc_node( + int size) +{ + return kzalloc(size, GFP_KERNEL | __GFP_NOLOCKDEP | __GFP_NOFAIL); +} + static void xfs_iext_grow( struct xfs_ifork *ifp) { - struct xfs_iext_node *node = kmem_zalloc(NODE_SIZE, KM_NOFS); + struct xfs_iext_node *node = xfs_iext_alloc_node(NODE_SIZE); int i; if (ifp->if_height == 1) { @@ -454,7 +461,7 @@ xfs_iext_split_node( int *nr_entries) { struct xfs_iext_node *node = *nodep; - struct xfs_iext_node *new = kmem_zalloc(NODE_SIZE, KM_NOFS); + struct xfs_iext_node *new = xfs_iext_alloc_node(NODE_SIZE); const int nr_move = KEYS_PER_NODE / 2; int nr_keep = nr_move + (KEYS_PER_NODE & 1); int i = 0; @@ -542,7 +549,7 @@ xfs_iext_split_leaf( int *nr_entries) { struct xfs_iext_leaf *leaf = cur->leaf; - struct xfs_iext_leaf *new = kmem_zalloc(NODE_SIZE, KM_NOFS); + struct xfs_iext_leaf *new = xfs_iext_alloc_node(NODE_SIZE); const int nr_move = RECS_PER_LEAF / 2; int nr_keep = nr_move + (RECS_PER_LEAF & 1); int i; @@ -583,7 +590,7 @@ xfs_iext_alloc_root( { ASSERT(ifp->if_bytes == 0); - ifp->if_data = kmem_zalloc(sizeof(struct xfs_iext_rec), KM_NOFS); + ifp->if_data = xfs_iext_alloc_node(sizeof(struct xfs_iext_rec)); ifp->if_height = 1; /* now that we have a node step into it */ @@ -603,7 +610,8 @@ xfs_iext_realloc_root( if (new_size / sizeof(struct xfs_iext_rec) == RECS_PER_LEAF) new_size = NODE_SIZE; - new = krealloc(ifp->if_data, new_size, GFP_NOFS | __GFP_NOFAIL); + new = krealloc(ifp->if_data, new_size, + GFP_KERNEL | __GFP_NOLOCKDEP | __GFP_NOFAIL); memset(new + ifp->if_bytes, 0, new_size - ifp->if_bytes); ifp->if_data = new; cur->leaf = new; @@ -743,7 +751,7 @@ xfs_iext_remove_node( again: ASSERT(node->ptrs[pos]); ASSERT(node->ptrs[pos] == victim); - kmem_free(victim); + kfree(victim); nr_entries = xfs_iext_node_nr_entries(node, pos) - 1; offset = node->keys[0]; @@ -789,7 +797,7 @@ again: ASSERT(node == ifp->if_data); ifp->if_data = node->ptrs[0]; ifp->if_height--; - kmem_free(node); + kfree(node); } } @@ -863,7 +871,7 @@ xfs_iext_free_last_leaf( struct xfs_ifork *ifp) { ifp->if_height--; - kmem_free(ifp->if_data); + kfree(ifp->if_data); ifp->if_data = NULL; } @@ -1044,7 +1052,7 @@ xfs_iext_destroy_node( } } - kmem_free(node); + kfree(node); } void diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index 137a65bda9..d0dcce462b 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c @@ -18,6 +18,7 @@ #include "xfs_trans.h" #include "xfs_ialloc.h" #include "xfs_dir2.h" +#include "xfs_health.h" #include <linux/iversion.h> @@ -132,9 +133,14 @@ xfs_imap_to_bp( struct xfs_imap *imap, struct xfs_buf **bpp) { - return xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap->im_blkno, - imap->im_len, XBF_UNMAPPED, bpp, - &xfs_inode_buf_ops); + int error; + + error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap->im_blkno, + imap->im_len, XBF_UNMAPPED, bpp, &xfs_inode_buf_ops); + if (xfs_metadata_is_sick(error)) + xfs_agno_mark_sick(mp, xfs_daddr_to_agno(mp, imap->im_blkno), + XFS_SICK_AG_INODES); + return error; } static inline struct timespec64 xfs_inode_decode_bigtime(uint64_t ts) diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c index f4569e18a8..7d660a9739 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.c +++ b/fs/xfs/libxfs/xfs_inode_fork.c @@ -25,6 +25,8 @@ #include "xfs_attr_leaf.h" #include "xfs_types.h" #include "xfs_errortag.h" +#include "xfs_health.h" +#include "xfs_symlink_remote.h" struct kmem_cache *xfs_ifork_cache; @@ -50,7 +52,8 @@ xfs_init_local_fork( mem_size++; if (size) { - char *new_data = kmem_alloc(mem_size, KM_NOFS); + char *new_data = kmalloc(mem_size, + GFP_KERNEL | __GFP_NOLOCKDEP | __GFP_NOFAIL); memcpy(new_data, data, size); if (zero_terminate) @@ -77,7 +80,7 @@ xfs_iformat_local( /* * If the size is unreasonable, then something * is wrong and we just bail out rather than crash in - * kmem_alloc() or memcpy() below. + * kmalloc() or memcpy() below. */ if (unlikely(size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) { xfs_warn(ip->i_mount, @@ -87,6 +90,7 @@ xfs_iformat_local( xfs_inode_verifier_error(ip, -EFSCORRUPTED, "xfs_iformat_local", dip, sizeof(*dip), __this_address); + xfs_inode_mark_sick(ip, XFS_SICK_INO_CORE); return -EFSCORRUPTED; } @@ -116,7 +120,7 @@ xfs_iformat_extents( /* * If the number of extents is unreasonable, then something is wrong and - * we just bail out rather than crash in kmem_alloc() or memcpy() below. + * we just bail out rather than crash in kmalloc() or memcpy() below. */ if (unlikely(size < 0 || size > XFS_DFORK_SIZE(dip, mp, whichfork))) { xfs_warn(ip->i_mount, "corrupt inode %llu ((a)extents = %llu).", @@ -124,6 +128,7 @@ xfs_iformat_extents( xfs_inode_verifier_error(ip, -EFSCORRUPTED, "xfs_iformat_extents(1)", dip, sizeof(*dip), __this_address); + xfs_inode_mark_sick(ip, XFS_SICK_INO_CORE); return -EFSCORRUPTED; } @@ -143,6 +148,7 @@ xfs_iformat_extents( xfs_inode_verifier_error(ip, -EFSCORRUPTED, "xfs_iformat_extents(2)", dp, sizeof(*dp), fa); + xfs_inode_mark_sick(ip, XFS_SICK_INO_CORE); return xfs_bmap_complain_bad_rec(ip, whichfork, fa, &new); } @@ -201,11 +207,13 @@ xfs_iformat_btree( xfs_inode_verifier_error(ip, -EFSCORRUPTED, "xfs_iformat_btree", dfp, size, __this_address); + xfs_inode_mark_sick(ip, XFS_SICK_INO_CORE); return -EFSCORRUPTED; } ifp->if_broot_bytes = size; - ifp->if_broot = kmem_alloc(size, KM_NOFS); + ifp->if_broot = kmalloc(size, + GFP_KERNEL | __GFP_NOLOCKDEP | __GFP_NOFAIL); ASSERT(ifp->if_broot != NULL); /* * Copy and convert from the on-disk structure @@ -265,12 +273,14 @@ xfs_iformat_data_fork( default: xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__, dip, sizeof(*dip), __this_address); + xfs_inode_mark_sick(ip, XFS_SICK_INO_CORE); return -EFSCORRUPTED; } break; default: xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__, dip, sizeof(*dip), __this_address); + xfs_inode_mark_sick(ip, XFS_SICK_INO_CORE); return -EFSCORRUPTED; } } @@ -342,6 +352,7 @@ xfs_iformat_attr_fork( default: xfs_inode_verifier_error(ip, error, __func__, dip, sizeof(*dip), __this_address); + xfs_inode_mark_sick(ip, XFS_SICK_INO_CORE); error = -EFSCORRUPTED; break; } @@ -399,7 +410,8 @@ xfs_iroot_realloc( */ if (ifp->if_broot_bytes == 0) { new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, rec_diff); - ifp->if_broot = kmem_alloc(new_size, KM_NOFS); + ifp->if_broot = kmalloc(new_size, + GFP_KERNEL | __GFP_NOFAIL); ifp->if_broot_bytes = (int)new_size; return; } @@ -414,7 +426,7 @@ xfs_iroot_realloc( new_max = cur_max + rec_diff; new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, new_max); ifp->if_broot = krealloc(ifp->if_broot, new_size, - GFP_NOFS | __GFP_NOFAIL); + GFP_KERNEL | __GFP_NOFAIL); op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1, ifp->if_broot_bytes); np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1, @@ -440,7 +452,7 @@ xfs_iroot_realloc( else new_size = 0; if (new_size > 0) { - new_broot = kmem_alloc(new_size, KM_NOFS); + new_broot = kmalloc(new_size, GFP_KERNEL | __GFP_NOFAIL); /* * First copy over the btree block header. */ @@ -470,7 +482,7 @@ xfs_iroot_realloc( (int)new_size); memcpy(np, op, new_max * (uint)sizeof(xfs_fsblock_t)); } - kmem_free(ifp->if_broot); + kfree(ifp->if_broot); ifp->if_broot = new_broot; ifp->if_broot_bytes = (int)new_size; if (ifp->if_broot) @@ -488,7 +500,7 @@ xfs_iroot_realloc( * * If the amount of space needed has decreased below the size of the * inline buffer, then switch to using the inline buffer. Otherwise, - * use kmem_realloc() or kmem_alloc() to adjust the size of the buffer + * use krealloc() or kmalloc() to adjust the size of the buffer * to what is needed. * * ip -- the inode whose if_data area is changing @@ -509,7 +521,7 @@ xfs_idata_realloc( if (byte_diff) { ifp->if_data = krealloc(ifp->if_data, new_size, - GFP_NOFS | __GFP_NOFAIL); + GFP_KERNEL | __GFP_NOFAIL); if (new_size == 0) ifp->if_data = NULL; ifp->if_bytes = new_size; @@ -524,13 +536,13 @@ xfs_idestroy_fork( struct xfs_ifork *ifp) { if (ifp->if_broot != NULL) { - kmem_free(ifp->if_broot); + kfree(ifp->if_broot); ifp->if_broot = NULL; } switch (ifp->if_format) { case XFS_DINODE_FMT_LOCAL: - kmem_free(ifp->if_data); + kfree(ifp->if_data); ifp->if_data = NULL; break; case XFS_DINODE_FMT_EXTENTS: @@ -562,7 +574,7 @@ xfs_iextents_copy( struct xfs_bmbt_irec rec; int64_t copied = 0; - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL | XFS_ILOCK_SHARED)); + xfs_assert_ilocked(ip, XFS_ILOCK_EXCL | XFS_ILOCK_SHARED); ASSERT(ifp->if_bytes > 0); for_each_xfs_iext(ifp, &icur, &rec) { @@ -689,7 +701,7 @@ xfs_ifork_init_cow( return; ip->i_cowfp = kmem_cache_zalloc(xfs_ifork_cache, - GFP_NOFS | __GFP_NOFAIL); + GFP_KERNEL | __GFP_NOLOCKDEP | __GFP_NOFAIL); ip->i_cowfp->if_format = XFS_DINODE_FMT_EXTENTS; } @@ -802,3 +814,12 @@ xfs_iext_count_upgrade( return 0; } + +/* Decide if a file mapping is on the realtime device or not. */ +bool +xfs_ifork_is_realtime( + struct xfs_inode *ip, + int whichfork) +{ + return XFS_IS_REALTIME_INODE(ip) && whichfork != XFS_ATTR_FORK; +} diff --git a/fs/xfs/libxfs/xfs_inode_fork.h b/fs/xfs/libxfs/xfs_inode_fork.h index 96303249d2..bd53eb951b 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.h +++ b/fs/xfs/libxfs/xfs_inode_fork.h @@ -260,6 +260,7 @@ int xfs_iext_count_may_overflow(struct xfs_inode *ip, int whichfork, int nr_to_add); int xfs_iext_count_upgrade(struct xfs_trans *tp, struct xfs_inode *ip, uint nr_to_add); +bool xfs_ifork_is_realtime(struct xfs_inode *ip, int whichfork); /* returns true if the fork has extents but they are not read in yet. */ static inline bool xfs_need_iread_extents(const struct xfs_ifork *ifp) diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h index 269573c828..16872972e1 100644 --- a/fs/xfs/libxfs/xfs_log_format.h +++ b/fs/xfs/libxfs/xfs_log_format.h @@ -838,10 +838,12 @@ struct xfs_cud_log_format { #define XFS_BMAP_EXTENT_ATTR_FORK (1U << 31) #define XFS_BMAP_EXTENT_UNWRITTEN (1U << 30) +#define XFS_BMAP_EXTENT_REALTIME (1U << 29) #define XFS_BMAP_EXTENT_FLAGS (XFS_BMAP_EXTENT_TYPE_MASK | \ XFS_BMAP_EXTENT_ATTR_FORK | \ - XFS_BMAP_EXTENT_UNWRITTEN) + XFS_BMAP_EXTENT_UNWRITTEN | \ + XFS_BMAP_EXTENT_REALTIME) /* * This is the structure used to lay out an bui log item in the diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c index 6709a7f8ba..511c912d51 100644 --- a/fs/xfs/libxfs/xfs_refcount.c +++ b/fs/xfs/libxfs/xfs_refcount.c @@ -23,6 +23,7 @@ #include "xfs_refcount.h" #include "xfs_rmap.h" #include "xfs_ag.h" +#include "xfs_health.h" struct kmem_cache *xfs_refcount_intent_cache; @@ -156,6 +157,7 @@ xfs_refcount_complain_bad_rec( xfs_warn(mp, "Start block 0x%x, block count 0x%x, references 0x%x", irec->rc_startblock, irec->rc_blockcount, irec->rc_refcount); + xfs_btree_mark_sick(cur); return -EFSCORRUPTED; } @@ -238,6 +240,7 @@ xfs_refcount_insert( if (error) goto out_error; if (XFS_IS_CORRUPT(cur->bc_mp, *i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } @@ -268,12 +271,14 @@ xfs_refcount_delete( if (error) goto out_error; if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } trace_xfs_refcount_delete(cur->bc_mp, cur->bc_ag.pag->pag_agno, &irec); error = xfs_btree_delete(cur, i); if (XFS_IS_CORRUPT(cur->bc_mp, *i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } @@ -398,6 +403,7 @@ xfs_refcount_split_extent( if (error) goto out_error; if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } @@ -425,6 +431,7 @@ xfs_refcount_split_extent( if (error) goto out_error; if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } @@ -470,6 +477,7 @@ xfs_refcount_merge_center_extents( if (error) goto out_error; if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } @@ -478,6 +486,7 @@ xfs_refcount_merge_center_extents( if (error) goto out_error; if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } @@ -487,6 +496,7 @@ xfs_refcount_merge_center_extents( if (error) goto out_error; if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } @@ -498,6 +508,7 @@ xfs_refcount_merge_center_extents( if (error) goto out_error; if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } @@ -542,6 +553,7 @@ xfs_refcount_merge_left_extent( if (error) goto out_error; if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } @@ -550,6 +562,7 @@ xfs_refcount_merge_left_extent( if (error) goto out_error; if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } @@ -561,6 +574,7 @@ xfs_refcount_merge_left_extent( if (error) goto out_error; if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } @@ -608,6 +622,7 @@ xfs_refcount_merge_right_extent( if (error) goto out_error; if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } @@ -616,6 +631,7 @@ xfs_refcount_merge_right_extent( if (error) goto out_error; if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } @@ -627,6 +643,7 @@ xfs_refcount_merge_right_extent( if (error) goto out_error; if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } @@ -674,6 +691,7 @@ xfs_refcount_find_left_extents( if (error) goto out_error; if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } @@ -693,6 +711,7 @@ xfs_refcount_find_left_extents( if (error) goto out_error; if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } @@ -767,6 +786,7 @@ xfs_refcount_find_right_extents( if (error) goto out_error; if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } @@ -786,6 +806,7 @@ xfs_refcount_find_right_extents( if (error) goto out_error; if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } @@ -1056,7 +1077,7 @@ xfs_refcount_still_have_space( * to handle each of the shape changes to the refcount btree. */ overhead = xfs_allocfree_block_count(cur->bc_mp, - cur->bc_ag.refc.shape_changes); + cur->bc_refc.shape_changes); overhead += cur->bc_mp->m_refc_maxlevels; overhead *= cur->bc_mp->m_sb.sb_blocksize; @@ -1064,17 +1085,17 @@ xfs_refcount_still_have_space( * Only allow 2 refcount extent updates per transaction if the * refcount continue update "error" has been injected. */ - if (cur->bc_ag.refc.nr_ops > 2 && + if (cur->bc_refc.nr_ops > 2 && XFS_TEST_ERROR(false, cur->bc_mp, XFS_ERRTAG_REFCOUNT_CONTINUE_UPDATE)) return false; - if (cur->bc_ag.refc.nr_ops == 0) + if (cur->bc_refc.nr_ops == 0) return true; else if (overhead > cur->bc_tp->t_log_res) return false; - return cur->bc_tp->t_log_res - overhead > - cur->bc_ag.refc.nr_ops * XFS_REFCOUNT_ITEM_OVERHEAD; + return cur->bc_tp->t_log_res - overhead > + cur->bc_refc.nr_ops * XFS_REFCOUNT_ITEM_OVERHEAD; } /* @@ -1134,7 +1155,7 @@ xfs_refcount_adjust_extents( * Either cover the hole (increment) or * delete the range (decrement). */ - cur->bc_ag.refc.nr_ops++; + cur->bc_refc.nr_ops++; if (tmp.rc_refcount) { error = xfs_refcount_insert(cur, &tmp, &found_tmp); @@ -1142,6 +1163,7 @@ xfs_refcount_adjust_extents( goto out_error; if (XFS_IS_CORRUPT(cur->bc_mp, found_tmp != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } @@ -1180,6 +1202,7 @@ xfs_refcount_adjust_extents( */ if (XFS_IS_CORRUPT(cur->bc_mp, ext.rc_blockcount == 0) || XFS_IS_CORRUPT(cur->bc_mp, ext.rc_blockcount > *aglen)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } @@ -1193,7 +1216,7 @@ xfs_refcount_adjust_extents( ext.rc_refcount += adj; trace_xfs_refcount_modify_extent(cur->bc_mp, cur->bc_ag.pag->pag_agno, &ext); - cur->bc_ag.refc.nr_ops++; + cur->bc_refc.nr_ops++; if (ext.rc_refcount > 1) { error = xfs_refcount_update(cur, &ext); if (error) @@ -1203,6 +1226,7 @@ xfs_refcount_adjust_extents( if (error) goto out_error; if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } @@ -1281,7 +1305,7 @@ xfs_refcount_adjust( if (shape_changed) shape_changes++; if (shape_changes) - cur->bc_ag.refc.shape_changes++; + cur->bc_refc.shape_changes++; /* Now that we've taken care of the ends, adjust the middle extents */ error = xfs_refcount_adjust_extents(cur, agbno, aglen, adj); @@ -1327,8 +1351,10 @@ xfs_refcount_continue_op( struct xfs_perag *pag = cur->bc_ag.pag; if (XFS_IS_CORRUPT(mp, !xfs_verify_agbext(pag, new_agbno, - ri->ri_blockcount))) + ri->ri_blockcount))) { + xfs_btree_mark_sick(cur); return -EFSCORRUPTED; + } ri->ri_startblock = XFS_AGB_TO_FSB(mp, pag->pag_agno, new_agbno); @@ -1374,8 +1400,8 @@ xfs_refcount_finish_one( */ rcur = *pcur; if (rcur != NULL && rcur->bc_ag.pag != ri->ri_pag) { - nr_ops = rcur->bc_ag.refc.nr_ops; - shape_changes = rcur->bc_ag.refc.shape_changes; + nr_ops = rcur->bc_refc.nr_ops; + shape_changes = rcur->bc_refc.shape_changes; xfs_refcount_finish_one_cleanup(tp, rcur, 0); rcur = NULL; *pcur = NULL; @@ -1387,8 +1413,8 @@ xfs_refcount_finish_one( return error; rcur = xfs_refcountbt_init_cursor(mp, tp, agbp, ri->ri_pag); - rcur->bc_ag.refc.nr_ops = nr_ops; - rcur->bc_ag.refc.shape_changes = shape_changes; + rcur->bc_refc.nr_ops = nr_ops; + rcur->bc_refc.shape_changes = shape_changes; } *pcur = rcur; @@ -1449,7 +1475,7 @@ __xfs_refcount_add( blockcount); ri = kmem_cache_alloc(xfs_refcount_intent_cache, - GFP_NOFS | __GFP_NOFAIL); + GFP_KERNEL | __GFP_NOFAIL); INIT_LIST_HEAD(&ri->ri_list); ri->ri_type = type; ri->ri_startblock = startblock; @@ -1535,6 +1561,7 @@ xfs_refcount_find_shared( if (error) goto out_error; if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } @@ -1552,6 +1579,7 @@ xfs_refcount_find_shared( if (error) goto out_error; if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } @@ -1585,6 +1613,7 @@ xfs_refcount_find_shared( if (error) goto out_error; if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } @@ -1682,6 +1711,7 @@ xfs_refcount_adjust_cow_extents( goto out_error; if (XFS_IS_CORRUPT(cur->bc_mp, found_rec && ext.rc_domain != XFS_REFC_DOMAIN_COW)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } @@ -1697,6 +1727,7 @@ xfs_refcount_adjust_cow_extents( /* Adding a CoW reservation, there should be nothing here. */ if (XFS_IS_CORRUPT(cur->bc_mp, agbno + aglen > ext.rc_startblock)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } @@ -1714,6 +1745,7 @@ xfs_refcount_adjust_cow_extents( if (error) goto out_error; if (XFS_IS_CORRUPT(cur->bc_mp, found_tmp != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } @@ -1721,14 +1753,17 @@ xfs_refcount_adjust_cow_extents( case XFS_REFCOUNT_ADJUST_COW_FREE: /* Removing a CoW reservation, there should be one extent. */ if (XFS_IS_CORRUPT(cur->bc_mp, ext.rc_startblock != agbno)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } if (XFS_IS_CORRUPT(cur->bc_mp, ext.rc_blockcount != aglen)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } if (XFS_IS_CORRUPT(cur->bc_mp, ext.rc_refcount != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } @@ -1740,6 +1775,7 @@ xfs_refcount_adjust_cow_extents( if (error) goto out_error; if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } @@ -1889,8 +1925,10 @@ xfs_refcount_recover_extent( struct xfs_refcount_recovery *rr; if (XFS_IS_CORRUPT(cur->bc_mp, - be32_to_cpu(rec->refc.rc_refcount) != 1)) + be32_to_cpu(rec->refc.rc_refcount) != 1)) { + xfs_btree_mark_sick(cur); return -EFSCORRUPTED; + } rr = kmalloc(sizeof(struct xfs_refcount_recovery), GFP_KERNEL | __GFP_NOFAIL); @@ -1900,6 +1938,7 @@ xfs_refcount_recover_extent( if (xfs_refcount_check_irec(cur->bc_ag.pag, &rr->rr_rrec) != NULL || XFS_IS_CORRUPT(cur->bc_mp, rr->rr_rrec.rc_domain != XFS_REFC_DOMAIN_COW)) { + xfs_btree_mark_sick(cur); kfree(rr); return -EFSCORRUPTED; } diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c index 0d80bd9914..ca59f6c89f 100644 --- a/fs/xfs/libxfs/xfs_refcount_btree.c +++ b/fs/xfs/libxfs/xfs_refcount_btree.c @@ -16,6 +16,7 @@ #include "xfs_refcount.h" #include "xfs_alloc.h" #include "xfs_error.h" +#include "xfs_health.h" #include "xfs_trace.h" #include "xfs_trans.h" #include "xfs_bit.h" @@ -77,8 +78,6 @@ xfs_refcountbt_alloc_block( xfs_refc_block(args.mp))); if (error) goto out_error; - trace_xfs_refcountbt_alloc_block(cur->bc_mp, cur->bc_ag.pag->pag_agno, - args.agbno, 1); if (args.fsbno == NULLFSBLOCK) { *stat = 0; return 0; @@ -107,8 +106,6 @@ xfs_refcountbt_free_block( struct xfs_agf *agf = agbp->b_addr; xfs_fsblock_t fsbno = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp)); - trace_xfs_refcountbt_free_block(cur->bc_mp, cur->bc_ag.pag->pag_agno, - XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno), 1); be32_add_cpu(&agf->agf_refcount_blocks, -1); xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_REFCOUNT_BLOCKS); return xfs_free_extent_later(cur->bc_tp, fsbno, 1, @@ -220,7 +217,7 @@ xfs_refcountbt_verify( if (!xfs_has_reflink(mp)) return __this_address; - fa = xfs_btree_sblock_v5hdr_verify(bp); + fa = xfs_btree_agblock_v5hdr_verify(bp); if (fa) return fa; @@ -242,7 +239,7 @@ xfs_refcountbt_verify( } else if (level >= mp->m_refc_maxlevels) return __this_address; - return xfs_btree_sblock_verify(bp, mp->m_refc_mxr[level != 0]); + return xfs_btree_agblock_verify(bp, mp->m_refc_mxr[level != 0]); } STATIC void @@ -251,7 +248,7 @@ xfs_refcountbt_read_verify( { xfs_failaddr_t fa; - if (!xfs_btree_sblock_verify_crc(bp)) + if (!xfs_btree_agblock_verify_crc(bp)) xfs_verifier_error(bp, -EFSBADCRC, __this_address); else { fa = xfs_refcountbt_verify(bp); @@ -275,7 +272,7 @@ xfs_refcountbt_write_verify( xfs_verifier_error(bp, -EFSCORRUPTED, fa); return; } - xfs_btree_sblock_calc_crc(bp); + xfs_btree_agblock_calc_crc(bp); } @@ -321,9 +318,17 @@ xfs_refcountbt_keys_contiguous( be32_to_cpu(key2->refc.rc_startblock)); } -static const struct xfs_btree_ops xfs_refcountbt_ops = { +const struct xfs_btree_ops xfs_refcountbt_ops = { + .name = "refcount", + .type = XFS_BTREE_TYPE_AG, + .rec_len = sizeof(struct xfs_refcount_rec), .key_len = sizeof(struct xfs_refcount_key), + .ptr_len = XFS_BTREE_SHORT_PTR_LEN, + + .lru_refs = XFS_REFC_BTREE_REF, + .statoff = XFS_STATS_CALC_INDEX(xs_refcbt_2), + .sick_mask = XFS_SICK_AG_REFCNTBT, .dup_cursor = xfs_refcountbt_dup_cursor, .set_root = xfs_refcountbt_set_root, @@ -344,59 +349,32 @@ static const struct xfs_btree_ops xfs_refcountbt_ops = { }; /* - * Initialize a new refcount btree cursor. + * Create a new refcount btree cursor. + * + * For staging cursors tp and agbp are NULL. */ -static struct xfs_btree_cur * -xfs_refcountbt_init_common( +struct xfs_btree_cur * +xfs_refcountbt_init_cursor( struct xfs_mount *mp, struct xfs_trans *tp, + struct xfs_buf *agbp, struct xfs_perag *pag) { struct xfs_btree_cur *cur; ASSERT(pag->pag_agno < mp->m_sb.sb_agcount); - cur = xfs_btree_alloc_cursor(mp, tp, XFS_BTNUM_REFC, + cur = xfs_btree_alloc_cursor(mp, tp, &xfs_refcountbt_ops, mp->m_refc_maxlevels, xfs_refcountbt_cur_cache); - cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_refcbt_2); - - cur->bc_flags |= XFS_BTREE_CRC_BLOCKS; - cur->bc_ag.pag = xfs_perag_hold(pag); - cur->bc_ag.refc.nr_ops = 0; - cur->bc_ag.refc.shape_changes = 0; - cur->bc_ops = &xfs_refcountbt_ops; - return cur; -} - -/* Create a btree cursor. */ -struct xfs_btree_cur * -xfs_refcountbt_init_cursor( - struct xfs_mount *mp, - struct xfs_trans *tp, - struct xfs_buf *agbp, - struct xfs_perag *pag) -{ - struct xfs_agf *agf = agbp->b_addr; - struct xfs_btree_cur *cur; - - cur = xfs_refcountbt_init_common(mp, tp, pag); - cur->bc_nlevels = be32_to_cpu(agf->agf_refcount_level); + cur->bc_refc.nr_ops = 0; + cur->bc_refc.shape_changes = 0; cur->bc_ag.agbp = agbp; - return cur; -} + if (agbp) { + struct xfs_agf *agf = agbp->b_addr; -/* Create a btree cursor with a fake root for staging. */ -struct xfs_btree_cur * -xfs_refcountbt_stage_cursor( - struct xfs_mount *mp, - struct xbtree_afakeroot *afake, - struct xfs_perag *pag) -{ - struct xfs_btree_cur *cur; - - cur = xfs_refcountbt_init_common(mp, NULL, pag); - xfs_btree_stage_afakeroot(cur, afake); + cur->bc_nlevels = be32_to_cpu(agf->agf_refcount_level); + } return cur; } @@ -421,7 +399,7 @@ xfs_refcountbt_commit_staged_btree( xfs_alloc_log_agf(tp, agbp, XFS_AGF_REFCOUNT_BLOCKS | XFS_AGF_REFCOUNT_ROOT | XFS_AGF_REFCOUNT_LEVEL); - xfs_btree_commit_afakeroot(cur, tp, agbp, &xfs_refcountbt_ops); + xfs_btree_commit_afakeroot(cur, tp, agbp); } /* Calculate number of records in a refcount btree block. */ diff --git a/fs/xfs/libxfs/xfs_refcount_btree.h b/fs/xfs/libxfs/xfs_refcount_btree.h index d66b37259b..1e0ab25f6c 100644 --- a/fs/xfs/libxfs/xfs_refcount_btree.h +++ b/fs/xfs/libxfs/xfs_refcount_btree.h @@ -48,8 +48,6 @@ struct xbtree_afakeroot; extern struct xfs_btree_cur *xfs_refcountbt_init_cursor(struct xfs_mount *mp, struct xfs_trans *tp, struct xfs_buf *agbp, struct xfs_perag *pag); -struct xfs_btree_cur *xfs_refcountbt_stage_cursor(struct xfs_mount *mp, - struct xbtree_afakeroot *afake, struct xfs_perag *pag); extern int xfs_refcountbt_maxrecs(int blocklen, bool leaf); extern void xfs_refcountbt_compute_maxlevels(struct xfs_mount *mp); diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c index 76bf7f48cb..ef16f6f9ce 100644 --- a/fs/xfs/libxfs/xfs_rmap.c +++ b/fs/xfs/libxfs/xfs_rmap.c @@ -23,6 +23,7 @@ #include "xfs_error.h" #include "xfs_inode.h" #include "xfs_ag.h" +#include "xfs_health.h" struct kmem_cache *xfs_rmap_intent_cache; @@ -56,8 +57,10 @@ xfs_rmap_lookup_le( error = xfs_rmap_get_rec(cur, irec, &get_stat); if (error) return error; - if (!get_stat) + if (!get_stat) { + xfs_btree_mark_sick(cur); return -EFSCORRUPTED; + } return 0; } @@ -132,6 +135,7 @@ xfs_rmap_insert( if (error) goto done; if (XFS_IS_CORRUPT(rcur->bc_mp, i != 0)) { + xfs_btree_mark_sick(rcur); error = -EFSCORRUPTED; goto done; } @@ -145,6 +149,7 @@ xfs_rmap_insert( if (error) goto done; if (XFS_IS_CORRUPT(rcur->bc_mp, i != 1)) { + xfs_btree_mark_sick(rcur); error = -EFSCORRUPTED; goto done; } @@ -174,6 +179,7 @@ xfs_rmap_delete( if (error) goto done; if (XFS_IS_CORRUPT(rcur->bc_mp, i != 1)) { + xfs_btree_mark_sick(rcur); error = -EFSCORRUPTED; goto done; } @@ -182,6 +188,7 @@ xfs_rmap_delete( if (error) goto done; if (XFS_IS_CORRUPT(rcur->bc_mp, i != 1)) { + xfs_btree_mark_sick(rcur); error = -EFSCORRUPTED; goto done; } @@ -208,10 +215,10 @@ xfs_rmap_btrec_to_irec( /* Simple checks for rmap records. */ xfs_failaddr_t xfs_rmap_check_irec( - struct xfs_btree_cur *cur, + struct xfs_perag *pag, const struct xfs_rmap_irec *irec) { - struct xfs_mount *mp = cur->bc_mp; + struct xfs_mount *mp = pag->pag_mount; bool is_inode; bool is_unwritten; bool is_bmbt; @@ -226,8 +233,8 @@ xfs_rmap_check_irec( return __this_address; } else { /* check for valid extent range, including overflow */ - if (!xfs_verify_agbext(cur->bc_ag.pag, irec->rm_startblock, - irec->rm_blockcount)) + if (!xfs_verify_agbext(pag, irec->rm_startblock, + irec->rm_blockcount)) return __this_address; } @@ -262,6 +269,16 @@ xfs_rmap_check_irec( return NULL; } +static inline xfs_failaddr_t +xfs_rmap_check_btrec( + struct xfs_btree_cur *cur, + const struct xfs_rmap_irec *irec) +{ + if (xfs_btree_is_mem_rmap(cur->bc_ops)) + return xfs_rmap_check_irec(cur->bc_mem.pag, irec); + return xfs_rmap_check_irec(cur->bc_ag.pag, irec); +} + static inline int xfs_rmap_complain_bad_rec( struct xfs_btree_cur *cur, @@ -270,13 +287,18 @@ xfs_rmap_complain_bad_rec( { struct xfs_mount *mp = cur->bc_mp; - xfs_warn(mp, - "Reverse Mapping BTree record corruption in AG %d detected at %pS!", - cur->bc_ag.pag->pag_agno, fa); + if (xfs_btree_is_mem_rmap(cur->bc_ops)) + xfs_warn(mp, + "In-Memory Reverse Mapping BTree record corruption detected at %pS!", fa); + else + xfs_warn(mp, + "Reverse Mapping BTree record corruption in AG %d detected at %pS!", + cur->bc_ag.pag->pag_agno, fa); xfs_warn(mp, "Owner 0x%llx, flags 0x%x, start block 0x%x block count 0x%x", irec->rm_owner, irec->rm_flags, irec->rm_startblock, irec->rm_blockcount); + xfs_btree_mark_sick(cur); return -EFSCORRUPTED; } @@ -299,7 +321,7 @@ xfs_rmap_get_rec( fa = xfs_rmap_btrec_to_irec(rec, irec); if (!fa) - fa = xfs_rmap_check_irec(cur, irec); + fa = xfs_rmap_check_btrec(cur, irec); if (fa) return xfs_rmap_complain_bad_rec(cur, fa, irec); @@ -512,7 +534,7 @@ xfs_rmap_lookup_le_range( */ static int xfs_rmap_free_check_owner( - struct xfs_mount *mp, + struct xfs_btree_cur *cur, uint64_t ltoff, struct xfs_rmap_irec *rec, xfs_filblks_t len, @@ -520,6 +542,7 @@ xfs_rmap_free_check_owner( uint64_t offset, unsigned int flags) { + struct xfs_mount *mp = cur->bc_mp; int error = 0; if (owner == XFS_RMAP_OWN_UNKNOWN) @@ -529,12 +552,14 @@ xfs_rmap_free_check_owner( if (XFS_IS_CORRUPT(mp, (flags & XFS_RMAP_UNWRITTEN) != (rec->rm_flags & XFS_RMAP_UNWRITTEN))) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out; } /* Make sure the owner matches what we expect to find in the tree. */ if (XFS_IS_CORRUPT(mp, owner != rec->rm_owner)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out; } @@ -546,16 +571,19 @@ xfs_rmap_free_check_owner( if (flags & XFS_RMAP_BMBT_BLOCK) { if (XFS_IS_CORRUPT(mp, !(rec->rm_flags & XFS_RMAP_BMBT_BLOCK))) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out; } } else { if (XFS_IS_CORRUPT(mp, rec->rm_offset > offset)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out; } if (XFS_IS_CORRUPT(mp, offset + len > ltoff + rec->rm_blockcount)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out; } @@ -618,6 +646,7 @@ xfs_rmap_unmap( if (error) goto out_error; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } @@ -639,6 +668,7 @@ xfs_rmap_unmap( if (XFS_IS_CORRUPT(mp, bno < ltrec.rm_startblock + ltrec.rm_blockcount)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } @@ -665,6 +695,7 @@ xfs_rmap_unmap( if (error) goto out_error; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } @@ -677,12 +708,13 @@ xfs_rmap_unmap( ltrec.rm_startblock > bno || ltrec.rm_startblock + ltrec.rm_blockcount < bno + len)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } /* Check owner information. */ - error = xfs_rmap_free_check_owner(mp, ltoff, <rec, len, owner, + error = xfs_rmap_free_check_owner(cur, ltoff, <rec, len, owner, offset, flags); if (error) goto out_error; @@ -697,6 +729,7 @@ xfs_rmap_unmap( if (error) goto out_error; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } @@ -788,6 +821,86 @@ out_error: return error; } +#ifdef CONFIG_XFS_LIVE_HOOKS +/* + * Use a static key here to reduce the overhead of rmapbt live updates. If + * the compiler supports jump labels, the static branch will be replaced by a + * nop sled when there are no hook users. Online fsck is currently the only + * caller, so this is a reasonable tradeoff. + * + * Note: Patching the kernel code requires taking the cpu hotplug lock. Other + * parts of the kernel allocate memory with that lock held, which means that + * XFS callers cannot hold any locks that might be used by memory reclaim or + * writeback when calling the static_branch_{inc,dec} functions. + */ +DEFINE_STATIC_XFS_HOOK_SWITCH(xfs_rmap_hooks_switch); + +void +xfs_rmap_hook_disable(void) +{ + xfs_hooks_switch_off(&xfs_rmap_hooks_switch); +} + +void +xfs_rmap_hook_enable(void) +{ + xfs_hooks_switch_on(&xfs_rmap_hooks_switch); +} + +/* Call downstream hooks for a reverse mapping update. */ +static inline void +xfs_rmap_update_hook( + struct xfs_trans *tp, + struct xfs_perag *pag, + enum xfs_rmap_intent_type op, + xfs_agblock_t startblock, + xfs_extlen_t blockcount, + bool unwritten, + const struct xfs_owner_info *oinfo) +{ + if (xfs_hooks_switched_on(&xfs_rmap_hooks_switch)) { + struct xfs_rmap_update_params p = { + .startblock = startblock, + .blockcount = blockcount, + .unwritten = unwritten, + .oinfo = *oinfo, /* struct copy */ + }; + + if (pag) + xfs_hooks_call(&pag->pag_rmap_update_hooks, op, &p); + } +} + +/* Call the specified function during a reverse mapping update. */ +int +xfs_rmap_hook_add( + struct xfs_perag *pag, + struct xfs_rmap_hook *hook) +{ + return xfs_hooks_add(&pag->pag_rmap_update_hooks, &hook->rmap_hook); +} + +/* Stop calling the specified function during a reverse mapping update. */ +void +xfs_rmap_hook_del( + struct xfs_perag *pag, + struct xfs_rmap_hook *hook) +{ + xfs_hooks_del(&pag->pag_rmap_update_hooks, &hook->rmap_hook); +} + +/* Configure rmap update hook functions. */ +void +xfs_rmap_hook_setup( + struct xfs_rmap_hook *hook, + notifier_fn_t mod_fn) +{ + xfs_hook_setup(&hook->rmap_hook, mod_fn); +} +#else +# define xfs_rmap_update_hook(t, p, o, s, b, u, oi) do { } while (0) +#endif /* CONFIG_XFS_LIVE_HOOKS */ + /* * Remove a reference to an extent in the rmap btree. */ @@ -808,7 +921,7 @@ xfs_rmap_free( return 0; cur = xfs_rmapbt_init_cursor(mp, tp, agbp, pag); - + xfs_rmap_update_hook(tp, pag, XFS_RMAP_UNMAP, bno, len, false, oinfo); error = xfs_rmap_unmap(cur, bno, len, false, oinfo); xfs_btree_del_cursor(cur, error); @@ -900,6 +1013,7 @@ xfs_rmap_map( if (XFS_IS_CORRUPT(mp, have_lt != 0 && ltrec.rm_startblock + ltrec.rm_blockcount > bno)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } @@ -917,10 +1031,12 @@ xfs_rmap_map( if (error) goto out_error; if (XFS_IS_CORRUPT(mp, have_gt != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } if (XFS_IS_CORRUPT(mp, bno + len > gtrec.rm_startblock)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } @@ -974,6 +1090,7 @@ xfs_rmap_map( if (error) goto out_error; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } @@ -1021,6 +1138,7 @@ xfs_rmap_map( if (error) goto out_error; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } @@ -1055,6 +1173,7 @@ xfs_rmap_alloc( return 0; cur = xfs_rmapbt_init_cursor(mp, tp, agbp, pag); + xfs_rmap_update_hook(tp, pag, XFS_RMAP_MAP, bno, len, false, oinfo); error = xfs_rmap_map(cur, bno, len, false, oinfo); xfs_btree_del_cursor(cur, error); @@ -1116,6 +1235,7 @@ xfs_rmap_convert( if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } @@ -1153,12 +1273,14 @@ xfs_rmap_convert( if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } if (XFS_IS_CORRUPT(mp, LEFT.rm_startblock + LEFT.rm_blockcount > bno)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } @@ -1181,6 +1303,7 @@ xfs_rmap_convert( if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } @@ -1193,10 +1316,12 @@ xfs_rmap_convert( if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } if (XFS_IS_CORRUPT(mp, bno + len > RIGHT.rm_startblock)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } @@ -1227,6 +1352,7 @@ xfs_rmap_convert( if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } @@ -1246,6 +1372,7 @@ xfs_rmap_convert( if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } @@ -1257,6 +1384,7 @@ xfs_rmap_convert( if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } @@ -1264,6 +1392,7 @@ xfs_rmap_convert( if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } @@ -1275,6 +1404,7 @@ xfs_rmap_convert( if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } @@ -1282,6 +1412,7 @@ xfs_rmap_convert( if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } @@ -1305,6 +1436,7 @@ xfs_rmap_convert( if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } @@ -1312,6 +1444,7 @@ xfs_rmap_convert( if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } @@ -1331,6 +1464,7 @@ xfs_rmap_convert( if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } @@ -1342,6 +1476,7 @@ xfs_rmap_convert( if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } @@ -1349,6 +1484,7 @@ xfs_rmap_convert( if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } @@ -1419,6 +1555,7 @@ xfs_rmap_convert( if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } @@ -1461,6 +1598,7 @@ xfs_rmap_convert( if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 0)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } @@ -1476,6 +1614,7 @@ xfs_rmap_convert( if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } @@ -1509,6 +1648,7 @@ xfs_rmap_convert( if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } @@ -1522,6 +1662,7 @@ xfs_rmap_convert( if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 0)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } @@ -1534,6 +1675,7 @@ xfs_rmap_convert( if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } @@ -1606,6 +1748,7 @@ xfs_rmap_convert_shared( if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } @@ -1634,6 +1777,7 @@ xfs_rmap_convert_shared( if (XFS_IS_CORRUPT(mp, LEFT.rm_startblock + LEFT.rm_blockcount > bno)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } @@ -1652,10 +1796,12 @@ xfs_rmap_convert_shared( if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } if (XFS_IS_CORRUPT(mp, bno + len > RIGHT.rm_startblock)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } @@ -1706,6 +1852,7 @@ xfs_rmap_convert_shared( if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } @@ -1732,6 +1879,7 @@ xfs_rmap_convert_shared( if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } @@ -1758,6 +1906,7 @@ xfs_rmap_convert_shared( if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } @@ -1781,6 +1930,7 @@ xfs_rmap_convert_shared( if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } @@ -1816,6 +1966,7 @@ xfs_rmap_convert_shared( if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } @@ -1861,6 +2012,7 @@ xfs_rmap_convert_shared( if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } @@ -1896,6 +2048,7 @@ xfs_rmap_convert_shared( if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } @@ -1934,6 +2087,7 @@ xfs_rmap_convert_shared( if (error) goto done; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto done; } @@ -2023,6 +2177,7 @@ xfs_rmap_unmap_shared( if (error) goto out_error; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } @@ -2033,12 +2188,14 @@ xfs_rmap_unmap_shared( ltrec.rm_startblock > bno || ltrec.rm_startblock + ltrec.rm_blockcount < bno + len)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } /* Make sure the owner matches what we expect to find in the tree. */ if (XFS_IS_CORRUPT(mp, owner != ltrec.rm_owner)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } @@ -2047,16 +2204,19 @@ xfs_rmap_unmap_shared( if (XFS_IS_CORRUPT(mp, (flags & XFS_RMAP_UNWRITTEN) != (ltrec.rm_flags & XFS_RMAP_UNWRITTEN))) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } /* Check the offset. */ if (XFS_IS_CORRUPT(mp, ltrec.rm_offset > offset)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } if (XFS_IS_CORRUPT(mp, offset > ltoff + ltrec.rm_blockcount)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } @@ -2113,6 +2273,7 @@ xfs_rmap_unmap_shared( if (error) goto out_error; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } @@ -2142,6 +2303,7 @@ xfs_rmap_unmap_shared( if (error) goto out_error; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } @@ -2221,6 +2383,7 @@ xfs_rmap_map_shared( if (error) goto out_error; if (XFS_IS_CORRUPT(mp, have_gt != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } @@ -2273,6 +2436,7 @@ xfs_rmap_map_shared( if (error) goto out_error; if (XFS_IS_CORRUPT(mp, i != 1)) { + xfs_btree_mark_sick(cur); error = -EFSCORRUPTED; goto out_error; } @@ -2335,15 +2499,12 @@ xfs_rmap_map_raw( { struct xfs_owner_info oinfo; - oinfo.oi_owner = rmap->rm_owner; - oinfo.oi_offset = rmap->rm_offset; - oinfo.oi_flags = 0; - if (rmap->rm_flags & XFS_RMAP_ATTR_FORK) - oinfo.oi_flags |= XFS_OWNER_INFO_ATTR_FORK; - if (rmap->rm_flags & XFS_RMAP_BMBT_BLOCK) - oinfo.oi_flags |= XFS_OWNER_INFO_BMBT_BLOCK; + xfs_owner_info_pack(&oinfo, rmap->rm_owner, rmap->rm_offset, + rmap->rm_flags); - if (rmap->rm_flags || XFS_RMAP_NON_INODE_OWNER(rmap->rm_owner)) + if ((rmap->rm_flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK | + XFS_RMAP_UNWRITTEN)) || + XFS_RMAP_NON_INODE_OWNER(rmap->rm_owner)) return xfs_rmap_map(cur, rmap->rm_startblock, rmap->rm_blockcount, rmap->rm_flags & XFS_RMAP_UNWRITTEN, @@ -2373,7 +2534,7 @@ xfs_rmap_query_range_helper( fa = xfs_rmap_btrec_to_irec(rec, &irec); if (!fa) - fa = xfs_rmap_check_irec(cur, &irec); + fa = xfs_rmap_check_btrec(cur, &irec); if (fa) return xfs_rmap_complain_bad_rec(cur, fa, &irec); @@ -2428,6 +2589,38 @@ xfs_rmap_finish_one_cleanup( xfs_trans_brelse(tp, agbp); } +/* Commit an rmap operation into the ondisk tree. */ +int +__xfs_rmap_finish_intent( + struct xfs_btree_cur *rcur, + enum xfs_rmap_intent_type op, + xfs_agblock_t bno, + xfs_extlen_t len, + const struct xfs_owner_info *oinfo, + bool unwritten) +{ + switch (op) { + case XFS_RMAP_ALLOC: + case XFS_RMAP_MAP: + return xfs_rmap_map(rcur, bno, len, unwritten, oinfo); + case XFS_RMAP_MAP_SHARED: + return xfs_rmap_map_shared(rcur, bno, len, unwritten, oinfo); + case XFS_RMAP_FREE: + case XFS_RMAP_UNMAP: + return xfs_rmap_unmap(rcur, bno, len, unwritten, oinfo); + case XFS_RMAP_UNMAP_SHARED: + return xfs_rmap_unmap_shared(rcur, bno, len, unwritten, oinfo); + case XFS_RMAP_CONVERT: + return xfs_rmap_convert(rcur, bno, len, !unwritten, oinfo); + case XFS_RMAP_CONVERT_SHARED: + return xfs_rmap_convert_shared(rcur, bno, len, !unwritten, + oinfo); + default: + ASSERT(0); + return -EFSCORRUPTED; + } +} + /* * Process one of the deferred rmap operations. We pass back the * btree cursor to maintain our lock on the rmapbt between calls. @@ -2476,10 +2669,14 @@ xfs_rmap_finish_one( * allocate blocks. */ error = xfs_free_extent_fix_freelist(tp, ri->ri_pag, &agbp); - if (error) + if (error) { + xfs_ag_mark_sick(ri->ri_pag, XFS_SICK_AG_AGFL); return error; - if (XFS_IS_CORRUPT(tp->t_mountp, !agbp)) + } + if (XFS_IS_CORRUPT(tp->t_mountp, !agbp)) { + xfs_ag_mark_sick(ri->ri_pag, XFS_SICK_AG_AGFL); return -EFSCORRUPTED; + } rcur = xfs_rmapbt_init_cursor(mp, tp, agbp, ri->ri_pag); } @@ -2490,39 +2687,14 @@ xfs_rmap_finish_one( unwritten = ri->ri_bmap.br_state == XFS_EXT_UNWRITTEN; bno = XFS_FSB_TO_AGBNO(rcur->bc_mp, ri->ri_bmap.br_startblock); - switch (ri->ri_type) { - case XFS_RMAP_ALLOC: - case XFS_RMAP_MAP: - error = xfs_rmap_map(rcur, bno, ri->ri_bmap.br_blockcount, - unwritten, &oinfo); - break; - case XFS_RMAP_MAP_SHARED: - error = xfs_rmap_map_shared(rcur, bno, - ri->ri_bmap.br_blockcount, unwritten, &oinfo); - break; - case XFS_RMAP_FREE: - case XFS_RMAP_UNMAP: - error = xfs_rmap_unmap(rcur, bno, ri->ri_bmap.br_blockcount, - unwritten, &oinfo); - break; - case XFS_RMAP_UNMAP_SHARED: - error = xfs_rmap_unmap_shared(rcur, bno, - ri->ri_bmap.br_blockcount, unwritten, &oinfo); - break; - case XFS_RMAP_CONVERT: - error = xfs_rmap_convert(rcur, bno, ri->ri_bmap.br_blockcount, - !unwritten, &oinfo); - break; - case XFS_RMAP_CONVERT_SHARED: - error = xfs_rmap_convert_shared(rcur, bno, - ri->ri_bmap.br_blockcount, !unwritten, &oinfo); - break; - default: - ASSERT(0); - error = -EFSCORRUPTED; - } + error = __xfs_rmap_finish_intent(rcur, ri->ri_type, bno, + ri->ri_bmap.br_blockcount, &oinfo, unwritten); + if (error) + return error; - return error; + xfs_rmap_update_hook(tp, ri->ri_pag, ri->ri_type, bno, + ri->ri_bmap.br_blockcount, unwritten, &oinfo); + return 0; } /* @@ -2559,7 +2731,7 @@ __xfs_rmap_add( bmap->br_blockcount, bmap->br_state); - ri = kmem_cache_alloc(xfs_rmap_intent_cache, GFP_NOFS | __GFP_NOFAIL); + ri = kmem_cache_alloc(xfs_rmap_intent_cache, GFP_KERNEL | __GFP_NOFAIL); INIT_LIST_HEAD(&ri->ri_list); ri->ri_type = type; ri->ri_owner = owner; diff --git a/fs/xfs/libxfs/xfs_rmap.h b/fs/xfs/libxfs/xfs_rmap.h index 3c98d9d50a..9d01fe6894 100644 --- a/fs/xfs/libxfs/xfs_rmap.h +++ b/fs/xfs/libxfs/xfs_rmap.h @@ -186,6 +186,10 @@ void xfs_rmap_finish_one_cleanup(struct xfs_trans *tp, struct xfs_btree_cur *rcur, int error); int xfs_rmap_finish_one(struct xfs_trans *tp, struct xfs_rmap_intent *ri, struct xfs_btree_cur **pcur); +int __xfs_rmap_finish_intent(struct xfs_btree_cur *rcur, + enum xfs_rmap_intent_type op, xfs_agblock_t bno, + xfs_extlen_t len, const struct xfs_owner_info *oinfo, + bool unwritten); int xfs_rmap_lookup_le_range(struct xfs_btree_cur *cur, xfs_agblock_t bno, uint64_t owner, uint64_t offset, unsigned int flags, @@ -195,7 +199,7 @@ int xfs_rmap_compare(const struct xfs_rmap_irec *a, union xfs_btree_rec; xfs_failaddr_t xfs_rmap_btrec_to_irec(const union xfs_btree_rec *rec, struct xfs_rmap_irec *irec); -xfs_failaddr_t xfs_rmap_check_irec(struct xfs_btree_cur *cur, +xfs_failaddr_t xfs_rmap_check_irec(struct xfs_perag *pag, const struct xfs_rmap_irec *irec); int xfs_rmap_has_records(struct xfs_btree_cur *cur, xfs_agblock_t bno, @@ -235,4 +239,29 @@ extern struct kmem_cache *xfs_rmap_intent_cache; int __init xfs_rmap_intent_init_cache(void); void xfs_rmap_intent_destroy_cache(void); +/* + * Parameters for tracking reverse mapping changes. The hook function arg + * parameter is enum xfs_rmap_intent_type, and the rest is below. + */ +struct xfs_rmap_update_params { + xfs_agblock_t startblock; + xfs_extlen_t blockcount; + struct xfs_owner_info oinfo; + bool unwritten; +}; + +#ifdef CONFIG_XFS_LIVE_HOOKS + +struct xfs_rmap_hook { + struct xfs_hook rmap_hook; +}; + +void xfs_rmap_hook_disable(void); +void xfs_rmap_hook_enable(void); + +int xfs_rmap_hook_add(struct xfs_perag *pag, struct xfs_rmap_hook *hook); +void xfs_rmap_hook_del(struct xfs_perag *pag, struct xfs_rmap_hook *hook); +void xfs_rmap_hook_setup(struct xfs_rmap_hook *hook, notifier_fn_t mod_fn); +#endif + #endif /* __XFS_RMAP_H__ */ diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c index 6c81b20e97..9e759efa81 100644 --- a/fs/xfs/libxfs/xfs_rmap_btree.c +++ b/fs/xfs/libxfs/xfs_rmap_btree.c @@ -16,11 +16,14 @@ #include "xfs_btree_staging.h" #include "xfs_rmap.h" #include "xfs_rmap_btree.h" +#include "xfs_health.h" #include "xfs_trace.h" #include "xfs_error.h" #include "xfs_extent_busy.h" #include "xfs_ag.h" #include "xfs_ag_resv.h" +#include "xfs_buf_mem.h" +#include "xfs_btree_mem.h" static struct kmem_cache *xfs_rmapbt_cur_cache; @@ -65,13 +68,12 @@ xfs_rmapbt_set_root( { struct xfs_buf *agbp = cur->bc_ag.agbp; struct xfs_agf *agf = agbp->b_addr; - int btnum = cur->bc_btnum; ASSERT(ptr->s != 0); - agf->agf_roots[btnum] = ptr->s; - be32_add_cpu(&agf->agf_levels[btnum], inc); - cur->bc_ag.pag->pagf_levels[btnum] += inc; + agf->agf_rmap_root = ptr->s; + be32_add_cpu(&agf->agf_rmap_level, inc); + cur->bc_ag.pag->pagf_rmap_level += inc; xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS); } @@ -94,8 +96,6 @@ xfs_rmapbt_alloc_block( &bno, 1); if (error) return error; - - trace_xfs_rmapbt_alloc_block(cur->bc_mp, pag->pag_agno, bno, 1); if (bno == NULLAGBLOCK) { *stat = 0; return 0; @@ -125,8 +125,6 @@ xfs_rmapbt_free_block( int error; bno = xfs_daddr_to_agbno(cur->bc_mp, xfs_buf_daddr(bp)); - trace_xfs_rmapbt_free_block(cur->bc_mp, pag->pag_agno, - bno, 1); be32_add_cpu(&agf->agf_rmap_blocks, -1); xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_RMAP_BLOCKS); error = xfs_alloc_put_freelist(pag, cur->bc_tp, agbp, NULL, bno, 1); @@ -226,7 +224,7 @@ xfs_rmapbt_init_ptr_from_cur( ASSERT(cur->bc_ag.pag->pag_agno == be32_to_cpu(agf->agf_seqno)); - ptr->s = agf->agf_roots[cur->bc_btnum]; + ptr->s = agf->agf_rmap_root; } /* @@ -340,18 +338,29 @@ xfs_rmapbt_verify( if (!xfs_has_rmapbt(mp)) return __this_address; - fa = xfs_btree_sblock_v5hdr_verify(bp); + fa = xfs_btree_agblock_v5hdr_verify(bp); if (fa) return fa; level = be16_to_cpu(block->bb_level); if (pag && xfs_perag_initialised_agf(pag)) { - if (level >= pag->pagf_levels[XFS_BTNUM_RMAPi]) + unsigned int maxlevel = pag->pagf_rmap_level; + +#ifdef CONFIG_XFS_ONLINE_REPAIR + /* + * Online repair could be rewriting the free space btrees, so + * we'll validate against the larger of either tree while this + * is going on. + */ + maxlevel = max_t(unsigned int, maxlevel, + pag->pagf_repair_rmap_level); +#endif + if (level >= maxlevel) return __this_address; } else if (level >= mp->m_rmap_maxlevels) return __this_address; - return xfs_btree_sblock_verify(bp, mp->m_rmap_mxr[level != 0]); + return xfs_btree_agblock_verify(bp, mp->m_rmap_mxr[level != 0]); } static void @@ -360,7 +369,7 @@ xfs_rmapbt_read_verify( { xfs_failaddr_t fa; - if (!xfs_btree_sblock_verify_crc(bp)) + if (!xfs_btree_agblock_verify_crc(bp)) xfs_verifier_error(bp, -EFSBADCRC, __this_address); else { fa = xfs_rmapbt_verify(bp); @@ -384,7 +393,7 @@ xfs_rmapbt_write_verify( xfs_verifier_error(bp, -EFSCORRUPTED, fa); return; } - xfs_btree_sblock_calc_crc(bp); + xfs_btree_agblock_calc_crc(bp); } @@ -476,9 +485,19 @@ xfs_rmapbt_keys_contiguous( be32_to_cpu(key2->rmap.rm_startblock)); } -static const struct xfs_btree_ops xfs_rmapbt_ops = { +const struct xfs_btree_ops xfs_rmapbt_ops = { + .name = "rmap", + .type = XFS_BTREE_TYPE_AG, + .geom_flags = XFS_BTGEO_OVERLAPPING, + .rec_len = sizeof(struct xfs_rmap_rec), + /* Overlapping btree; 2 keys per pointer. */ .key_len = 2 * sizeof(struct xfs_rmap_key), + .ptr_len = XFS_BTREE_SHORT_PTR_LEN, + + .lru_refs = XFS_RMAP_BTREE_REF, + .statoff = XFS_STATS_CALC_INDEX(xs_rmap_2), + .sick_mask = XFS_SICK_AG_RMAPBT, .dup_cursor = xfs_rmapbt_dup_cursor, .set_root = xfs_rmapbt_set_root, @@ -498,55 +517,176 @@ static const struct xfs_btree_ops xfs_rmapbt_ops = { .keys_contiguous = xfs_rmapbt_keys_contiguous, }; -static struct xfs_btree_cur * -xfs_rmapbt_init_common( +/* + * Create a new reverse mapping btree cursor. + * + * For staging cursors tp and agbp are NULL. + */ +struct xfs_btree_cur * +xfs_rmapbt_init_cursor( struct xfs_mount *mp, struct xfs_trans *tp, + struct xfs_buf *agbp, struct xfs_perag *pag) { struct xfs_btree_cur *cur; - /* Overlapping btree; 2 keys per pointer. */ - cur = xfs_btree_alloc_cursor(mp, tp, XFS_BTNUM_RMAP, + cur = xfs_btree_alloc_cursor(mp, tp, &xfs_rmapbt_ops, mp->m_rmap_maxlevels, xfs_rmapbt_cur_cache); - cur->bc_flags = XFS_BTREE_CRC_BLOCKS | XFS_BTREE_OVERLAPPING; - cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_rmap_2); - cur->bc_ops = &xfs_rmapbt_ops; - cur->bc_ag.pag = xfs_perag_hold(pag); + cur->bc_ag.agbp = agbp; + if (agbp) { + struct xfs_agf *agf = agbp->b_addr; + + cur->bc_nlevels = be32_to_cpu(agf->agf_rmap_level); + } return cur; } -/* Create a new reverse mapping btree cursor. */ +#ifdef CONFIG_XFS_BTREE_IN_MEM +static inline unsigned int +xfs_rmapbt_mem_block_maxrecs( + unsigned int blocklen, + bool leaf) +{ + if (leaf) + return blocklen / sizeof(struct xfs_rmap_rec); + return blocklen / + (2 * sizeof(struct xfs_rmap_key) + sizeof(__be64)); +} + +/* + * Validate an in-memory rmap btree block. Callers are allowed to generate an + * in-memory btree even if the ondisk feature is not enabled. + */ +static xfs_failaddr_t +xfs_rmapbt_mem_verify( + struct xfs_buf *bp) +{ + struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); + xfs_failaddr_t fa; + unsigned int level; + unsigned int maxrecs; + + if (!xfs_verify_magic(bp, block->bb_magic)) + return __this_address; + + fa = xfs_btree_fsblock_v5hdr_verify(bp, XFS_RMAP_OWN_UNKNOWN); + if (fa) + return fa; + + level = be16_to_cpu(block->bb_level); + if (level >= xfs_rmapbt_maxlevels_ondisk()) + return __this_address; + + maxrecs = xfs_rmapbt_mem_block_maxrecs( + XFBNO_BLOCKSIZE - XFS_BTREE_LBLOCK_CRC_LEN, level == 0); + return xfs_btree_memblock_verify(bp, maxrecs); +} + +static void +xfs_rmapbt_mem_rw_verify( + struct xfs_buf *bp) +{ + xfs_failaddr_t fa = xfs_rmapbt_mem_verify(bp); + + if (fa) + xfs_verifier_error(bp, -EFSCORRUPTED, fa); +} + +/* skip crc checks on in-memory btrees to save time */ +static const struct xfs_buf_ops xfs_rmapbt_mem_buf_ops = { + .name = "xfs_rmapbt_mem", + .magic = { 0, cpu_to_be32(XFS_RMAP_CRC_MAGIC) }, + .verify_read = xfs_rmapbt_mem_rw_verify, + .verify_write = xfs_rmapbt_mem_rw_verify, + .verify_struct = xfs_rmapbt_mem_verify, +}; + +const struct xfs_btree_ops xfs_rmapbt_mem_ops = { + .name = "mem_rmap", + .type = XFS_BTREE_TYPE_MEM, + .geom_flags = XFS_BTGEO_OVERLAPPING, + + .rec_len = sizeof(struct xfs_rmap_rec), + /* Overlapping btree; 2 keys per pointer. */ + .key_len = 2 * sizeof(struct xfs_rmap_key), + .ptr_len = XFS_BTREE_LONG_PTR_LEN, + + .lru_refs = XFS_RMAP_BTREE_REF, + .statoff = XFS_STATS_CALC_INDEX(xs_rmap_mem_2), + + .dup_cursor = xfbtree_dup_cursor, + .set_root = xfbtree_set_root, + .alloc_block = xfbtree_alloc_block, + .free_block = xfbtree_free_block, + .get_minrecs = xfbtree_get_minrecs, + .get_maxrecs = xfbtree_get_maxrecs, + .init_key_from_rec = xfs_rmapbt_init_key_from_rec, + .init_high_key_from_rec = xfs_rmapbt_init_high_key_from_rec, + .init_rec_from_cur = xfs_rmapbt_init_rec_from_cur, + .init_ptr_from_cur = xfbtree_init_ptr_from_cur, + .key_diff = xfs_rmapbt_key_diff, + .buf_ops = &xfs_rmapbt_mem_buf_ops, + .diff_two_keys = xfs_rmapbt_diff_two_keys, + .keys_inorder = xfs_rmapbt_keys_inorder, + .recs_inorder = xfs_rmapbt_recs_inorder, + .keys_contiguous = xfs_rmapbt_keys_contiguous, +}; + +/* Create a cursor for an in-memory btree. */ struct xfs_btree_cur * -xfs_rmapbt_init_cursor( - struct xfs_mount *mp, +xfs_rmapbt_mem_cursor( + struct xfs_perag *pag, struct xfs_trans *tp, - struct xfs_buf *agbp, - struct xfs_perag *pag) + struct xfbtree *xfbt) { - struct xfs_agf *agf = agbp->b_addr; struct xfs_btree_cur *cur; + struct xfs_mount *mp = pag->pag_mount; - cur = xfs_rmapbt_init_common(mp, tp, pag); - cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]); - cur->bc_ag.agbp = agbp; + cur = xfs_btree_alloc_cursor(mp, tp, &xfs_rmapbt_mem_ops, + xfs_rmapbt_maxlevels_ondisk(), xfs_rmapbt_cur_cache); + cur->bc_mem.xfbtree = xfbt; + cur->bc_nlevels = xfbt->nlevels; + + cur->bc_mem.pag = xfs_perag_hold(pag); return cur; } -/* Create a new reverse mapping btree cursor with a fake root for staging. */ -struct xfs_btree_cur * -xfs_rmapbt_stage_cursor( +/* Create an in-memory rmap btree. */ +int +xfs_rmapbt_mem_init( struct xfs_mount *mp, - struct xbtree_afakeroot *afake, - struct xfs_perag *pag) + struct xfbtree *xfbt, + struct xfs_buftarg *btp, + xfs_agnumber_t agno) { - struct xfs_btree_cur *cur; + xfbt->owner = agno; + return xfbtree_init(mp, xfbt, btp, &xfs_rmapbt_mem_ops); +} - cur = xfs_rmapbt_init_common(mp, NULL, pag); - xfs_btree_stage_afakeroot(cur, afake); - return cur; +/* Compute the max possible height for reverse mapping btrees in memory. */ +static unsigned int +xfs_rmapbt_mem_maxlevels(void) +{ + unsigned int minrecs[2]; + unsigned int blocklen; + + blocklen = XFBNO_BLOCKSIZE - XFS_BTREE_LBLOCK_CRC_LEN; + + minrecs[0] = xfs_rmapbt_mem_block_maxrecs(blocklen, true) / 2; + minrecs[1] = xfs_rmapbt_mem_block_maxrecs(blocklen, false) / 2; + + /* + * How tall can an in-memory rmap btree become if we filled the entire + * AG with rmap records? + */ + return xfs_btree_compute_maxlevels(minrecs, + XFS_MAX_AG_BYTES / sizeof(struct xfs_rmap_rec)); } +#else +# define xfs_rmapbt_mem_maxlevels() (0) +#endif /* CONFIG_XFS_BTREE_IN_MEM */ /* * Install a new reverse mapping btree root. Caller is responsible for @@ -563,12 +703,12 @@ xfs_rmapbt_commit_staged_btree( ASSERT(cur->bc_flags & XFS_BTREE_STAGING); - agf->agf_roots[cur->bc_btnum] = cpu_to_be32(afake->af_root); - agf->agf_levels[cur->bc_btnum] = cpu_to_be32(afake->af_levels); + agf->agf_rmap_root = cpu_to_be32(afake->af_root); + agf->agf_rmap_level = cpu_to_be32(afake->af_levels); agf->agf_rmap_blocks = cpu_to_be32(afake->af_blocks); xfs_alloc_log_agf(tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS | XFS_AGF_RMAP_BLOCKS); - xfs_btree_commit_afakeroot(cur, tp, agbp, &xfs_rmapbt_ops); + xfs_btree_commit_afakeroot(cur, tp, agbp); } /* Calculate number of records in a reverse mapping btree block. */ @@ -618,7 +758,8 @@ xfs_rmapbt_maxlevels_ondisk(void) * like if it consumes almost all the blocks in the AG due to maximal * sharing factor. */ - return xfs_btree_space_to_height(minrecs, XFS_MAX_CRC_AG_BLOCKS); + return max(xfs_btree_space_to_height(minrecs, XFS_MAX_CRC_AG_BLOCKS), + xfs_rmapbt_mem_maxlevels()); } /* Compute the maximum height of an rmap btree. */ diff --git a/fs/xfs/libxfs/xfs_rmap_btree.h b/fs/xfs/libxfs/xfs_rmap_btree.h index 3244715dd1..eb90d89e80 100644 --- a/fs/xfs/libxfs/xfs_rmap_btree.h +++ b/fs/xfs/libxfs/xfs_rmap_btree.h @@ -10,6 +10,7 @@ struct xfs_buf; struct xfs_btree_cur; struct xfs_mount; struct xbtree_afakeroot; +struct xfbtree; /* rmaps only exist on crc enabled filesystems */ #define XFS_RMAP_BLOCK_LEN XFS_BTREE_SBLOCK_CRC_LEN @@ -44,8 +45,6 @@ struct xbtree_afakeroot; struct xfs_btree_cur *xfs_rmapbt_init_cursor(struct xfs_mount *mp, struct xfs_trans *tp, struct xfs_buf *bp, struct xfs_perag *pag); -struct xfs_btree_cur *xfs_rmapbt_stage_cursor(struct xfs_mount *mp, - struct xbtree_afakeroot *afake, struct xfs_perag *pag); void xfs_rmapbt_commit_staged_btree(struct xfs_btree_cur *cur, struct xfs_trans *tp, struct xfs_buf *agbp); int xfs_rmapbt_maxrecs(int blocklen, int leaf); @@ -64,4 +63,9 @@ unsigned int xfs_rmapbt_maxlevels_ondisk(void); int __init xfs_rmapbt_init_cur_cache(void); void xfs_rmapbt_destroy_cur_cache(void); +struct xfs_btree_cur *xfs_rmapbt_mem_cursor(struct xfs_perag *pag, + struct xfs_trans *tp, struct xfbtree *xfbtree); +int xfs_rmapbt_mem_init(struct xfs_mount *mp, struct xfbtree *xfbtree, + struct xfs_buftarg *btp, xfs_agnumber_t agno); + #endif /* __XFS_RMAP_BTREE_H__ */ diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c index e31663cb7b..f246d6dbf4 100644 --- a/fs/xfs/libxfs/xfs_rtbitmap.c +++ b/fs/xfs/libxfs/xfs_rtbitmap.c @@ -17,6 +17,7 @@ #include "xfs_rtalloc.h" #include "xfs_error.h" #include "xfs_rtbitmap.h" +#include "xfs_health.h" /* * Realtime allocator bitmap functions shared with userspace. @@ -115,13 +116,19 @@ xfs_rtbuf_get( if (error) return error; - if (XFS_IS_CORRUPT(mp, nmap == 0 || !xfs_bmap_is_written_extent(&map))) + if (XFS_IS_CORRUPT(mp, nmap == 0 || !xfs_bmap_is_written_extent(&map))) { + xfs_rt_mark_sick(mp, issum ? XFS_SICK_RT_SUMMARY : + XFS_SICK_RT_BITMAP); return -EFSCORRUPTED; + } ASSERT(map.br_startblock != NULLFSBLOCK); error = xfs_trans_read_buf(mp, args->tp, mp->m_ddev_targp, XFS_FSB_TO_DADDR(mp, map.br_startblock), mp->m_bsize, 0, &bp, &xfs_rtbuf_ops); + if (xfs_metadata_is_sick(error)) + xfs_rt_mark_sick(mp, issum ? XFS_SICK_RT_SUMMARY : + XFS_SICK_RT_BITMAP); if (error) return error; @@ -934,7 +941,7 @@ xfs_rtfree_extent( struct timespec64 atime; ASSERT(mp->m_rbmip->i_itemp != NULL); - ASSERT(xfs_isilocked(mp->m_rbmip, XFS_ILOCK_EXCL)); + xfs_assert_ilocked(mp->m_rbmip, XFS_ILOCK_EXCL); error = xfs_rtcheck_alloc_range(&args, start, len); if (error) diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index 5bb6e2bd6d..73a4b895de 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -530,7 +530,8 @@ xfs_validate_sb_common( } if (!xfs_validate_stripe_geometry(mp, XFS_FSB_TO_B(mp, sbp->sb_unit), - XFS_FSB_TO_B(mp, sbp->sb_width), 0, false)) + XFS_FSB_TO_B(mp, sbp->sb_width), 0, + xfs_buf_daddr(bp) == XFS_SB_DADDR, false)) return -EFSCORRUPTED; /* @@ -1290,6 +1291,8 @@ xfs_sb_read_secondary( error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, XFS_AG_DADDR(mp, agno, XFS_SB_BLOCK(mp)), XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_sb_buf_ops); + if (xfs_metadata_is_sick(error)) + xfs_agno_mark_sick(mp, agno, XFS_SICK_AG_SB); if (error) return error; xfs_buf_set_ref(bp, XFS_SSB_REF); @@ -1321,8 +1324,10 @@ xfs_sb_get_secondary( } /* - * sunit, swidth, sectorsize(optional with 0) should be all in bytes, - * so users won't be confused by values in error messages. + * sunit, swidth, sectorsize(optional with 0) should be all in bytes, so users + * won't be confused by values in error messages. This function returns false + * if the stripe geometry is invalid and the caller is unable to repair the + * stripe configuration later in the mount process. */ bool xfs_validate_stripe_geometry( @@ -1330,20 +1335,21 @@ xfs_validate_stripe_geometry( __s64 sunit, __s64 swidth, int sectorsize, + bool may_repair, bool silent) { if (swidth > INT_MAX) { if (!silent) xfs_notice(mp, "stripe width (%lld) is too large", swidth); - return false; + goto check_override; } if (sunit > swidth) { if (!silent) xfs_notice(mp, "stripe unit (%lld) is larger than the stripe width (%lld)", sunit, swidth); - return false; + goto check_override; } if (sectorsize && (int)sunit % sectorsize) { @@ -1351,21 +1357,21 @@ xfs_validate_stripe_geometry( xfs_notice(mp, "stripe unit (%lld) must be a multiple of the sector size (%d)", sunit, sectorsize); - return false; + goto check_override; } if (sunit && !swidth) { if (!silent) xfs_notice(mp, "invalid stripe unit (%lld) and stripe width of 0", sunit); - return false; + goto check_override; } if (!sunit && swidth) { if (!silent) xfs_notice(mp, "invalid stripe width (%lld) and stripe unit of 0", swidth); - return false; + goto check_override; } if (sunit && (int)swidth % (int)sunit) { @@ -1373,9 +1379,27 @@ xfs_validate_stripe_geometry( xfs_notice(mp, "stripe width (%lld) must be a multiple of the stripe unit (%lld)", swidth, sunit); - return false; + goto check_override; } return true; + +check_override: + if (!may_repair) + return false; + /* + * During mount, mp->m_dalign will not be set unless the sunit mount + * option was set. If it was set, ignore the bad stripe alignment values + * and allow the validation and overwrite later in the mount process to + * attempt to overwrite the bad stripe alignment values with the values + * supplied by mount options. + */ + if (!mp->m_dalign) + return false; + if (!silent) + xfs_notice(mp, +"Will try to correct with specified mount options sunit (%d) and swidth (%d)", + BBTOB(mp->m_dalign), BBTOB(mp->m_swidth)); + return true; } /* diff --git a/fs/xfs/libxfs/xfs_sb.h b/fs/xfs/libxfs/xfs_sb.h index 2e8e8d63d4..37b1ed1bc2 100644 --- a/fs/xfs/libxfs/xfs_sb.h +++ b/fs/xfs/libxfs/xfs_sb.h @@ -35,8 +35,9 @@ extern int xfs_sb_get_secondary(struct xfs_mount *mp, struct xfs_trans *tp, xfs_agnumber_t agno, struct xfs_buf **bpp); -extern bool xfs_validate_stripe_geometry(struct xfs_mount *mp, - __s64 sunit, __s64 swidth, int sectorsize, bool silent); +bool xfs_validate_stripe_geometry(struct xfs_mount *mp, + __s64 sunit, __s64 swidth, int sectorsize, bool may_repair, + bool silent); uint8_t xfs_compute_rextslog(xfs_rtbxlen_t rtextents); diff --git a/fs/xfs/libxfs/xfs_shared.h b/fs/xfs/libxfs/xfs_shared.h index 4220d3584c..dfd61fa833 100644 --- a/fs/xfs/libxfs/xfs_shared.h +++ b/fs/xfs/libxfs/xfs_shared.h @@ -43,6 +43,60 @@ extern const struct xfs_buf_ops xfs_sb_buf_ops; extern const struct xfs_buf_ops xfs_sb_quiet_buf_ops; extern const struct xfs_buf_ops xfs_symlink_buf_ops; +/* btree ops */ +extern const struct xfs_btree_ops xfs_bnobt_ops; +extern const struct xfs_btree_ops xfs_cntbt_ops; +extern const struct xfs_btree_ops xfs_inobt_ops; +extern const struct xfs_btree_ops xfs_finobt_ops; +extern const struct xfs_btree_ops xfs_bmbt_ops; +extern const struct xfs_btree_ops xfs_refcountbt_ops; +extern const struct xfs_btree_ops xfs_rmapbt_ops; +extern const struct xfs_btree_ops xfs_rmapbt_mem_ops; + +static inline bool xfs_btree_is_bno(const struct xfs_btree_ops *ops) +{ + return ops == &xfs_bnobt_ops; +} + +static inline bool xfs_btree_is_cnt(const struct xfs_btree_ops *ops) +{ + return ops == &xfs_cntbt_ops; +} + +static inline bool xfs_btree_is_bmap(const struct xfs_btree_ops *ops) +{ + return ops == &xfs_bmbt_ops; +} + +static inline bool xfs_btree_is_ino(const struct xfs_btree_ops *ops) +{ + return ops == &xfs_inobt_ops; +} + +static inline bool xfs_btree_is_fino(const struct xfs_btree_ops *ops) +{ + return ops == &xfs_finobt_ops; +} + +static inline bool xfs_btree_is_refcount(const struct xfs_btree_ops *ops) +{ + return ops == &xfs_refcountbt_ops; +} + +static inline bool xfs_btree_is_rmap(const struct xfs_btree_ops *ops) +{ + return ops == &xfs_rmapbt_ops; +} + +#ifdef CONFIG_XFS_BTREE_IN_MEM +static inline bool xfs_btree_is_mem_rmap(const struct xfs_btree_ops *ops) +{ + return ops == &xfs_rmapbt_mem_ops; +} +#else +# define xfs_btree_is_mem_rmap(...) (false) +#endif + /* log size calculation functions */ int xfs_log_calc_unit_res(struct xfs_mount *mp, int unit_bytes); int xfs_log_calc_minimum_size(struct xfs_mount *); @@ -128,19 +182,6 @@ void xfs_log_get_max_trans_res(struct xfs_mount *mp, #define XFS_ICHGTIME_CHG 0x2 /* inode field change timestamp */ #define XFS_ICHGTIME_CREATE 0x4 /* inode create timestamp */ - -/* - * Symlink decoding/encoding functions - */ -int xfs_symlink_blocks(struct xfs_mount *mp, int pathlen); -int xfs_symlink_hdr_set(struct xfs_mount *mp, xfs_ino_t ino, uint32_t offset, - uint32_t size, struct xfs_buf *bp); -bool xfs_symlink_hdr_ok(xfs_ino_t ino, uint32_t offset, - uint32_t size, struct xfs_buf *bp); -void xfs_symlink_local_to_remote(struct xfs_trans *tp, struct xfs_buf *bp, - struct xfs_inode *ip, struct xfs_ifork *ifp); -xfs_failaddr_t xfs_symlink_shortform_verify(void *sfp, int64_t size); - /* Computed inode geometry for the filesystem. */ struct xfs_ino_geometry { /* Maximum inode count in this filesystem. */ diff --git a/fs/xfs/libxfs/xfs_symlink_remote.c b/fs/xfs/libxfs/xfs_symlink_remote.c index 160aa20aa4..ffb1317a92 100644 --- a/fs/xfs/libxfs/xfs_symlink_remote.c +++ b/fs/xfs/libxfs/xfs_symlink_remote.c @@ -16,7 +16,10 @@ #include "xfs_trans.h" #include "xfs_buf_item.h" #include "xfs_log.h" - +#include "xfs_symlink_remote.h" +#include "xfs_bit.h" +#include "xfs_bmap.h" +#include "xfs_health.h" /* * Each contiguous block has a header, so it is not just a simple pathlen @@ -227,3 +230,153 @@ xfs_symlink_shortform_verify( return __this_address; return NULL; } + +/* Read a remote symlink target into the buffer. */ +int +xfs_symlink_remote_read( + struct xfs_inode *ip, + char *link) +{ + struct xfs_mount *mp = ip->i_mount; + struct xfs_bmbt_irec mval[XFS_SYMLINK_MAPS]; + struct xfs_buf *bp; + xfs_daddr_t d; + char *cur_chunk; + int pathlen = ip->i_disk_size; + int nmaps = XFS_SYMLINK_MAPS; + int byte_cnt; + int n; + int error = 0; + int fsblocks = 0; + int offset; + + xfs_assert_ilocked(ip, XFS_ILOCK_SHARED | XFS_ILOCK_EXCL); + + fsblocks = xfs_symlink_blocks(mp, pathlen); + error = xfs_bmapi_read(ip, 0, fsblocks, mval, &nmaps, 0); + if (error) + goto out; + + offset = 0; + for (n = 0; n < nmaps; n++) { + d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock); + byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount); + + error = xfs_buf_read(mp->m_ddev_targp, d, BTOBB(byte_cnt), 0, + &bp, &xfs_symlink_buf_ops); + if (xfs_metadata_is_sick(error)) + xfs_inode_mark_sick(ip, XFS_SICK_INO_SYMLINK); + if (error) + return error; + byte_cnt = XFS_SYMLINK_BUF_SPACE(mp, byte_cnt); + if (pathlen < byte_cnt) + byte_cnt = pathlen; + + cur_chunk = bp->b_addr; + if (xfs_has_crc(mp)) { + if (!xfs_symlink_hdr_ok(ip->i_ino, offset, + byte_cnt, bp)) { + xfs_inode_mark_sick(ip, XFS_SICK_INO_SYMLINK); + error = -EFSCORRUPTED; + xfs_alert(mp, +"symlink header does not match required off/len/owner (0x%x/0x%x,0x%llx)", + offset, byte_cnt, ip->i_ino); + xfs_buf_relse(bp); + goto out; + + } + + cur_chunk += sizeof(struct xfs_dsymlink_hdr); + } + + memcpy(link + offset, cur_chunk, byte_cnt); + + pathlen -= byte_cnt; + offset += byte_cnt; + + xfs_buf_relse(bp); + } + ASSERT(pathlen == 0); + + link[ip->i_disk_size] = '\0'; + error = 0; + + out: + return error; +} + +/* Write the symlink target into the inode. */ +int +xfs_symlink_write_target( + struct xfs_trans *tp, + struct xfs_inode *ip, + const char *target_path, + int pathlen, + xfs_fsblock_t fs_blocks, + uint resblks) +{ + struct xfs_bmbt_irec mval[XFS_SYMLINK_MAPS]; + struct xfs_mount *mp = tp->t_mountp; + const char *cur_chunk; + struct xfs_buf *bp; + xfs_daddr_t d; + int byte_cnt; + int nmaps; + int offset = 0; + int n; + int error; + + /* + * If the symlink will fit into the inode, write it inline. + */ + if (pathlen <= xfs_inode_data_fork_size(ip)) { + xfs_init_local_fork(ip, XFS_DATA_FORK, target_path, pathlen); + + ip->i_disk_size = pathlen; + ip->i_df.if_format = XFS_DINODE_FMT_LOCAL; + xfs_trans_log_inode(tp, ip, XFS_ILOG_DDATA | XFS_ILOG_CORE); + return 0; + } + + nmaps = XFS_SYMLINK_MAPS; + error = xfs_bmapi_write(tp, ip, 0, fs_blocks, XFS_BMAPI_METADATA, + resblks, mval, &nmaps); + if (error) + return error; + + ip->i_disk_size = pathlen; + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + + cur_chunk = target_path; + offset = 0; + for (n = 0; n < nmaps; n++) { + char *buf; + + d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock); + byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount); + error = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, + BTOBB(byte_cnt), 0, &bp); + if (error) + return error; + bp->b_ops = &xfs_symlink_buf_ops; + + byte_cnt = XFS_SYMLINK_BUF_SPACE(mp, byte_cnt); + byte_cnt = min(byte_cnt, pathlen); + + buf = bp->b_addr; + buf += xfs_symlink_hdr_set(mp, ip->i_ino, offset, byte_cnt, + bp); + + memcpy(buf, cur_chunk, byte_cnt); + + cur_chunk += byte_cnt; + pathlen -= byte_cnt; + offset += byte_cnt; + + xfs_trans_buf_set_type(tp, bp, XFS_BLFT_SYMLINK_BUF); + xfs_trans_log_buf(tp, bp, 0, (buf + byte_cnt - 1) - + (char *)bp->b_addr); + } + ASSERT(pathlen == 0); + return 0; +} diff --git a/fs/xfs/libxfs/xfs_symlink_remote.h b/fs/xfs/libxfs/xfs_symlink_remote.h new file mode 100644 index 0000000000..a63bd38ae4 --- /dev/null +++ b/fs/xfs/libxfs/xfs_symlink_remote.h @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2000-2005 Silicon Graphics, Inc. + * Copyright (c) 2013 Red Hat, Inc. + * All Rights Reserved. + */ +#ifndef __XFS_SYMLINK_REMOTE_H +#define __XFS_SYMLINK_REMOTE_H + +/* + * Symlink decoding/encoding functions + */ +int xfs_symlink_blocks(struct xfs_mount *mp, int pathlen); +int xfs_symlink_hdr_set(struct xfs_mount *mp, xfs_ino_t ino, uint32_t offset, + uint32_t size, struct xfs_buf *bp); +bool xfs_symlink_hdr_ok(xfs_ino_t ino, uint32_t offset, + uint32_t size, struct xfs_buf *bp); +void xfs_symlink_local_to_remote(struct xfs_trans *tp, struct xfs_buf *bp, + struct xfs_inode *ip, struct xfs_ifork *ifp); +xfs_failaddr_t xfs_symlink_shortform_verify(void *sfp, int64_t size); +int xfs_symlink_remote_read(struct xfs_inode *ip, char *link); +int xfs_symlink_write_target(struct xfs_trans *tp, struct xfs_inode *ip, + const char *target_path, int pathlen, xfs_fsblock_t fs_blocks, + uint resblks); + +#endif /* __XFS_SYMLINK_REMOTE_H */ diff --git a/fs/xfs/libxfs/xfs_trans_inode.c b/fs/xfs/libxfs/xfs_trans_inode.c index 70e97ea6ee..69fc5b9813 100644 --- a/fs/xfs/libxfs/xfs_trans_inode.c +++ b/fs/xfs/libxfs/xfs_trans_inode.c @@ -31,7 +31,7 @@ xfs_trans_ijoin( { struct xfs_inode_log_item *iip; - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); + xfs_assert_ilocked(ip, XFS_ILOCK_EXCL); if (ip->i_itemp == NULL) xfs_inode_item_init(ip, ip->i_mount); iip = ip->i_itemp; @@ -60,7 +60,7 @@ xfs_trans_ichgtime( struct timespec64 tv; ASSERT(tp); - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); + xfs_assert_ilocked(ip, XFS_ILOCK_EXCL); tv = current_time(inode); @@ -90,7 +90,7 @@ xfs_trans_log_inode( struct inode *inode = VFS_I(ip); ASSERT(iip); - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); + xfs_assert_ilocked(ip, XFS_ILOCK_EXCL); ASSERT(!xfs_iflags_test(ip, XFS_ISTALE)); tp->t_flags |= XFS_TRANS_DIRTY; diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h index 62e02d5380..76eb9e3288 100644 --- a/fs/xfs/libxfs/xfs_types.h +++ b/fs/xfs/libxfs/xfs_types.h @@ -80,11 +80,13 @@ typedef void * xfs_failaddr_t; /* * Inode fork identifiers. */ -#define XFS_DATA_FORK 0 -#define XFS_ATTR_FORK 1 -#define XFS_COW_FORK 2 +#define XFS_STAGING_FORK (-1) /* fake fork for staging a btree */ +#define XFS_DATA_FORK (0) +#define XFS_ATTR_FORK (1) +#define XFS_COW_FORK (2) #define XFS_WHICHFORK_STRINGS \ + { XFS_STAGING_FORK, "staging" }, \ { XFS_DATA_FORK, "data" }, \ { XFS_ATTR_FORK, "attr" }, \ { XFS_COW_FORK, "cow" } @@ -114,24 +116,6 @@ typedef enum { { XFS_LOOKUP_LEi, "le" }, \ { XFS_LOOKUP_GEi, "ge" } -/* - * This enum is used in string mapping in xfs_trace.h and scrub/trace.h; - * please keep the TRACE_DEFINE_ENUMs for it up to date. - */ -typedef enum { - XFS_BTNUM_BNOi, XFS_BTNUM_CNTi, XFS_BTNUM_RMAPi, XFS_BTNUM_BMAPi, - XFS_BTNUM_INOi, XFS_BTNUM_FINOi, XFS_BTNUM_REFCi, XFS_BTNUM_MAX -} xfs_btnum_t; - -#define XFS_BTNUM_STRINGS \ - { XFS_BTNUM_BNOi, "bnobt" }, \ - { XFS_BTNUM_CNTi, "cntbt" }, \ - { XFS_BTNUM_RMAPi, "rmapbt" }, \ - { XFS_BTNUM_BMAPi, "bmbt" }, \ - { XFS_BTNUM_INOi, "inobt" }, \ - { XFS_BTNUM_FINOi, "finobt" }, \ - { XFS_BTNUM_REFCi, "refcbt" } - struct xfs_name { const unsigned char *name; int len; |