diff options
Diffstat (limited to 'fs/xfs/libxfs')
48 files changed, 1665 insertions, 831 deletions
diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c index f9f4d69464..39d9525270 100644 --- a/fs/xfs/libxfs/xfs_ag.c +++ b/fs/xfs/libxfs/xfs_ag.c @@ -332,6 +332,31 @@ xfs_agino_range( return __xfs_agino_range(mp, xfs_ag_block_count(mp, agno), first, last); } +/* + * Free perag within the specified AG range, it is only used to free unused + * perags under the error handling path. + */ +void +xfs_free_unused_perag_range( + struct xfs_mount *mp, + xfs_agnumber_t agstart, + xfs_agnumber_t agend) +{ + struct xfs_perag *pag; + xfs_agnumber_t index; + + for (index = agstart; index < agend; index++) { + spin_lock(&mp->m_perag_lock); + pag = radix_tree_delete(&mp->m_perag_tree, index); + spin_unlock(&mp->m_perag_lock); + if (!pag) + break; + xfs_buf_hash_destroy(pag); + xfs_defer_drain_free(&pag->pag_intents_drain); + kmem_free(pag); + } +} + int xfs_initialize_perag( struct xfs_mount *mp, @@ -424,19 +449,14 @@ xfs_initialize_perag( out_remove_pag: xfs_defer_drain_free(&pag->pag_intents_drain); + spin_lock(&mp->m_perag_lock); radix_tree_delete(&mp->m_perag_tree, index); + spin_unlock(&mp->m_perag_lock); out_free_pag: kmem_free(pag); out_unwind_new_pags: /* unwind any prior newly initialized pags */ - for (index = first_initialised; index < agcount; index++) { - pag = radix_tree_delete(&mp->m_perag_tree, index); - if (!pag) - break; - xfs_buf_hash_destroy(pag); - xfs_defer_drain_free(&pag->pag_intents_drain); - kmem_free(pag); - } + xfs_free_unused_perag_range(mp, first_initialised, agcount); return error; } @@ -984,7 +1004,7 @@ xfs_ag_shrink_space( if (err2 != -ENOSPC) goto resv_err; - err2 = __xfs_free_extent_later(*tpp, args.fsbno, delta, NULL, + err2 = xfs_free_extent_later(*tpp, args.fsbno, delta, NULL, XFS_AG_RESV_NONE, true); if (err2) goto resv_err; diff --git a/fs/xfs/libxfs/xfs_ag.h b/fs/xfs/libxfs/xfs_ag.h index 2e0aef87d6..4b343c4fac 100644 --- a/fs/xfs/libxfs/xfs_ag.h +++ b/fs/xfs/libxfs/xfs_ag.h @@ -80,6 +80,16 @@ struct xfs_perag { */ uint16_t pag_checked; uint16_t pag_sick; + +#ifdef CONFIG_XFS_ONLINE_REPAIR + /* + * Alternate btree heights so that online repair won't trip the write + * verifiers while rebuilding the AG btrees. + */ + uint8_t pagf_repair_levels[XFS_BTNUM_AGF]; + uint8_t pagf_repair_refcount_level; +#endif + spinlock_t pag_state_lock; spinlock_t pagb_lock; /* lock for pagb_tree */ @@ -133,6 +143,8 @@ __XFS_AG_OPSTATE(prefers_metadata, PREFERS_METADATA) __XFS_AG_OPSTATE(allows_inodes, ALLOWS_INODES) __XFS_AG_OPSTATE(agfl_needs_reset, AGFL_NEEDS_RESET) +void xfs_free_unused_perag_range(struct xfs_mount *mp, xfs_agnumber_t agstart, + xfs_agnumber_t agend); int xfs_initialize_perag(struct xfs_mount *mp, xfs_agnumber_t agcount, xfs_rfsblock_t dcount, xfs_agnumber_t *maxagi); int xfs_initialize_perag_data(struct xfs_mount *mp, xfs_agnumber_t agno); diff --git a/fs/xfs/libxfs/xfs_ag_resv.c b/fs/xfs/libxfs/xfs_ag_resv.c index 7fd1fea955..da1057bd0e 100644 --- a/fs/xfs/libxfs/xfs_ag_resv.c +++ b/fs/xfs/libxfs/xfs_ag_resv.c @@ -411,6 +411,8 @@ xfs_ag_resv_free_extent( fallthrough; case XFS_AG_RESV_NONE: xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (int64_t)len); + fallthrough; + case XFS_AG_RESV_IGNORE: return; } diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 100ab5931b..3bd0a33fee 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -246,11 +246,9 @@ xfs_alloc_btrec_to_irec( /* Simple checks for free space records. */ xfs_failaddr_t xfs_alloc_check_irec( - struct xfs_btree_cur *cur, - const struct xfs_alloc_rec_incore *irec) + struct xfs_perag *pag, + const struct xfs_alloc_rec_incore *irec) { - struct xfs_perag *pag = cur->bc_ag.pag; - if (irec->ar_blockcount == 0) return __this_address; @@ -299,7 +297,7 @@ xfs_alloc_get_rec( return error; xfs_alloc_btrec_to_irec(rec, &irec); - fa = xfs_alloc_check_irec(cur, &irec); + fa = xfs_alloc_check_irec(cur->bc_ag.pag, &irec); if (fa) return xfs_alloc_complain_bad_rec(cur, fa, &irec); @@ -2514,7 +2512,7 @@ xfs_defer_agfl_block( trace_xfs_agfl_free_defer(mp, agno, 0, agbno, 1); xfs_extent_free_get_group(mp, xefi); - xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_AGFL_FREE, &xefi->xefi_list); + xfs_defer_add(tp, &xefi->xefi_list, &xfs_agfl_free_defer_type); return 0; } @@ -2522,14 +2520,15 @@ xfs_defer_agfl_block( * Add the extent to the list of extents to be free at transaction end. * The list is maintained sorted (by block number). */ -int -__xfs_free_extent_later( +static int +xfs_defer_extent_free( struct xfs_trans *tp, xfs_fsblock_t bno, xfs_filblks_t len, const struct xfs_owner_info *oinfo, enum xfs_ag_resv_type type, - bool skip_discard) + bool skip_discard, + struct xfs_defer_pending **dfpp) { struct xfs_extent_free_item *xefi; struct xfs_mount *mp = tp->t_mountp; @@ -2577,10 +2576,105 @@ __xfs_free_extent_later( XFS_FSB_TO_AGBNO(tp->t_mountp, bno), len); xfs_extent_free_get_group(mp, xefi); - xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_FREE, &xefi->xefi_list); + *dfpp = xfs_defer_add(tp, &xefi->xefi_list, &xfs_extent_free_defer_type); return 0; } +int +xfs_free_extent_later( + struct xfs_trans *tp, + xfs_fsblock_t bno, + xfs_filblks_t len, + const struct xfs_owner_info *oinfo, + enum xfs_ag_resv_type type, + bool skip_discard) +{ + struct xfs_defer_pending *dontcare = NULL; + + return xfs_defer_extent_free(tp, bno, len, oinfo, type, skip_discard, + &dontcare); +} + +/* + * Set up automatic freeing of unwritten space in the filesystem. + * + * This function attached a paused deferred extent free item to the + * transaction. Pausing means that the EFI will be logged in the next + * transaction commit, but the pending EFI will not be finished until the + * pending item is unpaused. + * + * If the system goes down after the EFI has been persisted to the log but + * before the pending item is unpaused, log recovery will find the EFI, fail to + * find the EFD, and free the space. + * + * If the pending item is unpaused, the next transaction commit will log an EFD + * without freeing the space. + * + * Caller must ensure that the tp, fsbno, len, oinfo, and resv flags of the + * @args structure are set to the relevant values. + */ +int +xfs_alloc_schedule_autoreap( + const struct xfs_alloc_arg *args, + bool skip_discard, + struct xfs_alloc_autoreap *aarp) +{ + int error; + + error = xfs_defer_extent_free(args->tp, args->fsbno, args->len, + &args->oinfo, args->resv, skip_discard, &aarp->dfp); + if (error) + return error; + + xfs_defer_item_pause(args->tp, aarp->dfp); + return 0; +} + +/* + * Cancel automatic freeing of unwritten space in the filesystem. + * + * Earlier, we created a paused deferred extent free item and attached it to + * this transaction so that we could automatically roll back a new space + * allocation if the system went down. Now we want to cancel the paused work + * item by marking the EFI stale so we don't actually free the space, unpausing + * the pending item and logging an EFD. + * + * The caller generally should have already mapped the space into the ondisk + * filesystem. If the reserved space was partially used, the caller must call + * xfs_free_extent_later to create a new EFI to free the unused space. + */ +void +xfs_alloc_cancel_autoreap( + struct xfs_trans *tp, + struct xfs_alloc_autoreap *aarp) +{ + struct xfs_defer_pending *dfp = aarp->dfp; + struct xfs_extent_free_item *xefi; + + if (!dfp) + return; + + list_for_each_entry(xefi, &dfp->dfp_work, xefi_list) + xefi->xefi_flags |= XFS_EFI_CANCELLED; + + xfs_defer_item_unpause(tp, dfp); +} + +/* + * Commit automatic freeing of unwritten space in the filesystem. + * + * This unpauses an earlier _schedule_autoreap and commits to freeing the + * allocated space. Call this if none of the reserved space was used. + */ +void +xfs_alloc_commit_autoreap( + struct xfs_trans *tp, + struct xfs_alloc_autoreap *aarp) +{ + if (aarp->dfp) + xfs_defer_item_unpause(tp, aarp->dfp); +} + #ifdef DEBUG /* * Check if an AGF has a free extent record whose length is equal to @@ -3848,7 +3942,7 @@ xfs_alloc_query_range_helper( xfs_failaddr_t fa; xfs_alloc_btrec_to_irec(rec, &irec); - fa = xfs_alloc_check_irec(cur, &irec); + fa = xfs_alloc_check_irec(cur->bc_ag.pag, &irec); if (fa) return xfs_alloc_complain_bad_rec(cur, fa, &irec); diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h index 6bb8d295c3..0b956f8b9d 100644 --- a/fs/xfs/libxfs/xfs_alloc.h +++ b/fs/xfs/libxfs/xfs_alloc.h @@ -185,7 +185,7 @@ xfs_alloc_get_rec( union xfs_btree_rec; void xfs_alloc_btrec_to_irec(const union xfs_btree_rec *rec, struct xfs_alloc_rec_incore *irec); -xfs_failaddr_t xfs_alloc_check_irec(struct xfs_btree_cur *cur, +xfs_failaddr_t xfs_alloc_check_irec(struct xfs_perag *pag, const struct xfs_alloc_rec_incore *irec); int xfs_read_agf(struct xfs_perag *pag, struct xfs_trans *tp, int flags, @@ -231,7 +231,7 @@ xfs_buf_to_agfl_bno( return bp->b_addr; } -int __xfs_free_extent_later(struct xfs_trans *tp, xfs_fsblock_t bno, +int xfs_free_extent_later(struct xfs_trans *tp, xfs_fsblock_t bno, xfs_filblks_t len, const struct xfs_owner_info *oinfo, enum xfs_ag_resv_type type, bool skip_discard); @@ -255,18 +255,18 @@ void xfs_extent_free_get_group(struct xfs_mount *mp, #define XFS_EFI_SKIP_DISCARD (1U << 0) /* don't issue discard */ #define XFS_EFI_ATTR_FORK (1U << 1) /* freeing attr fork block */ #define XFS_EFI_BMBT_BLOCK (1U << 2) /* freeing bmap btree block */ +#define XFS_EFI_CANCELLED (1U << 3) /* dont actually free the space */ -static inline int -xfs_free_extent_later( - struct xfs_trans *tp, - xfs_fsblock_t bno, - xfs_filblks_t len, - const struct xfs_owner_info *oinfo, - enum xfs_ag_resv_type type) -{ - return __xfs_free_extent_later(tp, bno, len, oinfo, type, false); -} +struct xfs_alloc_autoreap { + struct xfs_defer_pending *dfp; +}; +int xfs_alloc_schedule_autoreap(const struct xfs_alloc_arg *args, + bool skip_discard, struct xfs_alloc_autoreap *aarp); +void xfs_alloc_cancel_autoreap(struct xfs_trans *tp, + struct xfs_alloc_autoreap *aarp); +void xfs_alloc_commit_autoreap(struct xfs_trans *tp, + struct xfs_alloc_autoreap *aarp); extern struct kmem_cache *xfs_extfree_item_cache; diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c index c65228efed..a7032bf0cd 100644 --- a/fs/xfs/libxfs/xfs_alloc_btree.c +++ b/fs/xfs/libxfs/xfs_alloc_btree.c @@ -323,7 +323,18 @@ xfs_allocbt_verify( if (bp->b_ops->magic[0] == cpu_to_be32(XFS_ABTC_MAGIC)) btnum = XFS_BTNUM_CNTi; if (pag && xfs_perag_initialised_agf(pag)) { - if (level >= pag->pagf_levels[btnum]) + unsigned int maxlevel = pag->pagf_levels[btnum]; + +#ifdef CONFIG_XFS_ONLINE_REPAIR + /* + * Online repair could be rewriting the free space btrees, so + * we'll validate against the larger of either tree while this + * is going on. + */ + maxlevel = max_t(unsigned int, maxlevel, + pag->pagf_repair_levels[btnum]); +#endif + if (level >= maxlevel) return __this_address; } else if (level >= mp->m_alloc_maxlevels) return __this_address; diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index e28d93d232..e965a48e7d 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -421,10 +421,10 @@ xfs_attr_complete_op( bool do_replace = args->op_flags & XFS_DA_OP_REPLACE; args->op_flags &= ~XFS_DA_OP_REPLACE; - if (do_replace) { - args->attr_filter &= ~XFS_ATTR_INCOMPLETE; + args->attr_filter &= ~XFS_ATTR_INCOMPLETE; + if (do_replace) return replace_state; - } + return XFS_DAS_DONE; } @@ -862,8 +862,11 @@ xfs_attr_lookup( if (!xfs_inode_hasattr(dp)) return -ENOATTR; - if (dp->i_af.if_format == XFS_DINODE_FMT_LOCAL) - return xfs_attr_sf_findname(args, NULL, NULL); + if (dp->i_af.if_format == XFS_DINODE_FMT_LOCAL) { + if (xfs_attr_sf_findname(args)) + return -EEXIST; + return -ENOATTR; + } if (xfs_attr_is_leaf(dp)) { error = xfs_attr_leaf_hasname(args, &bp); @@ -880,11 +883,10 @@ xfs_attr_lookup( return error; } -static int -xfs_attr_intent_init( +static void +xfs_attr_defer_add( struct xfs_da_args *args, - unsigned int op_flags, /* op flag (set or remove) */ - struct xfs_attr_intent **attr) /* new xfs_attr_intent */ + unsigned int op_flags) { struct xfs_attr_intent *new; @@ -893,66 +895,22 @@ xfs_attr_intent_init( new->xattri_op_flags = op_flags; new->xattri_da_args = args; - *attr = new; - return 0; -} - -/* Sets an attribute for an inode as a deferred operation */ -static int -xfs_attr_defer_add( - struct xfs_da_args *args) -{ - struct xfs_attr_intent *new; - int error = 0; - - error = xfs_attr_intent_init(args, XFS_ATTRI_OP_FLAGS_SET, &new); - if (error) - return error; + switch (op_flags) { + case XFS_ATTRI_OP_FLAGS_SET: + new->xattri_dela_state = xfs_attr_init_add_state(args); + break; + case XFS_ATTRI_OP_FLAGS_REPLACE: + new->xattri_dela_state = xfs_attr_init_replace_state(args); + break; + case XFS_ATTRI_OP_FLAGS_REMOVE: + new->xattri_dela_state = xfs_attr_init_remove_state(args); + break; + default: + ASSERT(0); + } - new->xattri_dela_state = xfs_attr_init_add_state(args); - xfs_defer_add(args->trans, XFS_DEFER_OPS_TYPE_ATTR, &new->xattri_list); + xfs_defer_add(args->trans, &new->xattri_list, &xfs_attr_defer_type); trace_xfs_attr_defer_add(new->xattri_dela_state, args->dp); - - return 0; -} - -/* Sets an attribute for an inode as a deferred operation */ -static int -xfs_attr_defer_replace( - struct xfs_da_args *args) -{ - struct xfs_attr_intent *new; - int error = 0; - - error = xfs_attr_intent_init(args, XFS_ATTRI_OP_FLAGS_REPLACE, &new); - if (error) - return error; - - new->xattri_dela_state = xfs_attr_init_replace_state(args); - xfs_defer_add(args->trans, XFS_DEFER_OPS_TYPE_ATTR, &new->xattri_list); - trace_xfs_attr_defer_replace(new->xattri_dela_state, args->dp); - - return 0; -} - -/* Removes an attribute for an inode as a deferred operation */ -static int -xfs_attr_defer_remove( - struct xfs_da_args *args) -{ - - struct xfs_attr_intent *new; - int error; - - error = xfs_attr_intent_init(args, XFS_ATTRI_OP_FLAGS_REMOVE, &new); - if (error) - return error; - - new->xattri_dela_state = xfs_attr_init_remove_state(args); - xfs_defer_add(args->trans, XFS_DEFER_OPS_TYPE_ATTR, &new->xattri_list); - trace_xfs_attr_defer_remove(new->xattri_dela_state, args->dp); - - return 0; } /* @@ -1038,16 +996,16 @@ xfs_attr_set( error = xfs_attr_lookup(args); switch (error) { case -EEXIST: - /* if no value, we are performing a remove operation */ if (!args->value) { - error = xfs_attr_defer_remove(args); + /* if no value, we are performing a remove operation */ + xfs_attr_defer_add(args, XFS_ATTRI_OP_FLAGS_REMOVE); break; } + /* Pure create fails if the attr already exists */ if (args->attr_flags & XATTR_CREATE) goto out_trans_cancel; - - error = xfs_attr_defer_replace(args); + xfs_attr_defer_add(args, XFS_ATTRI_OP_FLAGS_REPLACE); break; case -ENOATTR: /* Can't remove what isn't there. */ @@ -1057,14 +1015,11 @@ xfs_attr_set( /* Pure replace fails if no existing attr to replace. */ if (args->attr_flags & XATTR_REPLACE) goto out_trans_cancel; - - error = xfs_attr_defer_add(args); + xfs_attr_defer_add(args, XFS_ATTRI_OP_FLAGS_SET); break; default: goto out_trans_cancel; } - if (error) - goto out_trans_cancel; /* * If this is a synchronous mount, make sure that the @@ -1097,10 +1052,9 @@ out_trans_cancel: static inline int xfs_attr_sf_totsize(struct xfs_inode *dp) { - struct xfs_attr_shortform *sf; + struct xfs_attr_sf_hdr *sf = dp->i_af.if_data; - sf = (struct xfs_attr_shortform *)dp->i_af.if_u1.if_data; - return be16_to_cpu(sf->hdr.totsize); + return be16_to_cpu(sf->totsize); } /* @@ -1112,19 +1066,13 @@ xfs_attr_shortform_addname( struct xfs_da_args *args) { int newsize, forkoff; - int error; trace_xfs_attr_sf_addname(args); - error = xfs_attr_shortform_lookup(args); - switch (error) { - case -ENOATTR: - if (args->op_flags & XFS_DA_OP_REPLACE) - return error; - break; - case -EEXIST: - if (!(args->op_flags & XFS_DA_OP_REPLACE)) - return error; + if (xfs_attr_sf_findname(args)) { + int error; + + ASSERT(args->op_flags & XFS_DA_OP_REPLACE); error = xfs_attr_sf_removename(args); if (error) @@ -1137,11 +1085,8 @@ xfs_attr_shortform_addname( * around. */ args->op_flags &= ~XFS_DA_OP_REPLACE; - break; - case 0: - break; - default: - return error; + } else { + ASSERT(!(args->op_flags & XFS_DA_OP_REPLACE)); } if (args->namelen >= XFS_ATTR_SF_ENTSIZE_MAX || diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c index 2580ae4720..6374bf1072 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.c +++ b/fs/xfs/libxfs/xfs_attr_leaf.c @@ -690,56 +690,32 @@ xfs_attr_shortform_create( ASSERT(ifp->if_bytes == 0); if (ifp->if_format == XFS_DINODE_FMT_EXTENTS) ifp->if_format = XFS_DINODE_FMT_LOCAL; - xfs_idata_realloc(dp, sizeof(*hdr), XFS_ATTR_FORK); - hdr = (struct xfs_attr_sf_hdr *)ifp->if_u1.if_data; + + hdr = xfs_idata_realloc(dp, sizeof(*hdr), XFS_ATTR_FORK); memset(hdr, 0, sizeof(*hdr)); hdr->totsize = cpu_to_be16(sizeof(*hdr)); xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_ADATA); } /* - * Return -EEXIST if attr is found, or -ENOATTR if not - * args: args containing attribute name and namelen - * sfep: If not null, pointer will be set to the last attr entry found on - -EEXIST. On -ENOATTR pointer is left at the last entry in the list - * basep: If not null, pointer is set to the byte offset of the entry in the - * list on -EEXIST. On -ENOATTR, pointer is left at the byte offset of - * the last entry in the list + * Return the entry if the attr in args is found, or NULL if not. */ -int +struct xfs_attr_sf_entry * xfs_attr_sf_findname( - struct xfs_da_args *args, - struct xfs_attr_sf_entry **sfep, - unsigned int *basep) + struct xfs_da_args *args) { - struct xfs_attr_shortform *sf; - struct xfs_attr_sf_entry *sfe; - unsigned int base = sizeof(struct xfs_attr_sf_hdr); - int size = 0; - int end; - int i; + struct xfs_attr_sf_hdr *sf = args->dp->i_af.if_data; + struct xfs_attr_sf_entry *sfe; - sf = (struct xfs_attr_shortform *)args->dp->i_af.if_u1.if_data; - sfe = &sf->list[0]; - end = sf->hdr.count; - for (i = 0; i < end; sfe = xfs_attr_sf_nextentry(sfe), - base += size, i++) { - size = xfs_attr_sf_entsize(sfe); - if (!xfs_attr_match(args, sfe->namelen, sfe->nameval, - sfe->flags)) - continue; - break; + for (sfe = xfs_attr_sf_firstentry(sf); + sfe < xfs_attr_sf_endptr(sf); + sfe = xfs_attr_sf_nextentry(sfe)) { + if (xfs_attr_match(args, sfe->namelen, sfe->nameval, + sfe->flags)) + return sfe; } - if (sfep != NULL) - *sfep = sfe; - - if (basep != NULL) - *basep = base; - - if (i == end) - return -ENOATTR; - return -EEXIST; + return NULL; } /* @@ -751,38 +727,31 @@ xfs_attr_shortform_add( struct xfs_da_args *args, int forkoff) { - struct xfs_attr_shortform *sf; + struct xfs_inode *dp = args->dp; + struct xfs_mount *mp = dp->i_mount; + struct xfs_ifork *ifp = &dp->i_af; + struct xfs_attr_sf_hdr *sf = ifp->if_data; struct xfs_attr_sf_entry *sfe; - int offset, size; - struct xfs_mount *mp; - struct xfs_inode *dp; - struct xfs_ifork *ifp; + int size; trace_xfs_attr_sf_add(args); - dp = args->dp; - mp = dp->i_mount; dp->i_forkoff = forkoff; - ifp = &dp->i_af; ASSERT(ifp->if_format == XFS_DINODE_FMT_LOCAL); - sf = (struct xfs_attr_shortform *)ifp->if_u1.if_data; - if (xfs_attr_sf_findname(args, &sfe, NULL) == -EEXIST) - ASSERT(0); + ASSERT(!xfs_attr_sf_findname(args)); - offset = (char *)sfe - (char *)sf; size = xfs_attr_sf_entsize_byname(args->namelen, args->valuelen); - xfs_idata_realloc(dp, size, XFS_ATTR_FORK); - sf = (struct xfs_attr_shortform *)ifp->if_u1.if_data; - sfe = (struct xfs_attr_sf_entry *)((char *)sf + offset); + sf = xfs_idata_realloc(dp, size, XFS_ATTR_FORK); + sfe = xfs_attr_sf_endptr(sf); sfe->namelen = args->namelen; sfe->valuelen = args->valuelen; sfe->flags = args->attr_filter; memcpy(sfe->nameval, args->name, args->namelen); memcpy(&sfe->nameval[args->namelen], args->value, args->valuelen); - sf->hdr.count++; - be16_add_cpu(&sf->hdr.totsize, size); + sf->count++; + be16_add_cpu(&sf->totsize, size); xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_ADATA); xfs_sbversion_add_attr2(mp, args->trans); @@ -811,48 +780,43 @@ int xfs_attr_sf_removename( struct xfs_da_args *args) { - struct xfs_attr_shortform *sf; + struct xfs_inode *dp = args->dp; + struct xfs_mount *mp = dp->i_mount; + struct xfs_attr_sf_hdr *sf = dp->i_af.if_data; struct xfs_attr_sf_entry *sfe; - int size = 0, end, totsize; - unsigned int base; - struct xfs_mount *mp; - struct xfs_inode *dp; - int error; + uint16_t totsize = be16_to_cpu(sf->totsize); + void *next, *end; + int size = 0; trace_xfs_attr_sf_remove(args); - dp = args->dp; - mp = dp->i_mount; - sf = (struct xfs_attr_shortform *)dp->i_af.if_u1.if_data; - - error = xfs_attr_sf_findname(args, &sfe, &base); - - /* - * If we are recovering an operation, finding nothing to - * remove is not an error - it just means there was nothing - * to clean up. - */ - if (error == -ENOATTR && (args->op_flags & XFS_DA_OP_RECOVERY)) - return 0; - if (error != -EEXIST) - return error; - size = xfs_attr_sf_entsize(sfe); + sfe = xfs_attr_sf_findname(args); + if (!sfe) { + /* + * If we are recovering an operation, finding nothing to remove + * is not an error, it just means there was nothing to clean up. + */ + if (args->op_flags & XFS_DA_OP_RECOVERY) + return 0; + return -ENOATTR; + } /* * Fix up the attribute fork data, covering the hole */ - end = base + size; - totsize = be16_to_cpu(sf->hdr.totsize); - if (end != totsize) - memmove(&((char *)sf)[base], &((char *)sf)[end], totsize - end); - sf->hdr.count--; - be16_add_cpu(&sf->hdr.totsize, -size); + size = xfs_attr_sf_entsize(sfe); + next = xfs_attr_sf_nextentry(sfe); + end = xfs_attr_sf_endptr(sf); + if (next < end) + memmove(sfe, next, end - next); + sf->count--; + totsize -= size; + sf->totsize = cpu_to_be16(totsize); /* * Fix up the start offset of the attribute fork */ - totsize -= size; - if (totsize == sizeof(xfs_attr_sf_hdr_t) && xfs_has_attr2(mp) && + if (totsize == sizeof(struct xfs_attr_sf_hdr) && xfs_has_attr2(mp) && (dp->i_df.if_format != XFS_DINODE_FMT_BTREE) && !(args->op_flags & (XFS_DA_OP_ADDNAME | XFS_DA_OP_REPLACE))) { xfs_attr_fork_remove(dp, args->trans); @@ -860,7 +824,7 @@ xfs_attr_sf_removename( xfs_idata_realloc(dp, -size, XFS_ATTR_FORK); dp->i_forkoff = xfs_attr_shortform_bytesfit(dp, totsize); ASSERT(dp->i_forkoff); - ASSERT(totsize > sizeof(xfs_attr_sf_hdr_t) || + ASSERT(totsize > sizeof(struct xfs_attr_sf_hdr) || (args->op_flags & XFS_DA_OP_ADDNAME) || !xfs_has_attr2(mp) || dp->i_df.if_format == XFS_DINODE_FMT_BTREE); @@ -874,33 +838,6 @@ xfs_attr_sf_removename( } /* - * Look up a name in a shortform attribute list structure. - */ -/*ARGSUSED*/ -int -xfs_attr_shortform_lookup(xfs_da_args_t *args) -{ - struct xfs_attr_shortform *sf; - struct xfs_attr_sf_entry *sfe; - int i; - struct xfs_ifork *ifp; - - trace_xfs_attr_sf_lookup(args); - - ifp = &args->dp->i_af; - ASSERT(ifp->if_format == XFS_DINODE_FMT_LOCAL); - sf = (struct xfs_attr_shortform *)ifp->if_u1.if_data; - sfe = &sf->list[0]; - for (i = 0; i < sf->hdr.count; - sfe = xfs_attr_sf_nextentry(sfe), i++) { - if (xfs_attr_match(args, sfe->namelen, sfe->nameval, - sfe->flags)) - return -EEXIST; - } - return -ENOATTR; -} - -/* * Retrieve the attribute value and length. * * If args->valuelen is zero, only the length needs to be returned. Unlike a @@ -909,23 +846,19 @@ xfs_attr_shortform_lookup(xfs_da_args_t *args) */ int xfs_attr_shortform_getvalue( - struct xfs_da_args *args) + struct xfs_da_args *args) { - struct xfs_attr_shortform *sf; - struct xfs_attr_sf_entry *sfe; - int i; + struct xfs_attr_sf_entry *sfe; ASSERT(args->dp->i_af.if_format == XFS_DINODE_FMT_LOCAL); - sf = (struct xfs_attr_shortform *)args->dp->i_af.if_u1.if_data; - sfe = &sf->list[0]; - for (i = 0; i < sf->hdr.count; - sfe = xfs_attr_sf_nextentry(sfe), i++) { - if (xfs_attr_match(args, sfe->namelen, sfe->nameval, - sfe->flags)) - return xfs_attr_copy_value(args, - &sfe->nameval[args->namelen], sfe->valuelen); - } - return -ENOATTR; + + trace_xfs_attr_sf_lookup(args); + + sfe = xfs_attr_sf_findname(args); + if (!sfe) + return -ENOATTR; + return xfs_attr_copy_value(args, &sfe->nameval[args->namelen], + sfe->valuelen); } /* Convert from using the shortform to the leaf format. */ @@ -933,26 +866,23 @@ int xfs_attr_shortform_to_leaf( struct xfs_da_args *args) { - struct xfs_inode *dp; - struct xfs_attr_shortform *sf; + struct xfs_inode *dp = args->dp; + struct xfs_ifork *ifp = &dp->i_af; + struct xfs_attr_sf_hdr *sf = ifp->if_data; struct xfs_attr_sf_entry *sfe; + int size = be16_to_cpu(sf->totsize); struct xfs_da_args nargs; char *tmpbuffer; - int error, i, size; + int error, i; xfs_dablk_t blkno; struct xfs_buf *bp; - struct xfs_ifork *ifp; trace_xfs_attr_sf_to_leaf(args); - dp = args->dp; - ifp = &dp->i_af; - sf = (struct xfs_attr_shortform *)ifp->if_u1.if_data; - size = be16_to_cpu(sf->hdr.totsize); tmpbuffer = kmem_alloc(size, 0); ASSERT(tmpbuffer != NULL); - memcpy(tmpbuffer, ifp->if_u1.if_data, size); - sf = (struct xfs_attr_shortform *)tmpbuffer; + memcpy(tmpbuffer, ifp->if_data, size); + sf = (struct xfs_attr_sf_hdr *)tmpbuffer; xfs_idata_realloc(dp, -size, XFS_ATTR_FORK); xfs_bmap_local_to_extents_empty(args->trans, dp, XFS_ATTR_FORK); @@ -975,8 +905,8 @@ xfs_attr_shortform_to_leaf( nargs.trans = args->trans; nargs.op_flags = XFS_DA_OP_OKNOENT; - sfe = &sf->list[0]; - for (i = 0; i < sf->hdr.count; i++) { + sfe = xfs_attr_sf_firstentry(sf); + for (i = 0; i < sf->count; i++) { nargs.name = sfe->nameval; nargs.namelen = sfe->namelen; nargs.value = &sfe->nameval[nargs.namelen]; @@ -1040,23 +970,16 @@ xfs_attr_shortform_allfit( return xfs_attr_shortform_bytesfit(dp, bytes); } -/* Verify the consistency of an inline attribute fork. */ +/* Verify the consistency of a raw inline attribute fork. */ xfs_failaddr_t xfs_attr_shortform_verify( - struct xfs_inode *ip) + struct xfs_attr_sf_hdr *sfp, + size_t size) { - struct xfs_attr_shortform *sfp; - struct xfs_attr_sf_entry *sfep; + struct xfs_attr_sf_entry *sfep = xfs_attr_sf_firstentry(sfp); struct xfs_attr_sf_entry *next_sfep; char *endp; - struct xfs_ifork *ifp; int i; - int64_t size; - - ASSERT(ip->i_af.if_format == XFS_DINODE_FMT_LOCAL); - ifp = xfs_ifork_ptr(ip, XFS_ATTR_FORK); - sfp = (struct xfs_attr_shortform *)ifp->if_u1.if_data; - size = ifp->if_bytes; /* * Give up if the attribute is way too short. @@ -1067,8 +990,7 @@ xfs_attr_shortform_verify( endp = (char *)sfp + size; /* Check all reported entries */ - sfep = &sfp->list[0]; - for (i = 0; i < sfp->hdr.count; i++) { + for (i = 0; i < sfp->count; i++) { /* * struct xfs_attr_sf_entry has a variable length. * Check the fixed-offset parts of the structure are @@ -1244,14 +1166,10 @@ xfs_attr3_leaf_to_node( if (error) goto out; - /* copy leaf to new buffer, update identifiers */ - xfs_trans_buf_set_type(args->trans, bp2, XFS_BLFT_ATTR_LEAF_BUF); - bp2->b_ops = bp1->b_ops; - memcpy(bp2->b_addr, bp1->b_addr, args->geo->blksize); - if (xfs_has_crc(mp)) { - struct xfs_da3_blkinfo *hdr3 = bp2->b_addr; - hdr3->blkno = cpu_to_be64(xfs_buf_daddr(bp2)); - } + /* + * Copy leaf to new buffer and log it. + */ + xfs_da_buf_copy(bp2, bp1, args->geo->blksize); xfs_trans_log_buf(args->trans, bp2, 0, args->geo->blksize - 1); /* diff --git a/fs/xfs/libxfs/xfs_attr_leaf.h b/fs/xfs/libxfs/xfs_attr_leaf.h index 368f4d9fa1..9b9948639c 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.h +++ b/fs/xfs/libxfs/xfs_attr_leaf.h @@ -47,16 +47,14 @@ struct xfs_attr3_icleaf_hdr { */ void xfs_attr_shortform_create(struct xfs_da_args *args); void xfs_attr_shortform_add(struct xfs_da_args *args, int forkoff); -int xfs_attr_shortform_lookup(struct xfs_da_args *args); int xfs_attr_shortform_getvalue(struct xfs_da_args *args); int xfs_attr_shortform_to_leaf(struct xfs_da_args *args); int xfs_attr_sf_removename(struct xfs_da_args *args); -int xfs_attr_sf_findname(struct xfs_da_args *args, - struct xfs_attr_sf_entry **sfep, - unsigned int *basep); +struct xfs_attr_sf_entry *xfs_attr_sf_findname(struct xfs_da_args *args); int xfs_attr_shortform_allfit(struct xfs_buf *bp, struct xfs_inode *dp); int xfs_attr_shortform_bytesfit(struct xfs_inode *dp, int bytes); -xfs_failaddr_t xfs_attr_shortform_verify(struct xfs_inode *ip); +xfs_failaddr_t xfs_attr_shortform_verify(struct xfs_attr_sf_hdr *sfp, + size_t size); void xfs_attr_fork_remove(struct xfs_inode *ip, struct xfs_trans *tp); /* diff --git a/fs/xfs/libxfs/xfs_attr_sf.h b/fs/xfs/libxfs/xfs_attr_sf.h index 37578b369d..bc44222230 100644 --- a/fs/xfs/libxfs/xfs_attr_sf.h +++ b/fs/xfs/libxfs/xfs_attr_sf.h @@ -7,14 +7,6 @@ #define __XFS_ATTR_SF_H__ /* - * Attribute storage when stored inside the inode. - * - * Small attribute lists are packed as tightly as possible so as - * to fit into the literal area of the inode. - */ -typedef struct xfs_attr_sf_hdr xfs_attr_sf_hdr_t; - -/* * We generate this then sort it, attr_list() must return things in hash-order. */ typedef struct xfs_attr_sf_sort { @@ -41,11 +33,25 @@ static inline int xfs_attr_sf_entsize(struct xfs_attr_sf_entry *sfep) return struct_size(sfep, nameval, sfep->namelen + sfep->valuelen); } -/* next entry in struct */ +/* first entry in the SF attr fork */ +static inline struct xfs_attr_sf_entry * +xfs_attr_sf_firstentry(struct xfs_attr_sf_hdr *hdr) +{ + return (struct xfs_attr_sf_entry *)(hdr + 1); +} + +/* next entry after sfep */ static inline struct xfs_attr_sf_entry * xfs_attr_sf_nextentry(struct xfs_attr_sf_entry *sfep) { return (void *)sfep + xfs_attr_sf_entsize(sfep); } +/* pointer to the space after the last entry, e.g. for adding a new one */ +static inline struct xfs_attr_sf_entry * +xfs_attr_sf_endptr(struct xfs_attr_sf_hdr *sf) +{ + return (void *)sf + be16_to_cpu(sf->totsize); +} + #endif /* __XFS_ATTR_SF_H__ */ diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index be62acffad..f362345467 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -575,7 +575,7 @@ xfs_bmap_btree_to_extents( xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork); error = xfs_free_extent_later(cur->bc_tp, cbno, 1, &oinfo, - XFS_AG_RESV_NONE); + XFS_AG_RESV_NONE, false); if (error) return error; @@ -747,7 +747,7 @@ xfs_bmap_local_to_extents_empty( ASSERT(ifp->if_nextents == 0); xfs_bmap_forkoff_reset(ip, whichfork); - ifp->if_u1.if_root = NULL; + ifp->if_data = NULL; ifp->if_height = 0; ifp->if_format = XFS_DINODE_FMT_EXTENTS; xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); @@ -832,7 +832,7 @@ xfs_bmap_local_to_extents( xfs_bmap_local_to_extents_empty(tp, ip, whichfork); flags |= XFS_ILOG_CORE; - ifp->if_u1.if_root = NULL; + ifp->if_data = NULL; ifp->if_height = 0; rec.br_startoff = 0; @@ -3044,7 +3044,8 @@ xfs_bmap_extsize_align( #define XFS_ALLOC_GAP_UNITS 4 -void +/* returns true if ap->blkno was modified */ +bool xfs_bmap_adjacent( struct xfs_bmalloca *ap) /* bmap alloc argument struct */ { @@ -3079,13 +3080,14 @@ xfs_bmap_adjacent( if (adjust && ISVALID(ap->blkno + adjust, ap->prev.br_startblock)) ap->blkno += adjust; + return true; } /* * If not at eof, then compare the two neighbor blocks. * Figure out whether either one gives us a good starting point, * and pick the better one. */ - else if (!ap->eof) { + if (!ap->eof) { xfs_fsblock_t gotbno; /* right side block number */ xfs_fsblock_t gotdiff=0; /* right side difference */ xfs_fsblock_t prevbno; /* left side block number */ @@ -3165,14 +3167,21 @@ xfs_bmap_adjacent( * If both valid, pick the better one, else the only good * one, else ap->blkno is already set (to 0 or the inode block). */ - if (prevbno != NULLFSBLOCK && gotbno != NULLFSBLOCK) + if (prevbno != NULLFSBLOCK && gotbno != NULLFSBLOCK) { ap->blkno = prevdiff <= gotdiff ? prevbno : gotbno; - else if (prevbno != NULLFSBLOCK) + return true; + } + if (prevbno != NULLFSBLOCK) { ap->blkno = prevbno; - else if (gotbno != NULLFSBLOCK) + return true; + } + if (gotbno != NULLFSBLOCK) { ap->blkno = gotbno; + return true; + } } #undef ISVALID + return false; } int @@ -3263,11 +3272,14 @@ xfs_bmap_btalloc_select_lengths( } /* Update all inode and quota accounting for the allocation we just did. */ -static void -xfs_bmap_btalloc_accounting( - struct xfs_bmalloca *ap, - struct xfs_alloc_arg *args) +void +xfs_bmap_alloc_account( + struct xfs_bmalloca *ap) { + bool isrt = XFS_IS_REALTIME_INODE(ap->ip) && + !(ap->flags & XFS_BMAPI_ATTRFORK); + uint fld; + if (ap->flags & XFS_BMAPI_COWFORK) { /* * COW fork blocks are in-core only and thus are treated as @@ -3279,7 +3291,7 @@ xfs_bmap_btalloc_accounting( * yet. */ if (ap->wasdel) { - xfs_mod_delalloc(ap->ip->i_mount, -(int64_t)args->len); + xfs_mod_delalloc(ap->ip->i_mount, -(int64_t)ap->length); return; } @@ -3291,22 +3303,25 @@ xfs_bmap_btalloc_accounting( * This essentially transfers the transaction quota reservation * to that of a delalloc extent. */ - ap->ip->i_delayed_blks += args->len; - xfs_trans_mod_dquot_byino(ap->tp, ap->ip, XFS_TRANS_DQ_RES_BLKS, - -(long)args->len); + ap->ip->i_delayed_blks += ap->length; + xfs_trans_mod_dquot_byino(ap->tp, ap->ip, isrt ? + XFS_TRANS_DQ_RES_RTBLKS : XFS_TRANS_DQ_RES_BLKS, + -(long)ap->length); return; } /* data/attr fork only */ - ap->ip->i_nblocks += args->len; + ap->ip->i_nblocks += ap->length; xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE); if (ap->wasdel) { - ap->ip->i_delayed_blks -= args->len; - xfs_mod_delalloc(ap->ip->i_mount, -(int64_t)args->len); + ap->ip->i_delayed_blks -= ap->length; + xfs_mod_delalloc(ap->ip->i_mount, -(int64_t)ap->length); + fld = isrt ? XFS_TRANS_DQ_DELRTBCOUNT : XFS_TRANS_DQ_DELBCOUNT; + } else { + fld = isrt ? XFS_TRANS_DQ_RTBCOUNT : XFS_TRANS_DQ_BCOUNT; } - xfs_trans_mod_dquot_byino(ap->tp, ap->ip, - ap->wasdel ? XFS_TRANS_DQ_DELBCOUNT : XFS_TRANS_DQ_BCOUNT, - args->len); + + xfs_trans_mod_dquot_byino(ap->tp, ap->ip, fld, ap->length); } static int @@ -3380,7 +3395,7 @@ xfs_bmap_process_allocated_extent( ap->offset = orig_offset; else if (ap->offset + ap->length < orig_offset + orig_length) ap->offset = orig_offset + orig_length - ap->length; - xfs_bmap_btalloc_accounting(ap, args); + xfs_bmap_alloc_account(ap); } #ifdef DEBUG @@ -5010,7 +5025,6 @@ xfs_bmap_del_extent_real( xfs_fileoff_t del_endoff; /* first offset past del */ int do_fx; /* free extent at end of routine */ int error; /* error return value */ - int flags = 0;/* inode logging flags */ struct xfs_bmbt_irec got; /* current extent entry */ xfs_fileoff_t got_endoff; /* first offset past got */ int i; /* temp state */ @@ -5023,6 +5037,8 @@ xfs_bmap_del_extent_real( uint32_t state = xfs_bmap_fork_to_state(whichfork); struct xfs_bmbt_irec old; + *logflagsp = 0; + mp = ip->i_mount; XFS_STATS_INC(mp, xs_del_exlist); @@ -5035,7 +5051,6 @@ xfs_bmap_del_extent_real( ASSERT(got_endoff >= del_endoff); ASSERT(!isnullstartblock(got.br_startblock)); qfield = 0; - error = 0; /* * If it's the case where the directory code is running with no block @@ -5051,13 +5066,13 @@ xfs_bmap_del_extent_real( del->br_startoff > got.br_startoff && del_endoff < got_endoff) return -ENOSPC; - flags = XFS_ILOG_CORE; + *logflagsp = XFS_ILOG_CORE; if (whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip)) { if (!(bflags & XFS_BMAPI_REMAP)) { error = xfs_rtfree_blocks(tp, del->br_startblock, del->br_blockcount); if (error) - goto done; + return error; } do_fx = 0; @@ -5072,11 +5087,9 @@ xfs_bmap_del_extent_real( if (cur) { error = xfs_bmbt_lookup_eq(cur, &got, &i); if (error) - goto done; - if (XFS_IS_CORRUPT(mp, i != 1)) { - error = -EFSCORRUPTED; - goto done; - } + return error; + if (XFS_IS_CORRUPT(mp, i != 1)) + return -EFSCORRUPTED; } if (got.br_startoff == del->br_startoff) @@ -5093,17 +5106,15 @@ xfs_bmap_del_extent_real( xfs_iext_prev(ifp, icur); ifp->if_nextents--; - flags |= XFS_ILOG_CORE; + *logflagsp |= XFS_ILOG_CORE; if (!cur) { - flags |= xfs_ilog_fext(whichfork); + *logflagsp |= xfs_ilog_fext(whichfork); break; } if ((error = xfs_btree_delete(cur, &i))) - goto done; - if (XFS_IS_CORRUPT(mp, i != 1)) { - error = -EFSCORRUPTED; - goto done; - } + return error; + if (XFS_IS_CORRUPT(mp, i != 1)) + return -EFSCORRUPTED; break; case BMAP_LEFT_FILLING: /* @@ -5114,12 +5125,12 @@ xfs_bmap_del_extent_real( got.br_blockcount -= del->br_blockcount; xfs_iext_update_extent(ip, state, icur, &got); if (!cur) { - flags |= xfs_ilog_fext(whichfork); + *logflagsp |= xfs_ilog_fext(whichfork); break; } error = xfs_bmbt_update(cur, &got); if (error) - goto done; + return error; break; case BMAP_RIGHT_FILLING: /* @@ -5128,12 +5139,12 @@ xfs_bmap_del_extent_real( got.br_blockcount -= del->br_blockcount; xfs_iext_update_extent(ip, state, icur, &got); if (!cur) { - flags |= xfs_ilog_fext(whichfork); + *logflagsp |= xfs_ilog_fext(whichfork); break; } error = xfs_bmbt_update(cur, &got); if (error) - goto done; + return error; break; case 0: /* @@ -5150,18 +5161,18 @@ xfs_bmap_del_extent_real( new.br_state = got.br_state; new.br_startblock = del_endblock; - flags |= XFS_ILOG_CORE; + *logflagsp |= XFS_ILOG_CORE; if (cur) { error = xfs_bmbt_update(cur, &got); if (error) - goto done; + return error; error = xfs_btree_increment(cur, 0, &i); if (error) - goto done; + return error; cur->bc_rec.b = new; error = xfs_btree_insert(cur, &i); if (error && error != -ENOSPC) - goto done; + return error; /* * If get no-space back from btree insert, it tried a * split, and we have a zero block reservation. Fix up @@ -5174,33 +5185,28 @@ xfs_bmap_del_extent_real( */ error = xfs_bmbt_lookup_eq(cur, &got, &i); if (error) - goto done; - if (XFS_IS_CORRUPT(mp, i != 1)) { - error = -EFSCORRUPTED; - goto done; - } + return error; + if (XFS_IS_CORRUPT(mp, i != 1)) + return -EFSCORRUPTED; /* * Update the btree record back * to the original value. */ error = xfs_bmbt_update(cur, &old); if (error) - goto done; + return error; /* * Reset the extent record back * to the original value. */ xfs_iext_update_extent(ip, state, icur, &old); - flags = 0; - error = -ENOSPC; - goto done; - } - if (XFS_IS_CORRUPT(mp, i != 1)) { - error = -EFSCORRUPTED; - goto done; + *logflagsp = 0; + return -ENOSPC; } + if (XFS_IS_CORRUPT(mp, i != 1)) + return -EFSCORRUPTED; } else - flags |= xfs_ilog_fext(whichfork); + *logflagsp |= xfs_ilog_fext(whichfork); ifp->if_nextents++; xfs_iext_next(ifp, icur); @@ -5218,13 +5224,13 @@ xfs_bmap_del_extent_real( if (xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK) { xfs_refcount_decrease_extent(tp, del); } else { - error = __xfs_free_extent_later(tp, del->br_startblock, + error = xfs_free_extent_later(tp, del->br_startblock, del->br_blockcount, NULL, XFS_AG_RESV_NONE, ((bflags & XFS_BMAPI_NODISCARD) || del->br_state == XFS_EXT_UNWRITTEN)); if (error) - goto done; + return error; } } @@ -5239,9 +5245,7 @@ xfs_bmap_del_extent_real( if (qfield && !(bflags & XFS_BMAPI_REMAP)) xfs_trans_mod_dquot_byino(tp, ip, qfield, (long)-nblks); -done: - *logflagsp = flags; - return error; + return 0; } /* @@ -5250,7 +5254,7 @@ done: * that value. If not all extents in the block range can be removed then * *done is set. */ -int /* error */ +static int __xfs_bunmapi( struct xfs_trans *tp, /* transaction pointer */ struct xfs_inode *ip, /* incore inode */ @@ -6102,7 +6106,7 @@ __xfs_bmap_add( bi->bi_bmap = *bmap; xfs_bmap_update_get_group(tp->t_mountp, bi); - xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_BMAP, &bi->bi_list); + xfs_defer_add(tp, &bi->bi_list, &xfs_bmap_update_defer_type); return 0; } @@ -6179,19 +6183,18 @@ xfs_bmap_finish_one( return error; } -/* Check that an inode's extent does not have invalid flags or bad ranges. */ +/* Check that an extent does not have invalid flags or bad ranges. */ xfs_failaddr_t -xfs_bmap_validate_extent( - struct xfs_inode *ip, +xfs_bmap_validate_extent_raw( + struct xfs_mount *mp, + bool rtfile, int whichfork, struct xfs_bmbt_irec *irec) { - struct xfs_mount *mp = ip->i_mount; - if (!xfs_verify_fileext(mp, irec->br_startoff, irec->br_blockcount)) return __this_address; - if (XFS_IS_REALTIME_INODE(ip) && whichfork == XFS_DATA_FORK) { + if (rtfile && whichfork == XFS_DATA_FORK) { if (!xfs_verify_rtbext(mp, irec->br_startblock, irec->br_blockcount)) return __this_address; @@ -6221,3 +6224,53 @@ xfs_bmap_intent_destroy_cache(void) kmem_cache_destroy(xfs_bmap_intent_cache); xfs_bmap_intent_cache = NULL; } + +/* Check that an inode's extent does not have invalid flags or bad ranges. */ +xfs_failaddr_t +xfs_bmap_validate_extent( + struct xfs_inode *ip, + int whichfork, + struct xfs_bmbt_irec *irec) +{ + return xfs_bmap_validate_extent_raw(ip->i_mount, + XFS_IS_REALTIME_INODE(ip), whichfork, irec); +} + +/* + * Used in xfs_itruncate_extents(). This is the maximum number of extents + * freed from a file in a single transaction. + */ +#define XFS_ITRUNC_MAX_EXTENTS 2 + +/* + * Unmap every extent in part of an inode's fork. We don't do any higher level + * invalidation work at all. + */ +int +xfs_bunmapi_range( + struct xfs_trans **tpp, + struct xfs_inode *ip, + uint32_t flags, + xfs_fileoff_t startoff, + xfs_fileoff_t endoff) +{ + xfs_filblks_t unmap_len = endoff - startoff + 1; + int error = 0; + + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); + + while (unmap_len > 0) { + ASSERT((*tpp)->t_highest_agno == NULLAGNUMBER); + error = __xfs_bunmapi(*tpp, ip, startoff, &unmap_len, flags, + XFS_ITRUNC_MAX_EXTENTS); + if (error) + goto out; + + /* free the just unmapped extents */ + error = xfs_defer_finish(tpp); + if (error) + goto out; + } +out: + return error; +} diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h index e33470e397..f6b73f1bad 100644 --- a/fs/xfs/libxfs/xfs_bmap.h +++ b/fs/xfs/libxfs/xfs_bmap.h @@ -116,6 +116,8 @@ static inline int xfs_bmapi_whichfork(uint32_t bmapi_flags) return XFS_DATA_FORK; } +void xfs_bmap_alloc_account(struct xfs_bmalloca *ap); + /* * Special values for xfs_bmbt_irec_t br_startblock field. */ @@ -190,9 +192,6 @@ int xfs_bmapi_read(struct xfs_inode *ip, xfs_fileoff_t bno, int xfs_bmapi_write(struct xfs_trans *tp, struct xfs_inode *ip, xfs_fileoff_t bno, xfs_filblks_t len, uint32_t flags, xfs_extlen_t total, struct xfs_bmbt_irec *mval, int *nmap); -int __xfs_bunmapi(struct xfs_trans *tp, struct xfs_inode *ip, - xfs_fileoff_t bno, xfs_filblks_t *rlen, uint32_t flags, - xfs_extnum_t nexts); int xfs_bunmapi(struct xfs_trans *tp, struct xfs_inode *ip, xfs_fileoff_t bno, xfs_filblks_t len, uint32_t flags, xfs_extnum_t nexts, int *done); @@ -263,6 +262,8 @@ static inline uint32_t xfs_bmap_fork_to_state(int whichfork) } } +xfs_failaddr_t xfs_bmap_validate_extent_raw(struct xfs_mount *mp, bool rtfile, + int whichfork, struct xfs_bmbt_irec *irec); xfs_failaddr_t xfs_bmap_validate_extent(struct xfs_inode *ip, int whichfork, struct xfs_bmbt_irec *irec); int xfs_bmap_complain_bad_rec(struct xfs_inode *ip, int whichfork, @@ -271,6 +272,8 @@ int xfs_bmap_complain_bad_rec(struct xfs_inode *ip, int whichfork, int xfs_bmapi_remap(struct xfs_trans *tp, struct xfs_inode *ip, xfs_fileoff_t bno, xfs_filblks_t len, xfs_fsblock_t startblock, uint32_t flags); +int xfs_bunmapi_range(struct xfs_trans **tpp, struct xfs_inode *ip, + uint32_t flags, xfs_fileoff_t startoff, xfs_fileoff_t endoff); extern struct kmem_cache *xfs_bmap_intent_cache; diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c index bf3f1b36fd..71f2d50f78 100644 --- a/fs/xfs/libxfs/xfs_bmap_btree.c +++ b/fs/xfs/libxfs/xfs_bmap_btree.c @@ -15,6 +15,7 @@ #include "xfs_trans.h" #include "xfs_alloc.h" #include "xfs_btree.h" +#include "xfs_btree_staging.h" #include "xfs_bmap_btree.h" #include "xfs_bmap.h" #include "xfs_error.h" @@ -272,7 +273,7 @@ xfs_bmbt_free_block( xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, cur->bc_ino.whichfork); error = xfs_free_extent_later(cur->bc_tp, fsbno, 1, &oinfo, - XFS_AG_RESV_NONE); + XFS_AG_RESV_NONE, false); if (error) return error; @@ -288,10 +289,7 @@ xfs_bmbt_get_minrecs( int level) { if (level == cur->bc_nlevels - 1) { - struct xfs_ifork *ifp; - - ifp = xfs_ifork_ptr(cur->bc_ino.ip, - cur->bc_ino.whichfork); + struct xfs_ifork *ifp = xfs_btree_ifork_ptr(cur); return xfs_bmbt_maxrecs(cur->bc_mp, ifp->if_broot_bytes, level == 0) / 2; @@ -306,10 +304,7 @@ xfs_bmbt_get_maxrecs( int level) { if (level == cur->bc_nlevels - 1) { - struct xfs_ifork *ifp; - - ifp = xfs_ifork_ptr(cur->bc_ino.ip, - cur->bc_ino.whichfork); + struct xfs_ifork *ifp = xfs_btree_ifork_ptr(cur); return xfs_bmbt_maxrecs(cur->bc_mp, ifp->if_broot_bytes, level == 0); @@ -543,23 +538,19 @@ static const struct xfs_btree_ops xfs_bmbt_ops = { .keys_contiguous = xfs_bmbt_keys_contiguous, }; -/* - * Allocate a new bmap btree cursor. - */ -struct xfs_btree_cur * /* new bmap btree cursor */ -xfs_bmbt_init_cursor( - struct xfs_mount *mp, /* file system mount point */ - struct xfs_trans *tp, /* transaction pointer */ - struct xfs_inode *ip, /* inode owning the btree */ - int whichfork) /* data or attr fork */ +static struct xfs_btree_cur * +xfs_bmbt_init_common( + struct xfs_mount *mp, + struct xfs_trans *tp, + struct xfs_inode *ip, + int whichfork) { - struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork); struct xfs_btree_cur *cur; + ASSERT(whichfork != XFS_COW_FORK); cur = xfs_btree_alloc_cursor(mp, tp, XFS_BTNUM_BMAP, mp->m_bm_maxlevels[whichfork], xfs_bmbt_cur_cache); - cur->bc_nlevels = be16_to_cpu(ifp->if_broot->bb_level) + 1; cur->bc_statoff = XFS_STATS_CALC_INDEX(xs_bmbt_2); cur->bc_ops = &xfs_bmbt_ops; @@ -567,10 +558,30 @@ xfs_bmbt_init_cursor( if (xfs_has_crc(mp)) cur->bc_flags |= XFS_BTREE_CRC_BLOCKS; - cur->bc_ino.forksize = xfs_inode_fork_size(ip, whichfork); cur->bc_ino.ip = ip; cur->bc_ino.allocated = 0; cur->bc_ino.flags = 0; + + return cur; +} + +/* + * Allocate a new bmap btree cursor. + */ +struct xfs_btree_cur * +xfs_bmbt_init_cursor( + struct xfs_mount *mp, + struct xfs_trans *tp, + struct xfs_inode *ip, + int whichfork) +{ + struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork); + struct xfs_btree_cur *cur; + + cur = xfs_bmbt_init_common(mp, tp, ip, whichfork); + + cur->bc_nlevels = be16_to_cpu(ifp->if_broot->bb_level) + 1; + cur->bc_ino.forksize = xfs_inode_fork_size(ip, whichfork); cur->bc_ino.whichfork = whichfork; return cur; @@ -588,6 +599,76 @@ xfs_bmbt_block_maxrecs( } /* + * Allocate a new bmap btree cursor for reloading an inode block mapping data + * structure. Note that callers can use the staged cursor to reload extents + * format inode forks if they rebuild the iext tree and commit the staged + * cursor immediately. + */ +struct xfs_btree_cur * +xfs_bmbt_stage_cursor( + struct xfs_mount *mp, + struct xfs_inode *ip, + struct xbtree_ifakeroot *ifake) +{ + struct xfs_btree_cur *cur; + struct xfs_btree_ops *ops; + + /* data fork always has larger maxheight */ + cur = xfs_bmbt_init_common(mp, NULL, ip, XFS_DATA_FORK); + cur->bc_nlevels = ifake->if_levels; + cur->bc_ino.forksize = ifake->if_fork_size; + + /* Don't let anyone think we're attached to the real fork yet. */ + cur->bc_ino.whichfork = -1; + xfs_btree_stage_ifakeroot(cur, ifake, &ops); + ops->update_cursor = NULL; + return cur; +} + +/* + * Swap in the new inode fork root. Once we pass this point the newly rebuilt + * mappings are in place and we have to kill off any old btree blocks. + */ +void +xfs_bmbt_commit_staged_btree( + struct xfs_btree_cur *cur, + struct xfs_trans *tp, + int whichfork) +{ + struct xbtree_ifakeroot *ifake = cur->bc_ino.ifake; + struct xfs_ifork *ifp; + static const short brootflag[2] = {XFS_ILOG_DBROOT, XFS_ILOG_ABROOT}; + static const short extflag[2] = {XFS_ILOG_DEXT, XFS_ILOG_AEXT}; + int flags = XFS_ILOG_CORE; + + ASSERT(cur->bc_flags & XFS_BTREE_STAGING); + ASSERT(whichfork != XFS_COW_FORK); + + /* + * Free any resources hanging off the real fork, then shallow-copy the + * staging fork's contents into the real fork to transfer everything + * we just built. + */ + ifp = xfs_ifork_ptr(cur->bc_ino.ip, whichfork); + xfs_idestroy_fork(ifp); + memcpy(ifp, ifake->if_fork, sizeof(struct xfs_ifork)); + + switch (ifp->if_format) { + case XFS_DINODE_FMT_EXTENTS: + flags |= extflag[whichfork]; + break; + case XFS_DINODE_FMT_BTREE: + flags |= brootflag[whichfork]; + break; + default: + ASSERT(0); + break; + } + xfs_trans_log_inode(tp, cur->bc_ino.ip, flags); + xfs_btree_commit_ifakeroot(cur, tp, whichfork, &xfs_bmbt_ops); +} + +/* * Calculate number of records in a bmap btree block. */ int diff --git a/fs/xfs/libxfs/xfs_bmap_btree.h b/fs/xfs/libxfs/xfs_bmap_btree.h index 3e7a40a838..151b8491f6 100644 --- a/fs/xfs/libxfs/xfs_bmap_btree.h +++ b/fs/xfs/libxfs/xfs_bmap_btree.h @@ -11,6 +11,7 @@ struct xfs_btree_block; struct xfs_mount; struct xfs_inode; struct xfs_trans; +struct xbtree_ifakeroot; /* * Btree block header size depends on a superblock flag. @@ -106,6 +107,10 @@ extern int xfs_bmbt_change_owner(struct xfs_trans *tp, struct xfs_inode *ip, extern struct xfs_btree_cur *xfs_bmbt_init_cursor(struct xfs_mount *, struct xfs_trans *, struct xfs_inode *, int); +struct xfs_btree_cur *xfs_bmbt_stage_cursor(struct xfs_mount *mp, + struct xfs_inode *ip, struct xbtree_ifakeroot *ifake); +void xfs_bmbt_commit_staged_btree(struct xfs_btree_cur *cur, + struct xfs_trans *tp, int whichfork); extern unsigned long long xfs_bmbt_calc_size(struct xfs_mount *mp, unsigned long long len); diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index 6a6503ab0c..ea8d3659df 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -1330,7 +1330,7 @@ xfs_btree_get_buf_block( * Read in the buffer at the given ptr and return the buffer and * the block pointer within the buffer. */ -STATIC int +int xfs_btree_read_buf_block( struct xfs_btree_cur *cur, const union xfs_btree_ptr *ptr, @@ -5212,3 +5212,29 @@ xfs_btree_destroy_cur_caches(void) xfs_rmapbt_destroy_cur_cache(); xfs_refcountbt_destroy_cur_cache(); } + +/* Move the btree cursor before the first record. */ +int +xfs_btree_goto_left_edge( + struct xfs_btree_cur *cur) +{ + int stat = 0; + int error; + + memset(&cur->bc_rec, 0, sizeof(cur->bc_rec)); + error = xfs_btree_lookup(cur, XFS_LOOKUP_LE, &stat); + if (error) + return error; + if (!stat) + return 0; + + error = xfs_btree_decrement(cur, 0, &stat); + if (error) + return error; + if (stat != 0) { + ASSERT(0); + return -EFSCORRUPTED; + } + + return 0; +} diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h index 4d68a58be1..d906324e25 100644 --- a/fs/xfs/libxfs/xfs_btree.h +++ b/fs/xfs/libxfs/xfs_btree.h @@ -700,6 +700,9 @@ void xfs_btree_set_ptr_null(struct xfs_btree_cur *cur, int xfs_btree_get_buf_block(struct xfs_btree_cur *cur, const union xfs_btree_ptr *ptr, struct xfs_btree_block **block, struct xfs_buf **bpp); +int xfs_btree_read_buf_block(struct xfs_btree_cur *cur, + const union xfs_btree_ptr *ptr, int flags, + struct xfs_btree_block **block, struct xfs_buf **bpp); void xfs_btree_set_sibling(struct xfs_btree_cur *cur, struct xfs_btree_block *block, const union xfs_btree_ptr *ptr, int lr); @@ -735,4 +738,6 @@ xfs_btree_alloc_cursor( int __init xfs_btree_init_cur_caches(void); void xfs_btree_destroy_cur_caches(void); +int xfs_btree_goto_left_edge(struct xfs_btree_cur *cur); + #endif /* __XFS_BTREE_H__ */ diff --git a/fs/xfs/libxfs/xfs_btree_staging.c b/fs/xfs/libxfs/xfs_btree_staging.c index dd75e208b5..e276eba87c 100644 --- a/fs/xfs/libxfs/xfs_btree_staging.c +++ b/fs/xfs/libxfs/xfs_btree_staging.c @@ -333,20 +333,41 @@ xfs_btree_commit_ifakeroot( /* * Put a btree block that we're loading onto the ordered list and release it. * The btree blocks will be written to disk when bulk loading is finished. + * If we reach the dirty buffer threshold, flush them to disk before + * continuing. */ -static void +static int xfs_btree_bload_drop_buf( - struct list_head *buffers_list, - struct xfs_buf **bpp) + struct xfs_btree_bload *bbl, + struct list_head *buffers_list, + struct xfs_buf **bpp) { - if (*bpp == NULL) - return; + struct xfs_buf *bp = *bpp; + int error; + + if (!bp) + return 0; - if (!xfs_buf_delwri_queue(*bpp, buffers_list)) - ASSERT(0); + /* + * Mark this buffer XBF_DONE (i.e. uptodate) so that a subsequent + * xfs_buf_read will not pointlessly reread the contents from the disk. + */ + bp->b_flags |= XBF_DONE; - xfs_buf_relse(*bpp); + xfs_buf_delwri_queue_here(bp, buffers_list); + xfs_buf_relse(bp); *bpp = NULL; + bbl->nr_dirty++; + + if (!bbl->max_dirty || bbl->nr_dirty < bbl->max_dirty) + return 0; + + error = xfs_buf_delwri_submit(buffers_list); + if (error) + return error; + + bbl->nr_dirty = 0; + return 0; } /* @@ -384,7 +405,7 @@ xfs_btree_bload_prep_block( ASSERT(*bpp == NULL); /* Allocate a new incore btree root block. */ - new_size = bbl->iroot_size(cur, nr_this_block, priv); + new_size = bbl->iroot_size(cur, level, nr_this_block, priv); ifp->if_broot = kmem_zalloc(new_size, 0); ifp->if_broot_bytes = (int)new_size; @@ -418,7 +439,10 @@ xfs_btree_bload_prep_block( */ if (*blockp) xfs_btree_set_sibling(cur, *blockp, &new_ptr, XFS_BB_RIGHTSIB); - xfs_btree_bload_drop_buf(buffers_list, bpp); + + ret = xfs_btree_bload_drop_buf(bbl, buffers_list, bpp); + if (ret) + return ret; /* Initialize the new btree block. */ xfs_btree_init_block_cur(cur, new_bp, level, nr_this_block); @@ -436,22 +460,19 @@ STATIC int xfs_btree_bload_leaf( struct xfs_btree_cur *cur, unsigned int recs_this_block, - xfs_btree_bload_get_record_fn get_record, + xfs_btree_bload_get_records_fn get_records, struct xfs_btree_block *block, void *priv) { - unsigned int j; + unsigned int j = 1; int ret; /* Fill the leaf block with records. */ - for (j = 1; j <= recs_this_block; j++) { - union xfs_btree_rec *block_rec; - - ret = get_record(cur, priv); - if (ret) + while (j <= recs_this_block) { + ret = get_records(cur, j, block, recs_this_block - j + 1, priv); + if (ret < 0) return ret; - block_rec = xfs_btree_rec_addr(cur, j, block); - cur->bc_ops->init_rec_from_cur(cur, block_rec); + j += ret; } return 0; @@ -485,7 +506,12 @@ xfs_btree_bload_node( ASSERT(!xfs_btree_ptr_is_null(cur, child_ptr)); - ret = xfs_btree_get_buf_block(cur, child_ptr, &child_block, + /* + * Read the lower-level block in case the buffer for it has + * been reclaimed. LRU refs will be set on the block, which is + * desirable if the new btree commits. + */ + ret = xfs_btree_read_buf_block(cur, child_ptr, 0, &child_block, &child_bp); if (ret) return ret; @@ -570,7 +596,14 @@ xfs_btree_bload_level_geometry( unsigned int desired_npb; unsigned int maxnr; - maxnr = cur->bc_ops->get_maxrecs(cur, level); + /* + * Compute the absolute maximum number of records that we can store in + * the ondisk block or inode root. + */ + if (cur->bc_ops->get_dmaxrecs) + maxnr = cur->bc_ops->get_dmaxrecs(cur, level); + else + maxnr = cur->bc_ops->get_maxrecs(cur, level); /* * Compute the number of blocks we need to fill each block with the @@ -764,6 +797,7 @@ xfs_btree_bload( cur->bc_nlevels = bbl->btree_height; xfs_btree_set_ptr_null(cur, &child_ptr); xfs_btree_set_ptr_null(cur, &ptr); + bbl->nr_dirty = 0; xfs_btree_bload_level_geometry(cur, bbl, level, nr_this_level, &avg_per_block, &blocks, &blocks_with_extra); @@ -789,7 +823,7 @@ xfs_btree_bload( trace_xfs_btree_bload_block(cur, level, i, blocks, &ptr, nr_this_block); - ret = xfs_btree_bload_leaf(cur, nr_this_block, bbl->get_record, + ret = xfs_btree_bload_leaf(cur, nr_this_block, bbl->get_records, block, priv); if (ret) goto out; @@ -802,7 +836,10 @@ xfs_btree_bload( xfs_btree_copy_ptrs(cur, &child_ptr, &ptr, 1); } total_blocks += blocks; - xfs_btree_bload_drop_buf(&buffers_list, &bp); + + ret = xfs_btree_bload_drop_buf(bbl, &buffers_list, &bp); + if (ret) + goto out; /* Populate the internal btree nodes. */ for (level = 1; level < cur->bc_nlevels; level++) { @@ -844,7 +881,11 @@ xfs_btree_bload( xfs_btree_copy_ptrs(cur, &first_ptr, &ptr, 1); } total_blocks += blocks; - xfs_btree_bload_drop_buf(&buffers_list, &bp); + + ret = xfs_btree_bload_drop_buf(bbl, &buffers_list, &bp); + if (ret) + goto out; + xfs_btree_copy_ptrs(cur, &child_ptr, &first_ptr, 1); } diff --git a/fs/xfs/libxfs/xfs_btree_staging.h b/fs/xfs/libxfs/xfs_btree_staging.h index f0d2976050..055ea43b1e 100644 --- a/fs/xfs/libxfs/xfs_btree_staging.h +++ b/fs/xfs/libxfs/xfs_btree_staging.h @@ -37,12 +37,6 @@ struct xbtree_ifakeroot { /* Number of bytes available for this fork in the inode. */ unsigned int if_fork_size; - - /* Fork format. */ - unsigned int if_format; - - /* Number of records. */ - unsigned int if_extents; }; /* Cursor interactions with fake roots for inode-rooted btrees. */ @@ -53,19 +47,24 @@ void xfs_btree_commit_ifakeroot(struct xfs_btree_cur *cur, struct xfs_trans *tp, int whichfork, const struct xfs_btree_ops *ops); /* Bulk loading of staged btrees. */ -typedef int (*xfs_btree_bload_get_record_fn)(struct xfs_btree_cur *cur, void *priv); +typedef int (*xfs_btree_bload_get_records_fn)(struct xfs_btree_cur *cur, + unsigned int idx, struct xfs_btree_block *block, + unsigned int nr_wanted, void *priv); typedef int (*xfs_btree_bload_claim_block_fn)(struct xfs_btree_cur *cur, union xfs_btree_ptr *ptr, void *priv); typedef size_t (*xfs_btree_bload_iroot_size_fn)(struct xfs_btree_cur *cur, - unsigned int nr_this_level, void *priv); + unsigned int level, unsigned int nr_this_level, void *priv); struct xfs_btree_bload { /* - * This function will be called nr_records times to load records into - * the btree. The function does this by setting the cursor's bc_rec - * field in in-core format. Records must be returned in sort order. + * This function will be called to load @nr_wanted records into the + * btree. The implementation does this by setting the cursor's bc_rec + * field in in-core format and using init_rec_from_cur to set the + * records in the btree block. Records must be returned in sort order. + * The function must return the number of records loaded or the usual + * negative errno. */ - xfs_btree_bload_get_record_fn get_record; + xfs_btree_bload_get_records_fn get_records; /* * This function will be called nr_blocks times to obtain a pointer @@ -113,6 +112,16 @@ struct xfs_btree_bload { * height of the new btree. */ unsigned int btree_height; + + /* + * Flush the new btree block buffer list to disk after this many blocks + * have been formatted. Zero prohibits writing any buffers until all + * blocks have been formatted. + */ + uint16_t max_dirty; + + /* Number of dirty buffers. */ + uint16_t nr_dirty; }; int xfs_btree_bload_compute_geometry(struct xfs_btree_cur *cur, diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c index e576560b46..5457188bb4 100644 --- a/fs/xfs/libxfs/xfs_da_btree.c +++ b/fs/xfs/libxfs/xfs_da_btree.c @@ -421,6 +421,25 @@ xfs_da3_node_read_mapped( return xfs_da3_node_set_type(tp, *bpp); } +/* + * Copy src directory/attr leaf/node buffer to the dst. + * For v5 file systems make sure the right blkno is stamped in. + */ +void +xfs_da_buf_copy( + struct xfs_buf *dst, + struct xfs_buf *src, + size_t size) +{ + struct xfs_da3_blkinfo *da3 = dst->b_addr; + + memcpy(dst->b_addr, src->b_addr, size); + dst->b_ops = src->b_ops; + xfs_trans_buf_copy_type(dst, src); + if (xfs_has_crc(dst->b_mount)) + da3->blkno = cpu_to_be64(xfs_buf_daddr(dst)); +} + /*======================================================================== * Routines used for growing the Btree. *========================================================================*/ @@ -690,12 +709,6 @@ xfs_da3_root_split( btree = icnodehdr.btree; size = (int)((char *)&btree[icnodehdr.count] - (char *)oldroot); level = icnodehdr.level; - - /* - * we are about to copy oldroot to bp, so set up the type - * of bp while we know exactly what it will be. - */ - xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DA_NODE_BUF); } else { struct xfs_dir3_icleaf_hdr leafhdr; @@ -707,31 +720,17 @@ xfs_da3_root_split( size = (int)((char *)&leafhdr.ents[leafhdr.count] - (char *)leaf); level = 0; - - /* - * we are about to copy oldroot to bp, so set up the type - * of bp while we know exactly what it will be. - */ - xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DIR_LEAFN_BUF); } /* - * we can copy most of the information in the node from one block to - * another, but for CRC enabled headers we have to make sure that the - * block specific identifiers are kept intact. We update the buffer - * directly for this. + * Copy old root to new buffer and log it. */ - memcpy(node, oldroot, size); - if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC) || - oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)) { - struct xfs_da3_intnode *node3 = (struct xfs_da3_intnode *)node; - - node3->hdr.info.blkno = cpu_to_be64(xfs_buf_daddr(bp)); - } + xfs_da_buf_copy(bp, blk1->bp, size); xfs_trans_log_buf(tp, bp, 0, size - 1); - bp->b_ops = blk1->bp->b_ops; - xfs_trans_buf_copy_type(bp, blk1->bp); + /* + * Update blk1 to point to new buffer. + */ blk1->bp = bp; blk1->blkno = blkno; @@ -1220,21 +1219,14 @@ xfs_da3_root_join( xfs_da_blkinfo_onlychild_validate(bp->b_addr, oldroothdr.level); /* - * This could be copying a leaf back into the root block in the case of - * there only being a single leaf block left in the tree. Hence we have - * to update the b_ops pointer as well to match the buffer type change - * that could occur. For dir3 blocks we also need to update the block - * number in the buffer header. + * Copy child to root buffer and log it. */ - memcpy(root_blk->bp->b_addr, bp->b_addr, args->geo->blksize); - root_blk->bp->b_ops = bp->b_ops; - xfs_trans_buf_copy_type(root_blk->bp, bp); - if (oldroothdr.magic == XFS_DA3_NODE_MAGIC) { - struct xfs_da3_blkinfo *da3 = root_blk->bp->b_addr; - da3->blkno = cpu_to_be64(xfs_buf_daddr(root_blk->bp)); - } + xfs_da_buf_copy(root_blk->bp, bp, args->geo->blksize); xfs_trans_log_buf(args->trans, root_blk->bp, 0, args->geo->blksize - 1); + /* + * Now we can drop the child buffer. + */ error = xfs_da_shrink_inode(args, child, bp); return error; } @@ -2317,9 +2309,10 @@ xfs_da3_swap_lastblock( /* * Copy the last block into the dead buffer and log it. */ - memcpy(dead_buf->b_addr, last_buf->b_addr, args->geo->blksize); + xfs_da_buf_copy(dead_buf, last_buf, args->geo->blksize); xfs_trans_log_buf(tp, dead_buf, 0, args->geo->blksize - 1); dead_info = dead_buf->b_addr; + /* * Get values from the moved block. */ diff --git a/fs/xfs/libxfs/xfs_da_btree.h b/fs/xfs/libxfs/xfs_da_btree.h index ffa3df5b28..706baf36e1 100644 --- a/fs/xfs/libxfs/xfs_da_btree.h +++ b/fs/xfs/libxfs/xfs_da_btree.h @@ -219,6 +219,8 @@ int xfs_da_reada_buf(struct xfs_inode *dp, xfs_dablk_t bno, const struct xfs_buf_ops *ops); int xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno, struct xfs_buf *dead_buf); +void xfs_da_buf_copy(struct xfs_buf *dst, struct xfs_buf *src, + size_t size); uint xfs_da_hashname(const uint8_t *name_string, int name_length); enum xfs_dacmp xfs_da_compname(struct xfs_da_args *args, diff --git a/fs/xfs/libxfs/xfs_da_format.h b/fs/xfs/libxfs/xfs_da_format.h index f9015f88ec..24f9d1461f 100644 --- a/fs/xfs/libxfs/xfs_da_format.h +++ b/fs/xfs/libxfs/xfs_da_format.h @@ -578,20 +578,25 @@ xfs_dir2_block_leaf_p(struct xfs_dir2_block_tail *btp) #define XFS_ATTR_LEAF_MAPSIZE 3 /* how many freespace slots */ /* - * Entries are packed toward the top as tight as possible. - */ -struct xfs_attr_shortform { - struct xfs_attr_sf_hdr { /* constant-structure header block */ - __be16 totsize; /* total bytes in shortform list */ - __u8 count; /* count of active entries */ - __u8 padding; - } hdr; - struct xfs_attr_sf_entry { - uint8_t namelen; /* actual length of name (no NULL) */ - uint8_t valuelen; /* actual length of value (no NULL) */ - uint8_t flags; /* flags bits (see xfs_attr_leaf.h) */ - uint8_t nameval[]; /* name & value bytes concatenated */ - } list[]; /* variable sized array */ + * Attribute storage when stored inside the inode. + * + * Small attribute lists are packed as tightly as possible so as to fit into the + * literal area of the inode. + * + * These "shortform" attribute forks consist of a single xfs_attr_sf_hdr header + * followed by zero or more xfs_attr_sf_entry structures. + */ +struct xfs_attr_sf_hdr { /* constant-structure header block */ + __be16 totsize; /* total bytes in shortform list */ + __u8 count; /* count of active entries */ + __u8 padding; +}; + +struct xfs_attr_sf_entry { + __u8 namelen; /* actual length of name (no NULL) */ + __u8 valuelen; /* actual length of value (no NULL) */ + __u8 flags; /* flags bits (XFS_ATTR_*) */ + __u8 nameval[]; /* name & value bytes concatenated */ }; typedef struct xfs_attr_leaf_map { /* RLE map of free bytes */ diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c index f71679ce23..66a17910d0 100644 --- a/fs/xfs/libxfs/xfs_defer.c +++ b/fs/xfs/libxfs/xfs_defer.c @@ -26,6 +26,7 @@ #include "xfs_da_format.h" #include "xfs_da_btree.h" #include "xfs_attr.h" +#include "xfs_trans_priv.h" static struct kmem_cache *xfs_defer_pending_cache; @@ -181,16 +182,89 @@ static struct kmem_cache *xfs_defer_pending_cache; * Note that the continuation requested between t2 and t3 is likely to * reoccur. */ +STATIC struct xfs_log_item * +xfs_defer_barrier_create_intent( + struct xfs_trans *tp, + struct list_head *items, + unsigned int count, + bool sort) +{ + return NULL; +} -static const struct xfs_defer_op_type *defer_op_types[] = { - [XFS_DEFER_OPS_TYPE_BMAP] = &xfs_bmap_update_defer_type, - [XFS_DEFER_OPS_TYPE_REFCOUNT] = &xfs_refcount_update_defer_type, - [XFS_DEFER_OPS_TYPE_RMAP] = &xfs_rmap_update_defer_type, - [XFS_DEFER_OPS_TYPE_FREE] = &xfs_extent_free_defer_type, - [XFS_DEFER_OPS_TYPE_AGFL_FREE] = &xfs_agfl_free_defer_type, - [XFS_DEFER_OPS_TYPE_ATTR] = &xfs_attr_defer_type, +STATIC void +xfs_defer_barrier_abort_intent( + struct xfs_log_item *intent) +{ + /* empty */ +} + +STATIC struct xfs_log_item * +xfs_defer_barrier_create_done( + struct xfs_trans *tp, + struct xfs_log_item *intent, + unsigned int count) +{ + return NULL; +} + +STATIC int +xfs_defer_barrier_finish_item( + struct xfs_trans *tp, + struct xfs_log_item *done, + struct list_head *item, + struct xfs_btree_cur **state) +{ + ASSERT(0); + return -EFSCORRUPTED; +} + +STATIC void +xfs_defer_barrier_cancel_item( + struct list_head *item) +{ + ASSERT(0); +} + +static const struct xfs_defer_op_type xfs_barrier_defer_type = { + .max_items = 1, + .create_intent = xfs_defer_barrier_create_intent, + .abort_intent = xfs_defer_barrier_abort_intent, + .create_done = xfs_defer_barrier_create_done, + .finish_item = xfs_defer_barrier_finish_item, + .cancel_item = xfs_defer_barrier_cancel_item, }; +/* Create a log intent done item for a log intent item. */ +static inline void +xfs_defer_create_done( + struct xfs_trans *tp, + struct xfs_defer_pending *dfp) +{ + struct xfs_log_item *lip; + + /* If there is no log intent item, there can be no log done item. */ + if (!dfp->dfp_intent) + return; + + /* + * Mark the transaction dirty, even on error. This ensures the + * transaction is aborted, which: + * + * 1.) releases the log intent item and frees the log done item + * 2.) shuts down the filesystem + */ + tp->t_flags |= XFS_TRANS_DIRTY; + lip = dfp->dfp_ops->create_done(tp, dfp->dfp_intent, dfp->dfp_count); + if (!lip) + return; + + tp->t_flags |= XFS_TRANS_HAS_INTENT_DONE; + xfs_trans_add_item(tp, lip); + set_bit(XFS_LI_DIRTY, &lip->li_flags); + dfp->dfp_done = lip; +} + /* * Ensure there's a log intent item associated with this deferred work item if * the operation must be restarted on crash. Returns 1 if there's a log item; @@ -202,18 +276,21 @@ xfs_defer_create_intent( struct xfs_defer_pending *dfp, bool sort) { - const struct xfs_defer_op_type *ops = defer_op_types[dfp->dfp_type]; struct xfs_log_item *lip; if (dfp->dfp_intent) return 1; - lip = ops->create_intent(tp, &dfp->dfp_work, dfp->dfp_count, sort); + lip = dfp->dfp_ops->create_intent(tp, &dfp->dfp_work, dfp->dfp_count, + sort); if (!lip) return 0; if (IS_ERR(lip)) return PTR_ERR(lip); + tp->t_flags |= XFS_TRANS_DIRTY; + xfs_trans_add_item(tp, lip); + set_bit(XFS_LI_DIRTY, &lip->li_flags); dfp->dfp_intent = lip; return 1; } @@ -245,23 +322,50 @@ xfs_defer_create_intents( return ret; } -STATIC void +static inline void xfs_defer_pending_abort( struct xfs_mount *mp, + struct xfs_defer_pending *dfp) +{ + trace_xfs_defer_pending_abort(mp, dfp); + + if (dfp->dfp_intent && !dfp->dfp_done) { + dfp->dfp_ops->abort_intent(dfp->dfp_intent); + dfp->dfp_intent = NULL; + } +} + +static inline void +xfs_defer_pending_cancel_work( + struct xfs_mount *mp, + struct xfs_defer_pending *dfp) +{ + struct list_head *pwi; + struct list_head *n; + + trace_xfs_defer_cancel_list(mp, dfp); + + list_del(&dfp->dfp_list); + list_for_each_safe(pwi, n, &dfp->dfp_work) { + list_del(pwi); + dfp->dfp_count--; + trace_xfs_defer_cancel_item(mp, dfp, pwi); + dfp->dfp_ops->cancel_item(pwi); + } + ASSERT(dfp->dfp_count == 0); + kmem_cache_free(xfs_defer_pending_cache, dfp); +} + +STATIC void +xfs_defer_pending_abort_list( + struct xfs_mount *mp, struct list_head *dop_list) { struct xfs_defer_pending *dfp; - const struct xfs_defer_op_type *ops; /* Abort intent items that don't have a done item. */ - list_for_each_entry(dfp, dop_list, dfp_list) { - ops = defer_op_types[dfp->dfp_type]; - trace_xfs_defer_pending_abort(mp, dfp); - if (dfp->dfp_intent && !dfp->dfp_done) { - ops->abort_intent(dfp->dfp_intent); - dfp->dfp_intent = NULL; - } - } + list_for_each_entry(dfp, dop_list, dfp_list) + xfs_defer_pending_abort(mp, dfp); } /* Abort all the intents that were committed. */ @@ -271,7 +375,7 @@ xfs_defer_trans_abort( struct list_head *dop_pending) { trace_xfs_defer_trans_abort(tp, _RET_IP_); - xfs_defer_pending_abort(tp->t_mountp, dop_pending); + xfs_defer_pending_abort_list(tp->t_mountp, dop_pending); } /* @@ -389,27 +493,31 @@ xfs_defer_cancel_list( { struct xfs_defer_pending *dfp; struct xfs_defer_pending *pli; - struct list_head *pwi; - struct list_head *n; - const struct xfs_defer_op_type *ops; /* * Free the pending items. Caller should already have arranged * for the intent items to be released. */ - list_for_each_entry_safe(dfp, pli, dop_list, dfp_list) { - ops = defer_op_types[dfp->dfp_type]; - trace_xfs_defer_cancel_list(mp, dfp); - list_del(&dfp->dfp_list); - list_for_each_safe(pwi, n, &dfp->dfp_work) { - list_del(pwi); - dfp->dfp_count--; - trace_xfs_defer_cancel_item(mp, dfp, pwi); - ops->cancel_item(pwi); - } - ASSERT(dfp->dfp_count == 0); - kmem_cache_free(xfs_defer_pending_cache, dfp); + list_for_each_entry_safe(dfp, pli, dop_list, dfp_list) + xfs_defer_pending_cancel_work(mp, dfp); +} + +static inline void +xfs_defer_relog_intent( + struct xfs_trans *tp, + struct xfs_defer_pending *dfp) +{ + struct xfs_log_item *lip; + + xfs_defer_create_done(tp, dfp); + + lip = dfp->dfp_ops->relog_intent(tp, dfp->dfp_intent, dfp->dfp_done); + if (lip) { + xfs_trans_add_item(tp, lip); + set_bit(XFS_LI_DIRTY, &lip->li_flags); } + dfp->dfp_done = NULL; + dfp->dfp_intent = lip; } /* @@ -417,7 +525,7 @@ xfs_defer_cancel_list( * done item to release the intent item; and then log a new intent item. * The caller should provide a fresh transaction and roll it after we're done. */ -static int +static void xfs_defer_relog( struct xfs_trans **tpp, struct list_head *dfops) @@ -456,31 +564,28 @@ xfs_defer_relog( trace_xfs_defer_relog_intent((*tpp)->t_mountp, dfp); XFS_STATS_INC((*tpp)->t_mountp, defer_relog); - dfp->dfp_intent = xfs_trans_item_relog(dfp->dfp_intent, *tpp); - } - if ((*tpp)->t_flags & XFS_TRANS_DIRTY) - return xfs_defer_trans_roll(tpp); - return 0; + xfs_defer_relog_intent(*tpp, dfp); + } } /* * Log an intent-done item for the first pending intent, and finish the work * items. */ -static int +int xfs_defer_finish_one( struct xfs_trans *tp, struct xfs_defer_pending *dfp) { - const struct xfs_defer_op_type *ops = defer_op_types[dfp->dfp_type]; + const struct xfs_defer_op_type *ops = dfp->dfp_ops; struct xfs_btree_cur *state = NULL; struct list_head *li, *n; int error; trace_xfs_defer_pending_finish(tp->t_mountp, dfp); - dfp->dfp_done = ops->create_done(tp, dfp->dfp_intent, dfp->dfp_count); + xfs_defer_create_done(tp, dfp); list_for_each_safe(li, n, &dfp->dfp_work) { list_del(li); dfp->dfp_count--; @@ -517,6 +622,24 @@ out: return error; } +/* Move all paused deferred work from @tp to @paused_list. */ +static void +xfs_defer_isolate_paused( + struct xfs_trans *tp, + struct list_head *paused_list) +{ + struct xfs_defer_pending *dfp; + struct xfs_defer_pending *pli; + + list_for_each_entry_safe(dfp, pli, &tp->t_dfops, dfp_list) { + if (!(dfp->dfp_flags & XFS_DEFER_PAUSED)) + continue; + + list_move_tail(&dfp->dfp_list, paused_list); + trace_xfs_defer_isolate_paused(tp->t_mountp, dfp); + } +} + /* * Finish all the pending work. This involves logging intent items for * any work items that wandered in since the last transaction roll (if @@ -532,6 +655,7 @@ xfs_defer_finish_noroll( struct xfs_defer_pending *dfp = NULL; int error = 0; LIST_HEAD(dop_pending); + LIST_HEAD(dop_paused); ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES); @@ -550,6 +674,8 @@ xfs_defer_finish_noroll( */ int has_intents = xfs_defer_create_intents(*tp); + xfs_defer_isolate_paused(*tp, &dop_paused); + list_splice_init(&(*tp)->t_dfops, &dop_pending); if (has_intents < 0) { @@ -562,22 +688,33 @@ xfs_defer_finish_noroll( goto out_shutdown; /* Relog intent items to keep the log moving. */ - error = xfs_defer_relog(tp, &dop_pending); - if (error) - goto out_shutdown; + xfs_defer_relog(tp, &dop_pending); + xfs_defer_relog(tp, &dop_paused); + + if ((*tp)->t_flags & XFS_TRANS_DIRTY) { + error = xfs_defer_trans_roll(tp); + if (error) + goto out_shutdown; + } } - dfp = list_first_entry(&dop_pending, struct xfs_defer_pending, - dfp_list); + dfp = list_first_entry_or_null(&dop_pending, + struct xfs_defer_pending, dfp_list); + if (!dfp) + break; error = xfs_defer_finish_one(*tp, dfp); if (error && error != -EAGAIN) goto out_shutdown; } + /* Requeue the paused items in the outgoing transaction. */ + list_splice_tail_init(&dop_paused, &(*tp)->t_dfops); + trace_xfs_defer_finish_done(*tp, _RET_IP_); return 0; out_shutdown: + list_splice_tail_init(&dop_paused, &dop_pending); xfs_defer_trans_abort(*tp, &dop_pending); xfs_force_shutdown((*tp)->t_mountp, SHUTDOWN_CORRUPT_INCORE); trace_xfs_defer_finish_error(*tp, error); @@ -590,6 +727,9 @@ int xfs_defer_finish( struct xfs_trans **tp) { +#ifdef DEBUG + struct xfs_defer_pending *dfp; +#endif int error; /* @@ -609,7 +749,10 @@ xfs_defer_finish( } /* Reset LOWMODE now that we've finished all the dfops. */ - ASSERT(list_empty(&(*tp)->t_dfops)); +#ifdef DEBUG + list_for_each_entry(dfp, &(*tp)->t_dfops, dfp_list) + ASSERT(dfp->dfp_flags & XFS_DEFER_PAUSED); +#endif (*tp)->t_flags &= ~XFS_TRANS_LOWMODE; return 0; } @@ -621,48 +764,165 @@ xfs_defer_cancel( struct xfs_mount *mp = tp->t_mountp; trace_xfs_defer_cancel(tp, _RET_IP_); + xfs_defer_trans_abort(tp, &tp->t_dfops); xfs_defer_cancel_list(mp, &tp->t_dfops); } +/* + * Return the last pending work item attached to this transaction if it matches + * the deferred op type. + */ +static inline struct xfs_defer_pending * +xfs_defer_find_last( + struct xfs_trans *tp, + const struct xfs_defer_op_type *ops) +{ + struct xfs_defer_pending *dfp = NULL; + + /* No dfops at all? */ + if (list_empty(&tp->t_dfops)) + return NULL; + + dfp = list_last_entry(&tp->t_dfops, struct xfs_defer_pending, + dfp_list); + + /* Wrong type? */ + if (dfp->dfp_ops != ops) + return NULL; + return dfp; +} + +/* + * Decide if we can add a deferred work item to the last dfops item attached + * to the transaction. + */ +static inline bool +xfs_defer_can_append( + struct xfs_defer_pending *dfp, + const struct xfs_defer_op_type *ops) +{ + /* Already logged? */ + if (dfp->dfp_intent) + return false; + + /* Paused items cannot absorb more work */ + if (dfp->dfp_flags & XFS_DEFER_PAUSED) + return NULL; + + /* Already full? */ + if (ops->max_items && dfp->dfp_count >= ops->max_items) + return false; + + return true; +} + +/* Create a new pending item at the end of the transaction list. */ +static inline struct xfs_defer_pending * +xfs_defer_alloc( + struct xfs_trans *tp, + const struct xfs_defer_op_type *ops) +{ + struct xfs_defer_pending *dfp; + + dfp = kmem_cache_zalloc(xfs_defer_pending_cache, + GFP_NOFS | __GFP_NOFAIL); + dfp->dfp_ops = ops; + INIT_LIST_HEAD(&dfp->dfp_work); + list_add_tail(&dfp->dfp_list, &tp->t_dfops); + + return dfp; +} + /* Add an item for later deferred processing. */ -void +struct xfs_defer_pending * xfs_defer_add( struct xfs_trans *tp, - enum xfs_defer_ops_type type, - struct list_head *li) + struct list_head *li, + const struct xfs_defer_op_type *ops) { struct xfs_defer_pending *dfp = NULL; - const struct xfs_defer_op_type *ops = defer_op_types[type]; ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); - BUILD_BUG_ON(ARRAY_SIZE(defer_op_types) != XFS_DEFER_OPS_TYPE_MAX); - /* - * Add the item to a pending item at the end of the intake list. - * If the last pending item has the same type, reuse it. Else, - * create a new pending item at the end of the intake list. - */ - if (!list_empty(&tp->t_dfops)) { - dfp = list_last_entry(&tp->t_dfops, - struct xfs_defer_pending, dfp_list); - if (dfp->dfp_type != type || - (ops->max_items && dfp->dfp_count >= ops->max_items)) - dfp = NULL; - } - if (!dfp) { - dfp = kmem_cache_zalloc(xfs_defer_pending_cache, - GFP_NOFS | __GFP_NOFAIL); - dfp->dfp_type = type; - dfp->dfp_intent = NULL; - dfp->dfp_done = NULL; - dfp->dfp_count = 0; - INIT_LIST_HEAD(&dfp->dfp_work); - list_add_tail(&dfp->dfp_list, &tp->t_dfops); - } + dfp = xfs_defer_find_last(tp, ops); + if (!dfp || !xfs_defer_can_append(dfp, ops)) + dfp = xfs_defer_alloc(tp, ops); - list_add_tail(li, &dfp->dfp_work); + xfs_defer_add_item(dfp, li); trace_xfs_defer_add_item(tp->t_mountp, dfp, li); - dfp->dfp_count++; + return dfp; +} + +/* + * Add a defer ops barrier to force two otherwise adjacent deferred work items + * to be tracked separately and have separate log items. + */ +void +xfs_defer_add_barrier( + struct xfs_trans *tp) +{ + struct xfs_defer_pending *dfp; + + ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); + + /* If the last defer op added was a barrier, we're done. */ + dfp = xfs_defer_find_last(tp, &xfs_barrier_defer_type); + if (dfp) + return; + + xfs_defer_alloc(tp, &xfs_barrier_defer_type); + + trace_xfs_defer_add_item(tp->t_mountp, dfp, NULL); +} + +/* + * Create a pending deferred work item to replay the recovered intent item + * and add it to the list. + */ +void +xfs_defer_start_recovery( + struct xfs_log_item *lip, + struct list_head *r_dfops, + const struct xfs_defer_op_type *ops) +{ + struct xfs_defer_pending *dfp; + + dfp = kmem_cache_zalloc(xfs_defer_pending_cache, + GFP_NOFS | __GFP_NOFAIL); + dfp->dfp_ops = ops; + dfp->dfp_intent = lip; + INIT_LIST_HEAD(&dfp->dfp_work); + list_add_tail(&dfp->dfp_list, r_dfops); +} + +/* + * Cancel a deferred work item created to recover a log intent item. @dfp + * will be freed after this function returns. + */ +void +xfs_defer_cancel_recovery( + struct xfs_mount *mp, + struct xfs_defer_pending *dfp) +{ + xfs_defer_pending_abort(mp, dfp); + xfs_defer_pending_cancel_work(mp, dfp); +} + +/* Replay the deferred work item created from a recovered log intent item. */ +int +xfs_defer_finish_recovery( + struct xfs_mount *mp, + struct xfs_defer_pending *dfp, + struct list_head *capture_list) +{ + const struct xfs_defer_op_type *ops = dfp->dfp_ops; + int error; + + /* dfp is freed by recover_work and must not be accessed afterwards */ + error = ops->recover_work(dfp, capture_list); + if (error) + trace_xlog_intent_recovery_failed(mp, ops, error); + return error; } /* @@ -769,7 +1029,7 @@ xfs_defer_ops_capture_abort( { unsigned short i; - xfs_defer_pending_abort(mp, &dfc->dfc_dfops); + xfs_defer_pending_abort_list(mp, &dfc->dfc_dfops); xfs_defer_cancel_list(mp, &dfc->dfc_dfops); for (i = 0; i < dfc->dfc_held.dr_bufs; i++) @@ -938,3 +1198,36 @@ xfs_defer_destroy_item_caches(void) xfs_rmap_intent_destroy_cache(); xfs_defer_destroy_cache(); } + +/* + * Mark a deferred work item so that it will be requeued indefinitely without + * being finished. Caller must ensure there are no data dependencies on this + * work item in the meantime. + */ +void +xfs_defer_item_pause( + struct xfs_trans *tp, + struct xfs_defer_pending *dfp) +{ + ASSERT(!(dfp->dfp_flags & XFS_DEFER_PAUSED)); + + dfp->dfp_flags |= XFS_DEFER_PAUSED; + + trace_xfs_defer_item_pause(tp->t_mountp, dfp); +} + +/* + * Release a paused deferred work item so that it will be finished during the + * next transaction roll. + */ +void +xfs_defer_item_unpause( + struct xfs_trans *tp, + struct xfs_defer_pending *dfp) +{ + ASSERT(dfp->dfp_flags & XFS_DEFER_PAUSED); + + dfp->dfp_flags &= ~XFS_DEFER_PAUSED; + + trace_xfs_defer_item_unpause(tp->t_mountp, dfp); +} diff --git a/fs/xfs/libxfs/xfs_defer.h b/fs/xfs/libxfs/xfs_defer.h index 8788ad5f6a..18a9fb92dd 100644 --- a/fs/xfs/libxfs/xfs_defer.h +++ b/fs/xfs/libxfs/xfs_defer.h @@ -11,19 +11,6 @@ struct xfs_defer_op_type; struct xfs_defer_capture; /* - * Header for deferred operation list. - */ -enum xfs_defer_ops_type { - XFS_DEFER_OPS_TYPE_BMAP, - XFS_DEFER_OPS_TYPE_REFCOUNT, - XFS_DEFER_OPS_TYPE_RMAP, - XFS_DEFER_OPS_TYPE_FREE, - XFS_DEFER_OPS_TYPE_AGFL_FREE, - XFS_DEFER_OPS_TYPE_ATTR, - XFS_DEFER_OPS_TYPE_MAX, -}; - -/* * Save a log intent item and a list of extents, so that we can replay * whatever action had to happen to the extent list and file the log done * item. @@ -33,19 +20,35 @@ struct xfs_defer_pending { struct list_head dfp_work; /* work items */ struct xfs_log_item *dfp_intent; /* log intent item */ struct xfs_log_item *dfp_done; /* log done item */ + const struct xfs_defer_op_type *dfp_ops; unsigned int dfp_count; /* # extent items */ - enum xfs_defer_ops_type dfp_type; + unsigned int dfp_flags; }; -void xfs_defer_add(struct xfs_trans *tp, enum xfs_defer_ops_type type, - struct list_head *h); +/* + * Create a log intent item for this deferred item, but don't actually finish + * the work. Caller must clear this before the final transaction commit. + */ +#define XFS_DEFER_PAUSED (1U << 0) + +#define XFS_DEFER_PENDING_STRINGS \ + { XFS_DEFER_PAUSED, "paused" } + +void xfs_defer_item_pause(struct xfs_trans *tp, struct xfs_defer_pending *dfp); +void xfs_defer_item_unpause(struct xfs_trans *tp, struct xfs_defer_pending *dfp); + +struct xfs_defer_pending *xfs_defer_add(struct xfs_trans *tp, struct list_head *h, + const struct xfs_defer_op_type *ops); int xfs_defer_finish_noroll(struct xfs_trans **tp); int xfs_defer_finish(struct xfs_trans **tp); +int xfs_defer_finish_one(struct xfs_trans *tp, struct xfs_defer_pending *dfp); void xfs_defer_cancel(struct xfs_trans *); void xfs_defer_move(struct xfs_trans *dtp, struct xfs_trans *stp); /* Description of a deferred type. */ struct xfs_defer_op_type { + const char *name; + unsigned int max_items; struct xfs_log_item *(*create_intent)(struct xfs_trans *tp, struct list_head *items, unsigned int count, bool sort); void (*abort_intent)(struct xfs_log_item *intent); @@ -56,7 +59,11 @@ struct xfs_defer_op_type { void (*finish_cleanup)(struct xfs_trans *tp, struct xfs_btree_cur *state, int error); void (*cancel_item)(struct list_head *item); - unsigned int max_items; + int (*recover_work)(struct xfs_defer_pending *dfp, + struct list_head *capture_list); + struct xfs_log_item *(*relog_intent)(struct xfs_trans *tp, + struct xfs_log_item *intent, + struct xfs_log_item *done_item); }; extern const struct xfs_defer_op_type xfs_bmap_update_defer_type; @@ -125,7 +132,25 @@ void xfs_defer_ops_capture_abort(struct xfs_mount *mp, struct xfs_defer_capture *d); void xfs_defer_resources_rele(struct xfs_defer_resources *dres); +void xfs_defer_start_recovery(struct xfs_log_item *lip, + struct list_head *r_dfops, const struct xfs_defer_op_type *ops); +void xfs_defer_cancel_recovery(struct xfs_mount *mp, + struct xfs_defer_pending *dfp); +int xfs_defer_finish_recovery(struct xfs_mount *mp, + struct xfs_defer_pending *dfp, struct list_head *capture_list); + +static inline void +xfs_defer_add_item( + struct xfs_defer_pending *dfp, + struct list_head *work) +{ + list_add_tail(work, &dfp->dfp_work); + dfp->dfp_count++; +} + int __init xfs_defer_init_item_caches(void); void xfs_defer_destroy_item_caches(void); +void xfs_defer_add_barrier(struct xfs_trans *tp); + #endif /* __XFS_DEFER_H__ */ diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c index f5462fd582..a766732815 100644 --- a/fs/xfs/libxfs/xfs_dir2.c +++ b/fs/xfs/libxfs/xfs_dir2.c @@ -196,7 +196,7 @@ xfs_dir_isempty( return 1; if (dp->i_disk_size > xfs_inode_data_fork_size(dp)) return 0; - sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; + sfp = dp->i_df.if_data; return !sfp->count; } diff --git a/fs/xfs/libxfs/xfs_dir2_block.c b/fs/xfs/libxfs/xfs_dir2_block.c index 00f960a703..3c256d4cc4 100644 --- a/fs/xfs/libxfs/xfs_dir2_block.c +++ b/fs/xfs/libxfs/xfs_dir2_block.c @@ -1089,7 +1089,7 @@ xfs_dir2_sf_to_block( int newoffset; /* offset from current entry */ unsigned int offset = geo->data_entry_offset; xfs_dir2_sf_entry_t *sfep; /* sf entry pointer */ - xfs_dir2_sf_hdr_t *oldsfp; /* old shortform header */ + struct xfs_dir2_sf_hdr *oldsfp = ifp->if_data; xfs_dir2_sf_hdr_t *sfp; /* shortform header */ __be16 *tagp; /* end of data entry */ struct xfs_name name; @@ -1099,10 +1099,8 @@ xfs_dir2_sf_to_block( ASSERT(ifp->if_format == XFS_DINODE_FMT_LOCAL); ASSERT(dp->i_disk_size >= offsetof(struct xfs_dir2_sf_hdr, parent)); - oldsfp = (xfs_dir2_sf_hdr_t *)ifp->if_u1.if_data; - ASSERT(ifp->if_bytes == dp->i_disk_size); - ASSERT(ifp->if_u1.if_data != NULL); + ASSERT(oldsfp != NULL); ASSERT(dp->i_disk_size >= xfs_dir2_sf_hdr_size(oldsfp->i8count)); ASSERT(dp->i_df.if_nextents == 0); diff --git a/fs/xfs/libxfs/xfs_dir2_priv.h b/fs/xfs/libxfs/xfs_dir2_priv.h index 7404a9ff1a..1db2e60ba8 100644 --- a/fs/xfs/libxfs/xfs_dir2_priv.h +++ b/fs/xfs/libxfs/xfs_dir2_priv.h @@ -175,7 +175,8 @@ extern int xfs_dir2_sf_create(struct xfs_da_args *args, xfs_ino_t pino); extern int xfs_dir2_sf_lookup(struct xfs_da_args *args); extern int xfs_dir2_sf_removename(struct xfs_da_args *args); extern int xfs_dir2_sf_replace(struct xfs_da_args *args); -extern xfs_failaddr_t xfs_dir2_sf_verify(struct xfs_inode *ip); +xfs_failaddr_t xfs_dir2_sf_verify(struct xfs_mount *mp, + struct xfs_dir2_sf_hdr *sfp, int64_t size); int xfs_dir2_sf_entsize(struct xfs_mount *mp, struct xfs_dir2_sf_hdr *hdr, int len); void xfs_dir2_sf_put_ino(struct xfs_mount *mp, struct xfs_dir2_sf_hdr *hdr, diff --git a/fs/xfs/libxfs/xfs_dir2_sf.c b/fs/xfs/libxfs/xfs_dir2_sf.c index 8cd37e6e9d..e1f83fc7b6 100644 --- a/fs/xfs/libxfs/xfs_dir2_sf.c +++ b/fs/xfs/libxfs/xfs_dir2_sf.c @@ -364,25 +364,23 @@ int /* error */ xfs_dir2_sf_addname( xfs_da_args_t *args) /* operation arguments */ { - xfs_inode_t *dp; /* incore directory inode */ + struct xfs_inode *dp = args->dp; + struct xfs_dir2_sf_hdr *sfp = dp->i_df.if_data; int error; /* error return value */ int incr_isize; /* total change in size */ int new_isize; /* size after adding name */ int objchange; /* changing to 8-byte inodes */ xfs_dir2_data_aoff_t offset = 0; /* offset for new entry */ int pick; /* which algorithm to use */ - xfs_dir2_sf_hdr_t *sfp; /* shortform structure */ xfs_dir2_sf_entry_t *sfep = NULL; /* shortform entry */ trace_xfs_dir2_sf_addname(args); ASSERT(xfs_dir2_sf_lookup(args) == -ENOENT); - dp = args->dp; ASSERT(dp->i_df.if_format == XFS_DINODE_FMT_LOCAL); ASSERT(dp->i_disk_size >= offsetof(struct xfs_dir2_sf_hdr, parent)); ASSERT(dp->i_df.if_bytes == dp->i_disk_size); - ASSERT(dp->i_df.if_u1.if_data != NULL); - sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; + ASSERT(sfp != NULL); ASSERT(dp->i_disk_size >= xfs_dir2_sf_hdr_size(sfp->i8count)); /* * Compute entry (and change in) size. @@ -462,20 +460,17 @@ xfs_dir2_sf_addname_easy( { struct xfs_inode *dp = args->dp; struct xfs_mount *mp = dp->i_mount; - int byteoff; /* byte offset in sf dir */ - xfs_dir2_sf_hdr_t *sfp; /* shortform structure */ + struct xfs_dir2_sf_hdr *sfp = dp->i_df.if_data; + int byteoff = (int)((char *)sfep - (char *)sfp); - sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; - byteoff = (int)((char *)sfep - (char *)sfp); /* * Grow the in-inode space. */ - xfs_idata_realloc(dp, xfs_dir2_sf_entsize(mp, sfp, args->namelen), + sfp = xfs_idata_realloc(dp, xfs_dir2_sf_entsize(mp, sfp, args->namelen), XFS_DATA_FORK); /* * Need to set up again due to realloc of the inode data. */ - sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; sfep = (xfs_dir2_sf_entry_t *)((char *)sfp + byteoff); /* * Fill in the new entry. @@ -528,11 +523,10 @@ xfs_dir2_sf_addname_hard( /* * Copy the old directory to the stack buffer. */ - sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; old_isize = (int)dp->i_disk_size; buf = kmem_alloc(old_isize, 0); oldsfp = (xfs_dir2_sf_hdr_t *)buf; - memcpy(oldsfp, sfp, old_isize); + memcpy(oldsfp, dp->i_df.if_data, old_isize); /* * Loop over the old directory finding the place we're going * to insert the new entry. @@ -556,11 +550,8 @@ xfs_dir2_sf_addname_hard( * the data. */ xfs_idata_realloc(dp, -old_isize, XFS_DATA_FORK); - xfs_idata_realloc(dp, new_isize, XFS_DATA_FORK); - /* - * Reset the pointer since the buffer was reallocated. - */ - sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; + sfp = xfs_idata_realloc(dp, new_isize, XFS_DATA_FORK); + /* * Copy the first part of the directory, including the header. */ @@ -610,11 +601,10 @@ xfs_dir2_sf_addname_pick( int i; /* entry number */ xfs_dir2_data_aoff_t offset; /* data block offset */ xfs_dir2_sf_entry_t *sfep; /* shortform entry */ - xfs_dir2_sf_hdr_t *sfp; /* shortform structure */ + struct xfs_dir2_sf_hdr *sfp = dp->i_df.if_data; int size; /* entry's data size */ int used; /* data bytes used */ - sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; size = xfs_dir2_data_entsize(mp, args->namelen); offset = args->geo->data_first_offset; sfep = xfs_dir2_sf_firstentry(sfp); @@ -673,14 +663,13 @@ xfs_dir2_sf_check( { struct xfs_inode *dp = args->dp; struct xfs_mount *mp = dp->i_mount; + struct xfs_dir2_sf_hdr *sfp = dp->i_df.if_data; int i; /* entry number */ int i8count; /* number of big inode#s */ xfs_ino_t ino; /* entry inode number */ int offset; /* data offset */ xfs_dir2_sf_entry_t *sfep; /* shortform dir entry */ - xfs_dir2_sf_hdr_t *sfp; /* shortform structure */ - sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; offset = args->geo->data_first_offset; ino = xfs_dir2_sf_get_parent_ino(sfp); i8count = ino > XFS_DIR2_MAX_SHORT_INUM; @@ -707,11 +696,10 @@ xfs_dir2_sf_check( /* Verify the consistency of an inline directory. */ xfs_failaddr_t xfs_dir2_sf_verify( - struct xfs_inode *ip) + struct xfs_mount *mp, + struct xfs_dir2_sf_hdr *sfp, + int64_t size) { - struct xfs_mount *mp = ip->i_mount; - struct xfs_ifork *ifp = xfs_ifork_ptr(ip, XFS_DATA_FORK); - struct xfs_dir2_sf_hdr *sfp; struct xfs_dir2_sf_entry *sfep; struct xfs_dir2_sf_entry *next_sfep; char *endp; @@ -719,15 +707,9 @@ xfs_dir2_sf_verify( int i; int i8count; int offset; - int64_t size; int error; uint8_t filetype; - ASSERT(ifp->if_format == XFS_DINODE_FMT_LOCAL); - - sfp = (struct xfs_dir2_sf_hdr *)ifp->if_u1.if_data; - size = ifp->if_bytes; - /* * Give up if the directory is way too short. */ @@ -834,15 +816,13 @@ xfs_dir2_sf_create( ASSERT(dp->i_df.if_bytes == 0); i8count = pino > XFS_DIR2_MAX_SHORT_INUM; size = xfs_dir2_sf_hdr_size(i8count); + /* - * Make a buffer for the data. - */ - xfs_idata_realloc(dp, size, XFS_DATA_FORK); - /* - * Fill in the header, + * Make a buffer for the data and fill in the header. */ - sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; + sfp = xfs_idata_realloc(dp, size, XFS_DATA_FORK); sfp->i8count = i8count; + /* * Now can put in the inode number, since i8count is set. */ @@ -864,9 +844,9 @@ xfs_dir2_sf_lookup( { struct xfs_inode *dp = args->dp; struct xfs_mount *mp = dp->i_mount; + struct xfs_dir2_sf_hdr *sfp = dp->i_df.if_data; int i; /* entry index */ xfs_dir2_sf_entry_t *sfep; /* shortform directory entry */ - xfs_dir2_sf_hdr_t *sfp; /* shortform structure */ enum xfs_dacmp cmp; /* comparison result */ xfs_dir2_sf_entry_t *ci_sfep; /* case-insens. entry */ @@ -877,8 +857,7 @@ xfs_dir2_sf_lookup( ASSERT(dp->i_df.if_format == XFS_DINODE_FMT_LOCAL); ASSERT(dp->i_disk_size >= offsetof(struct xfs_dir2_sf_hdr, parent)); ASSERT(dp->i_df.if_bytes == dp->i_disk_size); - ASSERT(dp->i_df.if_u1.if_data != NULL); - sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; + ASSERT(sfp != NULL); ASSERT(dp->i_disk_size >= xfs_dir2_sf_hdr_size(sfp->i8count)); /* * Special case for . @@ -940,13 +919,13 @@ xfs_dir2_sf_removename( { struct xfs_inode *dp = args->dp; struct xfs_mount *mp = dp->i_mount; + struct xfs_dir2_sf_hdr *sfp = dp->i_df.if_data; int byteoff; /* offset of removed entry */ int entsize; /* this entry's size */ int i; /* shortform entry index */ int newsize; /* new inode size */ int oldsize; /* old inode size */ xfs_dir2_sf_entry_t *sfep; /* shortform directory entry */ - xfs_dir2_sf_hdr_t *sfp; /* shortform structure */ trace_xfs_dir2_sf_removename(args); @@ -954,8 +933,7 @@ xfs_dir2_sf_removename( oldsize = (int)dp->i_disk_size; ASSERT(oldsize >= offsetof(struct xfs_dir2_sf_hdr, parent)); ASSERT(dp->i_df.if_bytes == oldsize); - ASSERT(dp->i_df.if_u1.if_data != NULL); - sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; + ASSERT(sfp != NULL); ASSERT(oldsize >= xfs_dir2_sf_hdr_size(sfp->i8count)); /* * Loop over the old directory entries. @@ -992,11 +970,12 @@ xfs_dir2_sf_removename( */ sfp->count--; dp->i_disk_size = newsize; + /* * Reallocate, making it smaller. */ - xfs_idata_realloc(dp, newsize - oldsize, XFS_DATA_FORK); - sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; + sfp = xfs_idata_realloc(dp, newsize - oldsize, XFS_DATA_FORK); + /* * Are we changing inode number size? */ @@ -1019,13 +998,12 @@ xfs_dir2_sf_replace_needblock( struct xfs_inode *dp, xfs_ino_t inum) { + struct xfs_dir2_sf_hdr *sfp = dp->i_df.if_data; int newsize; - struct xfs_dir2_sf_hdr *sfp; if (dp->i_df.if_format != XFS_DINODE_FMT_LOCAL) return false; - sfp = (struct xfs_dir2_sf_hdr *)dp->i_df.if_u1.if_data; newsize = dp->i_df.if_bytes + (sfp->count + 1) * XFS_INO64_DIFF; return inum > XFS_DIR2_MAX_SHORT_INUM && @@ -1041,19 +1019,18 @@ xfs_dir2_sf_replace( { struct xfs_inode *dp = args->dp; struct xfs_mount *mp = dp->i_mount; + struct xfs_dir2_sf_hdr *sfp = dp->i_df.if_data; int i; /* entry index */ xfs_ino_t ino=0; /* entry old inode number */ int i8elevated; /* sf_toino8 set i8count=1 */ xfs_dir2_sf_entry_t *sfep; /* shortform directory entry */ - xfs_dir2_sf_hdr_t *sfp; /* shortform structure */ trace_xfs_dir2_sf_replace(args); ASSERT(dp->i_df.if_format == XFS_DINODE_FMT_LOCAL); ASSERT(dp->i_disk_size >= offsetof(struct xfs_dir2_sf_hdr, parent)); ASSERT(dp->i_df.if_bytes == dp->i_disk_size); - ASSERT(dp->i_df.if_u1.if_data != NULL); - sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; + ASSERT(sfp != NULL); ASSERT(dp->i_disk_size >= xfs_dir2_sf_hdr_size(sfp->i8count)); /* @@ -1076,7 +1053,7 @@ xfs_dir2_sf_replace( */ xfs_dir2_sf_toino8(args); i8elevated = 1; - sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; + sfp = dp->i_df.if_data; } else i8elevated = 0; @@ -1157,11 +1134,11 @@ xfs_dir2_sf_toino4( { struct xfs_inode *dp = args->dp; struct xfs_mount *mp = dp->i_mount; + struct xfs_dir2_sf_hdr *oldsfp = dp->i_df.if_data; char *buf; /* old dir's buffer */ int i; /* entry index */ int newsize; /* new inode size */ xfs_dir2_sf_entry_t *oldsfep; /* old sf entry */ - xfs_dir2_sf_hdr_t *oldsfp; /* old sf directory */ int oldsize; /* old inode size */ xfs_dir2_sf_entry_t *sfep; /* new sf entry */ xfs_dir2_sf_hdr_t *sfp; /* new sf directory */ @@ -1175,7 +1152,6 @@ xfs_dir2_sf_toino4( */ oldsize = dp->i_df.if_bytes; buf = kmem_alloc(oldsize, 0); - oldsfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; ASSERT(oldsfp->i8count == 1); memcpy(buf, oldsfp, oldsize); /* @@ -1188,7 +1164,7 @@ xfs_dir2_sf_toino4( * Reset our pointers, the data has moved. */ oldsfp = (xfs_dir2_sf_hdr_t *)buf; - sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; + sfp = dp->i_df.if_data; /* * Fill in the new header. */ @@ -1230,11 +1206,11 @@ xfs_dir2_sf_toino8( { struct xfs_inode *dp = args->dp; struct xfs_mount *mp = dp->i_mount; + struct xfs_dir2_sf_hdr *oldsfp = dp->i_df.if_data; char *buf; /* old dir's buffer */ int i; /* entry index */ int newsize; /* new inode size */ xfs_dir2_sf_entry_t *oldsfep; /* old sf entry */ - xfs_dir2_sf_hdr_t *oldsfp; /* old sf directory */ int oldsize; /* old inode size */ xfs_dir2_sf_entry_t *sfep; /* new sf entry */ xfs_dir2_sf_hdr_t *sfp; /* new sf directory */ @@ -1248,7 +1224,6 @@ xfs_dir2_sf_toino8( */ oldsize = dp->i_df.if_bytes; buf = kmem_alloc(oldsize, 0); - oldsfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; ASSERT(oldsfp->i8count == 0); memcpy(buf, oldsfp, oldsize); /* @@ -1261,7 +1236,7 @@ xfs_dir2_sf_toino8( * Reset our pointers, the data has moved. */ oldsfp = (xfs_dir2_sf_hdr_t *)buf; - sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; + sfp = dp->i_df.if_data; /* * Fill in the new header. */ diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h index 9a88aba158..382ab1e71c 100644 --- a/fs/xfs/libxfs/xfs_format.h +++ b/fs/xfs/libxfs/xfs_format.h @@ -1008,7 +1008,7 @@ enum xfs_dinode_fmt { * Return pointers to the data or attribute forks. */ #define XFS_DFORK_DPTR(dip) \ - ((char *)dip + xfs_dinode_size(dip->di_version)) + ((void *)dip + xfs_dinode_size(dip->di_version)) #define XFS_DFORK_APTR(dip) \ (XFS_DFORK_DPTR(dip) + XFS_DFORK_BOFF(dip)) #define XFS_DFORK_PTR(dip,w) \ @@ -1156,20 +1156,6 @@ static inline bool xfs_dinode_has_large_extent_counts( #define XFS_DFL_RTEXTSIZE (64 * 1024) /* 64kB */ #define XFS_MIN_RTEXTSIZE (4 * 1024) /* 4kB */ -#define XFS_BLOCKSIZE(mp) ((mp)->m_sb.sb_blocksize) -#define XFS_BLOCKMASK(mp) ((mp)->m_blockmask) - -/* - * RT bit manipulation macros. - */ -#define XFS_RTMIN(a,b) ((a) < (b) ? (a) : (b)) -#define XFS_RTMAX(a,b) ((a) > (b) ? (a) : (b)) - -#define XFS_RTLOBIT(w) xfs_lowbit32(w) -#define XFS_RTHIBIT(w) xfs_highbit32(w) - -#define XFS_RTBLOCKLOG(b) xfs_highbit64(b) - /* * Dquot and dquot block format definitions */ @@ -1272,6 +1258,9 @@ static inline time64_t xfs_dq_bigtime_to_unix(uint32_t ondisk_seconds) #define XFS_DQ_GRACE_MIN ((int64_t)0) #define XFS_DQ_GRACE_MAX ((int64_t)U32_MAX) +/* Maximum id value for a quota record */ +#define XFS_DQ_ID_MAX (U32_MAX) + /* * This is the main portion of the on-disk representation of quota information * for a user. We pad this with some more expansion room to construct the on diff --git a/fs/xfs/libxfs/xfs_health.h b/fs/xfs/libxfs/xfs_health.h index 99e796256c..6296993ff8 100644 --- a/fs/xfs/libxfs/xfs_health.h +++ b/fs/xfs/libxfs/xfs_health.h @@ -68,6 +68,11 @@ struct xfs_fsop_geom; #define XFS_SICK_INO_SYMLINK (1 << 6) /* symbolic link remote target */ #define XFS_SICK_INO_PARENT (1 << 7) /* parent pointers */ +#define XFS_SICK_INO_BMBTD_ZAPPED (1 << 8) /* data fork erased */ +#define XFS_SICK_INO_BMBTA_ZAPPED (1 << 9) /* attr fork erased */ +#define XFS_SICK_INO_DIR_ZAPPED (1 << 10) /* directory erased */ +#define XFS_SICK_INO_SYMLINK_ZAPPED (1 << 11) /* symlink erased */ + /* Primary evidence of health problems in a given group. */ #define XFS_SICK_FS_PRIMARY (XFS_SICK_FS_COUNTERS | \ XFS_SICK_FS_UQUOTA | \ @@ -97,6 +102,11 @@ struct xfs_fsop_geom; XFS_SICK_INO_SYMLINK | \ XFS_SICK_INO_PARENT) +#define XFS_SICK_INO_ZAPPED (XFS_SICK_INO_BMBTD_ZAPPED | \ + XFS_SICK_INO_BMBTA_ZAPPED | \ + XFS_SICK_INO_DIR_ZAPPED | \ + XFS_SICK_INO_SYMLINK_ZAPPED) + /* These functions must be provided by the xfs implementation. */ void xfs_fs_mark_sick(struct xfs_mount *mp, unsigned int mask); diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index b83e54c709..2361a22035 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -95,18 +95,28 @@ xfs_inobt_btrec_to_irec( irec->ir_free = be64_to_cpu(rec->inobt.ir_free); } +/* Compute the freecount of an incore inode record. */ +uint8_t +xfs_inobt_rec_freecount( + const struct xfs_inobt_rec_incore *irec) +{ + uint64_t realfree = irec->ir_free; + + if (xfs_inobt_issparse(irec->ir_holemask)) + realfree &= xfs_inobt_irec_to_allocmask(irec); + return hweight64(realfree); +} + /* Simple checks for inode records. */ xfs_failaddr_t xfs_inobt_check_irec( - struct xfs_btree_cur *cur, + struct xfs_perag *pag, const struct xfs_inobt_rec_incore *irec) { - uint64_t realfree; - /* Record has to be properly aligned within the AG. */ - if (!xfs_verify_agino(cur->bc_ag.pag, irec->ir_startino)) + if (!xfs_verify_agino(pag, irec->ir_startino)) return __this_address; - if (!xfs_verify_agino(cur->bc_ag.pag, + if (!xfs_verify_agino(pag, irec->ir_startino + XFS_INODES_PER_CHUNK - 1)) return __this_address; if (irec->ir_count < XFS_INODES_PER_HOLEMASK_BIT || @@ -115,12 +125,7 @@ xfs_inobt_check_irec( if (irec->ir_freecount > XFS_INODES_PER_CHUNK) return __this_address; - /* if there are no holes, return the first available offset */ - if (!xfs_inobt_issparse(irec->ir_holemask)) - realfree = irec->ir_free; - else - realfree = irec->ir_free & xfs_inobt_irec_to_allocmask(irec); - if (hweight64(realfree) != irec->ir_freecount) + if (xfs_inobt_rec_freecount(irec) != irec->ir_freecount) return __this_address; return NULL; @@ -164,7 +169,7 @@ xfs_inobt_get_rec( return error; xfs_inobt_btrec_to_irec(mp, rec, irec); - fa = xfs_inobt_check_irec(cur, irec); + fa = xfs_inobt_check_irec(cur->bc_ag.pag, irec); if (fa) return xfs_inobt_complain_bad_rec(cur, fa, irec); @@ -1854,7 +1859,7 @@ xfs_difree_inode_chunk( return xfs_free_extent_later(tp, XFS_AGB_TO_FSB(mp, agno, sagbno), M_IGEO(mp)->ialloc_blks, &XFS_RMAP_OINFO_INODES, - XFS_AG_RESV_NONE); + XFS_AG_RESV_NONE, false); } /* holemask is only 16-bits (fits in an unsigned long) */ @@ -1900,7 +1905,8 @@ xfs_difree_inode_chunk( ASSERT(contigblk % mp->m_sb.sb_spino_align == 0); error = xfs_free_extent_later(tp, XFS_AGB_TO_FSB(mp, agno, agbno), contigblk, - &XFS_RMAP_OINFO_INODES, XFS_AG_RESV_NONE); + &XFS_RMAP_OINFO_INODES, XFS_AG_RESV_NONE, + false); if (error) return error; @@ -2739,7 +2745,7 @@ xfs_ialloc_count_inodes_rec( xfs_failaddr_t fa; xfs_inobt_btrec_to_irec(cur->bc_mp, rec, &irec); - fa = xfs_inobt_check_irec(cur, &irec); + fa = xfs_inobt_check_irec(cur->bc_ag.pag, &irec); if (fa) return xfs_inobt_complain_bad_rec(cur, fa, &irec); diff --git a/fs/xfs/libxfs/xfs_ialloc.h b/fs/xfs/libxfs/xfs_ialloc.h index fe824bb04a..f1412183bb 100644 --- a/fs/xfs/libxfs/xfs_ialloc.h +++ b/fs/xfs/libxfs/xfs_ialloc.h @@ -79,6 +79,7 @@ int xfs_inobt_lookup(struct xfs_btree_cur *cur, xfs_agino_t ino, */ int xfs_inobt_get_rec(struct xfs_btree_cur *cur, xfs_inobt_rec_incore_t *rec, int *stat); +uint8_t xfs_inobt_rec_freecount(const struct xfs_inobt_rec_incore *irec); /* * Inode chunk initialisation routine @@ -93,7 +94,7 @@ union xfs_btree_rec; void xfs_inobt_btrec_to_irec(struct xfs_mount *mp, const union xfs_btree_rec *rec, struct xfs_inobt_rec_incore *irec); -xfs_failaddr_t xfs_inobt_check_irec(struct xfs_btree_cur *cur, +xfs_failaddr_t xfs_inobt_check_irec(struct xfs_perag *pag, const struct xfs_inobt_rec_incore *irec); int xfs_ialloc_has_inodes_at_extent(struct xfs_btree_cur *cur, xfs_agblock_t bno, xfs_extlen_t len, diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c index 9258f01c00..42a5e1f227 100644 --- a/fs/xfs/libxfs/xfs_ialloc_btree.c +++ b/fs/xfs/libxfs/xfs_ialloc_btree.c @@ -161,7 +161,7 @@ __xfs_inobt_free_block( xfs_inobt_mod_blockcount(cur, -1); fsbno = XFS_DADDR_TO_FSB(cur->bc_mp, xfs_buf_daddr(bp)); return xfs_free_extent_later(cur->bc_tp, fsbno, 1, - &XFS_RMAP_OINFO_INOBT, resv); + &XFS_RMAP_OINFO_INOBT, resv, false); } STATIC int diff --git a/fs/xfs/libxfs/xfs_iext_tree.c b/fs/xfs/libxfs/xfs_iext_tree.c index 773cf43494..f4e6b200cd 100644 --- a/fs/xfs/libxfs/xfs_iext_tree.c +++ b/fs/xfs/libxfs/xfs_iext_tree.c @@ -158,7 +158,7 @@ static void * xfs_iext_find_first_leaf( struct xfs_ifork *ifp) { - struct xfs_iext_node *node = ifp->if_u1.if_root; + struct xfs_iext_node *node = ifp->if_data; int height; if (!ifp->if_height) @@ -176,7 +176,7 @@ static void * xfs_iext_find_last_leaf( struct xfs_ifork *ifp) { - struct xfs_iext_node *node = ifp->if_u1.if_root; + struct xfs_iext_node *node = ifp->if_data; int height, i; if (!ifp->if_height) @@ -306,7 +306,7 @@ xfs_iext_find_level( xfs_fileoff_t offset, int level) { - struct xfs_iext_node *node = ifp->if_u1.if_root; + struct xfs_iext_node *node = ifp->if_data; int height, i; if (!ifp->if_height) @@ -402,12 +402,12 @@ xfs_iext_grow( int i; if (ifp->if_height == 1) { - struct xfs_iext_leaf *prev = ifp->if_u1.if_root; + struct xfs_iext_leaf *prev = ifp->if_data; node->keys[0] = xfs_iext_leaf_key(prev, 0); node->ptrs[0] = prev; } else { - struct xfs_iext_node *prev = ifp->if_u1.if_root; + struct xfs_iext_node *prev = ifp->if_data; ASSERT(ifp->if_height > 1); @@ -418,7 +418,7 @@ xfs_iext_grow( for (i = 1; i < KEYS_PER_NODE; i++) node->keys[i] = XFS_IEXT_KEY_INVALID; - ifp->if_u1.if_root = node; + ifp->if_data = node; ifp->if_height++; } @@ -430,7 +430,7 @@ xfs_iext_update_node( int level, void *ptr) { - struct xfs_iext_node *node = ifp->if_u1.if_root; + struct xfs_iext_node *node = ifp->if_data; int height, i; for (height = ifp->if_height; height > level; height--) { @@ -583,11 +583,11 @@ xfs_iext_alloc_root( { ASSERT(ifp->if_bytes == 0); - ifp->if_u1.if_root = kmem_zalloc(sizeof(struct xfs_iext_rec), KM_NOFS); + ifp->if_data = kmem_zalloc(sizeof(struct xfs_iext_rec), KM_NOFS); ifp->if_height = 1; /* now that we have a node step into it */ - cur->leaf = ifp->if_u1.if_root; + cur->leaf = ifp->if_data; cur->pos = 0; } @@ -603,9 +603,9 @@ xfs_iext_realloc_root( if (new_size / sizeof(struct xfs_iext_rec) == RECS_PER_LEAF) new_size = NODE_SIZE; - new = krealloc(ifp->if_u1.if_root, new_size, GFP_NOFS | __GFP_NOFAIL); + new = krealloc(ifp->if_data, new_size, GFP_NOFS | __GFP_NOFAIL); memset(new + ifp->if_bytes, 0, new_size - ifp->if_bytes); - ifp->if_u1.if_root = new; + ifp->if_data = new; cur->leaf = new; } @@ -622,13 +622,11 @@ static inline void xfs_iext_inc_seq(struct xfs_ifork *ifp) } void -xfs_iext_insert( - struct xfs_inode *ip, +xfs_iext_insert_raw( + struct xfs_ifork *ifp, struct xfs_iext_cursor *cur, - struct xfs_bmbt_irec *irec, - int state) + struct xfs_bmbt_irec *irec) { - struct xfs_ifork *ifp = xfs_iext_state_to_fork(ip, state); xfs_fileoff_t offset = irec->br_startoff; struct xfs_iext_leaf *new = NULL; int nr_entries, i; @@ -662,12 +660,23 @@ xfs_iext_insert( xfs_iext_set(cur_rec(cur), irec); ifp->if_bytes += sizeof(struct xfs_iext_rec); - trace_xfs_iext_insert(ip, cur, state, _RET_IP_); - if (new) xfs_iext_insert_node(ifp, xfs_iext_leaf_key(new, 0), new, 2); } +void +xfs_iext_insert( + struct xfs_inode *ip, + struct xfs_iext_cursor *cur, + struct xfs_bmbt_irec *irec, + int state) +{ + struct xfs_ifork *ifp = xfs_iext_state_to_fork(ip, state); + + xfs_iext_insert_raw(ifp, cur, irec); + trace_xfs_iext_insert(ip, cur, state, _RET_IP_); +} + static struct xfs_iext_node * xfs_iext_rebalance_node( struct xfs_iext_node *parent, @@ -777,8 +786,8 @@ again: * If we are at the root and only one entry is left we can just * free this node and update the root pointer. */ - ASSERT(node == ifp->if_u1.if_root); - ifp->if_u1.if_root = node->ptrs[0]; + ASSERT(node == ifp->if_data); + ifp->if_data = node->ptrs[0]; ifp->if_height--; kmem_free(node); } @@ -854,8 +863,8 @@ xfs_iext_free_last_leaf( struct xfs_ifork *ifp) { ifp->if_height--; - kmem_free(ifp->if_u1.if_root); - ifp->if_u1.if_root = NULL; + kmem_free(ifp->if_data); + ifp->if_data = NULL; } void @@ -872,7 +881,7 @@ xfs_iext_remove( trace_xfs_iext_remove(ip, cur, state, _RET_IP_); ASSERT(ifp->if_height > 0); - ASSERT(ifp->if_u1.if_root != NULL); + ASSERT(ifp->if_data != NULL); ASSERT(xfs_iext_valid(ifp, cur)); xfs_iext_inc_seq(ifp); @@ -1042,9 +1051,9 @@ void xfs_iext_destroy( struct xfs_ifork *ifp) { - xfs_iext_destroy_node(ifp->if_u1.if_root, ifp->if_height); + xfs_iext_destroy_node(ifp->if_data, ifp->if_height); ifp->if_bytes = 0; ifp->if_height = 0; - ifp->if_u1.if_root = NULL; + ifp->if_data = NULL; } diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c index 5a2e7ddfa7..f4569e18a8 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.c +++ b/fs/xfs/libxfs/xfs_inode_fork.c @@ -50,12 +50,15 @@ xfs_init_local_fork( mem_size++; if (size) { - ifp->if_u1.if_data = kmem_alloc(mem_size, KM_NOFS); - memcpy(ifp->if_u1.if_data, data, size); + char *new_data = kmem_alloc(mem_size, KM_NOFS); + + memcpy(new_data, data, size); if (zero_terminate) - ifp->if_u1.if_data[size] = '\0'; + new_data[size] = '\0'; + + ifp->if_data = new_data; } else { - ifp->if_u1.if_data = NULL; + ifp->if_data = NULL; } ifp->if_bytes = size; @@ -125,7 +128,7 @@ xfs_iformat_extents( } ifp->if_bytes = 0; - ifp->if_u1.if_root = NULL; + ifp->if_data = NULL; ifp->if_height = 0; if (size) { dp = (xfs_bmbt_rec_t *) XFS_DFORK_PTR(dip, whichfork); @@ -212,7 +215,7 @@ xfs_iformat_btree( ifp->if_broot, size); ifp->if_bytes = 0; - ifp->if_u1.if_root = NULL; + ifp->if_data = NULL; ifp->if_height = 0; return 0; } @@ -276,10 +279,9 @@ static uint16_t xfs_dfork_attr_shortform_size( struct xfs_dinode *dip) { - struct xfs_attr_shortform *atp = - (struct xfs_attr_shortform *)XFS_DFORK_APTR(dip); + struct xfs_attr_sf_hdr *sf = XFS_DFORK_APTR(dip); - return be16_to_cpu(atp->hdr.totsize); + return be16_to_cpu(sf->totsize); } void @@ -493,7 +495,7 @@ xfs_iroot_realloc( * byte_diff -- the change in the number of bytes, positive or negative, * requested for the if_data array. */ -void +void * xfs_idata_realloc( struct xfs_inode *ip, int64_t byte_diff, @@ -505,21 +507,18 @@ xfs_idata_realloc( ASSERT(new_size >= 0); ASSERT(new_size <= xfs_inode_fork_size(ip, whichfork)); - if (byte_diff == 0) - return; - - if (new_size == 0) { - kmem_free(ifp->if_u1.if_data); - ifp->if_u1.if_data = NULL; - ifp->if_bytes = 0; - return; + if (byte_diff) { + ifp->if_data = krealloc(ifp->if_data, new_size, + GFP_NOFS | __GFP_NOFAIL); + if (new_size == 0) + ifp->if_data = NULL; + ifp->if_bytes = new_size; } - ifp->if_u1.if_data = krealloc(ifp->if_u1.if_data, new_size, - GFP_NOFS | __GFP_NOFAIL); - ifp->if_bytes = new_size; + return ifp->if_data; } +/* Free all memory and reset a fork back to its initial state. */ void xfs_idestroy_fork( struct xfs_ifork *ifp) @@ -531,8 +530,8 @@ xfs_idestroy_fork( switch (ifp->if_format) { case XFS_DINODE_FMT_LOCAL: - kmem_free(ifp->if_u1.if_data); - ifp->if_u1.if_data = NULL; + kmem_free(ifp->if_data); + ifp->if_data = NULL; break; case XFS_DINODE_FMT_EXTENTS: case XFS_DINODE_FMT_BTREE: @@ -625,9 +624,9 @@ xfs_iflush_fork( case XFS_DINODE_FMT_LOCAL: if ((iip->ili_fields & dataflag[whichfork]) && (ifp->if_bytes > 0)) { - ASSERT(ifp->if_u1.if_data != NULL); + ASSERT(ifp->if_data != NULL); ASSERT(ifp->if_bytes <= xfs_inode_fork_size(ip, whichfork)); - memcpy(cp, ifp->if_u1.if_data, ifp->if_bytes); + memcpy(cp, ifp->if_data, ifp->if_bytes); } break; @@ -702,19 +701,27 @@ xfs_ifork_verify_local_data( xfs_failaddr_t fa = NULL; switch (VFS_I(ip)->i_mode & S_IFMT) { - case S_IFDIR: - fa = xfs_dir2_sf_verify(ip); + case S_IFDIR: { + struct xfs_mount *mp = ip->i_mount; + struct xfs_ifork *ifp = xfs_ifork_ptr(ip, XFS_DATA_FORK); + struct xfs_dir2_sf_hdr *sfp = ifp->if_data; + + fa = xfs_dir2_sf_verify(mp, sfp, ifp->if_bytes); break; - case S_IFLNK: - fa = xfs_symlink_shortform_verify(ip); + } + case S_IFLNK: { + struct xfs_ifork *ifp = xfs_ifork_ptr(ip, XFS_DATA_FORK); + + fa = xfs_symlink_shortform_verify(ifp->if_data, ifp->if_bytes); break; + } default: break; } if (fa) { xfs_inode_verifier_error(ip, -EFSCORRUPTED, "data fork", - ip->i_df.if_u1.if_data, ip->i_df.if_bytes, fa); + ip->i_df.if_data, ip->i_df.if_bytes, fa); return -EFSCORRUPTED; } @@ -729,14 +736,17 @@ xfs_ifork_verify_local_attr( struct xfs_ifork *ifp = &ip->i_af; xfs_failaddr_t fa; - if (!xfs_inode_has_attr_fork(ip)) + if (!xfs_inode_has_attr_fork(ip)) { fa = __this_address; - else - fa = xfs_attr_shortform_verify(ip); + } else { + struct xfs_ifork *ifp = &ip->i_af; + ASSERT(ifp->if_format == XFS_DINODE_FMT_LOCAL); + fa = xfs_attr_shortform_verify(ifp->if_data, ifp->if_bytes); + } if (fa) { xfs_inode_verifier_error(ip, -EFSCORRUPTED, "attr fork", - ifp->if_u1.if_data, ifp->if_bytes, fa); + ifp->if_data, ifp->if_bytes, fa); return -EFSCORRUPTED; } diff --git a/fs/xfs/libxfs/xfs_inode_fork.h b/fs/xfs/libxfs/xfs_inode_fork.h index 96d307784c..96303249d2 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.h +++ b/fs/xfs/libxfs/xfs_inode_fork.h @@ -13,14 +13,12 @@ struct xfs_dinode; * File incore extent information, present for each of data & attr forks. */ struct xfs_ifork { - int64_t if_bytes; /* bytes in if_u1 */ + int64_t if_bytes; /* bytes in if_data */ struct xfs_btree_block *if_broot; /* file's incore btree root */ unsigned int if_seq; /* fork mod counter */ int if_height; /* height of the extent tree */ - union { - void *if_root; /* extent tree root */ - char *if_data; /* inline file data */ - } if_u1; + void *if_data; /* extent tree root or + inline data */ xfs_extnum_t if_nextents; /* # of extents in this fork */ short if_broot_bytes; /* bytes allocated for root */ int8_t if_format; /* format of this fork */ @@ -170,7 +168,7 @@ int xfs_iformat_attr_fork(struct xfs_inode *, struct xfs_dinode *); void xfs_iflush_fork(struct xfs_inode *, struct xfs_dinode *, struct xfs_inode_log_item *, int); void xfs_idestroy_fork(struct xfs_ifork *ifp); -void xfs_idata_realloc(struct xfs_inode *ip, int64_t byte_diff, +void * xfs_idata_realloc(struct xfs_inode *ip, int64_t byte_diff, int whichfork); void xfs_iroot_realloc(struct xfs_inode *, int, int); int xfs_iread_extents(struct xfs_trans *, struct xfs_inode *, int); @@ -180,6 +178,9 @@ void xfs_init_local_fork(struct xfs_inode *ip, int whichfork, const void *data, int64_t size); xfs_extnum_t xfs_iext_count(struct xfs_ifork *ifp); +void xfs_iext_insert_raw(struct xfs_ifork *ifp, + struct xfs_iext_cursor *cur, + struct xfs_bmbt_irec *irec); void xfs_iext_insert(struct xfs_inode *, struct xfs_iext_cursor *cur, struct xfs_bmbt_irec *, int); void xfs_iext_remove(struct xfs_inode *, struct xfs_iext_cursor *, diff --git a/fs/xfs/libxfs/xfs_log_recover.h b/fs/xfs/libxfs/xfs_log_recover.h index a5100a11fa..9fe7a9564b 100644 --- a/fs/xfs/libxfs/xfs_log_recover.h +++ b/fs/xfs/libxfs/xfs_log_recover.h @@ -11,6 +11,7 @@ * define how recovery should work for that type of log item. */ struct xlog_recover_item; +struct xfs_defer_op_type; /* Sorting hat for log items as they're read in. */ enum xlog_recover_reorder { @@ -153,4 +154,11 @@ xlog_recover_resv(const struct xfs_trans_res *r) return ret; } +struct xfs_defer_pending; + +void xlog_recover_intent_item(struct xlog *log, struct xfs_log_item *lip, + xfs_lsn_t lsn, const struct xfs_defer_op_type *ops); +int xlog_recover_finish_intent(struct xfs_trans *tp, + struct xfs_defer_pending *dfp); + #endif /* __XFS_LOG_RECOVER_H__ */ diff --git a/fs/xfs/libxfs/xfs_ondisk.h b/fs/xfs/libxfs/xfs_ondisk.h new file mode 100644 index 0000000000..81885a6a02 --- /dev/null +++ b/fs/xfs/libxfs/xfs_ondisk.h @@ -0,0 +1,199 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2016 Oracle. + * All Rights Reserved. + */ +#ifndef __XFS_ONDISK_H +#define __XFS_ONDISK_H + +#define XFS_CHECK_STRUCT_SIZE(structname, size) \ + static_assert(sizeof(structname) == (size), \ + "XFS: sizeof(" #structname ") is wrong, expected " #size) + +#define XFS_CHECK_OFFSET(structname, member, off) \ + static_assert(offsetof(structname, member) == (off), \ + "XFS: offsetof(" #structname ", " #member ") is wrong, " \ + "expected " #off) + +#define XFS_CHECK_VALUE(value, expected) \ + static_assert((value) == (expected), \ + "XFS: value of " #value " is wrong, expected " #expected) + +static inline void __init +xfs_check_ondisk_structs(void) +{ + /* ag/file structures */ + XFS_CHECK_STRUCT_SIZE(struct xfs_acl, 4); + XFS_CHECK_STRUCT_SIZE(struct xfs_acl_entry, 12); + XFS_CHECK_STRUCT_SIZE(struct xfs_agf, 224); + XFS_CHECK_STRUCT_SIZE(struct xfs_agfl, 36); + XFS_CHECK_STRUCT_SIZE(struct xfs_agi, 344); + XFS_CHECK_STRUCT_SIZE(struct xfs_bmbt_key, 8); + XFS_CHECK_STRUCT_SIZE(struct xfs_bmbt_rec, 16); + XFS_CHECK_STRUCT_SIZE(struct xfs_bmdr_block, 4); + XFS_CHECK_STRUCT_SIZE(struct xfs_btree_block_shdr, 48); + XFS_CHECK_STRUCT_SIZE(struct xfs_btree_block_lhdr, 64); + XFS_CHECK_STRUCT_SIZE(struct xfs_btree_block, 72); + XFS_CHECK_STRUCT_SIZE(struct xfs_dinode, 176); + XFS_CHECK_STRUCT_SIZE(struct xfs_disk_dquot, 104); + XFS_CHECK_STRUCT_SIZE(struct xfs_dqblk, 136); + XFS_CHECK_STRUCT_SIZE(struct xfs_dsb, 264); + XFS_CHECK_STRUCT_SIZE(struct xfs_dsymlink_hdr, 56); + XFS_CHECK_STRUCT_SIZE(struct xfs_inobt_key, 4); + XFS_CHECK_STRUCT_SIZE(struct xfs_inobt_rec, 16); + XFS_CHECK_STRUCT_SIZE(struct xfs_refcount_key, 4); + XFS_CHECK_STRUCT_SIZE(struct xfs_refcount_rec, 12); + XFS_CHECK_STRUCT_SIZE(struct xfs_rmap_key, 20); + XFS_CHECK_STRUCT_SIZE(struct xfs_rmap_rec, 24); + XFS_CHECK_STRUCT_SIZE(xfs_timestamp_t, 8); + XFS_CHECK_STRUCT_SIZE(struct xfs_legacy_timestamp, 8); + XFS_CHECK_STRUCT_SIZE(xfs_alloc_key_t, 8); + XFS_CHECK_STRUCT_SIZE(xfs_alloc_ptr_t, 4); + XFS_CHECK_STRUCT_SIZE(xfs_alloc_rec_t, 8); + XFS_CHECK_STRUCT_SIZE(xfs_inobt_ptr_t, 4); + XFS_CHECK_STRUCT_SIZE(xfs_refcount_ptr_t, 4); + XFS_CHECK_STRUCT_SIZE(xfs_rmap_ptr_t, 4); + + /* dir/attr trees */ + XFS_CHECK_STRUCT_SIZE(struct xfs_attr3_leaf_hdr, 80); + XFS_CHECK_STRUCT_SIZE(struct xfs_attr3_leafblock, 80); + XFS_CHECK_STRUCT_SIZE(struct xfs_attr3_rmt_hdr, 56); + XFS_CHECK_STRUCT_SIZE(struct xfs_da3_blkinfo, 56); + XFS_CHECK_STRUCT_SIZE(struct xfs_da3_intnode, 64); + XFS_CHECK_STRUCT_SIZE(struct xfs_da3_node_hdr, 64); + XFS_CHECK_STRUCT_SIZE(struct xfs_dir3_blk_hdr, 48); + XFS_CHECK_STRUCT_SIZE(struct xfs_dir3_data_hdr, 64); + XFS_CHECK_STRUCT_SIZE(struct xfs_dir3_free, 64); + XFS_CHECK_STRUCT_SIZE(struct xfs_dir3_free_hdr, 64); + XFS_CHECK_STRUCT_SIZE(struct xfs_dir3_leaf, 64); + XFS_CHECK_STRUCT_SIZE(struct xfs_dir3_leaf_hdr, 64); + XFS_CHECK_STRUCT_SIZE(xfs_attr_leaf_entry_t, 8); + XFS_CHECK_STRUCT_SIZE(xfs_attr_leaf_hdr_t, 32); + XFS_CHECK_STRUCT_SIZE(xfs_attr_leaf_map_t, 4); + XFS_CHECK_STRUCT_SIZE(xfs_attr_leaf_name_local_t, 4); + + /* realtime structures */ + XFS_CHECK_STRUCT_SIZE(union xfs_rtword_raw, 4); + XFS_CHECK_STRUCT_SIZE(union xfs_suminfo_raw, 4); + + /* + * m68k has problems with xfs_attr_leaf_name_remote_t, but we pad it to + * 4 bytes anyway so it's not obviously a problem. Hence for the moment + * we don't check this structure. This can be re-instated when the attr + * definitions are updated to use c99 VLA definitions. + * + XFS_CHECK_STRUCT_SIZE(xfs_attr_leaf_name_remote_t, 12); + */ + + XFS_CHECK_OFFSET(xfs_attr_leaf_name_local_t, valuelen, 0); + XFS_CHECK_OFFSET(xfs_attr_leaf_name_local_t, namelen, 2); + XFS_CHECK_OFFSET(xfs_attr_leaf_name_local_t, nameval, 3); + XFS_CHECK_OFFSET(xfs_attr_leaf_name_remote_t, valueblk, 0); + XFS_CHECK_OFFSET(xfs_attr_leaf_name_remote_t, valuelen, 4); + XFS_CHECK_OFFSET(xfs_attr_leaf_name_remote_t, namelen, 8); + XFS_CHECK_OFFSET(xfs_attr_leaf_name_remote_t, name, 9); + XFS_CHECK_STRUCT_SIZE(xfs_attr_leafblock_t, 32); + XFS_CHECK_STRUCT_SIZE(struct xfs_attr_sf_hdr, 4); + XFS_CHECK_OFFSET(struct xfs_attr_sf_hdr, totsize, 0); + XFS_CHECK_OFFSET(struct xfs_attr_sf_hdr, count, 2); + XFS_CHECK_OFFSET(struct xfs_attr_sf_entry, namelen, 0); + XFS_CHECK_OFFSET(struct xfs_attr_sf_entry, valuelen, 1); + XFS_CHECK_OFFSET(struct xfs_attr_sf_entry, flags, 2); + XFS_CHECK_OFFSET(struct xfs_attr_sf_entry, nameval, 3); + XFS_CHECK_STRUCT_SIZE(xfs_da_blkinfo_t, 12); + XFS_CHECK_STRUCT_SIZE(xfs_da_intnode_t, 16); + XFS_CHECK_STRUCT_SIZE(xfs_da_node_entry_t, 8); + XFS_CHECK_STRUCT_SIZE(xfs_da_node_hdr_t, 16); + XFS_CHECK_STRUCT_SIZE(xfs_dir2_data_free_t, 4); + XFS_CHECK_STRUCT_SIZE(xfs_dir2_data_hdr_t, 16); + XFS_CHECK_OFFSET(xfs_dir2_data_unused_t, freetag, 0); + XFS_CHECK_OFFSET(xfs_dir2_data_unused_t, length, 2); + XFS_CHECK_STRUCT_SIZE(xfs_dir2_free_hdr_t, 16); + XFS_CHECK_STRUCT_SIZE(xfs_dir2_free_t, 16); + XFS_CHECK_STRUCT_SIZE(xfs_dir2_leaf_entry_t, 8); + XFS_CHECK_STRUCT_SIZE(xfs_dir2_leaf_hdr_t, 16); + XFS_CHECK_STRUCT_SIZE(xfs_dir2_leaf_t, 16); + XFS_CHECK_STRUCT_SIZE(xfs_dir2_leaf_tail_t, 4); + XFS_CHECK_STRUCT_SIZE(xfs_dir2_sf_entry_t, 3); + XFS_CHECK_OFFSET(xfs_dir2_sf_entry_t, namelen, 0); + XFS_CHECK_OFFSET(xfs_dir2_sf_entry_t, offset, 1); + XFS_CHECK_OFFSET(xfs_dir2_sf_entry_t, name, 3); + XFS_CHECK_STRUCT_SIZE(xfs_dir2_sf_hdr_t, 10); + + /* log structures */ + XFS_CHECK_STRUCT_SIZE(struct xfs_buf_log_format, 88); + XFS_CHECK_STRUCT_SIZE(struct xfs_dq_logformat, 24); + XFS_CHECK_STRUCT_SIZE(struct xfs_efd_log_format_32, 16); + XFS_CHECK_STRUCT_SIZE(struct xfs_efd_log_format_64, 16); + XFS_CHECK_STRUCT_SIZE(struct xfs_efi_log_format_32, 16); + XFS_CHECK_STRUCT_SIZE(struct xfs_efi_log_format_64, 16); + XFS_CHECK_STRUCT_SIZE(struct xfs_extent_32, 12); + XFS_CHECK_STRUCT_SIZE(struct xfs_extent_64, 16); + XFS_CHECK_STRUCT_SIZE(struct xfs_log_dinode, 176); + XFS_CHECK_STRUCT_SIZE(struct xfs_icreate_log, 28); + XFS_CHECK_STRUCT_SIZE(xfs_log_timestamp_t, 8); + XFS_CHECK_STRUCT_SIZE(struct xfs_log_legacy_timestamp, 8); + XFS_CHECK_STRUCT_SIZE(struct xfs_inode_log_format_32, 52); + XFS_CHECK_STRUCT_SIZE(struct xfs_inode_log_format, 56); + XFS_CHECK_STRUCT_SIZE(struct xfs_qoff_logformat, 20); + XFS_CHECK_STRUCT_SIZE(struct xfs_trans_header, 16); + XFS_CHECK_STRUCT_SIZE(struct xfs_attri_log_format, 40); + XFS_CHECK_STRUCT_SIZE(struct xfs_attrd_log_format, 16); + XFS_CHECK_STRUCT_SIZE(struct xfs_bui_log_format, 16); + XFS_CHECK_STRUCT_SIZE(struct xfs_bud_log_format, 16); + XFS_CHECK_STRUCT_SIZE(struct xfs_cui_log_format, 16); + XFS_CHECK_STRUCT_SIZE(struct xfs_cud_log_format, 16); + XFS_CHECK_STRUCT_SIZE(struct xfs_rui_log_format, 16); + XFS_CHECK_STRUCT_SIZE(struct xfs_rud_log_format, 16); + XFS_CHECK_STRUCT_SIZE(struct xfs_map_extent, 32); + XFS_CHECK_STRUCT_SIZE(struct xfs_phys_extent, 16); + + XFS_CHECK_OFFSET(struct xfs_bui_log_format, bui_extents, 16); + XFS_CHECK_OFFSET(struct xfs_cui_log_format, cui_extents, 16); + XFS_CHECK_OFFSET(struct xfs_rui_log_format, rui_extents, 16); + XFS_CHECK_OFFSET(struct xfs_efi_log_format, efi_extents, 16); + XFS_CHECK_OFFSET(struct xfs_efi_log_format_32, efi_extents, 16); + XFS_CHECK_OFFSET(struct xfs_efi_log_format_64, efi_extents, 16); + + /* + * The v5 superblock format extended several v4 header structures with + * additional data. While new fields are only accessible on v5 + * superblocks, it's important that the v5 structures place original v4 + * fields/headers in the correct location on-disk. For example, we must + * be able to find magic values at the same location in certain blocks + * regardless of superblock version. + * + * The following checks ensure that various v5 data structures place the + * subset of v4 metadata associated with the same type of block at the + * start of the on-disk block. If there is no data structure definition + * for certain types of v4 blocks, traverse down to the first field of + * common metadata (e.g., magic value) and make sure it is at offset + * zero. + */ + XFS_CHECK_OFFSET(struct xfs_dir3_leaf, hdr.info.hdr, 0); + XFS_CHECK_OFFSET(struct xfs_da3_intnode, hdr.info.hdr, 0); + XFS_CHECK_OFFSET(struct xfs_dir3_data_hdr, hdr.magic, 0); + XFS_CHECK_OFFSET(struct xfs_dir3_free, hdr.hdr.magic, 0); + XFS_CHECK_OFFSET(struct xfs_attr3_leafblock, hdr.info.hdr, 0); + + XFS_CHECK_STRUCT_SIZE(struct xfs_bulkstat, 192); + XFS_CHECK_STRUCT_SIZE(struct xfs_inumbers, 24); + XFS_CHECK_STRUCT_SIZE(struct xfs_bulkstat_req, 64); + XFS_CHECK_STRUCT_SIZE(struct xfs_inumbers_req, 64); + + /* + * Make sure the incore inode timestamp range corresponds to hand + * converted values based on the ondisk format specification. + */ + XFS_CHECK_VALUE(XFS_BIGTIME_TIME_MIN - XFS_BIGTIME_EPOCH_OFFSET, + XFS_LEGACY_TIME_MIN); + XFS_CHECK_VALUE(XFS_BIGTIME_TIME_MAX - XFS_BIGTIME_EPOCH_OFFSET, + 16299260424LL); + + /* Do the same with the incore quota expiration range. */ + XFS_CHECK_VALUE(XFS_DQ_BIGTIME_EXPIRY_MIN << XFS_DQ_BIGTIME_SHIFT, 4); + XFS_CHECK_VALUE(XFS_DQ_BIGTIME_EXPIRY_MAX << XFS_DQ_BIGTIME_SHIFT, + 16299260424LL); +} + +#endif /* __XFS_ONDISK_H */ diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c index 646b3fa362..6709a7f8ba 100644 --- a/fs/xfs/libxfs/xfs_refcount.c +++ b/fs/xfs/libxfs/xfs_refcount.c @@ -123,11 +123,9 @@ xfs_refcount_btrec_to_irec( /* Simple checks for refcount records. */ xfs_failaddr_t xfs_refcount_check_irec( - struct xfs_btree_cur *cur, + struct xfs_perag *pag, const struct xfs_refcount_irec *irec) { - struct xfs_perag *pag = cur->bc_ag.pag; - if (irec->rc_blockcount == 0 || irec->rc_blockcount > MAXREFCEXTLEN) return __this_address; @@ -179,7 +177,7 @@ xfs_refcount_get_rec( return error; xfs_refcount_btrec_to_irec(rec, irec); - fa = xfs_refcount_check_irec(cur, irec); + fa = xfs_refcount_check_irec(cur->bc_ag.pag, irec); if (fa) return xfs_refcount_complain_bad_rec(cur, fa, irec); @@ -1153,7 +1151,7 @@ xfs_refcount_adjust_extents( tmp.rc_startblock); error = xfs_free_extent_later(cur->bc_tp, fsbno, tmp.rc_blockcount, NULL, - XFS_AG_RESV_NONE); + XFS_AG_RESV_NONE, false); if (error) goto out_error; } @@ -1215,7 +1213,7 @@ xfs_refcount_adjust_extents( ext.rc_startblock); error = xfs_free_extent_later(cur->bc_tp, fsbno, ext.rc_blockcount, NULL, - XFS_AG_RESV_NONE); + XFS_AG_RESV_NONE, false); if (error) goto out_error; } @@ -1458,7 +1456,7 @@ __xfs_refcount_add( ri->ri_blockcount = blockcount; xfs_refcount_update_get_group(tp->t_mountp, ri); - xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_REFCOUNT, &ri->ri_list); + xfs_defer_add(tp, &ri->ri_list, &xfs_refcount_update_defer_type); } /* @@ -1899,7 +1897,7 @@ xfs_refcount_recover_extent( INIT_LIST_HEAD(&rr->rr_list); xfs_refcount_btrec_to_irec(rec, &rr->rr_rrec); - if (xfs_refcount_check_irec(cur, &rr->rr_rrec) != NULL || + if (xfs_refcount_check_irec(cur->bc_ag.pag, &rr->rr_rrec) != NULL || XFS_IS_CORRUPT(cur->bc_mp, rr->rr_rrec.rc_domain != XFS_REFC_DOMAIN_COW)) { kfree(rr); @@ -1985,7 +1983,7 @@ xfs_refcount_recover_cow_leftovers( /* Free the block. */ error = xfs_free_extent_later(tp, fsb, rr->rr_rrec.rc_blockcount, NULL, - XFS_AG_RESV_NONE); + XFS_AG_RESV_NONE, false); if (error) goto out_trans; @@ -2033,6 +2031,47 @@ xfs_refcount_has_records( return xfs_btree_has_records(cur, &low, &high, NULL, outcome); } +struct xfs_refcount_query_range_info { + xfs_refcount_query_range_fn fn; + void *priv; +}; + +/* Format btree record and pass to our callback. */ +STATIC int +xfs_refcount_query_range_helper( + struct xfs_btree_cur *cur, + const union xfs_btree_rec *rec, + void *priv) +{ + struct xfs_refcount_query_range_info *query = priv; + struct xfs_refcount_irec irec; + xfs_failaddr_t fa; + + xfs_refcount_btrec_to_irec(rec, &irec); + fa = xfs_refcount_check_irec(cur->bc_ag.pag, &irec); + if (fa) + return xfs_refcount_complain_bad_rec(cur, fa, &irec); + + return query->fn(cur, &irec, query->priv); +} + +/* Find all refcount records between two keys. */ +int +xfs_refcount_query_range( + struct xfs_btree_cur *cur, + const struct xfs_refcount_irec *low_rec, + const struct xfs_refcount_irec *high_rec, + xfs_refcount_query_range_fn fn, + void *priv) +{ + union xfs_btree_irec low_brec = { .rc = *low_rec }; + union xfs_btree_irec high_brec = { .rc = *high_rec }; + struct xfs_refcount_query_range_info query = { .priv = priv, .fn = fn }; + + return xfs_btree_query_range(cur, &low_brec, &high_brec, + xfs_refcount_query_range_helper, &query); +} + int __init xfs_refcount_intent_init_cache(void) { diff --git a/fs/xfs/libxfs/xfs_refcount.h b/fs/xfs/libxfs/xfs_refcount.h index 783cd89ca1..9b56768a59 100644 --- a/fs/xfs/libxfs/xfs_refcount.h +++ b/fs/xfs/libxfs/xfs_refcount.h @@ -117,7 +117,7 @@ extern int xfs_refcount_has_records(struct xfs_btree_cur *cur, union xfs_btree_rec; extern void xfs_refcount_btrec_to_irec(const union xfs_btree_rec *rec, struct xfs_refcount_irec *irec); -xfs_failaddr_t xfs_refcount_check_irec(struct xfs_btree_cur *cur, +xfs_failaddr_t xfs_refcount_check_irec(struct xfs_perag *pag, const struct xfs_refcount_irec *irec); extern int xfs_refcount_insert(struct xfs_btree_cur *cur, struct xfs_refcount_irec *irec, int *stat); @@ -127,4 +127,14 @@ extern struct kmem_cache *xfs_refcount_intent_cache; int __init xfs_refcount_intent_init_cache(void); void xfs_refcount_intent_destroy_cache(void); +typedef int (*xfs_refcount_query_range_fn)( + struct xfs_btree_cur *cur, + const struct xfs_refcount_irec *rec, + void *priv); + +int xfs_refcount_query_range(struct xfs_btree_cur *cur, + const struct xfs_refcount_irec *low_rec, + const struct xfs_refcount_irec *high_rec, + xfs_refcount_query_range_fn fn, void *priv); + #endif /* __XFS_REFCOUNT_H__ */ diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c index 5c3987d8dc..0d80bd9914 100644 --- a/fs/xfs/libxfs/xfs_refcount_btree.c +++ b/fs/xfs/libxfs/xfs_refcount_btree.c @@ -112,7 +112,7 @@ xfs_refcountbt_free_block( be32_add_cpu(&agf->agf_refcount_blocks, -1); xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_REFCOUNT_BLOCKS); return xfs_free_extent_later(cur->bc_tp, fsbno, 1, - &XFS_RMAP_OINFO_REFC, XFS_AG_RESV_METADATA); + &XFS_RMAP_OINFO_REFC, XFS_AG_RESV_METADATA, false); } STATIC int @@ -226,7 +226,18 @@ xfs_refcountbt_verify( level = be16_to_cpu(block->bb_level); if (pag && xfs_perag_initialised_agf(pag)) { - if (level >= pag->pagf_refcount_level) + unsigned int maxlevel = pag->pagf_refcount_level; + +#ifdef CONFIG_XFS_ONLINE_REPAIR + /* + * Online repair could be rewriting the refcount btree, so + * we'll validate against the larger of either tree while this + * is going on. + */ + maxlevel = max_t(unsigned int, maxlevel, + pag->pagf_repair_refcount_level); +#endif + if (level >= maxlevel) return __this_address; } else if (level >= mp->m_refc_maxlevels) return __this_address; diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c index fbb0b26374..76bf7f48cb 100644 --- a/fs/xfs/libxfs/xfs_rmap.c +++ b/fs/xfs/libxfs/xfs_rmap.c @@ -2567,7 +2567,7 @@ __xfs_rmap_add( ri->ri_bmap = *bmap; xfs_rmap_update_get_group(tp->t_mountp, ri); - xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_RMAP, &ri->ri_list); + xfs_defer_add(tp, &ri->ri_list, &xfs_rmap_update_defer_type); } /* Map an extent into a file. */ diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c index c269d70431..e31663cb7b 100644 --- a/fs/xfs/libxfs/xfs_rtbitmap.c +++ b/fs/xfs/libxfs/xfs_rtbitmap.c @@ -184,7 +184,7 @@ xfs_rtfind_back( * Calculate first (leftmost) bit number to look at, * and mask for all the relevant bits in this word. */ - firstbit = XFS_RTMAX((xfs_srtblock_t)(bit - len + 1), 0); + firstbit = max_t(xfs_srtblock_t, bit - len + 1, 0); mask = (((xfs_rtword_t)1 << (bit - firstbit + 1)) - 1) << firstbit; /* @@ -195,7 +195,7 @@ xfs_rtfind_back( /* * Different. Mark where we are and return. */ - i = bit - XFS_RTHIBIT(wdiff); + i = bit - xfs_highbit32(wdiff); *rtx = start - i + 1; return 0; } @@ -233,7 +233,7 @@ xfs_rtfind_back( /* * Different, mark where we are and return. */ - i += XFS_NBWORD - 1 - XFS_RTHIBIT(wdiff); + i += XFS_NBWORD - 1 - xfs_highbit32(wdiff); *rtx = start - i + 1; return 0; } @@ -272,7 +272,7 @@ xfs_rtfind_back( /* * Different, mark where we are and return. */ - i += XFS_NBWORD - 1 - XFS_RTHIBIT(wdiff); + i += XFS_NBWORD - 1 - xfs_highbit32(wdiff); *rtx = start - i + 1; return 0; } else @@ -338,7 +338,7 @@ xfs_rtfind_forw( * Calculate last (rightmost) bit number to look at, * and mask for all the relevant bits in this word. */ - lastbit = XFS_RTMIN(bit + len, XFS_NBWORD); + lastbit = min(bit + len, XFS_NBWORD); mask = (((xfs_rtword_t)1 << (lastbit - bit)) - 1) << bit; /* * Calculate the difference between the value there @@ -348,7 +348,7 @@ xfs_rtfind_forw( /* * Different. Mark where we are and return. */ - i = XFS_RTLOBIT(wdiff) - bit; + i = xfs_lowbit32(wdiff) - bit; *rtx = start + i - 1; return 0; } @@ -386,7 +386,7 @@ xfs_rtfind_forw( /* * Different, mark where we are and return. */ - i += XFS_RTLOBIT(wdiff); + i += xfs_lowbit32(wdiff); *rtx = start + i - 1; return 0; } @@ -423,7 +423,7 @@ xfs_rtfind_forw( /* * Different, mark where we are and return. */ - i += XFS_RTLOBIT(wdiff); + i += xfs_lowbit32(wdiff); *rtx = start + i - 1; return 0; } else @@ -452,71 +452,59 @@ xfs_trans_log_rtsummary( } /* - * Read and/or modify the summary information for a given extent size, - * bitmap block combination. - * Keeps track of a current summary block, so we don't keep reading - * it from the buffer cache. - * - * Summary information is returned in *sum if specified. - * If no delta is specified, returns summary only. + * Modify the summary information for a given extent size, bitmap block + * combination. */ int -xfs_rtmodify_summary_int( +xfs_rtmodify_summary( struct xfs_rtalloc_args *args, int log, /* log2 of extent size */ xfs_fileoff_t bbno, /* bitmap block number */ - int delta, /* change to make to summary info */ - xfs_suminfo_t *sum) /* out: summary info for this block */ + int delta) /* in/out: summary block number */ { struct xfs_mount *mp = args->mp; - int error; - xfs_fileoff_t sb; /* summary fsblock */ - xfs_rtsumoff_t so; /* index into the summary file */ + xfs_rtsumoff_t so = xfs_rtsumoffs(mp, log, bbno); unsigned int infoword; + xfs_suminfo_t val; + int error; - /* - * Compute entry number in the summary file. - */ - so = xfs_rtsumoffs(mp, log, bbno); - /* - * Compute the block number in the summary file. - */ - sb = xfs_rtsumoffs_to_block(mp, so); - - error = xfs_rtsummary_read_buf(args, sb); + error = xfs_rtsummary_read_buf(args, xfs_rtsumoffs_to_block(mp, so)); if (error) return error; - /* - * Point to the summary information, modify/log it, and/or copy it out. - */ infoword = xfs_rtsumoffs_to_infoword(mp, so); - if (delta) { - xfs_suminfo_t val = xfs_suminfo_add(args, infoword, delta); - - if (mp->m_rsum_cache) { - if (val == 0 && log + 1 == mp->m_rsum_cache[bbno]) - mp->m_rsum_cache[bbno] = log; - if (val != 0 && log >= mp->m_rsum_cache[bbno]) - mp->m_rsum_cache[bbno] = log + 1; - } - xfs_trans_log_rtsummary(args, infoword); - if (sum) - *sum = val; - } else if (sum) { - *sum = xfs_suminfo_get(args, infoword); + val = xfs_suminfo_add(args, infoword, delta); + + if (mp->m_rsum_cache) { + if (val == 0 && log + 1 == mp->m_rsum_cache[bbno]) + mp->m_rsum_cache[bbno] = log; + if (val != 0 && log >= mp->m_rsum_cache[bbno]) + mp->m_rsum_cache[bbno] = log + 1; } + + xfs_trans_log_rtsummary(args, infoword); return 0; } +/* + * Read and return the summary information for a given extent size, bitmap block + * combination. + */ int -xfs_rtmodify_summary( +xfs_rtget_summary( struct xfs_rtalloc_args *args, int log, /* log2 of extent size */ xfs_fileoff_t bbno, /* bitmap block number */ - int delta) /* in/out: summary block number */ + xfs_suminfo_t *sum) /* out: summary info for this block */ { - return xfs_rtmodify_summary_int(args, log, bbno, delta, NULL); + struct xfs_mount *mp = args->mp; + xfs_rtsumoff_t so = xfs_rtsumoffs(mp, log, bbno); + int error; + + error = xfs_rtsummary_read_buf(args, xfs_rtsumoffs_to_block(mp, so)); + if (!error) + *sum = xfs_suminfo_get(args, xfs_rtsumoffs_to_infoword(mp, so)); + return error; } /* Log rtbitmap block from the word @from to the byte before @next. */ @@ -585,7 +573,7 @@ xfs_rtmodify_range( /* * Compute first bit not changed and mask of relevant bits. */ - lastbit = XFS_RTMIN(bit + len, XFS_NBWORD); + lastbit = min(bit + len, XFS_NBWORD); mask = (((xfs_rtword_t)1 << (lastbit - bit)) - 1) << bit; /* * Set/clear the active bits. @@ -720,7 +708,7 @@ xfs_rtfree_range( */ if (preblock < start) { error = xfs_rtmodify_summary(args, - XFS_RTBLOCKLOG(start - preblock), + xfs_highbit64(start - preblock), xfs_rtx_to_rbmblock(mp, preblock), -1); if (error) { return error; @@ -732,7 +720,7 @@ xfs_rtfree_range( */ if (postblock > end) { error = xfs_rtmodify_summary(args, - XFS_RTBLOCKLOG(postblock - end), + xfs_highbit64(postblock - end), xfs_rtx_to_rbmblock(mp, end + 1), -1); if (error) { return error; @@ -743,7 +731,7 @@ xfs_rtfree_range( * (new) free extent. */ return xfs_rtmodify_summary(args, - XFS_RTBLOCKLOG(postblock + 1 - preblock), + xfs_highbit64(postblock + 1 - preblock), xfs_rtx_to_rbmblock(mp, preblock), 1); } @@ -799,7 +787,7 @@ xfs_rtcheck_range( /* * Compute first bit not examined. */ - lastbit = XFS_RTMIN(bit + len, XFS_NBWORD); + lastbit = min(bit + len, XFS_NBWORD); /* * Mask of relevant bits. */ @@ -812,7 +800,7 @@ xfs_rtcheck_range( /* * Different, compute first wrong bit and return. */ - i = XFS_RTLOBIT(wdiff) - bit; + i = xfs_lowbit32(wdiff) - bit; *new = start + i; *stat = 0; return 0; @@ -851,7 +839,7 @@ xfs_rtcheck_range( /* * Different, compute first wrong bit and return. */ - i += XFS_RTLOBIT(wdiff); + i += xfs_lowbit32(wdiff); *new = start + i; *stat = 0; return 0; @@ -889,7 +877,7 @@ xfs_rtcheck_range( /* * Different, compute first wrong bit and return. */ - i += XFS_RTLOBIT(wdiff); + i += xfs_lowbit32(wdiff); *new = start + i; *stat = 0; return 0; diff --git a/fs/xfs/libxfs/xfs_rtbitmap.h b/fs/xfs/libxfs/xfs_rtbitmap.h index c0637057d6..152a66750a 100644 --- a/fs/xfs/libxfs/xfs_rtbitmap.h +++ b/fs/xfs/libxfs/xfs_rtbitmap.h @@ -321,8 +321,8 @@ int xfs_rtfind_forw(struct xfs_rtalloc_args *args, xfs_rtxnum_t start, xfs_rtxnum_t limit, xfs_rtxnum_t *rtblock); int xfs_rtmodify_range(struct xfs_rtalloc_args *args, xfs_rtxnum_t start, xfs_rtxlen_t len, int val); -int xfs_rtmodify_summary_int(struct xfs_rtalloc_args *args, int log, - xfs_fileoff_t bbno, int delta, xfs_suminfo_t *sum); +int xfs_rtget_summary(struct xfs_rtalloc_args *args, int log, + xfs_fileoff_t bbno, xfs_suminfo_t *sum); int xfs_rtmodify_summary(struct xfs_rtalloc_args *args, int log, xfs_fileoff_t bbno, int delta); int xfs_rtfree_range(struct xfs_rtalloc_args *args, xfs_rtxnum_t start, diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index 1f74d0cd16..5bb6e2bd6d 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -25,6 +25,7 @@ #include "xfs_da_format.h" #include "xfs_health.h" #include "xfs_ag.h" +#include "xfs_rtbitmap.h" /* * Physical superblock buffer manipulations. Shared with libxfs in userspace. @@ -508,8 +509,9 @@ xfs_validate_sb_common( rbmblocks = howmany_64(sbp->sb_rextents, NBBY * sbp->sb_blocksize); - if (sbp->sb_rextents != rexts || - sbp->sb_rextslog != xfs_highbit32(sbp->sb_rextents) || + if (!xfs_validate_rtextents(rexts) || + sbp->sb_rextents != rexts || + sbp->sb_rextslog != xfs_compute_rextslog(rexts) || sbp->sb_rbmblocks != rbmblocks) { xfs_notice(mp, "realtime geometry sanity check failed"); @@ -1375,3 +1377,17 @@ xfs_validate_stripe_geometry( } return true; } + +/* + * Compute the maximum level number of the realtime summary file, as defined by + * mkfs. The historic use of highbit32 on a 64-bit quantity prohibited correct + * use of rt volumes with more than 2^32 extents. + */ +uint8_t +xfs_compute_rextslog( + xfs_rtbxlen_t rtextents) +{ + if (!rtextents) + return 0; + return xfs_highbit64(rtextents); +} diff --git a/fs/xfs/libxfs/xfs_sb.h b/fs/xfs/libxfs/xfs_sb.h index 19134b23c1..2e8e8d63d4 100644 --- a/fs/xfs/libxfs/xfs_sb.h +++ b/fs/xfs/libxfs/xfs_sb.h @@ -38,4 +38,6 @@ extern int xfs_sb_get_secondary(struct xfs_mount *mp, extern bool xfs_validate_stripe_geometry(struct xfs_mount *mp, __s64 sunit, __s64 swidth, int sectorsize, bool silent); +uint8_t xfs_compute_rextslog(xfs_rtbxlen_t rtextents); + #endif /* __XFS_SB_H__ */ diff --git a/fs/xfs/libxfs/xfs_shared.h b/fs/xfs/libxfs/xfs_shared.h index c4381388c0..4220d3584c 100644 --- a/fs/xfs/libxfs/xfs_shared.h +++ b/fs/xfs/libxfs/xfs_shared.h @@ -139,7 +139,7 @@ bool xfs_symlink_hdr_ok(xfs_ino_t ino, uint32_t offset, uint32_t size, struct xfs_buf *bp); void xfs_symlink_local_to_remote(struct xfs_trans *tp, struct xfs_buf *bp, struct xfs_inode *ip, struct xfs_ifork *ifp); -xfs_failaddr_t xfs_symlink_shortform_verify(struct xfs_inode *ip); +xfs_failaddr_t xfs_symlink_shortform_verify(void *sfp, int64_t size); /* Computed inode geometry for the filesystem. */ struct xfs_ino_geometry { diff --git a/fs/xfs/libxfs/xfs_symlink_remote.c b/fs/xfs/libxfs/xfs_symlink_remote.c index bdc777b9ec..160aa20aa4 100644 --- a/fs/xfs/libxfs/xfs_symlink_remote.c +++ b/fs/xfs/libxfs/xfs_symlink_remote.c @@ -175,7 +175,7 @@ xfs_symlink_local_to_remote( if (!xfs_has_crc(mp)) { bp->b_ops = NULL; - memcpy(bp->b_addr, ifp->if_u1.if_data, ifp->if_bytes); + memcpy(bp->b_addr, ifp->if_data, ifp->if_bytes); xfs_trans_log_buf(tp, bp, 0, ifp->if_bytes - 1); return; } @@ -191,7 +191,7 @@ xfs_symlink_local_to_remote( buf = bp->b_addr; buf += xfs_symlink_hdr_set(mp, ip->i_ino, 0, ifp->if_bytes, bp); - memcpy(buf, ifp->if_u1.if_data, ifp->if_bytes); + memcpy(buf, ifp->if_data, ifp->if_bytes); xfs_trans_log_buf(tp, bp, 0, sizeof(struct xfs_dsymlink_hdr) + ifp->if_bytes - 1); } @@ -202,15 +202,11 @@ xfs_symlink_local_to_remote( */ xfs_failaddr_t xfs_symlink_shortform_verify( - struct xfs_inode *ip) + void *sfp, + int64_t size) { - struct xfs_ifork *ifp = xfs_ifork_ptr(ip, XFS_DATA_FORK); - char *sfp = (char *)ifp->if_u1.if_data; - int size = ifp->if_bytes; char *endp = sfp + size; - ASSERT(ifp->if_format == XFS_DINODE_FMT_LOCAL); - /* * Zero length symlinks should never occur in memory as they are * never allowed to exist on disk. diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h index 533200c4cc..62e02d5380 100644 --- a/fs/xfs/libxfs/xfs_types.h +++ b/fs/xfs/libxfs/xfs_types.h @@ -51,7 +51,6 @@ typedef void * xfs_failaddr_t; #define NULLRFSBLOCK ((xfs_rfsblock_t)-1) #define NULLRTBLOCK ((xfs_rtblock_t)-1) #define NULLFILEOFF ((xfs_fileoff_t)-1) -#define NULLRTEXTNO ((xfs_rtxnum_t)-1) #define NULLAGBLOCK ((xfs_agblock_t)-1) #define NULLAGNUMBER ((xfs_agnumber_t)-1) @@ -208,6 +207,13 @@ enum xfs_ag_resv_type { XFS_AG_RESV_AGFL, XFS_AG_RESV_METADATA, XFS_AG_RESV_RMAPBT, + + /* + * Don't increase fdblocks when freeing extent. This is a pony for + * the bnobt repair functions to re-free the free space without + * altering fdblocks. If you think you need this you're wrong. + */ + XFS_AG_RESV_IGNORE, }; /* Results of scanning a btree keyspace to check occupancy. */ @@ -245,4 +251,16 @@ bool xfs_verify_fileoff(struct xfs_mount *mp, xfs_fileoff_t off); bool xfs_verify_fileext(struct xfs_mount *mp, xfs_fileoff_t off, xfs_fileoff_t len); +/* Do we support an rt volume having this number of rtextents? */ +static inline bool +xfs_validate_rtextents( + xfs_rtbxlen_t rtextents) +{ + /* No runt rt volumes */ + if (rtextents == 0) + return false; + + return true; +} + #endif /* __XFS_TYPES_H__ */ |