diff options
Diffstat (limited to 'fs/xfs/xfs_buf_mem.c')
-rw-r--r-- | fs/xfs/xfs_buf_mem.c | 270 |
1 files changed, 270 insertions, 0 deletions
diff --git a/fs/xfs/xfs_buf_mem.c b/fs/xfs/xfs_buf_mem.c new file mode 100644 index 0000000000..9bb2d24de7 --- /dev/null +++ b/fs/xfs/xfs_buf_mem.c @@ -0,0 +1,270 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2023-2024 Oracle. All Rights Reserved. + * Author: Darrick J. Wong <djwong@kernel.org> + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_buf.h" +#include "xfs_buf_mem.h" +#include "xfs_trace.h" +#include <linux/shmem_fs.h> +#include "xfs_log_format.h" +#include "xfs_trans.h" +#include "xfs_buf_item.h" +#include "xfs_error.h" + +/* + * Buffer Cache for In-Memory Files + * ================================ + * + * Online fsck wants to create ephemeral ordered recordsets. The existing + * btree infrastructure can do this, but we need the buffer cache to target + * memory instead of block devices. + * + * When CONFIG_TMPFS=y, shmemfs is enough of a filesystem to meet those + * requirements. Therefore, the xmbuf mechanism uses an unlinked shmem file to + * store our staging data. This file is not installed in the file descriptor + * table so that user programs cannot access the data, which means that the + * xmbuf must be freed with xmbuf_destroy. + * + * xmbufs assume that the caller will handle all required concurrency + * management; standard vfs locks (freezer and inode) are not taken. Reads + * and writes are satisfied directly from the page cache. + * + * The only supported block size is PAGE_SIZE, and we cannot use highmem. + */ + +/* + * shmem files used to back an in-memory buffer cache must not be exposed to + * userspace. Upper layers must coordinate access to the one handle returned + * by the constructor, so establish a separate lock class for xmbufs to avoid + * confusing lockdep. + */ +static struct lock_class_key xmbuf_i_mutex_key; + +/* + * Allocate a buffer cache target for a memory-backed file and set up the + * buffer target. + */ +int +xmbuf_alloc( + struct xfs_mount *mp, + const char *descr, + struct xfs_buftarg **btpp) +{ + struct file *file; + struct inode *inode; + struct xfs_buftarg *btp; + int error; + + btp = kzalloc(struct_size(btp, bt_cache, 1), GFP_KERNEL); + if (!btp) + return -ENOMEM; + + file = shmem_kernel_file_setup(descr, 0, 0); + if (IS_ERR(file)) { + error = PTR_ERR(file); + goto out_free_btp; + } + inode = file_inode(file); + + /* private file, private locking */ + lockdep_set_class(&inode->i_rwsem, &xmbuf_i_mutex_key); + + /* + * We don't want to bother with kmapping data during repair, so don't + * allow highmem pages to back this mapping. + */ + mapping_set_gfp_mask(inode->i_mapping, GFP_KERNEL); + + /* ensure all writes are below EOF to avoid pagecache zeroing */ + i_size_write(inode, inode->i_sb->s_maxbytes); + + error = xfs_buf_cache_init(btp->bt_cache); + if (error) + goto out_file; + + /* Initialize buffer target */ + btp->bt_mount = mp; + btp->bt_dev = (dev_t)-1U; + btp->bt_bdev = NULL; /* in-memory buftargs have no bdev */ + btp->bt_file = file; + btp->bt_meta_sectorsize = XMBUF_BLOCKSIZE; + btp->bt_meta_sectormask = XMBUF_BLOCKSIZE - 1; + + error = xfs_init_buftarg(btp, XMBUF_BLOCKSIZE, descr); + if (error) + goto out_bcache; + + trace_xmbuf_create(btp); + + *btpp = btp; + return 0; + +out_bcache: + xfs_buf_cache_destroy(btp->bt_cache); +out_file: + fput(file); +out_free_btp: + kfree(btp); + return error; +} + +/* Free a buffer cache target for a memory-backed buffer cache. */ +void +xmbuf_free( + struct xfs_buftarg *btp) +{ + ASSERT(xfs_buftarg_is_mem(btp)); + ASSERT(percpu_counter_sum(&btp->bt_io_count) == 0); + + trace_xmbuf_free(btp); + + xfs_destroy_buftarg(btp); + xfs_buf_cache_destroy(btp->bt_cache); + fput(btp->bt_file); + kfree(btp); +} + +/* Directly map a shmem page into the buffer cache. */ +int +xmbuf_map_page( + struct xfs_buf *bp) +{ + struct inode *inode = file_inode(bp->b_target->bt_file); + struct folio *folio = NULL; + struct page *page; + loff_t pos = BBTOB(xfs_buf_daddr(bp)); + int error; + + ASSERT(xfs_buftarg_is_mem(bp->b_target)); + + if (bp->b_map_count != 1) + return -ENOMEM; + if (BBTOB(bp->b_length) != XMBUF_BLOCKSIZE) + return -ENOMEM; + if (offset_in_page(pos) != 0) { + ASSERT(offset_in_page(pos)); + return -ENOMEM; + } + + error = shmem_get_folio(inode, pos >> PAGE_SHIFT, &folio, SGP_CACHE); + if (error) + return error; + + if (filemap_check_wb_err(inode->i_mapping, 0)) { + folio_unlock(folio); + folio_put(folio); + return -EIO; + } + + page = folio_file_page(folio, pos >> PAGE_SHIFT); + + /* + * Mark the page dirty so that it won't be reclaimed once we drop the + * (potentially last) reference in xmbuf_unmap_page. + */ + set_page_dirty(page); + unlock_page(page); + + bp->b_addr = page_address(page); + bp->b_pages = bp->b_page_array; + bp->b_pages[0] = page; + bp->b_page_count = 1; + return 0; +} + +/* Unmap a shmem page that was mapped into the buffer cache. */ +void +xmbuf_unmap_page( + struct xfs_buf *bp) +{ + struct page *page = bp->b_pages[0]; + + ASSERT(xfs_buftarg_is_mem(bp->b_target)); + + put_page(page); + + bp->b_addr = NULL; + bp->b_pages[0] = NULL; + bp->b_pages = NULL; + bp->b_page_count = 0; +} + +/* Is this a valid daddr within the buftarg? */ +bool +xmbuf_verify_daddr( + struct xfs_buftarg *btp, + xfs_daddr_t daddr) +{ + struct inode *inode = file_inode(btp->bt_file); + + ASSERT(xfs_buftarg_is_mem(btp)); + + return daddr < (inode->i_sb->s_maxbytes >> BBSHIFT); +} + +/* Discard the page backing this buffer. */ +static void +xmbuf_stale( + struct xfs_buf *bp) +{ + struct inode *inode = file_inode(bp->b_target->bt_file); + loff_t pos; + + ASSERT(xfs_buftarg_is_mem(bp->b_target)); + + pos = BBTOB(xfs_buf_daddr(bp)); + shmem_truncate_range(inode, pos, pos + BBTOB(bp->b_length) - 1); +} + +/* + * Finalize a buffer -- discard the backing page if it's stale, or run the + * write verifier to detect problems. + */ +int +xmbuf_finalize( + struct xfs_buf *bp) +{ + xfs_failaddr_t fa; + int error = 0; + + if (bp->b_flags & XBF_STALE) { + xmbuf_stale(bp); + return 0; + } + + /* + * Although this btree is ephemeral, validate the buffer structure so + * that we can detect memory corruption errors and software bugs. + */ + fa = bp->b_ops->verify_struct(bp); + if (fa) { + error = -EFSCORRUPTED; + xfs_verifier_error(bp, error, fa); + } + + return error; +} + +/* + * Detach this xmbuf buffer from the transaction by any means necessary. + * All buffers are direct-mapped, so they do not need bwrite. + */ +void +xmbuf_trans_bdetach( + struct xfs_trans *tp, + struct xfs_buf *bp) +{ + struct xfs_buf_log_item *bli = bp->b_log_item; + + ASSERT(bli != NULL); + + bli->bli_flags &= ~(XFS_BLI_DIRTY | XFS_BLI_ORDERED | + XFS_BLI_LOGGED | XFS_BLI_STALE); + clear_bit(XFS_LI_DIRTY, &bli->bli_item.li_flags); + + while (bp->b_log_item != NULL) + xfs_trans_bdetach(tp, bp); +} |