From 8b0a8165cdad0f4133837d753649ef4682e42c3b Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 7 Aug 2024 15:11:40 +0200 Subject: Merging upstream version 6.9.7. Signed-off-by: Daniel Baumann --- fs/xfs/scrub/xfile.c | 345 ++++++++++++++++++--------------------------------- 1 file changed, 119 insertions(+), 226 deletions(-) (limited to 'fs/xfs/scrub/xfile.c') diff --git a/fs/xfs/scrub/xfile.c b/fs/xfs/scrub/xfile.c index 090c3ead43..8cdd863db5 100644 --- a/fs/xfs/scrub/xfile.c +++ b/fs/xfs/scrub/xfile.c @@ -34,13 +34,6 @@ * xfiles assume that the caller will handle all required concurrency * management; standard vfs locks (freezer and inode) are not taken. Reads * and writes are satisfied directly from the page cache. - * - * NOTE: The current shmemfs implementation has a quirk that in-kernel reads - * of a hole cause a page to be mapped into the file. If you are going to - * create a sparse xfile, please be careful about reading from uninitialized - * parts of the file. These pages are !Uptodate and will eventually be - * reclaimed if not written, but in the short term this boosts memory - * consumption. */ /* @@ -62,38 +55,27 @@ xfile_create( { struct inode *inode; struct xfile *xf; - int error = -ENOMEM; + int error; xf = kmalloc(sizeof(struct xfile), XCHK_GFP_FLAGS); if (!xf) return -ENOMEM; - xf->file = shmem_file_setup(description, isize, 0); - if (!xf->file) - goto out_xfile; + xf->file = shmem_kernel_file_setup(description, isize, VM_NORESERVE); if (IS_ERR(xf->file)) { error = PTR_ERR(xf->file); goto out_xfile; } - /* - * We want a large sparse file that we can pread, pwrite, and seek. - * xfile users are responsible for keeping the xfile hidden away from - * all other callers, so we skip timestamp updates and security checks. - * Make the inode only accessible by root, just in case the xfile ever - * escapes. - */ - xf->file->f_mode |= FMODE_PREAD | FMODE_PWRITE | FMODE_NOCMTIME | - FMODE_LSEEK; - xf->file->f_flags |= O_RDWR | O_LARGEFILE | O_NOATIME; inode = file_inode(xf->file); - inode->i_flags |= S_PRIVATE | S_NOCMTIME | S_NOATIME; - inode->i_mode &= ~0177; - inode->i_uid = GLOBAL_ROOT_UID; - inode->i_gid = GLOBAL_ROOT_GID; - lockdep_set_class(&inode->i_rwsem, &xfile_i_mutex_key); + /* + * We don't want to bother with kmapping data during repair, so don't + * allow highmem pages to back this mapping. + */ + mapping_set_gfp_mask(inode->i_mapping, GFP_KERNEL); + trace_xfile_create(xf); *xfilep = xf; @@ -118,164 +100,128 @@ xfile_destroy( } /* - * Read a memory object directly from the xfile's page cache. Unlike regular - * pread, we return -E2BIG and -EFBIG for reads that are too large or at too - * high an offset, instead of truncating the read. Otherwise, we return - * bytes read or an error code, like regular pread. + * Load an object. Since we're treating this file as "memory", any error or + * short IO is treated as a failure to allocate memory. */ -ssize_t -xfile_pread( +int +xfile_load( struct xfile *xf, void *buf, size_t count, loff_t pos) { struct inode *inode = file_inode(xf->file); - struct address_space *mapping = inode->i_mapping; - struct page *page = NULL; - ssize_t read = 0; unsigned int pflags; - int error = 0; if (count > MAX_RW_COUNT) - return -E2BIG; + return -ENOMEM; if (inode->i_sb->s_maxbytes - pos < count) - return -EFBIG; + return -ENOMEM; - trace_xfile_pread(xf, pos, count); + trace_xfile_load(xf, pos, count); pflags = memalloc_nofs_save(); while (count > 0) { - void *p, *kaddr; + struct folio *folio; unsigned int len; + unsigned int offset; - len = min_t(ssize_t, count, PAGE_SIZE - offset_in_page(pos)); - - /* - * In-kernel reads of a shmem file cause it to allocate a page - * if the mapping shows a hole. Therefore, if we hit ENOMEM - * we can continue by zeroing the caller's buffer. - */ - page = shmem_read_mapping_page_gfp(mapping, pos >> PAGE_SHIFT, - __GFP_NOWARN); - if (IS_ERR(page)) { - error = PTR_ERR(page); - if (error != -ENOMEM) - break; - - memset(buf, 0, len); - goto advance; - } - - if (PageUptodate(page)) { + if (shmem_get_folio(inode, pos >> PAGE_SHIFT, &folio, + SGP_READ) < 0) + break; + if (!folio) { /* - * xfile pages must never be mapped into userspace, so - * we skip the dcache flush. + * No data stored at this offset, just zero the output + * buffer until the next page boundary. */ - kaddr = kmap_local_page(page); - p = kaddr + offset_in_page(pos); - memcpy(buf, p, len); - kunmap_local(kaddr); - } else { + len = min_t(ssize_t, count, + PAGE_SIZE - offset_in_page(pos)); memset(buf, 0, len); - } - put_page(page); + } else { + if (filemap_check_wb_err(inode->i_mapping, 0)) { + folio_unlock(folio); + folio_put(folio); + break; + } + + offset = offset_in_folio(folio, pos); + len = min_t(ssize_t, count, folio_size(folio) - offset); + memcpy(buf, folio_address(folio) + offset, len); -advance: + folio_unlock(folio); + folio_put(folio); + } count -= len; pos += len; buf += len; - read += len; } memalloc_nofs_restore(pflags); - if (read > 0) - return read; - return error; + if (count) + return -ENOMEM; + return 0; } /* - * Write a memory object directly to the xfile's page cache. Unlike regular - * pwrite, we return -E2BIG and -EFBIG for writes that are too large or at too - * high an offset, instead of truncating the write. Otherwise, we return - * bytes written or an error code, like regular pwrite. + * Store an object. Since we're treating this file as "memory", any error or + * short IO is treated as a failure to allocate memory. */ -ssize_t -xfile_pwrite( +int +xfile_store( struct xfile *xf, const void *buf, size_t count, loff_t pos) { struct inode *inode = file_inode(xf->file); - struct address_space *mapping = inode->i_mapping; - const struct address_space_operations *aops = mapping->a_ops; - struct page *page = NULL; - ssize_t written = 0; unsigned int pflags; - int error = 0; if (count > MAX_RW_COUNT) - return -E2BIG; + return -ENOMEM; if (inode->i_sb->s_maxbytes - pos < count) - return -EFBIG; + return -ENOMEM; - trace_xfile_pwrite(xf, pos, count); + trace_xfile_store(xf, pos, count); + + /* + * Increase the file size first so that shmem_get_folio(..., SGP_CACHE), + * actually allocates a folio instead of erroring out. + */ + if (pos + count > i_size_read(inode)) + i_size_write(inode, pos + count); pflags = memalloc_nofs_save(); while (count > 0) { - void *fsdata = NULL; - void *p, *kaddr; + struct folio *folio; unsigned int len; - int ret; - - len = min_t(ssize_t, count, PAGE_SIZE - offset_in_page(pos)); - - /* - * We call write_begin directly here to avoid all the freezer - * protection lock-taking that happens in the normal path. - * shmem doesn't support fs freeze, but lockdep doesn't know - * that and will trip over that. - */ - error = aops->write_begin(NULL, mapping, pos, len, &page, - &fsdata); - if (error) - break; + unsigned int offset; - /* - * xfile pages must never be mapped into userspace, so we skip - * the dcache flush. If the page is not uptodate, zero it - * before writing data. - */ - kaddr = kmap_local_page(page); - if (!PageUptodate(page)) { - memset(kaddr, 0, PAGE_SIZE); - SetPageUptodate(page); - } - p = kaddr + offset_in_page(pos); - memcpy(p, buf, len); - kunmap_local(kaddr); - - ret = aops->write_end(NULL, mapping, pos, len, len, page, - fsdata); - if (ret < 0) { - error = ret; + if (shmem_get_folio(inode, pos >> PAGE_SHIFT, &folio, + SGP_CACHE) < 0) + break; + if (filemap_check_wb_err(inode->i_mapping, 0)) { + folio_unlock(folio); + folio_put(folio); break; } - written += ret; - if (ret != len) - break; + offset = offset_in_folio(folio, pos); + len = min_t(ssize_t, count, folio_size(folio) - offset); + memcpy(folio_address(folio) + offset, buf, len); + + folio_mark_dirty(folio); + folio_unlock(folio); + folio_put(folio); - count -= ret; - pos += ret; - buf += ret; + count -= len; + pos += len; + buf += len; } memalloc_nofs_restore(pflags); - if (written > 0) - return written; - return error; + if (count) + return -ENOMEM; + return 0; } /* Find the next written area in the xfile data for a given offset. */ @@ -291,129 +237,76 @@ xfile_seek_data( return ret; } -/* Query stat information for an xfile. */ -int -xfile_stat( - struct xfile *xf, - struct xfile_stat *statbuf) -{ - struct kstat ks; - int error; - - error = vfs_getattr_nosec(&xf->file->f_path, &ks, - STATX_SIZE | STATX_BLOCKS, AT_STATX_DONT_SYNC); - if (error) - return error; - - statbuf->size = ks.size; - statbuf->bytes = ks.blocks << SECTOR_SHIFT; - return 0; -} - /* - * Grab the (locked) page for a memory object. The object cannot span a page - * boundary. Returns 0 (and a locked page) if successful, -ENOTBLK if we - * cannot grab the page, or the usual negative errno. + * Grab the (locked) folio for a memory object. The object cannot span a folio + * boundary. Returns the locked folio if successful, NULL if there was no + * folio or it didn't cover the range requested, or an ERR_PTR on failure. */ -int -xfile_get_page( +struct folio * +xfile_get_folio( struct xfile *xf, loff_t pos, - unsigned int len, - struct xfile_page *xfpage) + size_t len, + unsigned int flags) { struct inode *inode = file_inode(xf->file); - struct address_space *mapping = inode->i_mapping; - const struct address_space_operations *aops = mapping->a_ops; - struct page *page = NULL; - void *fsdata = NULL; - loff_t key = round_down(pos, PAGE_SIZE); + struct folio *folio = NULL; unsigned int pflags; int error; if (inode->i_sb->s_maxbytes - pos < len) - return -ENOMEM; - if (len > PAGE_SIZE - offset_in_page(pos)) - return -ENOTBLK; - - trace_xfile_get_page(xf, pos, len); + return ERR_PTR(-ENOMEM); - pflags = memalloc_nofs_save(); + trace_xfile_get_folio(xf, pos, len); /* - * We call write_begin directly here to avoid all the freezer - * protection lock-taking that happens in the normal path. shmem - * doesn't support fs freeze, but lockdep doesn't know that and will - * trip over that. + * Increase the file size first so that shmem_get_folio(..., SGP_CACHE), + * actually allocates a folio instead of erroring out. */ - error = aops->write_begin(NULL, mapping, key, PAGE_SIZE, &page, - &fsdata); + if ((flags & XFILE_ALLOC) && pos + len > i_size_read(inode)) + i_size_write(inode, pos + len); + + pflags = memalloc_nofs_save(); + error = shmem_get_folio(inode, pos >> PAGE_SHIFT, &folio, + (flags & XFILE_ALLOC) ? SGP_CACHE : SGP_READ); + memalloc_nofs_restore(pflags); if (error) - goto out_pflags; + return ERR_PTR(error); - /* We got the page, so make sure we push out EOF. */ - if (i_size_read(inode) < pos + len) - i_size_write(inode, pos + len); + if (!folio) + return NULL; - /* - * If the page isn't up to date, fill it with zeroes before we hand it - * to the caller and make sure the backing store will hold on to them. - */ - if (!PageUptodate(page)) { - void *kaddr; + if (len > folio_size(folio) - offset_in_folio(folio, pos)) { + folio_unlock(folio); + folio_put(folio); + return NULL; + } - kaddr = kmap_local_page(page); - memset(kaddr, 0, PAGE_SIZE); - kunmap_local(kaddr); - SetPageUptodate(page); + if (filemap_check_wb_err(inode->i_mapping, 0)) { + folio_unlock(folio); + folio_put(folio); + return ERR_PTR(-EIO); } /* - * Mark each page dirty so that the contents are written to some - * backing store when we drop this buffer, and take an extra reference - * to prevent the xfile page from being swapped or removed from the - * page cache by reclaim if the caller unlocks the page. + * Mark the folio dirty so that it won't be reclaimed once we drop the + * (potentially last) reference in xfile_put_folio. */ - set_page_dirty(page); - get_page(page); - - xfpage->page = page; - xfpage->fsdata = fsdata; - xfpage->pos = key; -out_pflags: - memalloc_nofs_restore(pflags); - return error; + if (flags & XFILE_ALLOC) + folio_set_dirty(folio); + return folio; } /* - * Release the (locked) page for a memory object. Returns 0 or a negative - * errno. + * Release the (locked) folio for a memory object. */ -int -xfile_put_page( +void +xfile_put_folio( struct xfile *xf, - struct xfile_page *xfpage) + struct folio *folio) { - struct inode *inode = file_inode(xf->file); - struct address_space *mapping = inode->i_mapping; - const struct address_space_operations *aops = mapping->a_ops; - unsigned int pflags; - int ret; - - trace_xfile_put_page(xf, xfpage->pos, PAGE_SIZE); - - /* Give back the reference that we took in xfile_get_page. */ - put_page(xfpage->page); + trace_xfile_put_folio(xf, folio_pos(folio), folio_size(folio)); - pflags = memalloc_nofs_save(); - ret = aops->write_end(NULL, mapping, xfpage->pos, PAGE_SIZE, PAGE_SIZE, - xfpage->page, xfpage->fsdata); - memalloc_nofs_restore(pflags); - memset(xfpage, 0, sizeof(struct xfile_page)); - - if (ret < 0) - return ret; - if (ret != PAGE_SIZE) - return -EIO; - return 0; + folio_unlock(folio); + folio_put(folio); } -- cgit v1.2.3