diff options
Diffstat (limited to 'fs/erofs')
-rw-r--r-- | fs/erofs/compress.h | 7 | ||||
-rw-r--r-- | fs/erofs/data.c | 6 | ||||
-rw-r--r-- | fs/erofs/decompressor_deflate.c | 58 | ||||
-rw-r--r-- | fs/erofs/decompressor_lzma.c | 3 | ||||
-rw-r--r-- | fs/erofs/fscache.c | 282 | ||||
-rw-r--r-- | fs/erofs/inode.c | 14 | ||||
-rw-r--r-- | fs/erofs/internal.h | 10 | ||||
-rw-r--r-- | fs/erofs/super.c | 19 | ||||
-rw-r--r-- | fs/erofs/utils.c | 2 | ||||
-rw-r--r-- | fs/erofs/zdata.c | 287 |
10 files changed, 362 insertions, 326 deletions
diff --git a/fs/erofs/compress.h b/fs/erofs/compress.h index 7cc5841577..333587ba61 100644 --- a/fs/erofs/compress.h +++ b/fs/erofs/compress.h @@ -81,13 +81,6 @@ static inline bool z_erofs_put_shortlivedpage(struct page **pagepool, return true; } -#define MNGD_MAPPING(sbi) ((sbi)->managed_cache->i_mapping) -static inline bool erofs_page_is_managed(const struct erofs_sb_info *sbi, - struct page *page) -{ - return page->mapping == MNGD_MAPPING(sbi); -} - int z_erofs_fixup_insize(struct z_erofs_decompress_req *rq, const char *padbuf, unsigned int padbufsize); extern const struct z_erofs_decompressor erofs_decompressors[]; diff --git a/fs/erofs/data.c b/fs/erofs/data.c index 3d9721b3fa..52524bd969 100644 --- a/fs/erofs/data.c +++ b/fs/erofs/data.c @@ -220,7 +220,7 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map) up_read(&devs->rwsem); return 0; } - map->m_bdev = dif->bdev_handle ? dif->bdev_handle->bdev : NULL; + map->m_bdev = dif->bdev_file ? file_bdev(dif->bdev_file) : NULL; map->m_daxdev = dif->dax_dev; map->m_dax_part_off = dif->dax_part_off; map->m_fscache = dif->fscache; @@ -238,8 +238,8 @@ int erofs_map_dev(struct super_block *sb, struct erofs_map_dev *map) if (map->m_pa >= startoff && map->m_pa < startoff + length) { map->m_pa -= startoff; - map->m_bdev = dif->bdev_handle ? - dif->bdev_handle->bdev : NULL; + map->m_bdev = dif->bdev_file ? + file_bdev(dif->bdev_file) : NULL; map->m_daxdev = dif->dax_dev; map->m_dax_part_off = dif->dax_part_off; map->m_fscache = dif->fscache; diff --git a/fs/erofs/decompressor_deflate.c b/fs/erofs/decompressor_deflate.c index b98872058a..3a3461561a 100644 --- a/fs/erofs/decompressor_deflate.c +++ b/fs/erofs/decompressor_deflate.c @@ -46,39 +46,15 @@ int __init z_erofs_deflate_init(void) /* by default, use # of possible CPUs instead */ if (!z_erofs_deflate_nstrms) z_erofs_deflate_nstrms = num_possible_cpus(); - - for (; z_erofs_deflate_avail_strms < z_erofs_deflate_nstrms; - ++z_erofs_deflate_avail_strms) { - struct z_erofs_deflate *strm; - - strm = kzalloc(sizeof(*strm), GFP_KERNEL); - if (!strm) - goto out_failed; - - /* XXX: in-kernel zlib cannot shrink windowbits currently */ - strm->z.workspace = vmalloc(zlib_inflate_workspacesize()); - if (!strm->z.workspace) { - kfree(strm); - goto out_failed; - } - - spin_lock(&z_erofs_deflate_lock); - strm->next = z_erofs_deflate_head; - z_erofs_deflate_head = strm; - spin_unlock(&z_erofs_deflate_lock); - } return 0; - -out_failed: - erofs_err(NULL, "failed to allocate zlib workspace"); - z_erofs_deflate_exit(); - return -ENOMEM; } int z_erofs_load_deflate_config(struct super_block *sb, struct erofs_super_block *dsb, void *data, int size) { struct z_erofs_deflate_cfgs *dfl = data; + static DEFINE_MUTEX(deflate_resize_mutex); + static bool inited; if (!dfl || size < sizeof(struct z_erofs_deflate_cfgs)) { erofs_err(sb, "invalid deflate cfgs, size=%u", size); @@ -89,9 +65,36 @@ int z_erofs_load_deflate_config(struct super_block *sb, erofs_err(sb, "unsupported windowbits %u", dfl->windowbits); return -EOPNOTSUPP; } + mutex_lock(&deflate_resize_mutex); + if (!inited) { + for (; z_erofs_deflate_avail_strms < z_erofs_deflate_nstrms; + ++z_erofs_deflate_avail_strms) { + struct z_erofs_deflate *strm; + + strm = kzalloc(sizeof(*strm), GFP_KERNEL); + if (!strm) + goto failed; + /* XXX: in-kernel zlib cannot customize windowbits */ + strm->z.workspace = vmalloc(zlib_inflate_workspacesize()); + if (!strm->z.workspace) { + kfree(strm); + goto failed; + } + spin_lock(&z_erofs_deflate_lock); + strm->next = z_erofs_deflate_head; + z_erofs_deflate_head = strm; + spin_unlock(&z_erofs_deflate_lock); + } + inited = true; + } + mutex_unlock(&deflate_resize_mutex); erofs_info(sb, "EXPERIMENTAL DEFLATE feature in use. Use at your own risk!"); return 0; +failed: + mutex_unlock(&deflate_resize_mutex); + z_erofs_deflate_exit(); + return -ENOMEM; } int z_erofs_deflate_decompress(struct z_erofs_decompress_req *rq, @@ -212,9 +215,6 @@ again: if (rq->out[no] != rq->in[j]) continue; - - DBG_BUGON(erofs_page_is_managed(EROFS_SB(sb), - rq->in[j])); tmppage = erofs_allocpage(pgpl, rq->gfp); if (!tmppage) { err = -ENOMEM; diff --git a/fs/erofs/decompressor_lzma.c b/fs/erofs/decompressor_lzma.c index 6ca357d83c..4b28dc130c 100644 --- a/fs/erofs/decompressor_lzma.c +++ b/fs/erofs/decompressor_lzma.c @@ -258,9 +258,6 @@ again: if (rq->out[no] != rq->in[j]) continue; - - DBG_BUGON(erofs_page_is_managed(EROFS_SB(rq->sb), - rq->in[j])); tmppage = erofs_allocpage(pgpl, rq->gfp); if (!tmppage) { err = -ENOMEM; diff --git a/fs/erofs/fscache.c b/fs/erofs/fscache.c index 122a4753ec..62da538d91 100644 --- a/fs/erofs/fscache.c +++ b/fs/erofs/fscache.c @@ -25,9 +25,15 @@ static struct file_system_type erofs_anon_fs_type = { .kill_sb = kill_anon_super, }; -struct erofs_fscache_request { - struct erofs_fscache_request *primary; - struct netfs_cache_resources cache_resources; +struct erofs_fscache_io { + struct netfs_cache_resources cres; + struct iov_iter iter; + netfs_io_terminated_t end_io; + void *private; + refcount_t ref; +}; + +struct erofs_fscache_rq { struct address_space *mapping; /* The mapping being accessed */ loff_t start; /* Start position */ size_t len; /* Length of the request */ @@ -36,44 +42,17 @@ struct erofs_fscache_request { refcount_t ref; }; -static struct erofs_fscache_request *erofs_fscache_req_alloc(struct address_space *mapping, - loff_t start, size_t len) -{ - struct erofs_fscache_request *req; - - req = kzalloc(sizeof(struct erofs_fscache_request), GFP_KERNEL); - if (!req) - return ERR_PTR(-ENOMEM); - - req->mapping = mapping; - req->start = start; - req->len = len; - refcount_set(&req->ref, 1); - - return req; -} - -static struct erofs_fscache_request *erofs_fscache_req_chain(struct erofs_fscache_request *primary, - size_t len) +static bool erofs_fscache_io_put(struct erofs_fscache_io *io) { - struct erofs_fscache_request *req; - - /* use primary request for the first submission */ - if (!primary->submitted) { - refcount_inc(&primary->ref); - return primary; - } - - req = erofs_fscache_req_alloc(primary->mapping, - primary->start + primary->submitted, len); - if (!IS_ERR(req)) { - req->primary = primary; - refcount_inc(&primary->ref); - } - return req; + if (!refcount_dec_and_test(&io->ref)) + return false; + if (io->cres.ops) + io->cres.ops->end_operation(&io->cres); + kfree(io); + return true; } -static void erofs_fscache_req_complete(struct erofs_fscache_request *req) +static void erofs_fscache_req_complete(struct erofs_fscache_rq *req) { struct folio *folio; bool failed = req->error; @@ -93,120 +72,196 @@ static void erofs_fscache_req_complete(struct erofs_fscache_request *req) rcu_read_unlock(); } -static void erofs_fscache_req_put(struct erofs_fscache_request *req) +static void erofs_fscache_req_put(struct erofs_fscache_rq *req) { - if (refcount_dec_and_test(&req->ref)) { - if (req->cache_resources.ops) - req->cache_resources.ops->end_operation(&req->cache_resources); - if (!req->primary) - erofs_fscache_req_complete(req); - else - erofs_fscache_req_put(req->primary); - kfree(req); - } + if (!refcount_dec_and_test(&req->ref)) + return; + erofs_fscache_req_complete(req); + kfree(req); +} + +static struct erofs_fscache_rq *erofs_fscache_req_alloc(struct address_space *mapping, + loff_t start, size_t len) +{ + struct erofs_fscache_rq *req = kzalloc(sizeof(*req), GFP_KERNEL); + + if (!req) + return NULL; + req->mapping = mapping; + req->start = start; + req->len = len; + refcount_set(&req->ref, 1); + return req; } -static void erofs_fscache_subreq_complete(void *priv, +static void erofs_fscache_req_io_put(struct erofs_fscache_io *io) +{ + struct erofs_fscache_rq *req = io->private; + + if (erofs_fscache_io_put(io)) + erofs_fscache_req_put(req); +} + +static void erofs_fscache_req_end_io(void *priv, ssize_t transferred_or_error, bool was_async) { - struct erofs_fscache_request *req = priv; + struct erofs_fscache_io *io = priv; + struct erofs_fscache_rq *req = io->private; - if (IS_ERR_VALUE(transferred_or_error)) { - if (req->primary) - req->primary->error = transferred_or_error; - else - req->error = transferred_or_error; - } - erofs_fscache_req_put(req); + if (IS_ERR_VALUE(transferred_or_error)) + req->error = transferred_or_error; + erofs_fscache_req_io_put(io); +} + +static struct erofs_fscache_io *erofs_fscache_req_io_alloc(struct erofs_fscache_rq *req) +{ + struct erofs_fscache_io *io = kzalloc(sizeof(*io), GFP_KERNEL); + + if (!io) + return NULL; + io->end_io = erofs_fscache_req_end_io; + io->private = req; + refcount_inc(&req->ref); + refcount_set(&io->ref, 1); + return io; } /* - * Read data from fscache (cookie, pstart, len), and fill the read data into - * page cache described by (req->mapping, lstart, len). @pstart describeis the - * start physical address in the cache file. + * Read data from fscache described by cookie at pstart physical address + * offset, and fill the read data into buffer described by io->iter. */ -static int erofs_fscache_read_folios_async(struct fscache_cookie *cookie, - struct erofs_fscache_request *req, loff_t pstart, size_t len) +static int erofs_fscache_read_io_async(struct fscache_cookie *cookie, + loff_t pstart, struct erofs_fscache_io *io) { enum netfs_io_source source; - struct super_block *sb = req->mapping->host->i_sb; - struct netfs_cache_resources *cres = &req->cache_resources; - struct iov_iter iter; - loff_t lstart = req->start + req->submitted; - size_t done = 0; + struct netfs_cache_resources *cres = &io->cres; + struct iov_iter *iter = &io->iter; int ret; - DBG_BUGON(len > req->len - req->submitted); - ret = fscache_begin_read_operation(cres, cookie); if (ret) return ret; - while (done < len) { - loff_t sstart = pstart + done; - size_t slen = len - done; + while (iov_iter_count(iter)) { + size_t orig_count = iov_iter_count(iter), len = orig_count; unsigned long flags = 1 << NETFS_SREQ_ONDEMAND; source = cres->ops->prepare_ondemand_read(cres, - sstart, &slen, LLONG_MAX, &flags, 0); - if (WARN_ON(slen == 0)) + pstart, &len, LLONG_MAX, &flags, 0); + if (WARN_ON(len == 0)) source = NETFS_INVALID_READ; if (source != NETFS_READ_FROM_CACHE) { - erofs_err(sb, "failed to fscache prepare_read (source %d)", source); + erofs_err(NULL, "prepare_ondemand_read failed (source %d)", source); return -EIO; } - refcount_inc(&req->ref); - iov_iter_xarray(&iter, ITER_DEST, &req->mapping->i_pages, - lstart + done, slen); - - ret = fscache_read(cres, sstart, &iter, NETFS_READ_HOLE_FAIL, - erofs_fscache_subreq_complete, req); + iov_iter_truncate(iter, len); + refcount_inc(&io->ref); + ret = fscache_read(cres, pstart, iter, NETFS_READ_HOLE_FAIL, + io->end_io, io); if (ret == -EIOCBQUEUED) ret = 0; if (ret) { - erofs_err(sb, "failed to fscache_read (ret %d)", ret); + erofs_err(NULL, "fscache_read failed (ret %d)", ret); return ret; } + if (WARN_ON(iov_iter_count(iter))) + return -EIO; - done += slen; + iov_iter_reexpand(iter, orig_count - len); + pstart += len; } - DBG_BUGON(done != len); return 0; } -static int erofs_fscache_meta_read_folio(struct file *data, struct folio *folio) +struct erofs_fscache_bio { + struct erofs_fscache_io io; + struct bio bio; /* w/o bdev to share bio_add_page/endio() */ + struct bio_vec bvecs[BIO_MAX_VECS]; +}; + +static void erofs_fscache_bio_endio(void *priv, + ssize_t transferred_or_error, bool was_async) { + struct erofs_fscache_bio *io = priv; + + if (IS_ERR_VALUE(transferred_or_error)) + io->bio.bi_status = errno_to_blk_status(transferred_or_error); + io->bio.bi_end_io(&io->bio); + BUILD_BUG_ON(offsetof(struct erofs_fscache_bio, io) != 0); + erofs_fscache_io_put(&io->io); +} + +struct bio *erofs_fscache_bio_alloc(struct erofs_map_dev *mdev) +{ + struct erofs_fscache_bio *io; + + io = kmalloc(sizeof(*io), GFP_KERNEL | __GFP_NOFAIL); + bio_init(&io->bio, NULL, io->bvecs, BIO_MAX_VECS, REQ_OP_READ); + io->io.private = mdev->m_fscache->cookie; + io->io.end_io = erofs_fscache_bio_endio; + refcount_set(&io->io.ref, 1); + return &io->bio; +} + +void erofs_fscache_submit_bio(struct bio *bio) +{ + struct erofs_fscache_bio *io = container_of(bio, + struct erofs_fscache_bio, bio); int ret; + + iov_iter_bvec(&io->io.iter, ITER_DEST, io->bvecs, bio->bi_vcnt, + bio->bi_iter.bi_size); + ret = erofs_fscache_read_io_async(io->io.private, + bio->bi_iter.bi_sector << 9, &io->io); + erofs_fscache_io_put(&io->io); + if (!ret) + return; + bio->bi_status = errno_to_blk_status(ret); + bio->bi_end_io(bio); +} + +static int erofs_fscache_meta_read_folio(struct file *data, struct folio *folio) +{ struct erofs_fscache *ctx = folio->mapping->host->i_private; - struct erofs_fscache_request *req; + int ret = -ENOMEM; + struct erofs_fscache_rq *req; + struct erofs_fscache_io *io; req = erofs_fscache_req_alloc(folio->mapping, folio_pos(folio), folio_size(folio)); - if (IS_ERR(req)) { + if (!req) { folio_unlock(folio); - return PTR_ERR(req); + return ret; } - ret = erofs_fscache_read_folios_async(ctx->cookie, req, - folio_pos(folio), folio_size(folio)); + io = erofs_fscache_req_io_alloc(req); + if (!io) { + req->error = ret; + goto out; + } + iov_iter_xarray(&io->iter, ITER_DEST, &folio->mapping->i_pages, + folio_pos(folio), folio_size(folio)); + + ret = erofs_fscache_read_io_async(ctx->cookie, folio_pos(folio), io); if (ret) req->error = ret; + erofs_fscache_req_io_put(io); +out: erofs_fscache_req_put(req); return ret; } -static int erofs_fscache_data_read_slice(struct erofs_fscache_request *primary) +static int erofs_fscache_data_read_slice(struct erofs_fscache_rq *req) { - struct address_space *mapping = primary->mapping; + struct address_space *mapping = req->mapping; struct inode *inode = mapping->host; struct super_block *sb = inode->i_sb; - struct erofs_fscache_request *req; + struct erofs_fscache_io *io; struct erofs_map_blocks map; struct erofs_map_dev mdev; - struct iov_iter iter; - loff_t pos = primary->start + primary->submitted; + loff_t pos = req->start + req->submitted; size_t count; int ret; @@ -217,6 +272,7 @@ static int erofs_fscache_data_read_slice(struct erofs_fscache_request *primary) if (map.m_flags & EROFS_MAP_META) { struct erofs_buf buf = __EROFS_BUF_INITIALIZER; + struct iov_iter iter; erofs_blk_t blknr; size_t offset, size; void *src; @@ -237,15 +293,17 @@ static int erofs_fscache_data_read_slice(struct erofs_fscache_request *primary) } iov_iter_zero(PAGE_SIZE - size, &iter); erofs_put_metabuf(&buf); - primary->submitted += PAGE_SIZE; + req->submitted += PAGE_SIZE; return 0; } - count = primary->len - primary->submitted; + count = req->len - req->submitted; if (!(map.m_flags & EROFS_MAP_MAPPED)) { + struct iov_iter iter; + iov_iter_xarray(&iter, ITER_DEST, &mapping->i_pages, pos, count); iov_iter_zero(count, &iter); - primary->submitted += count; + req->submitted += count; return 0; } @@ -260,18 +318,19 @@ static int erofs_fscache_data_read_slice(struct erofs_fscache_request *primary) if (ret) return ret; - req = erofs_fscache_req_chain(primary, count); - if (IS_ERR(req)) - return PTR_ERR(req); + io = erofs_fscache_req_io_alloc(req); + if (!io) + return -ENOMEM; + iov_iter_xarray(&io->iter, ITER_DEST, &mapping->i_pages, pos, count); + ret = erofs_fscache_read_io_async(mdev.m_fscache->cookie, + mdev.m_pa + (pos - map.m_la), io); + erofs_fscache_req_io_put(io); - ret = erofs_fscache_read_folios_async(mdev.m_fscache->cookie, - req, mdev.m_pa + (pos - map.m_la), count); - erofs_fscache_req_put(req); - primary->submitted += count; + req->submitted += count; return ret; } -static int erofs_fscache_data_read(struct erofs_fscache_request *req) +static int erofs_fscache_data_read(struct erofs_fscache_rq *req) { int ret; @@ -280,20 +339,19 @@ static int erofs_fscache_data_read(struct erofs_fscache_request *req) if (ret) req->error = ret; } while (!ret && req->submitted < req->len); - return ret; } static int erofs_fscache_read_folio(struct file *file, struct folio *folio) { - struct erofs_fscache_request *req; + struct erofs_fscache_rq *req; int ret; req = erofs_fscache_req_alloc(folio->mapping, folio_pos(folio), folio_size(folio)); - if (IS_ERR(req)) { + if (!req) { folio_unlock(folio); - return PTR_ERR(req); + return -ENOMEM; } ret = erofs_fscache_data_read(req); @@ -303,14 +361,14 @@ static int erofs_fscache_read_folio(struct file *file, struct folio *folio) static void erofs_fscache_readahead(struct readahead_control *rac) { - struct erofs_fscache_request *req; + struct erofs_fscache_rq *req; if (!readahead_count(rac)) return; req = erofs_fscache_req_alloc(rac->mapping, readahead_pos(rac), readahead_length(rac)); - if (IS_ERR(req)) + if (!req) return; /* The request completion will drop refs on the folios. */ diff --git a/fs/erofs/inode.c b/fs/erofs/inode.c index 36e638e8b5..0eb0e6f933 100644 --- a/fs/erofs/inode.c +++ b/fs/erofs/inode.c @@ -259,14 +259,12 @@ static int erofs_fill_inode(struct inode *inode) if (erofs_inode_is_data_compressed(vi->datalayout)) { #ifdef CONFIG_EROFS_FS_ZIP - if (!erofs_is_fscache_mode(inode->i_sb)) { - DO_ONCE_LITE_IF(inode->i_sb->s_blocksize != PAGE_SIZE, - erofs_info, inode->i_sb, - "EXPERIMENTAL EROFS subpage compressed block support in use. Use at your own risk!"); - inode->i_mapping->a_ops = &z_erofs_aops; - err = 0; - goto out_unlock; - } + DO_ONCE_LITE_IF(inode->i_blkbits != PAGE_SHIFT, + erofs_info, inode->i_sb, + "EXPERIMENTAL EROFS subpage compressed block support in use. Use at your own risk!"); + inode->i_mapping->a_ops = &z_erofs_aops; + err = 0; + goto out_unlock; #endif err = -EOPNOTSUPP; goto out_unlock; diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h index c69174675c..d28ccfc035 100644 --- a/fs/erofs/internal.h +++ b/fs/erofs/internal.h @@ -49,7 +49,7 @@ typedef u32 erofs_blk_t; struct erofs_device_info { char *path; struct erofs_fscache *fscache; - struct bdev_handle *bdev_handle; + struct file *bdev_file; struct dax_device *dax_dev; u64 dax_part_off; @@ -459,8 +459,8 @@ int __init erofs_init_shrinker(void); void erofs_exit_shrinker(void); int __init z_erofs_init_zip_subsystem(void); void z_erofs_exit_zip_subsystem(void); -int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi, - struct erofs_workgroup *egrp); +int erofs_try_to_free_all_cached_folios(struct erofs_sb_info *sbi, + struct erofs_workgroup *egrp); int z_erofs_map_blocks_iter(struct inode *inode, struct erofs_map_blocks *map, int flags); void *erofs_get_pcpubuf(unsigned int requiredpages); @@ -505,6 +505,8 @@ void erofs_fscache_unregister_fs(struct super_block *sb); struct erofs_fscache *erofs_fscache_register_cookie(struct super_block *sb, char *name, unsigned int flags); void erofs_fscache_unregister_cookie(struct erofs_fscache *fscache); +struct bio *erofs_fscache_bio_alloc(struct erofs_map_dev *mdev); +void erofs_fscache_submit_bio(struct bio *bio); #else static inline int erofs_fscache_register_fs(struct super_block *sb) { @@ -522,6 +524,8 @@ struct erofs_fscache *erofs_fscache_register_cookie(struct super_block *sb, static inline void erofs_fscache_unregister_cookie(struct erofs_fscache *fscache) { } +static inline struct bio *erofs_fscache_bio_alloc(struct erofs_map_dev *mdev) { return NULL; } +static inline void erofs_fscache_submit_bio(struct bio *bio) {} #endif #define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ diff --git a/fs/erofs/super.c b/fs/erofs/super.c index a2fa745585..30b49b2eee 100644 --- a/fs/erofs/super.c +++ b/fs/erofs/super.c @@ -177,7 +177,7 @@ static int erofs_init_device(struct erofs_buf *buf, struct super_block *sb, struct erofs_sb_info *sbi = EROFS_SB(sb); struct erofs_fscache *fscache; struct erofs_deviceslot *dis; - struct bdev_handle *bdev_handle; + struct file *bdev_file; void *ptr; ptr = erofs_read_metabuf(buf, sb, erofs_blknr(sb, *pos), EROFS_KMAP); @@ -201,12 +201,12 @@ static int erofs_init_device(struct erofs_buf *buf, struct super_block *sb, return PTR_ERR(fscache); dif->fscache = fscache; } else if (!sbi->devs->flatdev) { - bdev_handle = bdev_open_by_path(dif->path, BLK_OPEN_READ, + bdev_file = bdev_file_open_by_path(dif->path, BLK_OPEN_READ, sb->s_type, NULL); - if (IS_ERR(bdev_handle)) - return PTR_ERR(bdev_handle); - dif->bdev_handle = bdev_handle; - dif->dax_dev = fs_dax_get_by_bdev(bdev_handle->bdev, + if (IS_ERR(bdev_file)) + return PTR_ERR(bdev_file); + dif->bdev_file = bdev_file; + dif->dax_dev = fs_dax_get_by_bdev(file_bdev(bdev_file), &dif->dax_part_off, NULL, NULL); } @@ -430,7 +430,6 @@ static bool erofs_fc_set_dax_mode(struct fs_context *fc, unsigned int mode) switch (mode) { case EROFS_MOUNT_DAX_ALWAYS: - warnfc(fc, "DAX enabled. Warning: EXPERIMENTAL, use at your own risk"); set_opt(&sbi->opt, DAX_ALWAYS); clear_opt(&sbi->opt, DAX_NEVER); return true; @@ -728,8 +727,8 @@ static int erofs_release_device_info(int id, void *ptr, void *data) struct erofs_device_info *dif = ptr; fs_put_dax(dif->dax_dev, NULL); - if (dif->bdev_handle) - bdev_release(dif->bdev_handle); + if (dif->bdev_file) + fput(dif->bdev_file); erofs_fscache_unregister_cookie(dif->fscache); dif->fscache = NULL; kfree(dif->path); @@ -843,7 +842,7 @@ static int __init erofs_module_init(void) erofs_inode_cachep = kmem_cache_create("erofs_inode", sizeof(struct erofs_inode), 0, - SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD | SLAB_ACCOUNT, + SLAB_RECLAIM_ACCOUNT | SLAB_ACCOUNT, erofs_inode_init_once); if (!erofs_inode_cachep) return -ENOMEM; diff --git a/fs/erofs/utils.c b/fs/erofs/utils.c index e146d09151..518bdd69c8 100644 --- a/fs/erofs/utils.c +++ b/fs/erofs/utils.c @@ -129,7 +129,7 @@ static bool erofs_try_to_release_workgroup(struct erofs_sb_info *sbi, * the XArray. Otherwise some cached pages could be still attached to * the orphan old workgroup when the new one is available in the tree. */ - if (erofs_try_to_free_all_cached_pages(sbi, grp)) + if (erofs_try_to_free_all_cached_folios(sbi, grp)) goto out; /* diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index ff0aa72b0d..3216b920d3 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -19,7 +19,10 @@ typedef void *z_erofs_next_pcluster_t; struct z_erofs_bvec { - struct page *page; + union { + struct page *page; + struct folio *folio; + }; int offset; unsigned int end; }; @@ -116,47 +119,46 @@ static inline unsigned int z_erofs_pclusterpages(struct z_erofs_pcluster *pcl) return PAGE_ALIGN(pcl->pclustersize) >> PAGE_SHIFT; } +#define MNGD_MAPPING(sbi) ((sbi)->managed_cache->i_mapping) +static bool erofs_folio_is_managed(struct erofs_sb_info *sbi, struct folio *fo) +{ + return fo->mapping == MNGD_MAPPING(sbi); +} + /* - * bit 30: I/O error occurred on this page - * bit 0 - 29: remaining parts to complete this page + * bit 30: I/O error occurred on this folio + * bit 0 - 29: remaining parts to complete this folio */ -#define Z_EROFS_PAGE_EIO (1 << 30) +#define Z_EROFS_FOLIO_EIO (1 << 30) -static inline void z_erofs_onlinepage_init(struct page *page) +static void z_erofs_onlinefolio_init(struct folio *folio) { union { atomic_t o; - unsigned long v; + void *v; } u = { .o = ATOMIC_INIT(1) }; - set_page_private(page, u.v); - smp_wmb(); - SetPagePrivate(page); + folio->private = u.v; /* valid only if file-backed folio is locked */ } -static inline void z_erofs_onlinepage_split(struct page *page) +static void z_erofs_onlinefolio_split(struct folio *folio) { - atomic_inc((atomic_t *)&page->private); + atomic_inc((atomic_t *)&folio->private); } -static void z_erofs_onlinepage_endio(struct page *page, int err) +static void z_erofs_onlinefolio_end(struct folio *folio, int err) { int orig, v; - DBG_BUGON(!PagePrivate(page)); - do { - orig = atomic_read((atomic_t *)&page->private); - v = (orig - 1) | (err ? Z_EROFS_PAGE_EIO : 0); - } while (atomic_cmpxchg((atomic_t *)&page->private, orig, v) != orig); - - if (!(v & ~Z_EROFS_PAGE_EIO)) { - set_page_private(page, 0); - ClearPagePrivate(page); - if (!(v & Z_EROFS_PAGE_EIO)) - SetPageUptodate(page); - unlock_page(page); - } + orig = atomic_read((atomic_t *)&folio->private); + v = (orig - 1) | (err ? Z_EROFS_FOLIO_EIO : 0); + } while (atomic_cmpxchg((atomic_t *)&folio->private, orig, v) != orig); + + if (v & ~Z_EROFS_FOLIO_EIO) + return; + folio->private = 0; + folio_end_read(folio, !(v & Z_EROFS_FOLIO_EIO)); } #define Z_EROFS_ONSTACK_PAGES 32 @@ -572,17 +574,13 @@ static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe) for (i = 0; i < pclusterpages; ++i) { struct page *page, *newpage; - void *t; /* mark pages just found for debugging */ /* Inaccurate check w/o locking to avoid unneeded lookups */ if (READ_ONCE(pcl->compressed_bvecs[i].page)) continue; page = find_get_page(mc, pcl->obj.index + i); - if (page) { - t = (void *)((unsigned long)page | 1); - newpage = NULL; - } else { + if (!page) { /* I/O is needed, no possible to decompress directly */ standalone = false; if (!shouldalloc) @@ -596,11 +594,10 @@ static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe) if (!newpage) continue; set_page_private(newpage, Z_EROFS_PREALLOCATED_PAGE); - t = (void *)((unsigned long)newpage | 1); } spin_lock(&pcl->obj.lockref.lock); if (!pcl->compressed_bvecs[i].page) { - pcl->compressed_bvecs[i].page = t; + pcl->compressed_bvecs[i].page = page ? page : newpage; spin_unlock(&pcl->obj.lockref.lock); continue; } @@ -620,9 +617,9 @@ static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe) fe->mode = Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE; } -/* called by erofs_shrinker to get rid of all compressed_pages */ -int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi, - struct erofs_workgroup *grp) +/* called by erofs_shrinker to get rid of all cached compressed bvecs */ +int erofs_try_to_free_all_cached_folios(struct erofs_sb_info *sbi, + struct erofs_workgroup *grp) { struct z_erofs_pcluster *const pcl = container_of(grp, struct z_erofs_pcluster, obj); @@ -630,27 +627,22 @@ int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi, int i; DBG_BUGON(z_erofs_is_inline_pcluster(pcl)); - /* - * refcount of workgroup is now freezed as 0, - * therefore no need to worry about available decompression users. - */ + /* There is no actice user since the pcluster is now freezed */ for (i = 0; i < pclusterpages; ++i) { - struct page *page = pcl->compressed_bvecs[i].page; + struct folio *folio = pcl->compressed_bvecs[i].folio; - if (!page) + if (!folio) continue; - /* block other users from reclaiming or migrating the page */ - if (!trylock_page(page)) + /* Avoid reclaiming or migrating this folio */ + if (!folio_trylock(folio)) return -EBUSY; - if (!erofs_page_is_managed(sbi, page)) + if (!erofs_folio_is_managed(sbi, folio)) continue; - - /* barrier is implied in the following 'unlock_page' */ - WRITE_ONCE(pcl->compressed_bvecs[i].page, NULL); - detach_page_private(page); - unlock_page(page); + pcl->compressed_bvecs[i].folio = NULL; + folio_detach_private(folio); + folio_unlock(folio); } return 0; } @@ -667,20 +659,17 @@ static bool z_erofs_cache_release_folio(struct folio *folio, gfp_t gfp) ret = false; spin_lock(&pcl->obj.lockref.lock); - if (pcl->obj.lockref.count > 0) - goto out; - - DBG_BUGON(z_erofs_is_inline_pcluster(pcl)); - for (i = 0; i < pclusterpages; ++i) { - if (pcl->compressed_bvecs[i].page == &folio->page) { - WRITE_ONCE(pcl->compressed_bvecs[i].page, NULL); - ret = true; - break; + if (pcl->obj.lockref.count <= 0) { + DBG_BUGON(z_erofs_is_inline_pcluster(pcl)); + for (i = 0; i < pclusterpages; ++i) { + if (pcl->compressed_bvecs[i].folio == folio) { + pcl->compressed_bvecs[i].folio = NULL; + folio_detach_private(folio); + ret = true; + break; + } } } - if (ret) - folio_detach_private(folio); -out: spin_unlock(&pcl->obj.lockref.lock); return ret; } @@ -962,20 +951,20 @@ static int z_erofs_read_fragment(struct super_block *sb, struct page *page, return 0; } -static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe, - struct page *page, bool ra) +static int z_erofs_scan_folio(struct z_erofs_decompress_frontend *fe, + struct folio *folio, bool ra) { struct inode *const inode = fe->inode; struct erofs_map_blocks *const map = &fe->map; - const loff_t offset = page_offset(page); - const unsigned int bs = i_blocksize(inode); + const loff_t offset = folio_pos(folio); + const unsigned int bs = i_blocksize(inode), fs = folio_size(folio); bool tight = true, exclusive; unsigned int cur, end, len, split; int err = 0; - z_erofs_onlinepage_init(page); + z_erofs_onlinefolio_init(folio); split = 0; - end = PAGE_SIZE; + end = fs; repeat: if (offset + end - 1 < map->m_la || offset + end - 1 >= map->m_la + map->m_llen) { @@ -992,7 +981,7 @@ repeat: ++split; if (!(map->m_flags & EROFS_MAP_MAPPED)) { - zero_user_segment(page, cur, end); + folio_zero_segment(folio, cur, end); tight = false; goto next_part; } @@ -1001,8 +990,8 @@ repeat: erofs_off_t fpos = offset + cur - map->m_la; len = min_t(unsigned int, map->m_llen - fpos, end - cur); - err = z_erofs_read_fragment(inode->i_sb, page, cur, cur + len, - EROFS_I(inode)->z_fragmentoff + fpos); + err = z_erofs_read_fragment(inode->i_sb, &folio->page, cur, + cur + len, EROFS_I(inode)->z_fragmentoff + fpos); if (err) goto out; tight = false; @@ -1017,25 +1006,25 @@ repeat: } /* - * Ensure the current partial page belongs to this submit chain rather + * Ensure the current partial folio belongs to this submit chain rather * than other concurrent submit chains or the noio(bypass) chain since - * those chains are handled asynchronously thus the page cannot be used + * those chains are handled asynchronously thus the folio cannot be used * for inplace I/O or bvpage (should be processed in a strict order.) */ tight &= (fe->mode > Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE); - exclusive = (!cur && ((split <= 1) || (tight && bs == PAGE_SIZE))); + exclusive = (!cur && ((split <= 1) || (tight && bs == fs))); if (cur) tight &= (fe->mode >= Z_EROFS_PCLUSTER_FOLLOWED); err = z_erofs_attach_page(fe, &((struct z_erofs_bvec) { - .page = page, + .page = &folio->page, .offset = offset - map->m_la, .end = end, }), exclusive); if (err) goto out; - z_erofs_onlinepage_split(page); + z_erofs_onlinefolio_split(folio); if (fe->pcl->pageofs_out != (map->m_la & ~PAGE_MASK)) fe->pcl->multibases = true; if (fe->pcl->length < offset + end - map->m_la) { @@ -1056,7 +1045,7 @@ next_part: goto repeat; out: - z_erofs_onlinepage_endio(page, err); + z_erofs_onlinefolio_end(folio, err); return err; } @@ -1159,7 +1148,7 @@ static void z_erofs_fill_other_copies(struct z_erofs_decompress_backend *be, cur += len; } kunmap_local(dst); - z_erofs_onlinepage_endio(bvi->bvec.page, err); + z_erofs_onlinefolio_end(page_folio(bvi->bvec.page), err); list_del(p); kfree(bvi); } @@ -1210,7 +1199,7 @@ static int z_erofs_parse_in_bvecs(struct z_erofs_decompress_backend *be, be->compressed_pages[i] = page; if (z_erofs_is_inline_pcluster(pcl) || - erofs_page_is_managed(EROFS_SB(be->sb), page)) { + erofs_folio_is_managed(EROFS_SB(be->sb), page_folio(page))) { if (!PageUptodate(page)) err = -EIO; continue; @@ -1295,7 +1284,8 @@ static int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be, /* consider shortlived pages added when decompressing */ page = be->compressed_pages[i]; - if (!page || erofs_page_is_managed(sbi, page)) + if (!page || + erofs_folio_is_managed(sbi, page_folio(page))) continue; (void)z_erofs_put_shortlivedpage(be->pagepool, page); WRITE_ONCE(pcl->compressed_bvecs[i].page, NULL); @@ -1316,7 +1306,7 @@ static int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be, /* recycle all individual short-lived pages */ if (z_erofs_put_shortlivedpage(be->pagepool, page)) continue; - z_erofs_onlinepage_endio(page, err); + z_erofs_onlinefolio_end(page_folio(page), err); } if (be->decompressed_pages != be->onstack_pages) @@ -1430,38 +1420,34 @@ static void z_erofs_fill_bio_vec(struct bio_vec *bvec, struct z_erofs_bvec zbv; struct address_space *mapping; struct page *page; - int justfound, bs = i_blocksize(f->inode); + int bs = i_blocksize(f->inode); - /* Except for inplace pages, the entire page can be used for I/Os */ + /* Except for inplace folios, the entire folio can be used for I/Os */ bvec->bv_offset = 0; bvec->bv_len = PAGE_SIZE; repeat: spin_lock(&pcl->obj.lockref.lock); zbv = pcl->compressed_bvecs[nr]; - page = zbv.page; - justfound = (unsigned long)page & 1UL; - page = (struct page *)((unsigned long)page & ~1UL); - pcl->compressed_bvecs[nr].page = page; spin_unlock(&pcl->obj.lockref.lock); - if (!page) - goto out_allocpage; + if (!zbv.folio) + goto out_allocfolio; - bvec->bv_page = page; - DBG_BUGON(z_erofs_is_shortlived_page(page)); + bvec->bv_page = &zbv.folio->page; + DBG_BUGON(z_erofs_is_shortlived_page(bvec->bv_page)); /* - * Handle preallocated cached pages. We tried to allocate such pages + * Handle preallocated cached folios. We tried to allocate such folios * without triggering direct reclaim. If allocation failed, inplace - * file-backed pages will be used instead. + * file-backed folios will be used instead. */ - if (page->private == Z_EROFS_PREALLOCATED_PAGE) { - set_page_private(page, 0); + if (zbv.folio->private == (void *)Z_EROFS_PREALLOCATED_PAGE) { + zbv.folio->private = 0; tocache = true; goto out_tocache; } - mapping = READ_ONCE(page->mapping); + mapping = READ_ONCE(zbv.folio->mapping); /* - * File-backed pages for inplace I/Os are all locked steady, + * File-backed folios for inplace I/Os are all locked steady, * therefore it is impossible for `mapping` to be NULL. */ if (mapping && mapping != mc) { @@ -1471,26 +1457,21 @@ repeat: return; } - lock_page(page); - /* only true if page reclaim goes wrong, should never happen */ - DBG_BUGON(justfound && PagePrivate(page)); - - /* the cached page is still in managed cache */ - if (page->mapping == mc) { + folio_lock(zbv.folio); + if (zbv.folio->mapping == mc) { /* - * The cached page is still available but without a valid - * `->private` pcluster hint. Let's reconnect them. + * The cached folio is still in managed cache but without + * a valid `->private` pcluster hint. Let's reconnect them. */ - if (!PagePrivate(page)) { - DBG_BUGON(!justfound); - /* compressed_bvecs[] already takes a ref */ - attach_page_private(page, pcl); - put_page(page); + if (!folio_test_private(zbv.folio)) { + folio_attach_private(zbv.folio, pcl); + /* compressed_bvecs[] already takes a ref before */ + folio_put(zbv.folio); } /* no need to submit if it is already up-to-date */ - if (PageUptodate(page)) { - unlock_page(page); + if (folio_test_uptodate(zbv.folio)) { + folio_unlock(zbv.folio); bvec->bv_page = NULL; } return; @@ -1500,34 +1481,32 @@ repeat: * It has been truncated, so it's unsafe to reuse this one. Let's * allocate a new page for compressed data. */ - DBG_BUGON(page->mapping); - DBG_BUGON(!justfound); - + DBG_BUGON(zbv.folio->mapping); tocache = true; - unlock_page(page); - put_page(page); -out_allocpage: + folio_unlock(zbv.folio); + folio_put(zbv.folio); +out_allocfolio: page = erofs_allocpage(&f->pagepool, gfp | __GFP_NOFAIL); spin_lock(&pcl->obj.lockref.lock); - if (pcl->compressed_bvecs[nr].page) { + if (pcl->compressed_bvecs[nr].folio) { erofs_pagepool_add(&f->pagepool, page); spin_unlock(&pcl->obj.lockref.lock); cond_resched(); goto repeat; } - pcl->compressed_bvecs[nr].page = page; + pcl->compressed_bvecs[nr].folio = zbv.folio = page_folio(page); spin_unlock(&pcl->obj.lockref.lock); bvec->bv_page = page; out_tocache: if (!tocache || bs != PAGE_SIZE || - add_to_page_cache_lru(page, mc, pcl->obj.index + nr, gfp)) { - /* turn into a temporary shortlived page (1 ref) */ - set_page_private(page, Z_EROFS_SHORTLIVED_PAGE); + filemap_add_folio(mc, zbv.folio, pcl->obj.index + nr, gfp)) { + /* turn into a temporary shortlived folio (1 ref) */ + zbv.folio->private = (void *)Z_EROFS_SHORTLIVED_PAGE; return; } - attach_page_private(page, pcl); + folio_attach_private(zbv.folio, pcl); /* drop a refcount added by allocpage (then 2 refs in total here) */ - put_page(page); + folio_put(zbv.folio); } static struct z_erofs_decompressqueue *jobqueue_init(struct super_block *sb, @@ -1582,28 +1561,29 @@ static void move_to_bypass_jobqueue(struct z_erofs_pcluster *pcl, qtail[JQ_BYPASS] = &pcl->next; } -static void z_erofs_submissionqueue_endio(struct bio *bio) +static void z_erofs_endio(struct bio *bio) { struct z_erofs_decompressqueue *q = bio->bi_private; blk_status_t err = bio->bi_status; - struct bio_vec *bvec; - struct bvec_iter_all iter_all; + struct folio_iter fi; - bio_for_each_segment_all(bvec, bio, iter_all) { - struct page *page = bvec->bv_page; + bio_for_each_folio_all(fi, bio) { + struct folio *folio = fi.folio; - DBG_BUGON(PageUptodate(page)); - DBG_BUGON(z_erofs_page_is_invalidated(page)); - if (erofs_page_is_managed(EROFS_SB(q->sb), page)) { - if (!err) - SetPageUptodate(page); - unlock_page(page); - } + DBG_BUGON(folio_test_uptodate(folio)); + DBG_BUGON(z_erofs_page_is_invalidated(&folio->page)); + if (!erofs_folio_is_managed(EROFS_SB(q->sb), folio)) + continue; + + if (!err) + folio_mark_uptodate(folio); + folio_unlock(folio); } if (err) q->eio = true; z_erofs_decompress_kickoff(q, -1); - bio_put(bio); + if (bio->bi_bdev) + bio_put(bio); } static void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f, @@ -1617,7 +1597,6 @@ static void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f, z_erofs_next_pcluster_t owned_head = f->owned_head; /* bio is NULL initially, so no need to initialize last_{index,bdev} */ erofs_off_t last_pa; - struct block_device *last_bdev; unsigned int nr_bios = 0; struct bio *bio = NULL; unsigned long pflags; @@ -1664,9 +1643,13 @@ static void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f, continue; if (bio && (cur != last_pa || - last_bdev != mdev.m_bdev)) { -submit_bio_retry: - submit_bio(bio); + bio->bi_bdev != mdev.m_bdev)) { +io_retry: + if (!erofs_is_fscache_mode(sb)) + submit_bio(bio); + else + erofs_fscache_submit_bio(bio); + if (memstall) { psi_memstall_leave(&pflags); memstall = 0; @@ -1681,15 +1664,16 @@ submit_bio_retry: } if (!bio) { - bio = bio_alloc(mdev.m_bdev, BIO_MAX_VECS, - REQ_OP_READ, GFP_NOIO); - bio->bi_end_io = z_erofs_submissionqueue_endio; + bio = erofs_is_fscache_mode(sb) ? + erofs_fscache_bio_alloc(&mdev) : + bio_alloc(mdev.m_bdev, BIO_MAX_VECS, + REQ_OP_READ, GFP_NOIO); + bio->bi_end_io = z_erofs_endio; bio->bi_iter.bi_sector = cur >> 9; bio->bi_private = q[JQ_SUBMIT]; if (readahead) bio->bi_opf |= REQ_RAHEAD; ++nr_bios; - last_bdev = mdev.m_bdev; } if (cur + bvec.bv_len > end) @@ -1697,7 +1681,7 @@ submit_bio_retry: DBG_BUGON(bvec.bv_len < sb->s_blocksize); if (!bio_add_page(bio, bvec.bv_page, bvec.bv_len, bvec.bv_offset)) - goto submit_bio_retry; + goto io_retry; last_pa = cur + bvec.bv_len; bypass = false; @@ -1710,7 +1694,10 @@ submit_bio_retry: } while (owned_head != Z_EROFS_PCLUSTER_TAIL); if (bio) { - submit_bio(bio); + if (!erofs_is_fscache_mode(sb)) + submit_bio(bio); + else + erofs_fscache_submit_bio(bio); if (memstall) psi_memstall_leave(&pflags); } @@ -1795,7 +1782,7 @@ static void z_erofs_pcluster_readmore(struct z_erofs_decompress_frontend *f, if (PageUptodate(page)) unlock_page(page); else - (void)z_erofs_do_read_page(f, page, !!rac); + z_erofs_scan_folio(f, page_folio(page), !!rac); put_page(page); } @@ -1816,7 +1803,7 @@ static int z_erofs_read_folio(struct file *file, struct folio *folio) f.headoffset = (erofs_off_t)folio->index << PAGE_SHIFT; z_erofs_pcluster_readmore(&f, NULL, true); - err = z_erofs_do_read_page(&f, &folio->page, false); + err = z_erofs_scan_folio(&f, folio, false); z_erofs_pcluster_readmore(&f, NULL, false); z_erofs_pcluster_end(&f); @@ -1857,7 +1844,7 @@ static void z_erofs_readahead(struct readahead_control *rac) folio = head; head = folio_get_private(folio); - err = z_erofs_do_read_page(&f, &folio->page, true); + err = z_erofs_scan_folio(&f, folio, true); if (err && err != -EINTR) erofs_err(inode->i_sb, "readahead error at folio %lu @ nid %llu", folio->index, EROFS_I(inode)->nid); |