summaryrefslogtreecommitdiffstats
path: root/fs/erofs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/erofs')
-rw-r--r--fs/erofs/Kconfig7
-rw-r--r--fs/erofs/compress.h5
-rw-r--r--fs/erofs/decompressor.c93
-rw-r--r--fs/erofs/decompressor_deflate.c21
-rw-r--r--fs/erofs/decompressor_lzma.c17
-rw-r--r--fs/erofs/fscache.c8
-rw-r--r--fs/erofs/inode.c8
-rw-r--r--fs/erofs/super.c10
-rw-r--r--fs/erofs/utils.c2
-rw-r--r--fs/erofs/zdata.c328
10 files changed, 258 insertions, 241 deletions
diff --git a/fs/erofs/Kconfig b/fs/erofs/Kconfig
index 1d318f8523..fffd391934 100644
--- a/fs/erofs/Kconfig
+++ b/fs/erofs/Kconfig
@@ -114,8 +114,11 @@ config EROFS_FS_ZIP_DEFLATE
config EROFS_FS_ONDEMAND
bool "EROFS fscache-based on-demand read support"
- depends on CACHEFILES_ONDEMAND && (EROFS_FS=m && FSCACHE || EROFS_FS=y && FSCACHE=y)
- default n
+ depends on EROFS_FS
+ select NETFS_SUPPORT
+ select FSCACHE
+ select CACHEFILES
+ select CACHEFILES_ONDEMAND
help
This permits EROFS to use fscache-backed data blobs with on-demand
read support.
diff --git a/fs/erofs/compress.h b/fs/erofs/compress.h
index 279933e007..7cc5841577 100644
--- a/fs/erofs/compress.h
+++ b/fs/erofs/compress.h
@@ -11,13 +11,12 @@
struct z_erofs_decompress_req {
struct super_block *sb;
struct page **in, **out;
-
unsigned short pageofs_in, pageofs_out;
unsigned int inputsize, outputsize;
- /* indicate the algorithm will be used for decompression */
- unsigned int alg;
+ unsigned int alg; /* the algorithm for decompression */
bool inplace_io, partial_decoding, fillgaps;
+ gfp_t gfp; /* allocation flags for extra temporary buffers */
};
struct z_erofs_decompressor {
diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c
index 066ddc03b7..2ec9b2bb62 100644
--- a/fs/erofs/decompressor.c
+++ b/fs/erofs/decompressor.c
@@ -111,8 +111,9 @@ static int z_erofs_lz4_prepare_dstpages(struct z_erofs_lz4_decompress_ctx *ctx,
victim = availables[--top];
get_page(victim);
} else {
- victim = erofs_allocpage(pagepool,
- GFP_KERNEL | __GFP_NOFAIL);
+ victim = erofs_allocpage(pagepool, rq->gfp);
+ if (!victim)
+ return -ENOMEM;
set_page_private(victim, Z_EROFS_SHORTLIVED_PAGE);
}
rq->out[i] = victim;
@@ -247,15 +248,9 @@ static int z_erofs_lz4_decompress_mem(struct z_erofs_lz4_decompress_ctx *ctx,
if (ret != rq->outputsize) {
erofs_err(rq->sb, "failed to decompress %d in[%u, %u] out[%u]",
ret, rq->inputsize, inputmargin, rq->outputsize);
-
- print_hex_dump(KERN_DEBUG, "[ in]: ", DUMP_PREFIX_OFFSET,
- 16, 1, src + inputmargin, rq->inputsize, true);
- print_hex_dump(KERN_DEBUG, "[out]: ", DUMP_PREFIX_OFFSET,
- 16, 1, out, rq->outputsize, true);
-
if (ret >= 0)
memset(out + ret, 0, rq->outputsize - ret);
- ret = -EIO;
+ ret = -EFSCORRUPTED;
} else {
ret = 0;
}
@@ -320,43 +315,59 @@ dstmap_out:
static int z_erofs_transform_plain(struct z_erofs_decompress_req *rq,
struct page **pagepool)
{
- const unsigned int inpages = PAGE_ALIGN(rq->inputsize) >> PAGE_SHIFT;
- const unsigned int outpages =
+ const unsigned int nrpages_in =
+ PAGE_ALIGN(rq->pageofs_in + rq->inputsize) >> PAGE_SHIFT;
+ const unsigned int nrpages_out =
PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT;
- const unsigned int righthalf = min_t(unsigned int, rq->outputsize,
- PAGE_SIZE - rq->pageofs_out);
- const unsigned int lefthalf = rq->outputsize - righthalf;
- const unsigned int interlaced_offset =
- rq->alg == Z_EROFS_COMPRESSION_SHIFTED ? 0 : rq->pageofs_out;
- u8 *src;
-
- if (outpages > 2 && rq->alg == Z_EROFS_COMPRESSION_SHIFTED) {
- DBG_BUGON(1);
- return -EFSCORRUPTED;
- }
+ const unsigned int bs = rq->sb->s_blocksize;
+ unsigned int cur = 0, ni = 0, no, pi, po, insz, cnt;
+ u8 *kin;
- if (rq->out[0] == *rq->in) {
- DBG_BUGON(rq->pageofs_out);
- return 0;
+ if (rq->outputsize > rq->inputsize)
+ return -EOPNOTSUPP;
+ if (rq->alg == Z_EROFS_COMPRESSION_INTERLACED) {
+ cur = bs - (rq->pageofs_out & (bs - 1));
+ pi = (rq->pageofs_in + rq->inputsize - cur) & ~PAGE_MASK;
+ cur = min(cur, rq->outputsize);
+ if (cur && rq->out[0]) {
+ kin = kmap_local_page(rq->in[nrpages_in - 1]);
+ if (rq->out[0] == rq->in[nrpages_in - 1]) {
+ memmove(kin + rq->pageofs_out, kin + pi, cur);
+ flush_dcache_page(rq->out[0]);
+ } else {
+ memcpy_to_page(rq->out[0], rq->pageofs_out,
+ kin + pi, cur);
+ }
+ kunmap_local(kin);
+ }
+ rq->outputsize -= cur;
}
- src = kmap_local_page(rq->in[inpages - 1]) + rq->pageofs_in;
- if (rq->out[0])
- memcpy_to_page(rq->out[0], rq->pageofs_out,
- src + interlaced_offset, righthalf);
-
- if (outpages > inpages) {
- DBG_BUGON(!rq->out[outpages - 1]);
- if (rq->out[outpages - 1] != rq->in[inpages - 1]) {
- memcpy_to_page(rq->out[outpages - 1], 0, src +
- (interlaced_offset ? 0 : righthalf),
- lefthalf);
- } else if (!interlaced_offset) {
- memmove(src, src + righthalf, lefthalf);
- flush_dcache_page(rq->in[inpages - 1]);
- }
+ for (; rq->outputsize; rq->pageofs_in = 0, cur += PAGE_SIZE, ni++) {
+ insz = min(PAGE_SIZE - rq->pageofs_in, rq->outputsize);
+ rq->outputsize -= insz;
+ if (!rq->in[ni])
+ continue;
+ kin = kmap_local_page(rq->in[ni]);
+ pi = 0;
+ do {
+ no = (rq->pageofs_out + cur + pi) >> PAGE_SHIFT;
+ po = (rq->pageofs_out + cur + pi) & ~PAGE_MASK;
+ DBG_BUGON(no >= nrpages_out);
+ cnt = min(insz - pi, PAGE_SIZE - po);
+ if (rq->out[no] == rq->in[ni]) {
+ memmove(kin + po,
+ kin + rq->pageofs_in + pi, cnt);
+ flush_dcache_page(rq->out[no]);
+ } else if (rq->out[no]) {
+ memcpy_to_page(rq->out[no], po,
+ kin + rq->pageofs_in + pi, cnt);
+ }
+ pi += cnt;
+ } while (pi < insz);
+ kunmap_local(kin);
}
- kunmap_local(src);
+ DBG_BUGON(ni > nrpages_in);
return 0;
}
diff --git a/fs/erofs/decompressor_deflate.c b/fs/erofs/decompressor_deflate.c
index daf3c1bdea..b98872058a 100644
--- a/fs/erofs/decompressor_deflate.c
+++ b/fs/erofs/decompressor_deflate.c
@@ -70,7 +70,7 @@ int __init z_erofs_deflate_init(void)
return 0;
out_failed:
- pr_err("failed to allocate zlib workspace\n");
+ erofs_err(NULL, "failed to allocate zlib workspace");
z_erofs_deflate_exit();
return -ENOMEM;
}
@@ -95,7 +95,7 @@ int z_erofs_load_deflate_config(struct super_block *sb,
}
int z_erofs_deflate_decompress(struct z_erofs_decompress_req *rq,
- struct page **pagepool)
+ struct page **pgpl)
{
const unsigned int nrpages_out =
PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT;
@@ -158,8 +158,12 @@ again:
strm->z.avail_out = min_t(u32, outsz, PAGE_SIZE - pofs);
outsz -= strm->z.avail_out;
if (!rq->out[no]) {
- rq->out[no] = erofs_allocpage(pagepool,
- GFP_KERNEL | __GFP_NOFAIL);
+ rq->out[no] = erofs_allocpage(pgpl, rq->gfp);
+ if (!rq->out[no]) {
+ kout = NULL;
+ err = -ENOMEM;
+ break;
+ }
set_page_private(rq->out[no],
Z_EROFS_SHORTLIVED_PAGE);
}
@@ -211,8 +215,11 @@ again:
DBG_BUGON(erofs_page_is_managed(EROFS_SB(sb),
rq->in[j]));
- tmppage = erofs_allocpage(pagepool,
- GFP_KERNEL | __GFP_NOFAIL);
+ tmppage = erofs_allocpage(pgpl, rq->gfp);
+ if (!tmppage) {
+ err = -ENOMEM;
+ goto failed;
+ }
set_page_private(tmppage, Z_EROFS_SHORTLIVED_PAGE);
copy_highpage(tmppage, rq->in[j]);
rq->in[j] = tmppage;
@@ -230,7 +237,7 @@ again:
break;
}
}
-
+failed:
if (zlib_inflateEnd(&strm->z) != Z_OK && !err)
err = -EIO;
if (kout)
diff --git a/fs/erofs/decompressor_lzma.c b/fs/erofs/decompressor_lzma.c
index 2dd14f99c1..6ca357d83c 100644
--- a/fs/erofs/decompressor_lzma.c
+++ b/fs/erofs/decompressor_lzma.c
@@ -148,7 +148,7 @@ again:
}
int z_erofs_lzma_decompress(struct z_erofs_decompress_req *rq,
- struct page **pagepool)
+ struct page **pgpl)
{
const unsigned int nrpages_out =
PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT;
@@ -215,8 +215,11 @@ again:
PAGE_SIZE - pageofs);
outlen -= strm->buf.out_size;
if (!rq->out[no] && rq->fillgaps) { /* deduped */
- rq->out[no] = erofs_allocpage(pagepool,
- GFP_KERNEL | __GFP_NOFAIL);
+ rq->out[no] = erofs_allocpage(pgpl, rq->gfp);
+ if (!rq->out[no]) {
+ err = -ENOMEM;
+ break;
+ }
set_page_private(rq->out[no],
Z_EROFS_SHORTLIVED_PAGE);
}
@@ -258,8 +261,11 @@ again:
DBG_BUGON(erofs_page_is_managed(EROFS_SB(rq->sb),
rq->in[j]));
- tmppage = erofs_allocpage(pagepool,
- GFP_KERNEL | __GFP_NOFAIL);
+ tmppage = erofs_allocpage(pgpl, rq->gfp);
+ if (!tmppage) {
+ err = -ENOMEM;
+ goto failed;
+ }
set_page_private(tmppage, Z_EROFS_SHORTLIVED_PAGE);
copy_highpage(tmppage, rq->in[j]);
rq->in[j] = tmppage;
@@ -277,6 +283,7 @@ again:
break;
}
}
+failed:
if (no < nrpages_out && strm->buf.out)
kunmap(rq->out[no]);
if (ni < nrpages_in)
diff --git a/fs/erofs/fscache.c b/fs/erofs/fscache.c
index afc37c9029..122a4753ec 100644
--- a/fs/erofs/fscache.c
+++ b/fs/erofs/fscache.c
@@ -178,10 +178,10 @@ static int erofs_fscache_read_folios_async(struct fscache_cookie *cookie,
static int erofs_fscache_meta_read_folio(struct file *data, struct folio *folio)
{
int ret;
- struct erofs_fscache *ctx = folio_mapping(folio)->host->i_private;
+ struct erofs_fscache *ctx = folio->mapping->host->i_private;
struct erofs_fscache_request *req;
- req = erofs_fscache_req_alloc(folio_mapping(folio),
+ req = erofs_fscache_req_alloc(folio->mapping,
folio_pos(folio), folio_size(folio));
if (IS_ERR(req)) {
folio_unlock(folio);
@@ -289,7 +289,7 @@ static int erofs_fscache_read_folio(struct file *file, struct folio *folio)
struct erofs_fscache_request *req;
int ret;
- req = erofs_fscache_req_alloc(folio_mapping(folio),
+ req = erofs_fscache_req_alloc(folio->mapping,
folio_pos(folio), folio_size(folio));
if (IS_ERR(req)) {
folio_unlock(folio);
@@ -473,7 +473,7 @@ static struct erofs_fscache *erofs_fscache_acquire_cookie(struct super_block *sb
inode->i_size = OFFSET_MAX;
inode->i_mapping->a_ops = &erofs_fscache_meta_aops;
- mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
+ mapping_set_gfp_mask(inode->i_mapping, GFP_KERNEL);
inode->i_blkbits = EROFS_SB(sb)->blkszbits;
inode->i_private = ctx;
diff --git a/fs/erofs/inode.c b/fs/erofs/inode.c
index 14a79d3226..36e638e8b5 100644
--- a/fs/erofs/inode.c
+++ b/fs/erofs/inode.c
@@ -60,7 +60,7 @@ static void *erofs_read_inode(struct erofs_buf *buf,
} else {
const unsigned int gotten = sb->s_blocksize - *ofs;
- copied = kmalloc(vi->inode_isize, GFP_NOFS);
+ copied = kmalloc(vi->inode_isize, GFP_KERNEL);
if (!copied) {
err = -ENOMEM;
goto err_out;
@@ -259,8 +259,10 @@ static int erofs_fill_inode(struct inode *inode)
if (erofs_inode_is_data_compressed(vi->datalayout)) {
#ifdef CONFIG_EROFS_FS_ZIP
- if (!erofs_is_fscache_mode(inode->i_sb) &&
- inode->i_sb->s_blocksize_bits == PAGE_SHIFT) {
+ if (!erofs_is_fscache_mode(inode->i_sb)) {
+ DO_ONCE_LITE_IF(inode->i_sb->s_blocksize != PAGE_SIZE,
+ erofs_info, inode->i_sb,
+ "EXPERIMENTAL EROFS subpage compressed block support in use. Use at your own risk!");
inode->i_mapping->a_ops = &z_erofs_aops;
err = 0;
goto out_unlock;
diff --git a/fs/erofs/super.c b/fs/erofs/super.c
index 8a82d5cec1..24788c230b 100644
--- a/fs/erofs/super.c
+++ b/fs/erofs/super.c
@@ -27,7 +27,10 @@ void _erofs_err(struct super_block *sb, const char *func, const char *fmt, ...)
vaf.fmt = fmt;
vaf.va = &args;
- pr_err("(device %s): %s: %pV", sb->s_id, func, &vaf);
+ if (sb)
+ pr_err("(device %s): %s: %pV", sb->s_id, func, &vaf);
+ else
+ pr_err("%s: %pV", func, &vaf);
va_end(args);
}
@@ -41,7 +44,10 @@ void _erofs_info(struct super_block *sb, const char *func, const char *fmt, ...)
vaf.fmt = fmt;
vaf.va = &args;
- pr_info("(device %s): %pV", sb->s_id, &vaf);
+ if (sb)
+ pr_info("(device %s): %pV", sb->s_id, &vaf);
+ else
+ pr_info("%pV", &vaf);
va_end(args);
}
diff --git a/fs/erofs/utils.c b/fs/erofs/utils.c
index 5dea308764..e146d09151 100644
--- a/fs/erofs/utils.c
+++ b/fs/erofs/utils.c
@@ -81,7 +81,7 @@ struct erofs_workgroup *erofs_insert_workgroup(struct super_block *sb,
repeat:
xa_lock(&sbi->managed_pslots);
pre = __xa_cmpxchg(&sbi->managed_pslots, grp->index,
- NULL, grp, GFP_NOFS);
+ NULL, grp, GFP_KERNEL);
if (pre) {
if (xa_is_err(pre)) {
pre = ERR_PTR(xa_err(pre));
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index 1c0e6167d8..ff0aa72b0d 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -56,6 +56,9 @@ struct z_erofs_pcluster {
/* L: total number of bvecs */
unsigned int vcnt;
+ /* I: pcluster size (compressed size) in bytes */
+ unsigned int pclustersize;
+
/* I: page offset of start position of decompression */
unsigned short pageofs_out;
@@ -70,14 +73,6 @@ struct z_erofs_pcluster {
struct rcu_head rcu;
};
- union {
- /* I: physical cluster size in pages */
- unsigned short pclusterpages;
-
- /* I: tailpacking inline compressed size */
- unsigned short tailpacking_size;
- };
-
/* I: compression algorithm format */
unsigned char algorithmformat;
@@ -87,6 +82,9 @@ struct z_erofs_pcluster {
/* L: indicate several pageofs_outs or not */
bool multibases;
+ /* L: whether extra buffer allocations are best-effort */
+ bool besteffort;
+
/* A: compressed bvecs (can be cached or inplaced pages) */
struct z_erofs_bvec compressed_bvecs[];
};
@@ -115,9 +113,7 @@ static inline bool z_erofs_is_inline_pcluster(struct z_erofs_pcluster *pcl)
static inline unsigned int z_erofs_pclusterpages(struct z_erofs_pcluster *pcl)
{
- if (z_erofs_is_inline_pcluster(pcl))
- return 1;
- return pcl->pclusterpages;
+ return PAGE_ALIGN(pcl->pclustersize) >> PAGE_SHIFT;
}
/*
@@ -237,7 +233,7 @@ static int z_erofs_bvec_enqueue(struct z_erofs_bvec_iter *iter,
struct page *nextpage = *candidate_bvpage;
if (!nextpage) {
- nextpage = erofs_allocpage(pagepool, GFP_NOFS);
+ nextpage = erofs_allocpage(pagepool, GFP_KERNEL);
if (!nextpage)
return -ENOMEM;
set_page_private(nextpage, Z_EROFS_SHORTLIVED_PAGE);
@@ -298,21 +294,21 @@ static int z_erofs_create_pcluster_pool(void)
return 0;
}
-static struct z_erofs_pcluster *z_erofs_alloc_pcluster(unsigned int nrpages)
+static struct z_erofs_pcluster *z_erofs_alloc_pcluster(unsigned int size)
{
- int i;
+ unsigned int nrpages = PAGE_ALIGN(size) >> PAGE_SHIFT;
+ struct z_erofs_pcluster_slab *pcs = pcluster_pool;
- for (i = 0; i < ARRAY_SIZE(pcluster_pool); ++i) {
- struct z_erofs_pcluster_slab *pcs = pcluster_pool + i;
+ for (; pcs < pcluster_pool + ARRAY_SIZE(pcluster_pool); ++pcs) {
struct z_erofs_pcluster *pcl;
if (nrpages > pcs->maxpages)
continue;
- pcl = kmem_cache_zalloc(pcs->slab, GFP_NOFS);
+ pcl = kmem_cache_zalloc(pcs->slab, GFP_KERNEL);
if (!pcl)
return ERR_PTR(-ENOMEM);
- pcl->pclusterpages = nrpages;
+ pcl->pclustersize = size;
return pcl;
}
return ERR_PTR(-EINVAL);
@@ -559,6 +555,7 @@ static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe)
{
struct address_space *mc = MNGD_MAPPING(EROFS_I_SB(fe->inode));
struct z_erofs_pcluster *pcl = fe->pcl;
+ unsigned int pclusterpages = z_erofs_pclusterpages(pcl);
bool shouldalloc = z_erofs_should_alloc_cache(fe);
bool standalone = true;
/*
@@ -569,22 +566,22 @@ static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe)
__GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN;
unsigned int i;
- if (fe->mode < Z_EROFS_PCLUSTER_FOLLOWED)
+ if (i_blocksize(fe->inode) != PAGE_SIZE ||
+ fe->mode < Z_EROFS_PCLUSTER_FOLLOWED)
return;
- for (i = 0; i < pcl->pclusterpages; ++i) {
- struct page *page;
+ for (i = 0; i < pclusterpages; ++i) {
+ struct page *page, *newpage;
void *t; /* mark pages just found for debugging */
- struct page *newpage = NULL;
- /* the compressed page was loaded before */
+ /* Inaccurate check w/o locking to avoid unneeded lookups */
if (READ_ONCE(pcl->compressed_bvecs[i].page))
continue;
page = find_get_page(mc, pcl->obj.index + i);
-
if (page) {
t = (void *)((unsigned long)page | 1);
+ newpage = NULL;
} else {
/* I/O is needed, no possible to decompress directly */
standalone = false;
@@ -592,9 +589,8 @@ static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe)
continue;
/*
- * try to use cached I/O if page allocation
- * succeeds or fallback to in-place I/O instead
- * to avoid any direct reclaim.
+ * Try cached I/O if allocation succeeds or fallback to
+ * in-place I/O instead to avoid any direct reclaim.
*/
newpage = erofs_allocpage(&fe->pagepool, gfp);
if (!newpage)
@@ -602,9 +598,13 @@ static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe)
set_page_private(newpage, Z_EROFS_PREALLOCATED_PAGE);
t = (void *)((unsigned long)newpage | 1);
}
-
- if (!cmpxchg_relaxed(&pcl->compressed_bvecs[i].page, NULL, t))
+ spin_lock(&pcl->obj.lockref.lock);
+ if (!pcl->compressed_bvecs[i].page) {
+ pcl->compressed_bvecs[i].page = t;
+ spin_unlock(&pcl->obj.lockref.lock);
continue;
+ }
+ spin_unlock(&pcl->obj.lockref.lock);
if (page)
put_page(page);
@@ -626,6 +626,7 @@ int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi,
{
struct z_erofs_pcluster *const pcl =
container_of(grp, struct z_erofs_pcluster, obj);
+ unsigned int pclusterpages = z_erofs_pclusterpages(pcl);
int i;
DBG_BUGON(z_erofs_is_inline_pcluster(pcl));
@@ -633,7 +634,7 @@ int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi,
* refcount of workgroup is now freezed as 0,
* therefore no need to worry about available decompression users.
*/
- for (i = 0; i < pcl->pclusterpages; ++i) {
+ for (i = 0; i < pclusterpages; ++i) {
struct page *page = pcl->compressed_bvecs[i].page;
if (!page)
@@ -657,6 +658,7 @@ int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi,
static bool z_erofs_cache_release_folio(struct folio *folio, gfp_t gfp)
{
struct z_erofs_pcluster *pcl = folio_get_private(folio);
+ unsigned int pclusterpages = z_erofs_pclusterpages(pcl);
bool ret;
int i;
@@ -669,7 +671,7 @@ static bool z_erofs_cache_release_folio(struct folio *folio, gfp_t gfp)
goto out;
DBG_BUGON(z_erofs_is_inline_pcluster(pcl));
- for (i = 0; i < pcl->pclusterpages; ++i) {
+ for (i = 0; i < pclusterpages; ++i) {
if (pcl->compressed_bvecs[i].page == &folio->page) {
WRITE_ONCE(pcl->compressed_bvecs[i].page, NULL);
ret = true;
@@ -697,7 +699,7 @@ static void z_erofs_cache_invalidate_folio(struct folio *folio,
DBG_BUGON(stop > folio_size(folio) || stop < length);
if (offset == 0 && stop == folio_size(folio))
- while (!z_erofs_cache_release_folio(folio, GFP_NOFS))
+ while (!z_erofs_cache_release_folio(folio, 0))
cond_resched();
}
@@ -716,36 +718,30 @@ int erofs_init_managed_cache(struct super_block *sb)
set_nlink(inode, 1);
inode->i_size = OFFSET_MAX;
inode->i_mapping->a_ops = &z_erofs_cache_aops;
- mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
+ mapping_set_gfp_mask(inode->i_mapping, GFP_KERNEL);
EROFS_SB(sb)->managed_cache = inode;
return 0;
}
-static bool z_erofs_try_inplace_io(struct z_erofs_decompress_frontend *fe,
- struct z_erofs_bvec *bvec)
-{
- struct z_erofs_pcluster *const pcl = fe->pcl;
-
- while (fe->icur > 0) {
- if (!cmpxchg(&pcl->compressed_bvecs[--fe->icur].page,
- NULL, bvec->page)) {
- pcl->compressed_bvecs[fe->icur] = *bvec;
- return true;
- }
- }
- return false;
-}
-
/* callers must be with pcluster lock held */
static int z_erofs_attach_page(struct z_erofs_decompress_frontend *fe,
struct z_erofs_bvec *bvec, bool exclusive)
{
+ struct z_erofs_pcluster *pcl = fe->pcl;
int ret;
if (exclusive) {
/* give priority for inplaceio to use file pages first */
- if (z_erofs_try_inplace_io(fe, bvec))
+ spin_lock(&pcl->obj.lockref.lock);
+ while (fe->icur > 0) {
+ if (pcl->compressed_bvecs[--fe->icur].page)
+ continue;
+ pcl->compressed_bvecs[fe->icur] = *bvec;
+ spin_unlock(&pcl->obj.lockref.lock);
return 0;
+ }
+ spin_unlock(&pcl->obj.lockref.lock);
+
/* otherwise, check if it can be used as a bvpage */
if (fe->mode >= Z_EROFS_PCLUSTER_FOLLOWED &&
!fe->candidate_bvpage)
@@ -778,20 +774,20 @@ static void z_erofs_try_to_claim_pcluster(struct z_erofs_decompress_frontend *f)
static int z_erofs_register_pcluster(struct z_erofs_decompress_frontend *fe)
{
struct erofs_map_blocks *map = &fe->map;
+ struct super_block *sb = fe->inode->i_sb;
bool ztailpacking = map->m_flags & EROFS_MAP_META;
struct z_erofs_pcluster *pcl;
struct erofs_workgroup *grp;
int err;
if (!(map->m_flags & EROFS_MAP_ENCODED) ||
- (!ztailpacking && !(map->m_pa >> PAGE_SHIFT))) {
+ (!ztailpacking && !erofs_blknr(sb, map->m_pa))) {
DBG_BUGON(1);
return -EFSCORRUPTED;
}
/* no available pcluster, let's allocate one */
- pcl = z_erofs_alloc_pcluster(ztailpacking ? 1 :
- map->m_plen >> PAGE_SHIFT);
+ pcl = z_erofs_alloc_pcluster(map->m_plen);
if (IS_ERR(pcl))
return PTR_ERR(pcl);
@@ -815,9 +811,8 @@ static int z_erofs_register_pcluster(struct z_erofs_decompress_frontend *fe)
if (ztailpacking) {
pcl->obj.index = 0; /* which indicates ztailpacking */
- pcl->tailpacking_size = map->m_plen;
} else {
- pcl->obj.index = map->m_pa >> PAGE_SHIFT;
+ pcl->obj.index = erofs_blknr(sb, map->m_pa);
grp = erofs_insert_workgroup(fe->inode->i_sb, &pcl->obj);
if (IS_ERR(grp)) {
@@ -968,17 +963,17 @@ static int z_erofs_read_fragment(struct super_block *sb, struct page *page,
}
static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
- struct page *page)
+ struct page *page, bool ra)
{
struct inode *const inode = fe->inode;
struct erofs_map_blocks *const map = &fe->map;
const loff_t offset = page_offset(page);
+ const unsigned int bs = i_blocksize(inode);
bool tight = true, exclusive;
unsigned int cur, end, len, split;
int err = 0;
z_erofs_onlinepage_init(page);
-
split = 0;
end = PAGE_SIZE;
repeat:
@@ -1018,6 +1013,7 @@ repeat:
err = z_erofs_pcluster_begin(fe);
if (err)
goto out;
+ fe->pcl->besteffort |= !ra;
}
/*
@@ -1027,7 +1023,7 @@ repeat:
* for inplace I/O or bvpage (should be processed in a strict order.)
*/
tight &= (fe->mode > Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE);
- exclusive = (!cur && ((split <= 1) || tight));
+ exclusive = (!cur && ((split <= 1) || (tight && bs == PAGE_SIZE)));
if (cur)
tight &= (fe->mode >= Z_EROFS_PCLUSTER_FOLLOWED);
@@ -1206,34 +1202,27 @@ static int z_erofs_parse_in_bvecs(struct z_erofs_decompress_backend *be,
struct z_erofs_bvec *bvec = &pcl->compressed_bvecs[i];
struct page *page = bvec->page;
- /* compressed pages ought to be present before decompressing */
+ /* compressed data ought to be valid before decompressing */
if (!page) {
- DBG_BUGON(1);
+ err = -EIO;
continue;
}
be->compressed_pages[i] = page;
- if (z_erofs_is_inline_pcluster(pcl)) {
+ if (z_erofs_is_inline_pcluster(pcl) ||
+ erofs_page_is_managed(EROFS_SB(be->sb), page)) {
if (!PageUptodate(page))
err = -EIO;
continue;
}
DBG_BUGON(z_erofs_page_is_invalidated(page));
- if (!z_erofs_is_shortlived_page(page)) {
- if (erofs_page_is_managed(EROFS_SB(be->sb), page)) {
- if (!PageUptodate(page))
- err = -EIO;
- continue;
- }
- z_erofs_do_decompressed_bvec(be, bvec);
- *overlapped = true;
- }
+ if (z_erofs_is_shortlived_page(page))
+ continue;
+ z_erofs_do_decompressed_bvec(be, bvec);
+ *overlapped = true;
}
-
- if (err)
- return err;
- return 0;
+ return err;
}
static int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be,
@@ -1242,10 +1231,9 @@ static int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be,
struct erofs_sb_info *const sbi = EROFS_SB(be->sb);
struct z_erofs_pcluster *pcl = be->pcl;
unsigned int pclusterpages = z_erofs_pclusterpages(pcl);
- const struct z_erofs_decompressor *decompressor =
+ const struct z_erofs_decompressor *decomp =
&erofs_decompressors[pcl->algorithmformat];
- unsigned int i, inputsize;
- int err2;
+ int i, err2;
struct page *page;
bool overlapped;
@@ -1279,29 +1267,24 @@ static int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be,
err2 = z_erofs_parse_in_bvecs(be, &overlapped);
if (err2)
err = err2;
- if (err)
- goto out;
-
- if (z_erofs_is_inline_pcluster(pcl))
- inputsize = pcl->tailpacking_size;
- else
- inputsize = pclusterpages * PAGE_SIZE;
-
- err = decompressor->decompress(&(struct z_erofs_decompress_req) {
+ if (!err)
+ err = decomp->decompress(&(struct z_erofs_decompress_req) {
.sb = be->sb,
.in = be->compressed_pages,
.out = be->decompressed_pages,
.pageofs_in = pcl->pageofs_in,
.pageofs_out = pcl->pageofs_out,
- .inputsize = inputsize,
+ .inputsize = pcl->pclustersize,
.outputsize = pcl->length,
.alg = pcl->algorithmformat,
.inplace_io = overlapped,
.partial_decoding = pcl->partial,
.fillgaps = pcl->multibases,
+ .gfp = pcl->besteffort ?
+ GFP_KERNEL | __GFP_NOFAIL :
+ GFP_NOWAIT | __GFP_NORETRY
}, be->pagepool);
-out:
/* must handle all compressed pages before actual file pages */
if (z_erofs_is_inline_pcluster(pcl)) {
page = pcl->compressed_bvecs[0].page;
@@ -1312,7 +1295,7 @@ out:
/* consider shortlived pages added when decompressing */
page = be->compressed_pages[i];
- if (erofs_page_is_managed(sbi, page))
+ if (!page || erofs_page_is_managed(sbi, page))
continue;
(void)z_erofs_put_shortlivedpage(be->pagepool, page);
WRITE_ONCE(pcl->compressed_bvecs[i].page, NULL);
@@ -1342,6 +1325,7 @@ out:
pcl->length = 0;
pcl->partial = true;
pcl->multibases = false;
+ pcl->besteffort = false;
pcl->bvset.nextpage = NULL;
pcl->vcnt = 0;
@@ -1435,86 +1419,86 @@ static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io,
z_erofs_decompressqueue_work(&io->u.work);
}
-static struct page *pickup_page_for_submission(struct z_erofs_pcluster *pcl,
- unsigned int nr,
- struct page **pagepool,
- struct address_space *mc)
+static void z_erofs_fill_bio_vec(struct bio_vec *bvec,
+ struct z_erofs_decompress_frontend *f,
+ struct z_erofs_pcluster *pcl,
+ unsigned int nr,
+ struct address_space *mc)
{
- const pgoff_t index = pcl->obj.index;
gfp_t gfp = mapping_gfp_mask(mc);
bool tocache = false;
-
+ struct z_erofs_bvec zbv;
struct address_space *mapping;
- struct page *oldpage, *page;
- int justfound;
+ struct page *page;
+ int justfound, bs = i_blocksize(f->inode);
+ /* Except for inplace pages, the entire page can be used for I/Os */
+ bvec->bv_offset = 0;
+ bvec->bv_len = PAGE_SIZE;
repeat:
- page = READ_ONCE(pcl->compressed_bvecs[nr].page);
- oldpage = page;
-
- if (!page)
- goto out_allocpage;
-
+ spin_lock(&pcl->obj.lockref.lock);
+ zbv = pcl->compressed_bvecs[nr];
+ page = zbv.page;
justfound = (unsigned long)page & 1UL;
page = (struct page *)((unsigned long)page & ~1UL);
+ pcl->compressed_bvecs[nr].page = page;
+ spin_unlock(&pcl->obj.lockref.lock);
+ if (!page)
+ goto out_allocpage;
+ bvec->bv_page = page;
+ DBG_BUGON(z_erofs_is_shortlived_page(page));
/*
- * preallocated cached pages, which is used to avoid direct reclaim
- * otherwise, it will go inplace I/O path instead.
+ * Handle preallocated cached pages. We tried to allocate such pages
+ * without triggering direct reclaim. If allocation failed, inplace
+ * file-backed pages will be used instead.
*/
if (page->private == Z_EROFS_PREALLOCATED_PAGE) {
- WRITE_ONCE(pcl->compressed_bvecs[nr].page, page);
set_page_private(page, 0);
tocache = true;
goto out_tocache;
}
- mapping = READ_ONCE(page->mapping);
+ mapping = READ_ONCE(page->mapping);
/*
- * file-backed online pages in plcuster are all locked steady,
- * therefore it is impossible for `mapping' to be NULL.
+ * File-backed pages for inplace I/Os are all locked steady,
+ * therefore it is impossible for `mapping` to be NULL.
*/
- if (mapping && mapping != mc)
- /* ought to be unmanaged pages */
- goto out;
-
- /* directly return for shortlived page as well */
- if (z_erofs_is_shortlived_page(page))
- goto out;
+ if (mapping && mapping != mc) {
+ if (zbv.offset < 0)
+ bvec->bv_offset = round_up(-zbv.offset, bs);
+ bvec->bv_len = round_up(zbv.end, bs) - bvec->bv_offset;
+ return;
+ }
lock_page(page);
-
/* only true if page reclaim goes wrong, should never happen */
DBG_BUGON(justfound && PagePrivate(page));
- /* the page is still in manage cache */
+ /* the cached page is still in managed cache */
if (page->mapping == mc) {
- WRITE_ONCE(pcl->compressed_bvecs[nr].page, page);
-
+ /*
+ * The cached page is still available but without a valid
+ * `->private` pcluster hint. Let's reconnect them.
+ */
if (!PagePrivate(page)) {
- /*
- * impossible to be !PagePrivate(page) for
- * the current restriction as well if
- * the page is already in compressed_bvecs[].
- */
DBG_BUGON(!justfound);
-
- justfound = 0;
- set_page_private(page, (unsigned long)pcl);
- SetPagePrivate(page);
+ /* compressed_bvecs[] already takes a ref */
+ attach_page_private(page, pcl);
+ put_page(page);
}
- /* no need to submit io if it is already up-to-date */
+ /* no need to submit if it is already up-to-date */
if (PageUptodate(page)) {
unlock_page(page);
- page = NULL;
+ bvec->bv_page = NULL;
}
- goto out;
+ return;
}
/*
- * the managed page has been truncated, it's unsafe to
- * reuse this one, let's allocate a new cache-managed page.
+ * It has been truncated, so it's unsafe to reuse this one. Let's
+ * allocate a new page for compressed data.
*/
DBG_BUGON(page->mapping);
DBG_BUGON(!justfound);
@@ -1523,25 +1507,27 @@ repeat:
unlock_page(page);
put_page(page);
out_allocpage:
- page = erofs_allocpage(pagepool, gfp | __GFP_NOFAIL);
- if (oldpage != cmpxchg(&pcl->compressed_bvecs[nr].page,
- oldpage, page)) {
- erofs_pagepool_add(pagepool, page);
+ page = erofs_allocpage(&f->pagepool, gfp | __GFP_NOFAIL);
+ spin_lock(&pcl->obj.lockref.lock);
+ if (pcl->compressed_bvecs[nr].page) {
+ erofs_pagepool_add(&f->pagepool, page);
+ spin_unlock(&pcl->obj.lockref.lock);
cond_resched();
goto repeat;
}
+ pcl->compressed_bvecs[nr].page = page;
+ spin_unlock(&pcl->obj.lockref.lock);
+ bvec->bv_page = page;
out_tocache:
- if (!tocache || add_to_page_cache_lru(page, mc, index + nr, gfp)) {
- /* turn into temporary page if fails (1 ref) */
+ if (!tocache || bs != PAGE_SIZE ||
+ add_to_page_cache_lru(page, mc, pcl->obj.index + nr, gfp)) {
+ /* turn into a temporary shortlived page (1 ref) */
set_page_private(page, Z_EROFS_SHORTLIVED_PAGE);
- goto out;
+ return;
}
attach_page_private(page, pcl);
- /* drop a refcount added by allocpage (then we have 2 refs here) */
+ /* drop a refcount added by allocpage (then 2 refs in total here) */
put_page(page);
-
-out: /* the only exit (for tracing and debugging) */
- return page;
}
static struct z_erofs_decompressqueue *jobqueue_init(struct super_block *sb,
@@ -1596,7 +1582,7 @@ static void move_to_bypass_jobqueue(struct z_erofs_pcluster *pcl,
qtail[JQ_BYPASS] = &pcl->next;
}
-static void z_erofs_decompressqueue_endio(struct bio *bio)
+static void z_erofs_submissionqueue_endio(struct bio *bio)
{
struct z_erofs_decompressqueue *q = bio->bi_private;
blk_status_t err = bio->bi_status;
@@ -1608,7 +1594,6 @@ static void z_erofs_decompressqueue_endio(struct bio *bio)
DBG_BUGON(PageUptodate(page));
DBG_BUGON(z_erofs_page_is_invalidated(page));
-
if (erofs_page_is_managed(EROFS_SB(q->sb), page)) {
if (!err)
SetPageUptodate(page);
@@ -1631,17 +1616,14 @@ static void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f,
struct z_erofs_decompressqueue *q[NR_JOBQUEUES];
z_erofs_next_pcluster_t owned_head = f->owned_head;
/* bio is NULL initially, so no need to initialize last_{index,bdev} */
- pgoff_t last_index;
+ erofs_off_t last_pa;
struct block_device *last_bdev;
unsigned int nr_bios = 0;
struct bio *bio = NULL;
unsigned long pflags;
int memstall = 0;
- /*
- * if managed cache is enabled, bypass jobqueue is needed,
- * no need to read from device for all pclusters in this queue.
- */
+ /* No need to read from device for pclusters in the bypass queue. */
q[JQ_BYPASS] = jobqueue_init(sb, fgq + JQ_BYPASS, NULL);
q[JQ_SUBMIT] = jobqueue_init(sb, fgq + JQ_SUBMIT, force_fg);
@@ -1654,7 +1636,8 @@ static void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f,
do {
struct erofs_map_dev mdev;
struct z_erofs_pcluster *pcl;
- pgoff_t cur, end;
+ erofs_off_t cur, end;
+ struct bio_vec bvec;
unsigned int i = 0;
bool bypass = true;
@@ -1673,18 +1656,14 @@ static void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f,
};
(void)erofs_map_dev(sb, &mdev);
- cur = erofs_blknr(sb, mdev.m_pa);
- end = cur + pcl->pclusterpages;
-
+ cur = mdev.m_pa;
+ end = cur + pcl->pclustersize;
do {
- struct page *page;
-
- page = pickup_page_for_submission(pcl, i++,
- &f->pagepool, mc);
- if (!page)
+ z_erofs_fill_bio_vec(&bvec, f, pcl, i++, mc);
+ if (!bvec.bv_page)
continue;
- if (bio && (cur != last_index + 1 ||
+ if (bio && (cur != last_pa ||
last_bdev != mdev.m_bdev)) {
submit_bio_retry:
submit_bio(bio);
@@ -1695,7 +1674,8 @@ submit_bio_retry:
bio = NULL;
}
- if (unlikely(PageWorkingset(page)) && !memstall) {
+ if (unlikely(PageWorkingset(bvec.bv_page)) &&
+ !memstall) {
psi_memstall_enter(&pflags);
memstall = 1;
}
@@ -1703,23 +1683,25 @@ submit_bio_retry:
if (!bio) {
bio = bio_alloc(mdev.m_bdev, BIO_MAX_VECS,
REQ_OP_READ, GFP_NOIO);
- bio->bi_end_io = z_erofs_decompressqueue_endio;
-
- last_bdev = mdev.m_bdev;
- bio->bi_iter.bi_sector = (sector_t)cur <<
- (sb->s_blocksize_bits - 9);
+ bio->bi_end_io = z_erofs_submissionqueue_endio;
+ bio->bi_iter.bi_sector = cur >> 9;
bio->bi_private = q[JQ_SUBMIT];
if (readahead)
bio->bi_opf |= REQ_RAHEAD;
++nr_bios;
+ last_bdev = mdev.m_bdev;
}
- if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE)
+ if (cur + bvec.bv_len > end)
+ bvec.bv_len = end - cur;
+ DBG_BUGON(bvec.bv_len < sb->s_blocksize);
+ if (!bio_add_page(bio, bvec.bv_page, bvec.bv_len,
+ bvec.bv_offset))
goto submit_bio_retry;
- last_index = cur;
+ last_pa = cur + bvec.bv_len;
bypass = false;
- } while (++cur < end);
+ } while ((cur += bvec.bv_len) < end);
if (!bypass)
qtail[JQ_SUBMIT] = &pcl->next;
@@ -1813,7 +1795,7 @@ static void z_erofs_pcluster_readmore(struct z_erofs_decompress_frontend *f,
if (PageUptodate(page))
unlock_page(page);
else
- (void)z_erofs_do_read_page(f, page);
+ (void)z_erofs_do_read_page(f, page, !!rac);
put_page(page);
}
@@ -1834,7 +1816,7 @@ static int z_erofs_read_folio(struct file *file, struct folio *folio)
f.headoffset = (erofs_off_t)folio->index << PAGE_SHIFT;
z_erofs_pcluster_readmore(&f, NULL, true);
- err = z_erofs_do_read_page(&f, &folio->page);
+ err = z_erofs_do_read_page(&f, &folio->page, false);
z_erofs_pcluster_readmore(&f, NULL, false);
z_erofs_pcluster_end(&f);
@@ -1875,7 +1857,7 @@ static void z_erofs_readahead(struct readahead_control *rac)
folio = head;
head = folio_get_private(folio);
- err = z_erofs_do_read_page(&f, &folio->page);
+ err = z_erofs_do_read_page(&f, &folio->page, true);
if (err && err != -EINTR)
erofs_err(inode->i_sb, "readahead error at folio %lu @ nid %llu",
folio->index, EROFS_I(inode)->nid);