diff options
Diffstat (limited to 'fs/erofs/zdata.c')
-rw-r--r-- | fs/erofs/zdata.c | 287 |
1 files changed, 137 insertions, 150 deletions
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index ff0aa72b0d..3216b920d3 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -19,7 +19,10 @@ typedef void *z_erofs_next_pcluster_t; struct z_erofs_bvec { - struct page *page; + union { + struct page *page; + struct folio *folio; + }; int offset; unsigned int end; }; @@ -116,47 +119,46 @@ static inline unsigned int z_erofs_pclusterpages(struct z_erofs_pcluster *pcl) return PAGE_ALIGN(pcl->pclustersize) >> PAGE_SHIFT; } +#define MNGD_MAPPING(sbi) ((sbi)->managed_cache->i_mapping) +static bool erofs_folio_is_managed(struct erofs_sb_info *sbi, struct folio *fo) +{ + return fo->mapping == MNGD_MAPPING(sbi); +} + /* - * bit 30: I/O error occurred on this page - * bit 0 - 29: remaining parts to complete this page + * bit 30: I/O error occurred on this folio + * bit 0 - 29: remaining parts to complete this folio */ -#define Z_EROFS_PAGE_EIO (1 << 30) +#define Z_EROFS_FOLIO_EIO (1 << 30) -static inline void z_erofs_onlinepage_init(struct page *page) +static void z_erofs_onlinefolio_init(struct folio *folio) { union { atomic_t o; - unsigned long v; + void *v; } u = { .o = ATOMIC_INIT(1) }; - set_page_private(page, u.v); - smp_wmb(); - SetPagePrivate(page); + folio->private = u.v; /* valid only if file-backed folio is locked */ } -static inline void z_erofs_onlinepage_split(struct page *page) +static void z_erofs_onlinefolio_split(struct folio *folio) { - atomic_inc((atomic_t *)&page->private); + atomic_inc((atomic_t *)&folio->private); } -static void z_erofs_onlinepage_endio(struct page *page, int err) +static void z_erofs_onlinefolio_end(struct folio *folio, int err) { int orig, v; - DBG_BUGON(!PagePrivate(page)); - do { - orig = atomic_read((atomic_t *)&page->private); - v = (orig - 1) | (err ? Z_EROFS_PAGE_EIO : 0); - } while (atomic_cmpxchg((atomic_t *)&page->private, orig, v) != orig); - - if (!(v & ~Z_EROFS_PAGE_EIO)) { - set_page_private(page, 0); - ClearPagePrivate(page); - if (!(v & Z_EROFS_PAGE_EIO)) - SetPageUptodate(page); - unlock_page(page); - } + orig = atomic_read((atomic_t *)&folio->private); + v = (orig - 1) | (err ? Z_EROFS_FOLIO_EIO : 0); + } while (atomic_cmpxchg((atomic_t *)&folio->private, orig, v) != orig); + + if (v & ~Z_EROFS_FOLIO_EIO) + return; + folio->private = 0; + folio_end_read(folio, !(v & Z_EROFS_FOLIO_EIO)); } #define Z_EROFS_ONSTACK_PAGES 32 @@ -572,17 +574,13 @@ static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe) for (i = 0; i < pclusterpages; ++i) { struct page *page, *newpage; - void *t; /* mark pages just found for debugging */ /* Inaccurate check w/o locking to avoid unneeded lookups */ if (READ_ONCE(pcl->compressed_bvecs[i].page)) continue; page = find_get_page(mc, pcl->obj.index + i); - if (page) { - t = (void *)((unsigned long)page | 1); - newpage = NULL; - } else { + if (!page) { /* I/O is needed, no possible to decompress directly */ standalone = false; if (!shouldalloc) @@ -596,11 +594,10 @@ static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe) if (!newpage) continue; set_page_private(newpage, Z_EROFS_PREALLOCATED_PAGE); - t = (void *)((unsigned long)newpage | 1); } spin_lock(&pcl->obj.lockref.lock); if (!pcl->compressed_bvecs[i].page) { - pcl->compressed_bvecs[i].page = t; + pcl->compressed_bvecs[i].page = page ? page : newpage; spin_unlock(&pcl->obj.lockref.lock); continue; } @@ -620,9 +617,9 @@ static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe) fe->mode = Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE; } -/* called by erofs_shrinker to get rid of all compressed_pages */ -int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi, - struct erofs_workgroup *grp) +/* called by erofs_shrinker to get rid of all cached compressed bvecs */ +int erofs_try_to_free_all_cached_folios(struct erofs_sb_info *sbi, + struct erofs_workgroup *grp) { struct z_erofs_pcluster *const pcl = container_of(grp, struct z_erofs_pcluster, obj); @@ -630,27 +627,22 @@ int erofs_try_to_free_all_cached_pages(struct erofs_sb_info *sbi, int i; DBG_BUGON(z_erofs_is_inline_pcluster(pcl)); - /* - * refcount of workgroup is now freezed as 0, - * therefore no need to worry about available decompression users. - */ + /* There is no actice user since the pcluster is now freezed */ for (i = 0; i < pclusterpages; ++i) { - struct page *page = pcl->compressed_bvecs[i].page; + struct folio *folio = pcl->compressed_bvecs[i].folio; - if (!page) + if (!folio) continue; - /* block other users from reclaiming or migrating the page */ - if (!trylock_page(page)) + /* Avoid reclaiming or migrating this folio */ + if (!folio_trylock(folio)) return -EBUSY; - if (!erofs_page_is_managed(sbi, page)) + if (!erofs_folio_is_managed(sbi, folio)) continue; - - /* barrier is implied in the following 'unlock_page' */ - WRITE_ONCE(pcl->compressed_bvecs[i].page, NULL); - detach_page_private(page); - unlock_page(page); + pcl->compressed_bvecs[i].folio = NULL; + folio_detach_private(folio); + folio_unlock(folio); } return 0; } @@ -667,20 +659,17 @@ static bool z_erofs_cache_release_folio(struct folio *folio, gfp_t gfp) ret = false; spin_lock(&pcl->obj.lockref.lock); - if (pcl->obj.lockref.count > 0) - goto out; - - DBG_BUGON(z_erofs_is_inline_pcluster(pcl)); - for (i = 0; i < pclusterpages; ++i) { - if (pcl->compressed_bvecs[i].page == &folio->page) { - WRITE_ONCE(pcl->compressed_bvecs[i].page, NULL); - ret = true; - break; + if (pcl->obj.lockref.count <= 0) { + DBG_BUGON(z_erofs_is_inline_pcluster(pcl)); + for (i = 0; i < pclusterpages; ++i) { + if (pcl->compressed_bvecs[i].folio == folio) { + pcl->compressed_bvecs[i].folio = NULL; + folio_detach_private(folio); + ret = true; + break; + } } } - if (ret) - folio_detach_private(folio); -out: spin_unlock(&pcl->obj.lockref.lock); return ret; } @@ -962,20 +951,20 @@ static int z_erofs_read_fragment(struct super_block *sb, struct page *page, return 0; } -static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe, - struct page *page, bool ra) +static int z_erofs_scan_folio(struct z_erofs_decompress_frontend *fe, + struct folio *folio, bool ra) { struct inode *const inode = fe->inode; struct erofs_map_blocks *const map = &fe->map; - const loff_t offset = page_offset(page); - const unsigned int bs = i_blocksize(inode); + const loff_t offset = folio_pos(folio); + const unsigned int bs = i_blocksize(inode), fs = folio_size(folio); bool tight = true, exclusive; unsigned int cur, end, len, split; int err = 0; - z_erofs_onlinepage_init(page); + z_erofs_onlinefolio_init(folio); split = 0; - end = PAGE_SIZE; + end = fs; repeat: if (offset + end - 1 < map->m_la || offset + end - 1 >= map->m_la + map->m_llen) { @@ -992,7 +981,7 @@ repeat: ++split; if (!(map->m_flags & EROFS_MAP_MAPPED)) { - zero_user_segment(page, cur, end); + folio_zero_segment(folio, cur, end); tight = false; goto next_part; } @@ -1001,8 +990,8 @@ repeat: erofs_off_t fpos = offset + cur - map->m_la; len = min_t(unsigned int, map->m_llen - fpos, end - cur); - err = z_erofs_read_fragment(inode->i_sb, page, cur, cur + len, - EROFS_I(inode)->z_fragmentoff + fpos); + err = z_erofs_read_fragment(inode->i_sb, &folio->page, cur, + cur + len, EROFS_I(inode)->z_fragmentoff + fpos); if (err) goto out; tight = false; @@ -1017,25 +1006,25 @@ repeat: } /* - * Ensure the current partial page belongs to this submit chain rather + * Ensure the current partial folio belongs to this submit chain rather * than other concurrent submit chains or the noio(bypass) chain since - * those chains are handled asynchronously thus the page cannot be used + * those chains are handled asynchronously thus the folio cannot be used * for inplace I/O or bvpage (should be processed in a strict order.) */ tight &= (fe->mode > Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE); - exclusive = (!cur && ((split <= 1) || (tight && bs == PAGE_SIZE))); + exclusive = (!cur && ((split <= 1) || (tight && bs == fs))); if (cur) tight &= (fe->mode >= Z_EROFS_PCLUSTER_FOLLOWED); err = z_erofs_attach_page(fe, &((struct z_erofs_bvec) { - .page = page, + .page = &folio->page, .offset = offset - map->m_la, .end = end, }), exclusive); if (err) goto out; - z_erofs_onlinepage_split(page); + z_erofs_onlinefolio_split(folio); if (fe->pcl->pageofs_out != (map->m_la & ~PAGE_MASK)) fe->pcl->multibases = true; if (fe->pcl->length < offset + end - map->m_la) { @@ -1056,7 +1045,7 @@ next_part: goto repeat; out: - z_erofs_onlinepage_endio(page, err); + z_erofs_onlinefolio_end(folio, err); return err; } @@ -1159,7 +1148,7 @@ static void z_erofs_fill_other_copies(struct z_erofs_decompress_backend *be, cur += len; } kunmap_local(dst); - z_erofs_onlinepage_endio(bvi->bvec.page, err); + z_erofs_onlinefolio_end(page_folio(bvi->bvec.page), err); list_del(p); kfree(bvi); } @@ -1210,7 +1199,7 @@ static int z_erofs_parse_in_bvecs(struct z_erofs_decompress_backend *be, be->compressed_pages[i] = page; if (z_erofs_is_inline_pcluster(pcl) || - erofs_page_is_managed(EROFS_SB(be->sb), page)) { + erofs_folio_is_managed(EROFS_SB(be->sb), page_folio(page))) { if (!PageUptodate(page)) err = -EIO; continue; @@ -1295,7 +1284,8 @@ static int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be, /* consider shortlived pages added when decompressing */ page = be->compressed_pages[i]; - if (!page || erofs_page_is_managed(sbi, page)) + if (!page || + erofs_folio_is_managed(sbi, page_folio(page))) continue; (void)z_erofs_put_shortlivedpage(be->pagepool, page); WRITE_ONCE(pcl->compressed_bvecs[i].page, NULL); @@ -1316,7 +1306,7 @@ static int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be, /* recycle all individual short-lived pages */ if (z_erofs_put_shortlivedpage(be->pagepool, page)) continue; - z_erofs_onlinepage_endio(page, err); + z_erofs_onlinefolio_end(page_folio(page), err); } if (be->decompressed_pages != be->onstack_pages) @@ -1430,38 +1420,34 @@ static void z_erofs_fill_bio_vec(struct bio_vec *bvec, struct z_erofs_bvec zbv; struct address_space *mapping; struct page *page; - int justfound, bs = i_blocksize(f->inode); + int bs = i_blocksize(f->inode); - /* Except for inplace pages, the entire page can be used for I/Os */ + /* Except for inplace folios, the entire folio can be used for I/Os */ bvec->bv_offset = 0; bvec->bv_len = PAGE_SIZE; repeat: spin_lock(&pcl->obj.lockref.lock); zbv = pcl->compressed_bvecs[nr]; - page = zbv.page; - justfound = (unsigned long)page & 1UL; - page = (struct page *)((unsigned long)page & ~1UL); - pcl->compressed_bvecs[nr].page = page; spin_unlock(&pcl->obj.lockref.lock); - if (!page) - goto out_allocpage; + if (!zbv.folio) + goto out_allocfolio; - bvec->bv_page = page; - DBG_BUGON(z_erofs_is_shortlived_page(page)); + bvec->bv_page = &zbv.folio->page; + DBG_BUGON(z_erofs_is_shortlived_page(bvec->bv_page)); /* - * Handle preallocated cached pages. We tried to allocate such pages + * Handle preallocated cached folios. We tried to allocate such folios * without triggering direct reclaim. If allocation failed, inplace - * file-backed pages will be used instead. + * file-backed folios will be used instead. */ - if (page->private == Z_EROFS_PREALLOCATED_PAGE) { - set_page_private(page, 0); + if (zbv.folio->private == (void *)Z_EROFS_PREALLOCATED_PAGE) { + zbv.folio->private = 0; tocache = true; goto out_tocache; } - mapping = READ_ONCE(page->mapping); + mapping = READ_ONCE(zbv.folio->mapping); /* - * File-backed pages for inplace I/Os are all locked steady, + * File-backed folios for inplace I/Os are all locked steady, * therefore it is impossible for `mapping` to be NULL. */ if (mapping && mapping != mc) { @@ -1471,26 +1457,21 @@ repeat: return; } - lock_page(page); - /* only true if page reclaim goes wrong, should never happen */ - DBG_BUGON(justfound && PagePrivate(page)); - - /* the cached page is still in managed cache */ - if (page->mapping == mc) { + folio_lock(zbv.folio); + if (zbv.folio->mapping == mc) { /* - * The cached page is still available but without a valid - * `->private` pcluster hint. Let's reconnect them. + * The cached folio is still in managed cache but without + * a valid `->private` pcluster hint. Let's reconnect them. */ - if (!PagePrivate(page)) { - DBG_BUGON(!justfound); - /* compressed_bvecs[] already takes a ref */ - attach_page_private(page, pcl); - put_page(page); + if (!folio_test_private(zbv.folio)) { + folio_attach_private(zbv.folio, pcl); + /* compressed_bvecs[] already takes a ref before */ + folio_put(zbv.folio); } /* no need to submit if it is already up-to-date */ - if (PageUptodate(page)) { - unlock_page(page); + if (folio_test_uptodate(zbv.folio)) { + folio_unlock(zbv.folio); bvec->bv_page = NULL; } return; @@ -1500,34 +1481,32 @@ repeat: * It has been truncated, so it's unsafe to reuse this one. Let's * allocate a new page for compressed data. */ - DBG_BUGON(page->mapping); - DBG_BUGON(!justfound); - + DBG_BUGON(zbv.folio->mapping); tocache = true; - unlock_page(page); - put_page(page); -out_allocpage: + folio_unlock(zbv.folio); + folio_put(zbv.folio); +out_allocfolio: page = erofs_allocpage(&f->pagepool, gfp | __GFP_NOFAIL); spin_lock(&pcl->obj.lockref.lock); - if (pcl->compressed_bvecs[nr].page) { + if (pcl->compressed_bvecs[nr].folio) { erofs_pagepool_add(&f->pagepool, page); spin_unlock(&pcl->obj.lockref.lock); cond_resched(); goto repeat; } - pcl->compressed_bvecs[nr].page = page; + pcl->compressed_bvecs[nr].folio = zbv.folio = page_folio(page); spin_unlock(&pcl->obj.lockref.lock); bvec->bv_page = page; out_tocache: if (!tocache || bs != PAGE_SIZE || - add_to_page_cache_lru(page, mc, pcl->obj.index + nr, gfp)) { - /* turn into a temporary shortlived page (1 ref) */ - set_page_private(page, Z_EROFS_SHORTLIVED_PAGE); + filemap_add_folio(mc, zbv.folio, pcl->obj.index + nr, gfp)) { + /* turn into a temporary shortlived folio (1 ref) */ + zbv.folio->private = (void *)Z_EROFS_SHORTLIVED_PAGE; return; } - attach_page_private(page, pcl); + folio_attach_private(zbv.folio, pcl); /* drop a refcount added by allocpage (then 2 refs in total here) */ - put_page(page); + folio_put(zbv.folio); } static struct z_erofs_decompressqueue *jobqueue_init(struct super_block *sb, @@ -1582,28 +1561,29 @@ static void move_to_bypass_jobqueue(struct z_erofs_pcluster *pcl, qtail[JQ_BYPASS] = &pcl->next; } -static void z_erofs_submissionqueue_endio(struct bio *bio) +static void z_erofs_endio(struct bio *bio) { struct z_erofs_decompressqueue *q = bio->bi_private; blk_status_t err = bio->bi_status; - struct bio_vec *bvec; - struct bvec_iter_all iter_all; + struct folio_iter fi; - bio_for_each_segment_all(bvec, bio, iter_all) { - struct page *page = bvec->bv_page; + bio_for_each_folio_all(fi, bio) { + struct folio *folio = fi.folio; - DBG_BUGON(PageUptodate(page)); - DBG_BUGON(z_erofs_page_is_invalidated(page)); - if (erofs_page_is_managed(EROFS_SB(q->sb), page)) { - if (!err) - SetPageUptodate(page); - unlock_page(page); - } + DBG_BUGON(folio_test_uptodate(folio)); + DBG_BUGON(z_erofs_page_is_invalidated(&folio->page)); + if (!erofs_folio_is_managed(EROFS_SB(q->sb), folio)) + continue; + + if (!err) + folio_mark_uptodate(folio); + folio_unlock(folio); } if (err) q->eio = true; z_erofs_decompress_kickoff(q, -1); - bio_put(bio); + if (bio->bi_bdev) + bio_put(bio); } static void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f, @@ -1617,7 +1597,6 @@ static void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f, z_erofs_next_pcluster_t owned_head = f->owned_head; /* bio is NULL initially, so no need to initialize last_{index,bdev} */ erofs_off_t last_pa; - struct block_device *last_bdev; unsigned int nr_bios = 0; struct bio *bio = NULL; unsigned long pflags; @@ -1664,9 +1643,13 @@ static void z_erofs_submit_queue(struct z_erofs_decompress_frontend *f, continue; if (bio && (cur != last_pa || - last_bdev != mdev.m_bdev)) { -submit_bio_retry: - submit_bio(bio); + bio->bi_bdev != mdev.m_bdev)) { +io_retry: + if (!erofs_is_fscache_mode(sb)) + submit_bio(bio); + else + erofs_fscache_submit_bio(bio); + if (memstall) { psi_memstall_leave(&pflags); memstall = 0; @@ -1681,15 +1664,16 @@ submit_bio_retry: } if (!bio) { - bio = bio_alloc(mdev.m_bdev, BIO_MAX_VECS, - REQ_OP_READ, GFP_NOIO); - bio->bi_end_io = z_erofs_submissionqueue_endio; + bio = erofs_is_fscache_mode(sb) ? + erofs_fscache_bio_alloc(&mdev) : + bio_alloc(mdev.m_bdev, BIO_MAX_VECS, + REQ_OP_READ, GFP_NOIO); + bio->bi_end_io = z_erofs_endio; bio->bi_iter.bi_sector = cur >> 9; bio->bi_private = q[JQ_SUBMIT]; if (readahead) bio->bi_opf |= REQ_RAHEAD; ++nr_bios; - last_bdev = mdev.m_bdev; } if (cur + bvec.bv_len > end) @@ -1697,7 +1681,7 @@ submit_bio_retry: DBG_BUGON(bvec.bv_len < sb->s_blocksize); if (!bio_add_page(bio, bvec.bv_page, bvec.bv_len, bvec.bv_offset)) - goto submit_bio_retry; + goto io_retry; last_pa = cur + bvec.bv_len; bypass = false; @@ -1710,7 +1694,10 @@ submit_bio_retry: } while (owned_head != Z_EROFS_PCLUSTER_TAIL); if (bio) { - submit_bio(bio); + if (!erofs_is_fscache_mode(sb)) + submit_bio(bio); + else + erofs_fscache_submit_bio(bio); if (memstall) psi_memstall_leave(&pflags); } @@ -1795,7 +1782,7 @@ static void z_erofs_pcluster_readmore(struct z_erofs_decompress_frontend *f, if (PageUptodate(page)) unlock_page(page); else - (void)z_erofs_do_read_page(f, page, !!rac); + z_erofs_scan_folio(f, page_folio(page), !!rac); put_page(page); } @@ -1816,7 +1803,7 @@ static int z_erofs_read_folio(struct file *file, struct folio *folio) f.headoffset = (erofs_off_t)folio->index << PAGE_SHIFT; z_erofs_pcluster_readmore(&f, NULL, true); - err = z_erofs_do_read_page(&f, &folio->page, false); + err = z_erofs_scan_folio(&f, folio, false); z_erofs_pcluster_readmore(&f, NULL, false); z_erofs_pcluster_end(&f); @@ -1857,7 +1844,7 @@ static void z_erofs_readahead(struct readahead_control *rac) folio = head; head = folio_get_private(folio); - err = z_erofs_do_read_page(&f, &folio->page, true); + err = z_erofs_scan_folio(&f, folio, true); if (err && err != -EINTR) erofs_err(inode->i_sb, "readahead error at folio %lu @ nid %llu", folio->index, EROFS_I(inode)->nid); |