summaryrefslogtreecommitdiffstats
path: root/mm/swap_state.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/swap_state.c')
-rw-r--r--mm/swap_state.c127
1 files changed, 70 insertions, 57 deletions
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 85d9e5806a..7255c01a1e 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -410,13 +410,12 @@ struct folio *filemap_get_incore_folio(struct address_space *mapping,
return folio;
}
-struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
- struct mempolicy *mpol, pgoff_t ilx,
- bool *new_page_allocated)
+struct folio *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
+ struct mempolicy *mpol, pgoff_t ilx, bool *new_page_allocated,
+ bool skip_if_exists)
{
struct swap_info_struct *si;
struct folio *folio;
- struct page *page;
void *shadow = NULL;
*new_page_allocated = false;
@@ -433,10 +432,8 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
*/
folio = filemap_get_folio(swap_address_space(entry),
swp_offset(entry));
- if (!IS_ERR(folio)) {
- page = folio_file_page(folio, swp_offset(entry));
- goto got_page;
- }
+ if (!IS_ERR(folio))
+ goto got_folio;
/*
* Just skip read ahead for unused swap slot.
@@ -450,7 +447,7 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
goto fail_put_swap;
/*
- * Get a new page to read into from swap. Allocate it now,
+ * Get a new folio to read into from swap. Allocate it now,
* before marking swap_map SWAP_HAS_CACHE, when -EEXIST will
* cause any racers to loop around until we add it to cache.
*/
@@ -471,17 +468,28 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
goto fail_put_swap;
/*
+ * Protect against a recursive call to __read_swap_cache_async()
+ * on the same entry waiting forever here because SWAP_HAS_CACHE
+ * is set but the folio is not the swap cache yet. This can
+ * happen today if mem_cgroup_swapin_charge_folio() below
+ * triggers reclaim through zswap, which may call
+ * __read_swap_cache_async() in the writeback path.
+ */
+ if (skip_if_exists)
+ goto fail_put_swap;
+
+ /*
* We might race against __delete_from_swap_cache(), and
* stumble across a swap_map entry whose SWAP_HAS_CACHE
* has not yet been cleared. Or race against another
* __read_swap_cache_async(), which has set SWAP_HAS_CACHE
- * in swap_map, but not yet added its page to swap cache.
+ * in swap_map, but not yet added its folio to swap cache.
*/
schedule_timeout_uninterruptible(1);
}
/*
- * The swap entry is ours to swap in. Prepare the new page.
+ * The swap entry is ours to swap in. Prepare the new folio.
*/
__folio_set_locked(folio);
@@ -502,10 +510,9 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
/* Caller will initiate read into locked folio */
folio_add_lru(folio);
*new_page_allocated = true;
- page = &folio->page;
-got_page:
+got_folio:
put_swap_device(si);
- return page;
+ return folio;
fail_unlock:
put_swap_folio(folio, entry);
@@ -523,26 +530,26 @@ fail_put_swap:
* the swap entry is no longer in use.
*
* get/put_swap_device() aren't needed to call this function, because
- * __read_swap_cache_async() call them and swap_readpage() holds the
+ * __read_swap_cache_async() call them and swap_read_folio() holds the
* swap cache folio lock.
*/
-struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
- struct vm_area_struct *vma,
- unsigned long addr, struct swap_iocb **plug)
+struct folio *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
+ struct vm_area_struct *vma, unsigned long addr,
+ struct swap_iocb **plug)
{
bool page_allocated;
struct mempolicy *mpol;
pgoff_t ilx;
- struct page *page;
+ struct folio *folio;
mpol = get_vma_policy(vma, addr, 0, &ilx);
- page = __read_swap_cache_async(entry, gfp_mask, mpol, ilx,
- &page_allocated);
+ folio = __read_swap_cache_async(entry, gfp_mask, mpol, ilx,
+ &page_allocated, false);
mpol_cond_put(mpol);
if (page_allocated)
- swap_readpage(page, false, plug);
- return page;
+ swap_read_folio(folio, false, plug);
+ return folio;
}
static unsigned int __swapin_nr_pages(unsigned long prev_offset,
@@ -613,7 +620,7 @@ static unsigned long swapin_nr_pages(unsigned long offset)
* @mpol: NUMA memory allocation policy to be applied
* @ilx: NUMA interleave index, for use only when MPOL_INTERLEAVE
*
- * Returns the struct page for entry and addr, after queueing swapin.
+ * Returns the struct folio for entry and addr, after queueing swapin.
*
* Primitive swap readahead code. We simply read an aligned block of
* (1 << page_cluster) entries in the swap area. This method is chosen
@@ -624,10 +631,10 @@ static unsigned long swapin_nr_pages(unsigned long offset)
* are used for every page of the readahead: neighbouring pages on swap
* are fairly likely to have been swapped out from the same node.
*/
-struct page *swap_cluster_readahead(swp_entry_t entry, gfp_t gfp_mask,
+struct folio *swap_cluster_readahead(swp_entry_t entry, gfp_t gfp_mask,
struct mempolicy *mpol, pgoff_t ilx)
{
- struct page *page;
+ struct folio *folio;
unsigned long entry_offset = swp_offset(entry);
unsigned long offset = entry_offset;
unsigned long start_offset, end_offset;
@@ -652,30 +659,32 @@ struct page *swap_cluster_readahead(swp_entry_t entry, gfp_t gfp_mask,
blk_start_plug(&plug);
for (offset = start_offset; offset <= end_offset ; offset++) {
/* Ok, do the async read-ahead now */
- page = __read_swap_cache_async(
+ folio = __read_swap_cache_async(
swp_entry(swp_type(entry), offset),
- gfp_mask, mpol, ilx, &page_allocated);
- if (!page)
+ gfp_mask, mpol, ilx, &page_allocated, false);
+ if (!folio)
continue;
if (page_allocated) {
- swap_readpage(page, false, &splug);
+ swap_read_folio(folio, false, &splug);
if (offset != entry_offset) {
- SetPageReadahead(page);
+ folio_set_readahead(folio);
count_vm_event(SWAP_RA);
}
}
- put_page(page);
+ folio_put(folio);
}
blk_finish_plug(&plug);
swap_read_unplug(splug);
lru_add_drain(); /* Push any new pages onto the LRU now */
skip:
/* The page was likely read above, so no need for plugging here */
- page = __read_swap_cache_async(entry, gfp_mask, mpol, ilx,
- &page_allocated);
- if (unlikely(page_allocated))
- swap_readpage(page, false, NULL);
- return page;
+ folio = __read_swap_cache_async(entry, gfp_mask, mpol, ilx,
+ &page_allocated, false);
+ if (unlikely(page_allocated)) {
+ zswap_folio_swapin(folio);
+ swap_read_folio(folio, false, NULL);
+ }
+ return folio;
}
int init_swap_address_space(unsigned int type, unsigned long nr_pages)
@@ -779,7 +788,7 @@ static void swap_ra_info(struct vm_fault *vmf,
* @targ_ilx: NUMA interleave index, for use only when MPOL_INTERLEAVE
* @vmf: fault information
*
- * Returns the struct page for entry and addr, after queueing swapin.
+ * Returns the struct folio for entry and addr, after queueing swapin.
*
* Primitive swap readahead code. We simply read in a few pages whose
* virtual addresses are around the fault address in the same vma.
@@ -787,13 +796,12 @@ static void swap_ra_info(struct vm_fault *vmf,
* Caller must hold read mmap_lock if vmf->vma is not NULL.
*
*/
-static struct page *swap_vma_readahead(swp_entry_t targ_entry, gfp_t gfp_mask,
- struct mempolicy *mpol, pgoff_t targ_ilx,
- struct vm_fault *vmf)
+static struct folio *swap_vma_readahead(swp_entry_t targ_entry, gfp_t gfp_mask,
+ struct mempolicy *mpol, pgoff_t targ_ilx, struct vm_fault *vmf)
{
struct blk_plug plug;
struct swap_iocb *splug = NULL;
- struct page *page;
+ struct folio *folio;
pte_t *pte = NULL, pentry;
unsigned long addr;
swp_entry_t entry;
@@ -826,18 +834,18 @@ static struct page *swap_vma_readahead(swp_entry_t targ_entry, gfp_t gfp_mask,
continue;
pte_unmap(pte);
pte = NULL;
- page = __read_swap_cache_async(entry, gfp_mask, mpol, ilx,
- &page_allocated);
- if (!page)
+ folio = __read_swap_cache_async(entry, gfp_mask, mpol, ilx,
+ &page_allocated, false);
+ if (!folio)
continue;
if (page_allocated) {
- swap_readpage(page, false, &splug);
+ swap_read_folio(folio, false, &splug);
if (i != ra_info.offset) {
- SetPageReadahead(page);
+ folio_set_readahead(folio);
count_vm_event(SWAP_RA);
}
}
- put_page(page);
+ folio_put(folio);
}
if (pte)
pte_unmap(pte);
@@ -845,12 +853,14 @@ static struct page *swap_vma_readahead(swp_entry_t targ_entry, gfp_t gfp_mask,
swap_read_unplug(splug);
lru_add_drain();
skip:
- /* The page was likely read above, so no need for plugging here */
- page = __read_swap_cache_async(targ_entry, gfp_mask, mpol, targ_ilx,
- &page_allocated);
- if (unlikely(page_allocated))
- swap_readpage(page, false, NULL);
- return page;
+ /* The folio was likely read above, so no need for plugging here */
+ folio = __read_swap_cache_async(targ_entry, gfp_mask, mpol, targ_ilx,
+ &page_allocated, false);
+ if (unlikely(page_allocated)) {
+ zswap_folio_swapin(folio);
+ swap_read_folio(folio, false, NULL);
+ }
+ return folio;
}
/**
@@ -870,14 +880,17 @@ struct page *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
{
struct mempolicy *mpol;
pgoff_t ilx;
- struct page *page;
+ struct folio *folio;
mpol = get_vma_policy(vmf->vma, vmf->address, 0, &ilx);
- page = swap_use_vma_readahead() ?
+ folio = swap_use_vma_readahead() ?
swap_vma_readahead(entry, gfp_mask, mpol, ilx, vmf) :
swap_cluster_readahead(entry, gfp_mask, mpol, ilx);
mpol_cond_put(mpol);
- return page;
+
+ if (!folio)
+ return NULL;
+ return folio_file_page(folio, swp_offset(entry));
}
#ifdef CONFIG_SYSFS