summaryrefslogtreecommitdiffstats
path: root/mm/rmap.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/rmap.c')
-rw-r--r--mm/rmap.c164
1 files changed, 101 insertions, 63 deletions
diff --git a/mm/rmap.c b/mm/rmap.c
index 9f795b93cf..7a27a2b418 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -798,6 +798,7 @@ struct folio_referenced_arg {
unsigned long vm_flags;
struct mem_cgroup *memcg;
};
+
/*
* arg: folio_referenced_arg will be passed
*/
@@ -807,17 +808,33 @@ static bool folio_referenced_one(struct folio *folio,
struct folio_referenced_arg *pra = arg;
DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, address, 0);
int referenced = 0;
+ unsigned long start = address, ptes = 0;
while (page_vma_mapped_walk(&pvmw)) {
address = pvmw.address;
- if ((vma->vm_flags & VM_LOCKED) &&
- (!folio_test_large(folio) || !pvmw.pte)) {
- /* Restore the mlock which got missed */
- mlock_vma_folio(folio, vma, !pvmw.pte);
- page_vma_mapped_walk_done(&pvmw);
- pra->vm_flags |= VM_LOCKED;
- return false; /* To break the loop */
+ if (vma->vm_flags & VM_LOCKED) {
+ if (!folio_test_large(folio) || !pvmw.pte) {
+ /* Restore the mlock which got missed */
+ mlock_vma_folio(folio, vma);
+ page_vma_mapped_walk_done(&pvmw);
+ pra->vm_flags |= VM_LOCKED;
+ return false; /* To break the loop */
+ }
+ /*
+ * For large folio fully mapped to VMA, will
+ * be handled after the pvmw loop.
+ *
+ * For large folio cross VMA boundaries, it's
+ * expected to be picked by page reclaim. But
+ * should skip reference of pages which are in
+ * the range of VM_LOCKED vma. As page reclaim
+ * should just count the reference of pages out
+ * the range of VM_LOCKED vma.
+ */
+ ptes++;
+ pra->mapcount--;
+ continue;
}
if (pvmw.pte) {
@@ -842,6 +859,23 @@ static bool folio_referenced_one(struct folio *folio,
pra->mapcount--;
}
+ if ((vma->vm_flags & VM_LOCKED) &&
+ folio_test_large(folio) &&
+ folio_within_vma(folio, vma)) {
+ unsigned long s_align, e_align;
+
+ s_align = ALIGN_DOWN(start, PMD_SIZE);
+ e_align = ALIGN_DOWN(start + folio_size(folio) - 1, PMD_SIZE);
+
+ /* folio doesn't cross page table boundary and fully mapped */
+ if ((s_align == e_align) && (ptes == folio_nr_pages(folio))) {
+ /* Restore the mlock which got missed */
+ mlock_vma_folio(folio, vma);
+ pra->vm_flags |= VM_LOCKED;
+ return false; /* To break the loop */
+ }
+ }
+
if (referenced)
folio_clear_idle(folio);
if (folio_test_clear_young(folio))
@@ -1094,19 +1128,17 @@ int folio_total_mapcount(struct folio *folio)
}
/**
- * page_move_anon_rmap - move a page to our anon_vma
- * @page: the page to move to our anon_vma
- * @vma: the vma the page belongs to
+ * folio_move_anon_rmap - move a folio to our anon_vma
+ * @folio: The folio to move to our anon_vma
+ * @vma: The vma the folio belongs to
*
- * When a page belongs exclusively to one process after a COW event,
- * that page can be moved into the anon_vma that belongs to just that
- * process, so the rmap code will not search the parent or sibling
- * processes.
+ * When a folio belongs exclusively to one process after a COW event,
+ * that folio can be moved into the anon_vma that belongs to just that
+ * process, so the rmap code will not search the parent or sibling processes.
*/
-void page_move_anon_rmap(struct page *page, struct vm_area_struct *vma)
+void folio_move_anon_rmap(struct folio *folio, struct vm_area_struct *vma)
{
void *anon_vma = vma->anon_vma;
- struct folio *folio = page_folio(page);
VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
VM_BUG_ON_VMA(!anon_vma, vma);
@@ -1118,31 +1150,25 @@ void page_move_anon_rmap(struct page *page, struct vm_area_struct *vma)
* folio_test_anon()) will not see one without the other.
*/
WRITE_ONCE(folio->mapping, anon_vma);
- SetPageAnonExclusive(page);
}
/**
- * __page_set_anon_rmap - set up new anonymous rmap
- * @folio: Folio which contains page.
- * @page: Page to add to rmap.
- * @vma: VM area to add page to.
+ * __folio_set_anon - set up a new anonymous rmap for a folio
+ * @folio: The folio to set up the new anonymous rmap for.
+ * @vma: VM area to add the folio to.
* @address: User virtual address of the mapping
- * @exclusive: the page is exclusively owned by the current process
+ * @exclusive: Whether the folio is exclusive to the process.
*/
-static void __page_set_anon_rmap(struct folio *folio, struct page *page,
- struct vm_area_struct *vma, unsigned long address, int exclusive)
+static void __folio_set_anon(struct folio *folio, struct vm_area_struct *vma,
+ unsigned long address, bool exclusive)
{
struct anon_vma *anon_vma = vma->anon_vma;
BUG_ON(!anon_vma);
- if (folio_test_anon(folio))
- goto out;
-
/*
- * If the page isn't exclusively mapped into this vma,
- * we must use the _oldest_ possible anon_vma for the
- * page mapping!
+ * If the folio isn't exclusive to this vma, we must use the _oldest_
+ * possible anon_vma for the folio mapping!
*/
if (!exclusive)
anon_vma = anon_vma->root;
@@ -1156,9 +1182,6 @@ static void __page_set_anon_rmap(struct folio *folio, struct page *page,
anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
WRITE_ONCE(folio->mapping, (struct address_space *) anon_vma);
folio->index = linear_page_index(vma, address);
-out:
- if (exclusive)
- SetPageAnonExclusive(page);
}
/**
@@ -1207,7 +1230,7 @@ void page_add_anon_rmap(struct page *page, struct vm_area_struct *vma,
atomic_t *mapped = &folio->_nr_pages_mapped;
int nr = 0, nr_pmdmapped = 0;
bool compound = flags & RMAP_COMPOUND;
- bool first = true;
+ bool first;
/* Is page being mapped by PTE? Is this its first map to be added? */
if (likely(!compound)) {
@@ -1236,24 +1259,40 @@ void page_add_anon_rmap(struct page *page, struct vm_area_struct *vma,
}
}
- VM_BUG_ON_PAGE(!first && (flags & RMAP_EXCLUSIVE), page);
- VM_BUG_ON_PAGE(!first && PageAnonExclusive(page), page);
-
if (nr_pmdmapped)
__lruvec_stat_mod_folio(folio, NR_ANON_THPS, nr_pmdmapped);
if (nr)
__lruvec_stat_mod_folio(folio, NR_ANON_MAPPED, nr);
- if (likely(!folio_test_ksm(folio))) {
- /* address might be in next vma when migration races vma_merge */
- if (first)
- __page_set_anon_rmap(folio, page, vma, address,
- !!(flags & RMAP_EXCLUSIVE));
- else
- __page_check_anon_rmap(folio, page, vma, address);
+ if (unlikely(!folio_test_anon(folio))) {
+ VM_WARN_ON_FOLIO(!folio_test_locked(folio), folio);
+ /*
+ * For a PTE-mapped large folio, we only know that the single
+ * PTE is exclusive. Further, __folio_set_anon() might not get
+ * folio->index right when not given the address of the head
+ * page.
+ */
+ VM_WARN_ON_FOLIO(folio_test_large(folio) && !compound, folio);
+ __folio_set_anon(folio, vma, address,
+ !!(flags & RMAP_EXCLUSIVE));
+ } else if (likely(!folio_test_ksm(folio))) {
+ __page_check_anon_rmap(folio, page, vma, address);
}
+ if (flags & RMAP_EXCLUSIVE)
+ SetPageAnonExclusive(page);
+ /* While PTE-mapping a THP we have a PMD and a PTE mapping. */
+ VM_WARN_ON_FOLIO((atomic_read(&page->_mapcount) > 0 ||
+ (folio_test_large(folio) && folio_entire_mapcount(folio) > 1)) &&
+ PageAnonExclusive(page), folio);
- mlock_vma_folio(folio, vma, compound);
+ /*
+ * For large folio, only mlock it if it's fully mapped to VMA. It's
+ * not easy to check whether the large folio is fully mapped to VMA
+ * here. Only mlock normal 4K folio and leave page reclaim to handle
+ * large folio.
+ */
+ if (!folio_test_large(folio))
+ mlock_vma_folio(folio, vma);
}
/**
@@ -1290,7 +1329,8 @@ void folio_add_new_anon_rmap(struct folio *folio, struct vm_area_struct *vma,
}
__lruvec_stat_mod_folio(folio, NR_ANON_MAPPED, nr);
- __page_set_anon_rmap(folio, &folio->page, vma, address, 1);
+ __folio_set_anon(folio, vma, address, true);
+ SetPageAnonExclusive(&folio->page);
}
/**
@@ -1352,7 +1392,9 @@ void folio_add_file_rmap_range(struct folio *folio, struct page *page,
if (nr)
__lruvec_stat_mod_folio(folio, NR_FILE_MAPPED, nr);
- mlock_vma_folio(folio, vma, compound);
+ /* See comments in page_add_anon_rmap() */
+ if (!folio_test_large(folio))
+ mlock_vma_folio(folio, vma);
}
/**
@@ -1463,7 +1505,7 @@ void page_remove_rmap(struct page *page, struct vm_area_struct *vma,
* it's only reliable while mapped.
*/
- munlock_vma_folio(folio, vma, compound);
+ munlock_vma_folio(folio, vma);
}
/*
@@ -1528,7 +1570,8 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,
if (!(flags & TTU_IGNORE_MLOCK) &&
(vma->vm_flags & VM_LOCKED)) {
/* Restore the mlock which got missed */
- mlock_vma_folio(folio, vma, false);
+ if (!folio_test_large(folio))
+ mlock_vma_folio(folio, vma);
page_vma_mapped_walk_done(&pvmw);
ret = false;
break;
@@ -2540,22 +2583,16 @@ void rmap_walk_locked(struct folio *folio, struct rmap_walk_control *rwc)
*
* RMAP_COMPOUND is ignored.
*/
-void hugepage_add_anon_rmap(struct page *page, struct vm_area_struct *vma,
+void hugepage_add_anon_rmap(struct folio *folio, struct vm_area_struct *vma,
unsigned long address, rmap_t flags)
{
- struct folio *folio = page_folio(page);
- struct anon_vma *anon_vma = vma->anon_vma;
- int first;
+ VM_WARN_ON_FOLIO(!folio_test_anon(folio), folio);
- BUG_ON(!folio_test_locked(folio));
- BUG_ON(!anon_vma);
- /* address might be in next vma when migration races vma_merge */
- first = atomic_inc_and_test(&folio->_entire_mapcount);
- VM_BUG_ON_PAGE(!first && (flags & RMAP_EXCLUSIVE), page);
- VM_BUG_ON_PAGE(!first && PageAnonExclusive(page), page);
- if (first)
- __page_set_anon_rmap(folio, page, vma, address,
- !!(flags & RMAP_EXCLUSIVE));
+ atomic_inc(&folio->_entire_mapcount);
+ if (flags & RMAP_EXCLUSIVE)
+ SetPageAnonExclusive(&folio->page);
+ VM_WARN_ON_FOLIO(folio_entire_mapcount(folio) > 1 &&
+ PageAnonExclusive(&folio->page), folio);
}
void hugepage_add_new_anon_rmap(struct folio *folio,
@@ -2565,6 +2602,7 @@ void hugepage_add_new_anon_rmap(struct folio *folio,
/* increment count (starts at -1) */
atomic_set(&folio->_entire_mapcount, 0);
folio_clear_hugetlb_restore_reserve(folio);
- __page_set_anon_rmap(folio, &folio->page, vma, address, 1);
+ __folio_set_anon(folio, vma, address, true);
+ SetPageAnonExclusive(&folio->page);
}
#endif /* CONFIG_HUGETLB_PAGE */