diff options
Diffstat (limited to 'mm/rmap.c')
-rw-r--r-- | mm/rmap.c | 499 |
1 files changed, 307 insertions, 192 deletions
@@ -470,7 +470,7 @@ void __init anon_vma_init(void) /* * Getting a lock on a stable anon_vma from a page off the LRU is tricky! * - * Since there is no serialization what so ever against page_remove_rmap() + * Since there is no serialization what so ever against folio_remove_rmap_*() * the best this function can do is return a refcount increased anon_vma * that might have been relevant to this page. * @@ -487,9 +487,15 @@ void __init anon_vma_init(void) * [ something equivalent to page_mapped_in_vma() ]. * * Since anon_vma's slab is SLAB_TYPESAFE_BY_RCU and we know from - * page_remove_rmap() that the anon_vma pointer from page->mapping is valid + * folio_remove_rmap_*() that the anon_vma pointer from page->mapping is valid * if there is a mapcount, we can dereference the anon_vma after observing * those. + * + * NOTE: the caller should normally hold folio lock when calling this. If + * not, the caller needs to double check the anon_vma didn't change after + * taking the anon_vma lock for either read or write (UFFDIO_MOVE can modify it + * concurrently without folio lock protection). See folio_lock_anon_vma_read() + * which has already covered that, and comment above remap_pages(). */ struct anon_vma *folio_get_anon_vma(struct folio *folio) { @@ -542,6 +548,7 @@ struct anon_vma *folio_lock_anon_vma_read(struct folio *folio, struct anon_vma *root_anon_vma; unsigned long anon_mapping; +retry: rcu_read_lock(); anon_mapping = (unsigned long)READ_ONCE(folio->mapping); if ((anon_mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON) @@ -553,6 +560,17 @@ struct anon_vma *folio_lock_anon_vma_read(struct folio *folio, root_anon_vma = READ_ONCE(anon_vma->root); if (down_read_trylock(&root_anon_vma->rwsem)) { /* + * folio_move_anon_rmap() might have changed the anon_vma as we + * might not hold the folio lock here. + */ + if (unlikely((unsigned long)READ_ONCE(folio->mapping) != + anon_mapping)) { + up_read(&root_anon_vma->rwsem); + rcu_read_unlock(); + goto retry; + } + + /* * If the folio is still mapped, then this anon_vma is still * its anon_vma, and holding the mutex ensures that it will * not go away, see anon_vma_free(). @@ -586,6 +604,18 @@ struct anon_vma *folio_lock_anon_vma_read(struct folio *folio, rcu_read_unlock(); anon_vma_lock_read(anon_vma); + /* + * folio_move_anon_rmap() might have changed the anon_vma as we might + * not hold the folio lock here. + */ + if (unlikely((unsigned long)READ_ONCE(folio->mapping) != + anon_mapping)) { + anon_vma_unlock_read(anon_vma); + put_anon_vma(anon_vma); + anon_vma = NULL; + goto retry; + } + if (atomic_dec_and_test(&anon_vma->refcount)) { /* * Oops, we held the last refcount, release the lock @@ -1127,6 +1157,48 @@ int folio_total_mapcount(struct folio *folio) return mapcount; } +static __always_inline unsigned int __folio_add_rmap(struct folio *folio, + struct page *page, int nr_pages, enum rmap_level level, + int *nr_pmdmapped) +{ + atomic_t *mapped = &folio->_nr_pages_mapped; + int first, nr = 0; + + __folio_rmap_sanity_checks(folio, page, nr_pages, level); + + switch (level) { + case RMAP_LEVEL_PTE: + do { + first = atomic_inc_and_test(&page->_mapcount); + if (first && folio_test_large(folio)) { + first = atomic_inc_return_relaxed(mapped); + first = (first < ENTIRELY_MAPPED); + } + + if (first) + nr++; + } while (page++, --nr_pages > 0); + break; + case RMAP_LEVEL_PMD: + first = atomic_inc_and_test(&folio->_entire_mapcount); + if (first) { + nr = atomic_add_return_relaxed(ENTIRELY_MAPPED, mapped); + if (likely(nr < ENTIRELY_MAPPED + ENTIRELY_MAPPED)) { + *nr_pmdmapped = folio_nr_pages(folio); + nr = *nr_pmdmapped - (nr & FOLIO_PAGES_MAPPED); + /* Raced ahead of a remove and another add? */ + if (unlikely(nr < 0)) + nr = 0; + } else { + /* Raced ahead of a remove of ENTIRELY_MAPPED */ + nr = 0; + } + } + break; + } + return nr; +} + /** * folio_move_anon_rmap - move a folio to our anon_vma * @folio: The folio to move to our anon_vma @@ -1198,12 +1270,12 @@ static void __page_check_anon_rmap(struct folio *folio, struct page *page, * The page's anon-rmap details (mapping and index) are guaranteed to * be set up correctly at this point. * - * We have exclusion against page_add_anon_rmap because the caller + * We have exclusion against folio_add_anon_rmap_*() because the caller * always holds the page locked. * - * We have exclusion against page_add_new_anon_rmap because those pages + * We have exclusion against folio_add_new_anon_rmap because those pages * are initially only visible via the pagetables, and the pte is locked - * over the call to page_add_new_anon_rmap. + * over the call to folio_add_new_anon_rmap. */ VM_BUG_ON_FOLIO(folio_anon_vma(folio)->root != vma->anon_vma->root, folio); @@ -1211,54 +1283,13 @@ static void __page_check_anon_rmap(struct folio *folio, struct page *page, page); } -/** - * page_add_anon_rmap - add pte mapping to an anonymous page - * @page: the page to add the mapping to - * @vma: the vm area in which the mapping is added - * @address: the user virtual address mapped - * @flags: the rmap flags - * - * The caller needs to hold the pte lock, and the page must be locked in - * the anon_vma case: to serialize mapping,index checking after setting, - * and to ensure that PageAnon is not being upgraded racily to PageKsm - * (but PageKsm is never downgraded to PageAnon). - */ -void page_add_anon_rmap(struct page *page, struct vm_area_struct *vma, - unsigned long address, rmap_t flags) +static __always_inline void __folio_add_anon_rmap(struct folio *folio, + struct page *page, int nr_pages, struct vm_area_struct *vma, + unsigned long address, rmap_t flags, enum rmap_level level) { - struct folio *folio = page_folio(page); - atomic_t *mapped = &folio->_nr_pages_mapped; - int nr = 0, nr_pmdmapped = 0; - bool compound = flags & RMAP_COMPOUND; - bool first; - - /* Is page being mapped by PTE? Is this its first map to be added? */ - if (likely(!compound)) { - first = atomic_inc_and_test(&page->_mapcount); - nr = first; - if (first && folio_test_large(folio)) { - nr = atomic_inc_return_relaxed(mapped); - nr = (nr < COMPOUND_MAPPED); - } - } else if (folio_test_pmd_mappable(folio)) { - /* That test is redundant: it's for safety or to optimize out */ - - first = atomic_inc_and_test(&folio->_entire_mapcount); - if (first) { - nr = atomic_add_return_relaxed(COMPOUND_MAPPED, mapped); - if (likely(nr < COMPOUND_MAPPED + COMPOUND_MAPPED)) { - nr_pmdmapped = folio_nr_pages(folio); - nr = nr_pmdmapped - (nr & FOLIO_PAGES_MAPPED); - /* Raced ahead of a remove and another add? */ - if (unlikely(nr < 0)) - nr = 0; - } else { - /* Raced ahead of a remove of COMPOUND_MAPPED */ - nr = 0; - } - } - } + int i, nr, nr_pmdmapped = 0; + nr = __folio_add_rmap(folio, page, nr_pages, level, &nr_pmdmapped); if (nr_pmdmapped) __lruvec_stat_mod_folio(folio, NR_ANON_THPS, nr_pmdmapped); if (nr) @@ -1272,18 +1303,34 @@ void page_add_anon_rmap(struct page *page, struct vm_area_struct *vma, * folio->index right when not given the address of the head * page. */ - VM_WARN_ON_FOLIO(folio_test_large(folio) && !compound, folio); + VM_WARN_ON_FOLIO(folio_test_large(folio) && + level != RMAP_LEVEL_PMD, folio); __folio_set_anon(folio, vma, address, !!(flags & RMAP_EXCLUSIVE)); } else if (likely(!folio_test_ksm(folio))) { __page_check_anon_rmap(folio, page, vma, address); } - if (flags & RMAP_EXCLUSIVE) - SetPageAnonExclusive(page); - /* While PTE-mapping a THP we have a PMD and a PTE mapping. */ - VM_WARN_ON_FOLIO((atomic_read(&page->_mapcount) > 0 || - (folio_test_large(folio) && folio_entire_mapcount(folio) > 1)) && - PageAnonExclusive(page), folio); + + if (flags & RMAP_EXCLUSIVE) { + switch (level) { + case RMAP_LEVEL_PTE: + for (i = 0; i < nr_pages; i++) + SetPageAnonExclusive(page + i); + break; + case RMAP_LEVEL_PMD: + SetPageAnonExclusive(page); + break; + } + } + for (i = 0; i < nr_pages; i++) { + struct page *cur_page = page + i; + + /* While PTE-mapping a THP we have a PMD and a PTE mapping. */ + VM_WARN_ON_FOLIO((atomic_read(&cur_page->_mapcount) > 0 || + (folio_test_large(folio) && + folio_entire_mapcount(folio) > 1)) && + PageAnonExclusive(cur_page), folio); + } /* * For large folio, only mlock it if it's fully mapped to VMA. It's @@ -1296,182 +1343,200 @@ void page_add_anon_rmap(struct page *page, struct vm_area_struct *vma, } /** + * folio_add_anon_rmap_ptes - add PTE mappings to a page range of an anon folio + * @folio: The folio to add the mappings to + * @page: The first page to add + * @nr_pages: The number of pages which will be mapped + * @vma: The vm area in which the mappings are added + * @address: The user virtual address of the first page to map + * @flags: The rmap flags + * + * The page range of folio is defined by [first_page, first_page + nr_pages) + * + * The caller needs to hold the page table lock, and the page must be locked in + * the anon_vma case: to serialize mapping,index checking after setting, + * and to ensure that an anon folio is not being upgraded racily to a KSM folio + * (but KSM folios are never downgraded). + */ +void folio_add_anon_rmap_ptes(struct folio *folio, struct page *page, + int nr_pages, struct vm_area_struct *vma, unsigned long address, + rmap_t flags) +{ + __folio_add_anon_rmap(folio, page, nr_pages, vma, address, flags, + RMAP_LEVEL_PTE); +} + +/** + * folio_add_anon_rmap_pmd - add a PMD mapping to a page range of an anon folio + * @folio: The folio to add the mapping to + * @page: The first page to add + * @vma: The vm area in which the mapping is added + * @address: The user virtual address of the first page to map + * @flags: The rmap flags + * + * The page range of folio is defined by [first_page, first_page + HPAGE_PMD_NR) + * + * The caller needs to hold the page table lock, and the page must be locked in + * the anon_vma case: to serialize mapping,index checking after setting. + */ +void folio_add_anon_rmap_pmd(struct folio *folio, struct page *page, + struct vm_area_struct *vma, unsigned long address, rmap_t flags) +{ +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + __folio_add_anon_rmap(folio, page, HPAGE_PMD_NR, vma, address, flags, + RMAP_LEVEL_PMD); +#else + WARN_ON_ONCE(true); +#endif +} + +/** * folio_add_new_anon_rmap - Add mapping to a new anonymous folio. * @folio: The folio to add the mapping to. * @vma: the vm area in which the mapping is added * @address: the user virtual address mapped * - * Like page_add_anon_rmap() but must only be called on *new* folios. + * Like folio_add_anon_rmap_*() but must only be called on *new* folios. * This means the inc-and-test can be bypassed. * The folio does not have to be locked. * - * If the folio is large, it is accounted as a THP. As the folio + * If the folio is pmd-mappable, it is accounted as a THP. As the folio * is new, it's assumed to be mapped exclusively by a single process. */ void folio_add_new_anon_rmap(struct folio *folio, struct vm_area_struct *vma, unsigned long address) { - int nr; + int nr = folio_nr_pages(folio); - VM_BUG_ON_VMA(address < vma->vm_start || address >= vma->vm_end, vma); + VM_WARN_ON_FOLIO(folio_test_hugetlb(folio), folio); + VM_BUG_ON_VMA(address < vma->vm_start || + address + (nr << PAGE_SHIFT) > vma->vm_end, vma); __folio_set_swapbacked(folio); + __folio_set_anon(folio, vma, address, true); - if (likely(!folio_test_pmd_mappable(folio))) { + if (likely(!folio_test_large(folio))) { /* increment count (starts at -1) */ atomic_set(&folio->_mapcount, 0); - nr = 1; + SetPageAnonExclusive(&folio->page); + } else if (!folio_test_pmd_mappable(folio)) { + int i; + + for (i = 0; i < nr; i++) { + struct page *page = folio_page(folio, i); + + /* increment count (starts at -1) */ + atomic_set(&page->_mapcount, 0); + SetPageAnonExclusive(page); + } + + atomic_set(&folio->_nr_pages_mapped, nr); } else { /* increment count (starts at -1) */ atomic_set(&folio->_entire_mapcount, 0); - atomic_set(&folio->_nr_pages_mapped, COMPOUND_MAPPED); - nr = folio_nr_pages(folio); + atomic_set(&folio->_nr_pages_mapped, ENTIRELY_MAPPED); + SetPageAnonExclusive(&folio->page); __lruvec_stat_mod_folio(folio, NR_ANON_THPS, nr); } __lruvec_stat_mod_folio(folio, NR_ANON_MAPPED, nr); - __folio_set_anon(folio, vma, address, true); - SetPageAnonExclusive(&folio->page); } -/** - * folio_add_file_rmap_range - add pte mapping to page range of a folio - * @folio: The folio to add the mapping to - * @page: The first page to add - * @nr_pages: The number of pages which will be mapped - * @vma: the vm area in which the mapping is added - * @compound: charge the page as compound or small page - * - * The page range of folio is defined by [first_page, first_page + nr_pages) - * - * The caller needs to hold the pte lock. - */ -void folio_add_file_rmap_range(struct folio *folio, struct page *page, - unsigned int nr_pages, struct vm_area_struct *vma, - bool compound) +static __always_inline void __folio_add_file_rmap(struct folio *folio, + struct page *page, int nr_pages, struct vm_area_struct *vma, + enum rmap_level level) { - atomic_t *mapped = &folio->_nr_pages_mapped; - unsigned int nr_pmdmapped = 0, first; - int nr = 0; - - VM_WARN_ON_FOLIO(compound && !folio_test_pmd_mappable(folio), folio); - - /* Is page being mapped by PTE? Is this its first map to be added? */ - if (likely(!compound)) { - do { - first = atomic_inc_and_test(&page->_mapcount); - if (first && folio_test_large(folio)) { - first = atomic_inc_return_relaxed(mapped); - first = (first < COMPOUND_MAPPED); - } - - if (first) - nr++; - } while (page++, --nr_pages > 0); - } else if (folio_test_pmd_mappable(folio)) { - /* That test is redundant: it's for safety or to optimize out */ + int nr, nr_pmdmapped = 0; - first = atomic_inc_and_test(&folio->_entire_mapcount); - if (first) { - nr = atomic_add_return_relaxed(COMPOUND_MAPPED, mapped); - if (likely(nr < COMPOUND_MAPPED + COMPOUND_MAPPED)) { - nr_pmdmapped = folio_nr_pages(folio); - nr = nr_pmdmapped - (nr & FOLIO_PAGES_MAPPED); - /* Raced ahead of a remove and another add? */ - if (unlikely(nr < 0)) - nr = 0; - } else { - /* Raced ahead of a remove of COMPOUND_MAPPED */ - nr = 0; - } - } - } + VM_WARN_ON_FOLIO(folio_test_anon(folio), folio); + nr = __folio_add_rmap(folio, page, nr_pages, level, &nr_pmdmapped); if (nr_pmdmapped) __lruvec_stat_mod_folio(folio, folio_test_swapbacked(folio) ? NR_SHMEM_PMDMAPPED : NR_FILE_PMDMAPPED, nr_pmdmapped); if (nr) __lruvec_stat_mod_folio(folio, NR_FILE_MAPPED, nr); - /* See comments in page_add_anon_rmap() */ + /* See comments in folio_add_anon_rmap_*() */ if (!folio_test_large(folio)) mlock_vma_folio(folio, vma); } /** - * page_add_file_rmap - add pte mapping to a file page - * @page: the page to add the mapping to - * @vma: the vm area in which the mapping is added - * @compound: charge the page as compound or small page + * folio_add_file_rmap_ptes - add PTE mappings to a page range of a folio + * @folio: The folio to add the mappings to + * @page: The first page to add + * @nr_pages: The number of pages that will be mapped using PTEs + * @vma: The vm area in which the mappings are added + * + * The page range of the folio is defined by [page, page + nr_pages) * - * The caller needs to hold the pte lock. + * The caller needs to hold the page table lock. */ -void page_add_file_rmap(struct page *page, struct vm_area_struct *vma, - bool compound) +void folio_add_file_rmap_ptes(struct folio *folio, struct page *page, + int nr_pages, struct vm_area_struct *vma) { - struct folio *folio = page_folio(page); - unsigned int nr_pages; - - VM_WARN_ON_ONCE_PAGE(compound && !PageTransHuge(page), page); - - if (likely(!compound)) - nr_pages = 1; - else - nr_pages = folio_nr_pages(folio); - - folio_add_file_rmap_range(folio, page, nr_pages, vma, compound); + __folio_add_file_rmap(folio, page, nr_pages, vma, RMAP_LEVEL_PTE); } /** - * page_remove_rmap - take down pte mapping from a page - * @page: page to remove mapping from - * @vma: the vm area from which the mapping is removed - * @compound: uncharge the page as compound or small page + * folio_add_file_rmap_pmd - add a PMD mapping to a page range of a folio + * @folio: The folio to add the mapping to + * @page: The first page to add + * @vma: The vm area in which the mapping is added * - * The caller needs to hold the pte lock. + * The page range of the folio is defined by [page, page + HPAGE_PMD_NR) + * + * The caller needs to hold the page table lock. */ -void page_remove_rmap(struct page *page, struct vm_area_struct *vma, - bool compound) +void folio_add_file_rmap_pmd(struct folio *folio, struct page *page, + struct vm_area_struct *vma) +{ +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + __folio_add_file_rmap(folio, page, HPAGE_PMD_NR, vma, RMAP_LEVEL_PMD); +#else + WARN_ON_ONCE(true); +#endif +} + +static __always_inline void __folio_remove_rmap(struct folio *folio, + struct page *page, int nr_pages, struct vm_area_struct *vma, + enum rmap_level level) { - struct folio *folio = page_folio(page); atomic_t *mapped = &folio->_nr_pages_mapped; - int nr = 0, nr_pmdmapped = 0; - bool last; + int last, nr = 0, nr_pmdmapped = 0; enum node_stat_item idx; - VM_BUG_ON_PAGE(compound && !PageHead(page), page); - - /* Hugetlb pages are not counted in NR_*MAPPED */ - if (unlikely(folio_test_hugetlb(folio))) { - /* hugetlb pages are always mapped with pmds */ - atomic_dec(&folio->_entire_mapcount); - return; - } + __folio_rmap_sanity_checks(folio, page, nr_pages, level); - /* Is page being unmapped by PTE? Is this its last map to be removed? */ - if (likely(!compound)) { - last = atomic_add_negative(-1, &page->_mapcount); - nr = last; - if (last && folio_test_large(folio)) { - nr = atomic_dec_return_relaxed(mapped); - nr = (nr < COMPOUND_MAPPED); - } - } else if (folio_test_pmd_mappable(folio)) { - /* That test is redundant: it's for safety or to optimize out */ + switch (level) { + case RMAP_LEVEL_PTE: + do { + last = atomic_add_negative(-1, &page->_mapcount); + if (last && folio_test_large(folio)) { + last = atomic_dec_return_relaxed(mapped); + last = (last < ENTIRELY_MAPPED); + } + if (last) + nr++; + } while (page++, --nr_pages > 0); + break; + case RMAP_LEVEL_PMD: last = atomic_add_negative(-1, &folio->_entire_mapcount); if (last) { - nr = atomic_sub_return_relaxed(COMPOUND_MAPPED, mapped); - if (likely(nr < COMPOUND_MAPPED)) { + nr = atomic_sub_return_relaxed(ENTIRELY_MAPPED, mapped); + if (likely(nr < ENTIRELY_MAPPED)) { nr_pmdmapped = folio_nr_pages(folio); nr = nr_pmdmapped - (nr & FOLIO_PAGES_MAPPED); /* Raced ahead of another remove and an add? */ if (unlikely(nr < 0)) nr = 0; } else { - /* An add of COMPOUND_MAPPED raced ahead */ + /* An add of ENTIRELY_MAPPED raced ahead */ nr = 0; } } + break; } if (nr_pmdmapped) { @@ -1488,18 +1553,18 @@ void page_remove_rmap(struct page *page, struct vm_area_struct *vma, __lruvec_stat_mod_folio(folio, idx, -nr); /* - * Queue anon THP for deferred split if at least one + * Queue anon large folio for deferred split if at least one * page of the folio is unmapped and at least one page * is still mapped. */ - if (folio_test_pmd_mappable(folio) && folio_test_anon(folio)) - if (!compound || nr < nr_pmdmapped) + if (folio_test_large(folio) && folio_test_anon(folio)) + if (level == RMAP_LEVEL_PTE || nr < nr_pmdmapped) deferred_split_folio(folio); } /* * It would be tidy to reset folio_test_anon mapping when fully - * unmapped, but that might overwrite a racing page_add_anon_rmap + * unmapped, but that might overwrite a racing folio_add_anon_rmap_*() * which increments mapcount after us but sets mapping before us: * so leave the reset to free_pages_prepare, and remember that * it's only reliable while mapped. @@ -1508,6 +1573,43 @@ void page_remove_rmap(struct page *page, struct vm_area_struct *vma, munlock_vma_folio(folio, vma); } +/** + * folio_remove_rmap_ptes - remove PTE mappings from a page range of a folio + * @folio: The folio to remove the mappings from + * @page: The first page to remove + * @nr_pages: The number of pages that will be removed from the mapping + * @vma: The vm area from which the mappings are removed + * + * The page range of the folio is defined by [page, page + nr_pages) + * + * The caller needs to hold the page table lock. + */ +void folio_remove_rmap_ptes(struct folio *folio, struct page *page, + int nr_pages, struct vm_area_struct *vma) +{ + __folio_remove_rmap(folio, page, nr_pages, vma, RMAP_LEVEL_PTE); +} + +/** + * folio_remove_rmap_pmd - remove a PMD mapping from a page range of a folio + * @folio: The folio to remove the mapping from + * @page: The first page to remove + * @vma: The vm area from which the mapping is removed + * + * The page range of the folio is defined by [page, page + HPAGE_PMD_NR) + * + * The caller needs to hold the page table lock. + */ +void folio_remove_rmap_pmd(struct folio *folio, struct page *page, + struct vm_area_struct *vma) +{ +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + __folio_remove_rmap(folio, page, HPAGE_PMD_NR, vma, RMAP_LEVEL_PMD); +#else + WARN_ON_ONCE(true); +#endif +} + /* * @arg: enum ttu_flags will be passed to this argument */ @@ -1526,7 +1628,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, /* * When racing against e.g. zap_pte_range() on another cpu, - * in between its ptep_get_and_clear_full() and page_remove_rmap(), + * in between its ptep_get_and_clear_full() and folio_remove_rmap_*(), * try_to_unmap() may return before page_mapped() has become false, * if page table locking is skipped: use TTU_SYNC to wait for that. */ @@ -1764,9 +1866,9 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, break; } - /* See page_try_share_anon_rmap(): clear PTE first. */ + /* See folio_try_share_anon_rmap(): clear PTE first. */ if (anon_exclusive && - page_try_share_anon_rmap(subpage)) { + folio_try_share_anon_rmap_pte(folio, subpage)) { swap_free(entry); set_pte_at(mm, address, pvmw.pte, pteval); ret = false; @@ -1804,7 +1906,10 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, dec_mm_counter(mm, mm_counter_file(&folio->page)); } discard: - page_remove_rmap(subpage, vma, folio_test_hugetlb(folio)); + if (unlikely(folio_test_hugetlb(folio))) + hugetlb_remove_rmap(folio); + else + folio_remove_rmap_pte(folio, subpage, vma); if (vma->vm_flags & VM_LOCKED) mlock_drain_local(); folio_put(folio); @@ -1872,7 +1977,7 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma, /* * When racing against e.g. zap_pte_range() on another cpu, - * in between its ptep_get_and_clear_full() and page_remove_rmap(), + * in between its ptep_get_and_clear_full() and folio_remove_rmap_*(), * try_to_migrate() may return before page_mapped() has become false, * if page table locking is skipped: use TTU_SYNC to wait for that. */ @@ -2037,7 +2142,8 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma, pte_t swp_pte; if (anon_exclusive) - BUG_ON(page_try_share_anon_rmap(subpage)); + WARN_ON_ONCE(folio_try_share_anon_rmap_pte(folio, + subpage)); /* * Store the pfn of the page in a special migration @@ -2108,14 +2214,19 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma, VM_BUG_ON_PAGE(pte_write(pteval) && folio_test_anon(folio) && !anon_exclusive, subpage); - /* See page_try_share_anon_rmap(): clear PTE first. */ - if (anon_exclusive && - page_try_share_anon_rmap(subpage)) { - if (folio_test_hugetlb(folio)) + /* See folio_try_share_anon_rmap_pte(): clear PTE first. */ + if (folio_test_hugetlb(folio)) { + if (anon_exclusive && + hugetlb_try_share_anon_rmap(folio)) { set_huge_pte_at(mm, address, pvmw.pte, pteval, hsz); - else - set_pte_at(mm, address, pvmw.pte, pteval); + ret = false; + page_vma_mapped_walk_done(&pvmw); + break; + } + } else if (anon_exclusive && + folio_try_share_anon_rmap_pte(folio, subpage)) { + set_pte_at(mm, address, pvmw.pte, pteval); ret = false; page_vma_mapped_walk_done(&pvmw); break; @@ -2157,7 +2268,10 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma, */ } - page_remove_rmap(subpage, vma, folio_test_hugetlb(folio)); + if (unlikely(folio_test_hugetlb(folio))) + hugetlb_remove_rmap(folio); + else + folio_remove_rmap_pte(folio, subpage, vma); if (vma->vm_flags & VM_LOCKED) mlock_drain_local(); folio_put(folio); @@ -2296,7 +2410,7 @@ static bool page_make_device_exclusive_one(struct folio *folio, * There is a reference on the page for the swap entry which has * been removed, so shouldn't take another. */ - page_remove_rmap(subpage, vma, false); + folio_remove_rmap_pte(folio, subpage, vma); } mmu_notifier_invalidate_range_end(&range); @@ -2580,12 +2694,11 @@ void rmap_walk_locked(struct folio *folio, struct rmap_walk_control *rwc) * The following two functions are for anonymous (private mapped) hugepages. * Unlike common anonymous pages, anonymous hugepages have no accounting code * and no lru code, because we handle hugepages differently from common pages. - * - * RMAP_COMPOUND is ignored. */ -void hugepage_add_anon_rmap(struct folio *folio, struct vm_area_struct *vma, - unsigned long address, rmap_t flags) +void hugetlb_add_anon_rmap(struct folio *folio, struct vm_area_struct *vma, + unsigned long address, rmap_t flags) { + VM_WARN_ON_FOLIO(!folio_test_hugetlb(folio), folio); VM_WARN_ON_FOLIO(!folio_test_anon(folio), folio); atomic_inc(&folio->_entire_mapcount); @@ -2595,9 +2708,11 @@ void hugepage_add_anon_rmap(struct folio *folio, struct vm_area_struct *vma, PageAnonExclusive(&folio->page), folio); } -void hugepage_add_new_anon_rmap(struct folio *folio, - struct vm_area_struct *vma, unsigned long address) +void hugetlb_add_new_anon_rmap(struct folio *folio, + struct vm_area_struct *vma, unsigned long address) { + VM_WARN_ON_FOLIO(!folio_test_hugetlb(folio), folio); + BUG_ON(address < vma->vm_start || address >= vma->vm_end); /* increment count (starts at -1) */ atomic_set(&folio->_entire_mapcount, 0); |