From 7f3a4257159dea8e7ef66d1a539dc6df708b8ed3 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 7 Aug 2024 15:17:46 +0200 Subject: Adding upstream version 6.10.3. Signed-off-by: Daniel Baumann --- mm/rmap.c | 111 +++++++++++++++++++++++++++++++++----------------------------- 1 file changed, 60 insertions(+), 51 deletions(-) (limited to 'mm/rmap.c') diff --git a/mm/rmap.c b/mm/rmap.c index 3746a55310..e8fc5ecb59 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -23,7 +23,7 @@ * inode->i_rwsem (while writing or truncating, not reading or faulting) * mm->mmap_lock * mapping->invalidate_lock (in filemap_fault) - * page->flags PG_locked (lock_page) + * folio_lock * hugetlbfs_i_mmap_rwsem_key (in huge_pmd_share, see hugetlbfs below) * vma_start_write * mapping->i_mmap_rwsem @@ -50,7 +50,7 @@ * hugetlb_fault_mutex (hugetlbfs specific page fault mutex) * vma_lock (hugetlb specific lock for pmd_sharing) * mapping->i_mmap_rwsem (also used for hugetlb pmd sharing) - * page->flags PG_locked (lock_page) + * folio_lock */ #include @@ -182,8 +182,6 @@ static void anon_vma_chain_link(struct vm_area_struct *vma, * for the new allocation. At the same time, we do not want * to do any locking for the common case of already having * an anon_vma. - * - * This must be called with the mmap_lock held for reading. */ int __anon_vma_prepare(struct vm_area_struct *vma) { @@ -191,6 +189,7 @@ int __anon_vma_prepare(struct vm_area_struct *vma) struct anon_vma *anon_vma, *allocated; struct anon_vma_chain *avc; + mmap_assert_locked(mm); might_sleep(); avc = anon_vma_chain_alloc(GFP_KERNEL); @@ -775,6 +774,8 @@ static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags) unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma) { struct folio *folio = page_folio(page); + pgoff_t pgoff; + if (folio_test_anon(folio)) { struct anon_vma *page__anon_vma = folio_anon_vma(folio); /* @@ -790,7 +791,9 @@ unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma) return -EFAULT; } - return vma_address(page, vma); + /* The !page__anon_vma above handles KSM folios */ + pgoff = folio->index + folio_page_idx(folio, page); + return vma_address(vma, pgoff, 1); } /* @@ -961,7 +964,7 @@ static bool invalid_folio_referenced_vma(struct vm_area_struct *vma, void *arg) int folio_referenced(struct folio *folio, int is_locked, struct mem_cgroup *memcg, unsigned long *vm_flags) { - int we_locked = 0; + bool we_locked = false; struct folio_referenced_arg pra = { .mapcount = folio_mapcount(folio), .memcg = memcg, @@ -1128,56 +1131,38 @@ int pfn_mkclean_range(unsigned long pfn, unsigned long nr_pages, pgoff_t pgoff, if (invalid_mkclean_vma(vma, NULL)) return 0; - pvmw.address = vma_pgoff_address(pgoff, nr_pages, vma); + pvmw.address = vma_address(vma, pgoff, nr_pages); VM_BUG_ON_VMA(pvmw.address == -EFAULT, vma); return page_vma_mkclean_one(&pvmw); } -int folio_total_mapcount(struct folio *folio) -{ - int mapcount = folio_entire_mapcount(folio); - int nr_pages; - int i; - - /* In the common case, avoid the loop when no pages mapped by PTE */ - if (folio_nr_pages_mapped(folio) == 0) - return mapcount; - /* - * Add all the PTE mappings of those pages mapped by PTE. - * Limit the loop to folio_nr_pages_mapped()? - * Perhaps: given all the raciness, that may be a good or a bad idea. - */ - nr_pages = folio_nr_pages(folio); - for (i = 0; i < nr_pages; i++) - mapcount += atomic_read(&folio_page(folio, i)->_mapcount); - - /* But each of those _mapcounts was based on -1 */ - mapcount += nr_pages; - return mapcount; -} - static __always_inline unsigned int __folio_add_rmap(struct folio *folio, struct page *page, int nr_pages, enum rmap_level level, int *nr_pmdmapped) { atomic_t *mapped = &folio->_nr_pages_mapped; + const int orig_nr_pages = nr_pages; int first, nr = 0; __folio_rmap_sanity_checks(folio, page, nr_pages, level); switch (level) { case RMAP_LEVEL_PTE: + if (!folio_test_large(folio)) { + nr = atomic_inc_and_test(&page->_mapcount); + break; + } + do { first = atomic_inc_and_test(&page->_mapcount); - if (first && folio_test_large(folio)) { + if (first) { first = atomic_inc_return_relaxed(mapped); - first = (first < ENTIRELY_MAPPED); + if (first < ENTIRELY_MAPPED) + nr++; } - - if (first) - nr++; } while (page++, --nr_pages > 0); + atomic_add(orig_nr_pages, &folio->_large_mapcount); break; case RMAP_LEVEL_PMD: first = atomic_inc_and_test(&folio->_entire_mapcount); @@ -1194,6 +1179,7 @@ static __always_inline unsigned int __folio_add_rmap(struct folio *folio, nr = 0; } } + atomic_inc(&folio->_large_mapcount); break; } return nr; @@ -1429,10 +1415,14 @@ void folio_add_new_anon_rmap(struct folio *folio, struct vm_area_struct *vma, SetPageAnonExclusive(page); } + /* increment count (starts at -1) */ + atomic_set(&folio->_large_mapcount, nr - 1); atomic_set(&folio->_nr_pages_mapped, nr); } else { /* increment count (starts at -1) */ atomic_set(&folio->_entire_mapcount, 0); + /* increment count (starts at -1) */ + atomic_set(&folio->_large_mapcount, 0); atomic_set(&folio->_nr_pages_mapped, ENTIRELY_MAPPED); SetPageAnonExclusive(&folio->page); __lruvec_stat_mod_folio(folio, NR_ANON_THPS, nr); @@ -1445,13 +1435,14 @@ static __always_inline void __folio_add_file_rmap(struct folio *folio, struct page *page, int nr_pages, struct vm_area_struct *vma, enum rmap_level level) { + pg_data_t *pgdat = folio_pgdat(folio); int nr, nr_pmdmapped = 0; VM_WARN_ON_FOLIO(folio_test_anon(folio), folio); nr = __folio_add_rmap(folio, page, nr_pages, level, &nr_pmdmapped); if (nr_pmdmapped) - __lruvec_stat_mod_folio(folio, folio_test_swapbacked(folio) ? + __mod_node_page_state(pgdat, folio_test_swapbacked(folio) ? NR_SHMEM_PMDMAPPED : NR_FILE_PMDMAPPED, nr_pmdmapped); if (nr) __lruvec_stat_mod_folio(folio, NR_FILE_MAPPED, nr); @@ -1503,25 +1494,34 @@ static __always_inline void __folio_remove_rmap(struct folio *folio, enum rmap_level level) { atomic_t *mapped = &folio->_nr_pages_mapped; + pg_data_t *pgdat = folio_pgdat(folio); int last, nr = 0, nr_pmdmapped = 0; + bool partially_mapped = false; enum node_stat_item idx; __folio_rmap_sanity_checks(folio, page, nr_pages, level); switch (level) { case RMAP_LEVEL_PTE: + if (!folio_test_large(folio)) { + nr = atomic_add_negative(-1, &page->_mapcount); + break; + } + + atomic_sub(nr_pages, &folio->_large_mapcount); do { last = atomic_add_negative(-1, &page->_mapcount); - if (last && folio_test_large(folio)) { + if (last) { last = atomic_dec_return_relaxed(mapped); - last = (last < ENTIRELY_MAPPED); + if (last < ENTIRELY_MAPPED) + nr++; } - - if (last) - nr++; } while (page++, --nr_pages > 0); + + partially_mapped = nr && atomic_read(mapped); break; case RMAP_LEVEL_PMD: + atomic_dec(&folio->_large_mapcount); last = atomic_add_negative(-1, &folio->_entire_mapcount); if (last) { nr = atomic_sub_return_relaxed(ENTIRELY_MAPPED, mapped); @@ -1536,17 +1536,20 @@ static __always_inline void __folio_remove_rmap(struct folio *folio, nr = 0; } } + + partially_mapped = nr < nr_pmdmapped; break; } if (nr_pmdmapped) { + /* NR_{FILE/SHMEM}_PMDMAPPED are not maintained per-memcg */ if (folio_test_anon(folio)) - idx = NR_ANON_THPS; - else if (folio_test_swapbacked(folio)) - idx = NR_SHMEM_PMDMAPPED; + __lruvec_stat_mod_folio(folio, NR_ANON_THPS, -nr_pmdmapped); else - idx = NR_FILE_PMDMAPPED; - __lruvec_stat_mod_folio(folio, idx, -nr_pmdmapped); + __mod_node_page_state(pgdat, + folio_test_swapbacked(folio) ? + NR_SHMEM_PMDMAPPED : NR_FILE_PMDMAPPED, + -nr_pmdmapped); } if (nr) { idx = folio_test_anon(folio) ? NR_ANON_MAPPED : NR_FILE_MAPPED; @@ -1556,10 +1559,12 @@ static __always_inline void __folio_remove_rmap(struct folio *folio, * Queue anon large folio for deferred split if at least one * page of the folio is unmapped and at least one page * is still mapped. + * + * Check partially_mapped first to ensure it is a large folio. */ - if (folio_test_large(folio) && folio_test_anon(folio)) - if (level == RMAP_LEVEL_PTE || nr < nr_pmdmapped) - deferred_split_folio(folio); + if (folio_test_anon(folio) && partially_mapped && + list_empty(&folio->_deferred_list)) + deferred_split_folio(folio); } /* @@ -2588,7 +2593,8 @@ static void rmap_walk_anon(struct folio *folio, anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff_start, pgoff_end) { struct vm_area_struct *vma = avc->vma; - unsigned long address = vma_address(&folio->page, vma); + unsigned long address = vma_address(vma, pgoff_start, + folio_nr_pages(folio)); VM_BUG_ON_VMA(address == -EFAULT, vma); cond_resched(); @@ -2649,7 +2655,8 @@ static void rmap_walk_file(struct folio *folio, lookup: vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff_start, pgoff_end) { - unsigned long address = vma_address(&folio->page, vma); + unsigned long address = vma_address(vma, pgoff_start, + folio_nr_pages(folio)); VM_BUG_ON_VMA(address == -EFAULT, vma); cond_resched(); @@ -2702,6 +2709,7 @@ void hugetlb_add_anon_rmap(struct folio *folio, struct vm_area_struct *vma, VM_WARN_ON_FOLIO(!folio_test_anon(folio), folio); atomic_inc(&folio->_entire_mapcount); + atomic_inc(&folio->_large_mapcount); if (flags & RMAP_EXCLUSIVE) SetPageAnonExclusive(&folio->page); VM_WARN_ON_FOLIO(folio_entire_mapcount(folio) > 1 && @@ -2716,6 +2724,7 @@ void hugetlb_add_new_anon_rmap(struct folio *folio, BUG_ON(address < vma->vm_start || address >= vma->vm_end); /* increment count (starts at -1) */ atomic_set(&folio->_entire_mapcount, 0); + atomic_set(&folio->_large_mapcount, 0); folio_clear_hugetlb_restore_reserve(folio); __folio_set_anon(folio, vma, address, true); SetPageAnonExclusive(&folio->page); -- cgit v1.2.3