summaryrefslogtreecommitdiffstats
path: root/mm/huge_memory.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/huge_memory.c')
-rw-r--r--mm/huge_memory.c77
1 files changed, 43 insertions, 34 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 89f58c760..769e8a125 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2493,32 +2493,11 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
return __split_huge_zero_page_pmd(vma, haddr, pmd);
}
- /*
- * Up to this point the pmd is present and huge and userland has the
- * whole access to the hugepage during the split (which happens in
- * place). If we overwrite the pmd with the not-huge version pointing
- * to the pte here (which of course we could if all CPUs were bug
- * free), userland could trigger a small page size TLB miss on the
- * small sized TLB while the hugepage TLB entry is still established in
- * the huge TLB. Some CPU doesn't like that.
- * See http://support.amd.com/TechDocs/41322_10h_Rev_Gd.pdf, Erratum
- * 383 on page 105. Intel should be safe but is also warns that it's
- * only safe if the permission and cache attributes of the two entries
- * loaded in the two TLB is identical (which should be the case here).
- * But it is generally safer to never allow small and huge TLB entries
- * for the same virtual address to be loaded simultaneously. So instead
- * of doing "pmd_populate(); flush_pmd_tlb_range();" we first mark the
- * current pmd notpresent (atomically because here the pmd_trans_huge
- * must remain set at all times on the pmd until the split is complete
- * for this pmd), then we flush the SMP TLB and finally we write the
- * non-huge version of the pmd entry with pmd_populate.
- */
- old_pmd = pmdp_invalidate(vma, haddr, pmd);
-
- pmd_migration = is_pmd_migration_entry(old_pmd);
+ pmd_migration = is_pmd_migration_entry(*pmd);
if (unlikely(pmd_migration)) {
swp_entry_t entry;
+ old_pmd = *pmd;
entry = pmd_to_swp_entry(old_pmd);
page = pfn_swap_entry_to_page(entry);
write = is_writable_migration_entry(entry);
@@ -2529,6 +2508,30 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
soft_dirty = pmd_swp_soft_dirty(old_pmd);
uffd_wp = pmd_swp_uffd_wp(old_pmd);
} else {
+ /*
+ * Up to this point the pmd is present and huge and userland has
+ * the whole access to the hugepage during the split (which
+ * happens in place). If we overwrite the pmd with the not-huge
+ * version pointing to the pte here (which of course we could if
+ * all CPUs were bug free), userland could trigger a small page
+ * size TLB miss on the small sized TLB while the hugepage TLB
+ * entry is still established in the huge TLB. Some CPU doesn't
+ * like that. See
+ * http://support.amd.com/TechDocs/41322_10h_Rev_Gd.pdf, Erratum
+ * 383 on page 105. Intel should be safe but is also warns that
+ * it's only safe if the permission and cache attributes of the
+ * two entries loaded in the two TLB is identical (which should
+ * be the case here). But it is generally safer to never allow
+ * small and huge TLB entries for the same virtual address to be
+ * loaded simultaneously. So instead of doing "pmd_populate();
+ * flush_pmd_tlb_range();" we first mark the current pmd
+ * notpresent (atomically because here the pmd_trans_huge must
+ * remain set at all times on the pmd until the split is
+ * complete for this pmd), then we flush the SMP TLB and finally
+ * we write the non-huge version of the pmd entry with
+ * pmd_populate.
+ */
+ old_pmd = pmdp_invalidate(vma, haddr, pmd);
page = pmd_page(old_pmd);
folio = page_folio(page);
if (pmd_dirty(old_pmd)) {
@@ -3055,30 +3058,36 @@ int split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
if (new_order >= folio_order(folio))
return -EINVAL;
- /* Cannot split anonymous THP to order-1 */
- if (new_order == 1 && folio_test_anon(folio)) {
- VM_WARN_ONCE(1, "Cannot split to order-1 folio");
- return -EINVAL;
- }
-
- if (new_order) {
- /* Only swapping a whole PMD-mapped folio is supported */
- if (folio_test_swapcache(folio))
+ if (folio_test_anon(folio)) {
+ /* order-1 is not supported for anonymous THP. */
+ if (new_order == 1) {
+ VM_WARN_ONCE(1, "Cannot split to order-1 folio");
return -EINVAL;
+ }
+ } else if (new_order) {
/* Split shmem folio to non-zero order not supported */
if (shmem_mapping(folio->mapping)) {
VM_WARN_ONCE(1,
"Cannot split shmem folio to non-0 order");
return -EINVAL;
}
- /* No split if the file system does not support large folio */
- if (!mapping_large_folio_support(folio->mapping)) {
+ /*
+ * No split if the file system does not support large folio.
+ * Note that we might still have THPs in such mappings due to
+ * CONFIG_READ_ONLY_THP_FOR_FS. But in that case, the mapping
+ * does not actually support large folios properly.
+ */
+ if (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) &&
+ !mapping_large_folio_support(folio->mapping)) {
VM_WARN_ONCE(1,
"Cannot split file folio to non-0 order");
return -EINVAL;
}
}
+ /* Only swapping a whole PMD-mapped folio is supported */
+ if (folio_test_swapcache(folio) && new_order)
+ return -EINVAL;
is_hzp = is_huge_zero_page(&folio->page);
if (is_hzp) {