summaryrefslogtreecommitdiffstats
path: root/arch/s390/mm
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-06-19 21:00:37 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-06-19 21:00:37 +0000
commit94ac2ab3fff96814d7460a27a0e9d004abbd4128 (patch)
tree9a4eb8cc234b540b0f4b93363109cdd37a20540b /arch/s390/mm
parentAdding debian version 6.8.12-1. (diff)
downloadlinux-94ac2ab3fff96814d7460a27a0e9d004abbd4128.tar.xz
linux-94ac2ab3fff96814d7460a27a0e9d004abbd4128.zip
Merging upstream version 6.9.2.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'arch/s390/mm')
-rw-r--r--arch/s390/mm/Makefile1
-rw-r--r--arch/s390/mm/dump_pagetables.c21
-rw-r--r--arch/s390/mm/extmem.c4
-rw-r--r--arch/s390/mm/fault.c4
-rw-r--r--arch/s390/mm/gmap.c215
-rw-r--r--arch/s390/mm/hugetlbpage.c2
-rw-r--r--arch/s390/mm/init.c5
-rw-r--r--arch/s390/mm/mmap.c19
-rw-r--r--arch/s390/mm/pageattr.c2
-rw-r--r--arch/s390/mm/pgalloc.c8
-rw-r--r--arch/s390/mm/pgtable.c10
-rw-r--r--arch/s390/mm/physaddr.c15
-rw-r--r--arch/s390/mm/vmem.c68
13 files changed, 158 insertions, 216 deletions
diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile
index 352ff520fd..f6c2db7a86 100644
--- a/arch/s390/mm/Makefile
+++ b/arch/s390/mm/Makefile
@@ -7,6 +7,7 @@ obj-y := init.o fault.o extmem.o mmap.o vmem.o maccess.o
obj-y += page-states.o pageattr.o pgtable.o pgalloc.o extable.o
obj-$(CONFIG_CMM) += cmm.o
+obj-$(CONFIG_DEBUG_VIRTUAL) += physaddr.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
obj-$(CONFIG_PTDUMP_CORE) += dump_pagetables.o
obj-$(CONFIG_PGSTE) += gmap.o
diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c
index d37a8f607b..ffd07ed7b4 100644
--- a/arch/s390/mm/dump_pagetables.c
+++ b/arch/s390/mm/dump_pagetables.c
@@ -6,7 +6,6 @@
#include <linux/mm.h>
#include <linux/kfence.h>
#include <linux/kasan.h>
-#include <asm/ptdump.h>
#include <asm/kasan.h>
#include <asm/abs_lowcore.h>
#include <asm/nospec-branch.h>
@@ -122,7 +121,6 @@ static void print_prot(struct seq_file *m, unsigned int pr, int level)
static void note_prot_wx(struct pg_state *st, unsigned long addr)
{
-#ifdef CONFIG_DEBUG_WX
if (!st->check_wx)
return;
if (st->current_prot & _PAGE_INVALID)
@@ -139,10 +137,10 @@ static void note_prot_wx(struct pg_state *st, unsigned long addr)
*/
if (addr == PAGE_SIZE && (nospec_uses_trampoline() || !static_key_enabled(&cpu_has_bear)))
return;
- WARN_ONCE(1, "s390/mm: Found insecure W+X mapping at address %pS\n",
+ WARN_ONCE(IS_ENABLED(CONFIG_DEBUG_WX),
+ "s390/mm: Found insecure W+X mapping at address %pS\n",
(void *)st->start_address);
st->wx_pages += (addr - st->start_address) / PAGE_SIZE;
-#endif /* CONFIG_DEBUG_WX */
}
static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, u64 val)
@@ -194,8 +192,7 @@ static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level,
}
}
-#ifdef CONFIG_DEBUG_WX
-void ptdump_check_wx(void)
+bool ptdump_check_wx(void)
{
struct pg_state st = {
.ptdump = {
@@ -218,16 +215,20 @@ void ptdump_check_wx(void)
};
if (!MACHINE_HAS_NX)
- return;
+ return true;
ptdump_walk_pgd(&st.ptdump, &init_mm, NULL);
- if (st.wx_pages)
+ if (st.wx_pages) {
pr_warn("Checked W+X mappings: FAILED, %lu W+X pages found\n", st.wx_pages);
- else
+
+ return false;
+ } else {
pr_info("Checked W+X mappings: passed, no %sW+X pages found\n",
(nospec_uses_trampoline() || !static_key_enabled(&cpu_has_bear)) ?
"unexpected " : "");
+
+ return true;
+ }
}
-#endif /* CONFIG_DEBUG_WX */
#ifdef CONFIG_PTDUMP_DEBUGFS
static int ptdump_show(struct seq_file *m, void *v)
diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c
index e41869f5cc..282fefe107 100644
--- a/arch/s390/mm/extmem.c
+++ b/arch/s390/mm/extmem.c
@@ -136,7 +136,7 @@ dcss_diag(int *func, void *parameter,
unsigned long rx, ry;
int rc;
- rx = (unsigned long) parameter;
+ rx = virt_to_phys(parameter);
ry = (unsigned long) *func;
diag_stat_inc(DIAG_STAT_X064);
@@ -178,7 +178,7 @@ query_segment_type (struct dcss_segment *seg)
/* initialize diag input parameters */
qin->qopcode = DCSS_FINDSEGA;
- qin->qoutptr = (unsigned long) qout;
+ qin->qoutptr = virt_to_phys(qout);
qin->qoutlen = sizeof(struct qout64);
memcpy (qin->qname, seg->dcss_name, 8);
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index ac4c78546d..0c66b32e0f 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -67,13 +67,15 @@ early_initcall(fault_init);
static enum fault_type get_fault_type(struct pt_regs *regs)
{
union teid teid = { .val = regs->int_parm_long };
+ struct gmap *gmap;
if (likely(teid.as == PSW_BITS_AS_PRIMARY)) {
if (user_mode(regs))
return USER_FAULT;
if (!IS_ENABLED(CONFIG_PGSTE))
return KERNEL_FAULT;
- if (test_pt_regs_flag(regs, PIF_GUEST_FAULT))
+ gmap = (struct gmap *)S390_lowcore.gmap;
+ if (gmap && gmap->asce == regs->cr1)
return GMAP_FAULT;
return KERNEL_FAULT;
}
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index 3b57837f3e..12d22a7fa3 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -206,9 +206,11 @@ static void gmap_free(struct gmap *gmap)
/* Free additional data for a shadow gmap */
if (gmap_is_shadow(gmap)) {
+ struct ptdesc *ptdesc, *n;
+
/* Free all page tables. */
- list_for_each_entry_safe(page, next, &gmap->pt_list, lru)
- page_table_free_pgste(page);
+ list_for_each_entry_safe(ptdesc, n, &gmap->pt_list, pt_list)
+ page_table_free_pgste(ptdesc);
gmap_rmap_radix_tree_free(&gmap->host_to_rmap);
/* Release reference to the parent */
gmap_put(gmap->parent);
@@ -601,7 +603,7 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
pmd = pmd_offset(pud, vmaddr);
VM_BUG_ON(pmd_none(*pmd));
/* Are we allowed to use huge pages? */
- if (pmd_large(*pmd) && !gmap->mm->context.allow_gmap_hpage_1m)
+ if (pmd_leaf(*pmd) && !gmap->mm->context.allow_gmap_hpage_1m)
return -EFAULT;
/* Link gmap segment table entry location to page table. */
rc = radix_tree_preload(GFP_KERNEL_ACCOUNT);
@@ -613,7 +615,7 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
rc = radix_tree_insert(&gmap->host_to_guest,
vmaddr >> PMD_SHIFT, table);
if (!rc) {
- if (pmd_large(*pmd)) {
+ if (pmd_leaf(*pmd)) {
*table = (pmd_val(*pmd) &
_SEGMENT_ENTRY_HARDWARE_BITS_LARGE)
| _SEGMENT_ENTRY_GMAP_UC;
@@ -943,7 +945,7 @@ static inline pmd_t *gmap_pmd_op_walk(struct gmap *gmap, unsigned long gaddr)
}
/* 4k page table entries are locked via the pte (pte_alloc_map_lock). */
- if (!pmd_large(*pmdp))
+ if (!pmd_leaf(*pmdp))
spin_unlock(&gmap->guest_table_lock);
return pmdp;
}
@@ -955,7 +957,7 @@ static inline pmd_t *gmap_pmd_op_walk(struct gmap *gmap, unsigned long gaddr)
*/
static inline void gmap_pmd_op_end(struct gmap *gmap, pmd_t *pmdp)
{
- if (pmd_large(*pmdp))
+ if (pmd_leaf(*pmdp))
spin_unlock(&gmap->guest_table_lock);
}
@@ -1066,7 +1068,7 @@ static int gmap_protect_range(struct gmap *gmap, unsigned long gaddr,
rc = -EAGAIN;
pmdp = gmap_pmd_op_walk(gmap, gaddr);
if (pmdp) {
- if (!pmd_large(*pmdp)) {
+ if (!pmd_leaf(*pmdp)) {
rc = gmap_protect_pte(gmap, gaddr, pmdp, prot,
bits);
if (!rc) {
@@ -1348,7 +1350,7 @@ static void gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr)
{
unsigned long *ste;
phys_addr_t sto, pgt;
- struct page *page;
+ struct ptdesc *ptdesc;
BUG_ON(!gmap_is_shadow(sg));
ste = gmap_table_walk(sg, raddr, 1); /* get segment pointer */
@@ -1361,9 +1363,9 @@ static void gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr)
*ste = _SEGMENT_ENTRY_EMPTY;
__gmap_unshadow_pgt(sg, raddr, __va(pgt));
/* Free page table */
- page = phys_to_page(pgt);
- list_del(&page->lru);
- page_table_free_pgste(page);
+ ptdesc = page_ptdesc(phys_to_page(pgt));
+ list_del(&ptdesc->pt_list);
+ page_table_free_pgste(ptdesc);
}
/**
@@ -1377,7 +1379,7 @@ static void gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr)
static void __gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr,
unsigned long *sgt)
{
- struct page *page;
+ struct ptdesc *ptdesc;
phys_addr_t pgt;
int i;
@@ -1389,9 +1391,9 @@ static void __gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr,
sgt[i] = _SEGMENT_ENTRY_EMPTY;
__gmap_unshadow_pgt(sg, raddr, __va(pgt));
/* Free page table */
- page = phys_to_page(pgt);
- list_del(&page->lru);
- page_table_free_pgste(page);
+ ptdesc = page_ptdesc(phys_to_page(pgt));
+ list_del(&ptdesc->pt_list);
+ page_table_free_pgste(ptdesc);
}
}
@@ -2058,19 +2060,19 @@ int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt,
{
unsigned long raddr, origin;
unsigned long *table;
- struct page *page;
+ struct ptdesc *ptdesc;
phys_addr_t s_pgt;
int rc;
BUG_ON(!gmap_is_shadow(sg) || (pgt & _SEGMENT_ENTRY_LARGE));
/* Allocate a shadow page table */
- page = page_table_alloc_pgste(sg->mm);
- if (!page)
+ ptdesc = page_table_alloc_pgste(sg->mm);
+ if (!ptdesc)
return -ENOMEM;
- page->index = pgt & _SEGMENT_ENTRY_ORIGIN;
+ ptdesc->pt_index = pgt & _SEGMENT_ENTRY_ORIGIN;
if (fake)
- page->index |= GMAP_SHADOW_FAKE_TABLE;
- s_pgt = page_to_phys(page);
+ ptdesc->pt_index |= GMAP_SHADOW_FAKE_TABLE;
+ s_pgt = page_to_phys(ptdesc_page(ptdesc));
/* Install shadow page table */
spin_lock(&sg->guest_table_lock);
table = gmap_table_walk(sg, saddr, 1); /* get segment pointer */
@@ -2088,7 +2090,7 @@ int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt,
/* mark as invalid as long as the parent table is not protected */
*table = (unsigned long) s_pgt | _SEGMENT_ENTRY |
(pgt & _SEGMENT_ENTRY_PROTECT) | _SEGMENT_ENTRY_INVALID;
- list_add(&page->lru, &sg->pt_list);
+ list_add(&ptdesc->pt_list, &sg->pt_list);
if (fake) {
/* nothing to protect for fake tables */
*table &= ~_SEGMENT_ENTRY_INVALID;
@@ -2114,7 +2116,7 @@ int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt,
return rc;
out_free:
spin_unlock(&sg->guest_table_lock);
- page_table_free_pgste(page);
+ page_table_free_pgste(ptdesc);
return rc;
}
@@ -2498,7 +2500,7 @@ void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long bitmap[4],
if (!pmdp)
return;
- if (pmd_large(*pmdp)) {
+ if (pmd_leaf(*pmdp)) {
if (gmap_test_and_clear_dirty_pmd(gmap, pmdp, gaddr))
bitmap_fill(bitmap, _PAGE_ENTRIES);
} else {
@@ -2548,6 +2550,41 @@ static inline void thp_split_mm(struct mm_struct *mm)
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
/*
+ * Remove all empty zero pages from the mapping for lazy refaulting
+ * - This must be called after mm->context.has_pgste is set, to avoid
+ * future creation of zero pages
+ * - This must be called after THP was disabled.
+ *
+ * mm contracts with s390, that even if mm were to remove a page table,
+ * racing with the loop below and so causing pte_offset_map_lock() to fail,
+ * it will never insert a page table containing empty zero pages once
+ * mm_forbids_zeropage(mm) i.e. mm->context.has_pgste is set.
+ */
+static int __zap_zero_pages(pmd_t *pmd, unsigned long start,
+ unsigned long end, struct mm_walk *walk)
+{
+ unsigned long addr;
+
+ for (addr = start; addr != end; addr += PAGE_SIZE) {
+ pte_t *ptep;
+ spinlock_t *ptl;
+
+ ptep = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
+ if (!ptep)
+ break;
+ if (is_zero_pfn(pte_pfn(*ptep)))
+ ptep_xchg_direct(walk->mm, addr, ptep, __pte(_PAGE_INVALID));
+ pte_unmap_unlock(ptep, ptl);
+ }
+ return 0;
+}
+
+static const struct mm_walk_ops zap_zero_walk_ops = {
+ .pmd_entry = __zap_zero_pages,
+ .walk_lock = PGWALK_WRLOCK,
+};
+
+/*
* switch on pgstes for its userspace process (for kvm)
*/
int s390_enable_sie(void)
@@ -2564,142 +2601,22 @@ int s390_enable_sie(void)
mm->context.has_pgste = 1;
/* split thp mappings and disable thp for future mappings */
thp_split_mm(mm);
+ walk_page_range(mm, 0, TASK_SIZE, &zap_zero_walk_ops, NULL);
mmap_write_unlock(mm);
return 0;
}
EXPORT_SYMBOL_GPL(s390_enable_sie);
-static int find_zeropage_pte_entry(pte_t *pte, unsigned long addr,
- unsigned long end, struct mm_walk *walk)
-{
- unsigned long *found_addr = walk->private;
-
- /* Return 1 of the page is a zeropage. */
- if (is_zero_pfn(pte_pfn(*pte))) {
- /*
- * Shared zeropage in e.g., a FS DAX mapping? We cannot do the
- * right thing and likely don't care: FAULT_FLAG_UNSHARE
- * currently only works in COW mappings, which is also where
- * mm_forbids_zeropage() is checked.
- */
- if (!is_cow_mapping(walk->vma->vm_flags))
- return -EFAULT;
-
- *found_addr = addr;
- return 1;
- }
- return 0;
-}
-
-static const struct mm_walk_ops find_zeropage_ops = {
- .pte_entry = find_zeropage_pte_entry,
- .walk_lock = PGWALK_WRLOCK,
-};
-
-/*
- * Unshare all shared zeropages, replacing them by anonymous pages. Note that
- * we cannot simply zap all shared zeropages, because this could later
- * trigger unexpected userfaultfd missing events.
- *
- * This must be called after mm->context.allow_cow_sharing was
- * set to 0, to avoid future mappings of shared zeropages.
- *
- * mm contracts with s390, that even if mm were to remove a page table,
- * and racing with walk_page_range_vma() calling pte_offset_map_lock()
- * would fail, it will never insert a page table containing empty zero
- * pages once mm_forbids_zeropage(mm) i.e.
- * mm->context.allow_cow_sharing is set to 0.
- */
-static int __s390_unshare_zeropages(struct mm_struct *mm)
-{
- struct vm_area_struct *vma;
- VMA_ITERATOR(vmi, mm, 0);
- unsigned long addr;
- vm_fault_t fault;
- int rc;
-
- for_each_vma(vmi, vma) {
- /*
- * We could only look at COW mappings, but it's more future
- * proof to catch unexpected zeropages in other mappings and
- * fail.
- */
- if ((vma->vm_flags & VM_PFNMAP) || is_vm_hugetlb_page(vma))
- continue;
- addr = vma->vm_start;
-
-retry:
- rc = walk_page_range_vma(vma, addr, vma->vm_end,
- &find_zeropage_ops, &addr);
- if (rc < 0)
- return rc;
- else if (!rc)
- continue;
-
- /* addr was updated by find_zeropage_pte_entry() */
- fault = handle_mm_fault(vma, addr,
- FAULT_FLAG_UNSHARE | FAULT_FLAG_REMOTE,
- NULL);
- if (fault & VM_FAULT_OOM)
- return -ENOMEM;
- /*
- * See break_ksm(): even after handle_mm_fault() returned 0, we
- * must start the lookup from the current address, because
- * handle_mm_fault() may back out if there's any difficulty.
- *
- * VM_FAULT_SIGBUS and VM_FAULT_SIGSEGV are unexpected but
- * maybe they could trigger in the future on concurrent
- * truncation. In that case, the shared zeropage would be gone
- * and we can simply retry and make progress.
- */
- cond_resched();
- goto retry;
- }
-
- return 0;
-}
-
-static int __s390_disable_cow_sharing(struct mm_struct *mm)
+int gmap_mark_unmergeable(void)
{
- int rc;
-
- if (!mm->context.allow_cow_sharing)
- return 0;
-
- mm->context.allow_cow_sharing = 0;
-
- /* Replace all shared zeropages by anonymous pages. */
- rc = __s390_unshare_zeropages(mm);
/*
* Make sure to disable KSM (if enabled for the whole process or
* individual VMAs). Note that nothing currently hinders user space
* from re-enabling it.
*/
- if (!rc)
- rc = ksm_disable(mm);
- if (rc)
- mm->context.allow_cow_sharing = 1;
- return rc;
-}
-
-/*
- * Disable most COW-sharing of memory pages for the whole process:
- * (1) Disable KSM and unmerge/unshare any KSM pages.
- * (2) Disallow shared zeropages and unshare any zerpages that are mapped.
- *
- * Not that we currently don't bother with COW-shared pages that are shared
- * with parent/child processes due to fork().
- */
-int s390_disable_cow_sharing(void)
-{
- int rc;
-
- mmap_write_lock(current->mm);
- rc = __s390_disable_cow_sharing(current->mm);
- mmap_write_unlock(current->mm);
- return rc;
+ return ksm_disable(current->mm);
}
-EXPORT_SYMBOL_GPL(s390_disable_cow_sharing);
+EXPORT_SYMBOL_GPL(gmap_mark_unmergeable);
/*
* Enable storage key handling from now on and initialize the storage
@@ -2768,7 +2685,7 @@ int s390_enable_skey(void)
goto out_up;
mm->context.uses_skeys = 1;
- rc = __s390_disable_cow_sharing(mm);
+ rc = gmap_mark_unmergeable();
if (rc) {
mm->context.uses_skeys = 0;
goto out_up;
diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c
index 763469e518..dc3db86e13 100644
--- a/arch/s390/mm/hugetlbpage.c
+++ b/arch/s390/mm/hugetlbpage.c
@@ -235,7 +235,7 @@ pte_t *huge_pte_offset(struct mm_struct *mm,
int pmd_huge(pmd_t pmd)
{
- return pmd_large(pmd);
+ return pmd_leaf(pmd);
}
int pud_huge(pud_t pud)
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 43e612bc2b..f6391442c0 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -37,7 +37,6 @@
#include <asm/pgalloc.h>
#include <asm/ctlreg.h>
#include <asm/kfence.h>
-#include <asm/ptdump.h>
#include <asm/dma.h>
#include <asm/abs_lowcore.h>
#include <asm/tlb.h>
@@ -109,7 +108,6 @@ void mark_rodata_ro(void)
__set_memory_ro(__start_ro_after_init, __end_ro_after_init);
pr_info("Write protected read-only-after-init data: %luk\n", size >> 10);
- debug_checkwx();
}
int set_memory_encrypted(unsigned long vaddr, int numpages)
@@ -281,9 +279,6 @@ int arch_add_memory(int nid, u64 start, u64 size,
unsigned long size_pages = PFN_DOWN(size);
int rc;
- if (WARN_ON_ONCE(params->altmap))
- return -EINVAL;
-
if (WARN_ON_ONCE(params->pgprot.pgprot != PAGE_KERNEL.pgprot))
return -EINVAL;
diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c
index fc9a7dc26c..b14fc08876 100644
--- a/arch/s390/mm/mmap.c
+++ b/arch/s390/mm/mmap.c
@@ -71,6 +71,15 @@ static inline unsigned long mmap_base(unsigned long rnd,
return PAGE_ALIGN(STACK_TOP - gap - rnd);
}
+static int get_align_mask(struct file *filp, unsigned long flags)
+{
+ if (!(current->flags & PF_RANDOMIZE))
+ return 0;
+ if (filp || (flags & MAP_SHARED))
+ return MMAP_ALIGN_MASK << PAGE_SHIFT;
+ return 0;
+}
+
unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
unsigned long len, unsigned long pgoff,
unsigned long flags)
@@ -97,10 +106,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
info.length = len;
info.low_limit = mm->mmap_base;
info.high_limit = TASK_SIZE;
- if (filp || (flags & MAP_SHARED))
- info.align_mask = MMAP_ALIGN_MASK << PAGE_SHIFT;
- else
- info.align_mask = 0;
+ info.align_mask = get_align_mask(filp, flags);
info.align_offset = pgoff << PAGE_SHIFT;
addr = vm_unmapped_area(&info);
if (offset_in_page(addr))
@@ -138,10 +144,7 @@ unsigned long arch_get_unmapped_area_topdown(struct file *filp, unsigned long ad
info.length = len;
info.low_limit = PAGE_SIZE;
info.high_limit = mm->mmap_base;
- if (filp || (flags & MAP_SHARED))
- info.align_mask = MMAP_ALIGN_MASK << PAGE_SHIFT;
- else
- info.align_mask = 0;
+ info.align_mask = get_align_mask(filp, flags);
info.align_offset = pgoff << PAGE_SHIFT;
addr = vm_unmapped_area(&info);
diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c
index 739185fc39..01bc8fad64 100644
--- a/arch/s390/mm/pageattr.c
+++ b/arch/s390/mm/pageattr.c
@@ -185,7 +185,7 @@ static int walk_pmd_level(pud_t *pudp, unsigned long addr, unsigned long end,
if (pmd_none(*pmdp))
return -EINVAL;
next = pmd_addr_end(addr, end);
- if (pmd_large(*pmdp)) {
+ if (pmd_leaf(*pmdp)) {
need_split = !!(flags & SET_MEMORY_4K);
need_split |= !!(addr & ~PMD_MASK);
need_split |= !!(addr + PMD_SIZE > next);
diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c
index 008e487c94..abb629d7e1 100644
--- a/arch/s390/mm/pgalloc.c
+++ b/arch/s390/mm/pgalloc.c
@@ -135,7 +135,7 @@ err_p4d:
#ifdef CONFIG_PGSTE
-struct page *page_table_alloc_pgste(struct mm_struct *mm)
+struct ptdesc *page_table_alloc_pgste(struct mm_struct *mm)
{
struct ptdesc *ptdesc;
u64 *table;
@@ -147,12 +147,12 @@ struct page *page_table_alloc_pgste(struct mm_struct *mm)
memset64(table, _PAGE_INVALID, PTRS_PER_PTE);
memset64(table + PTRS_PER_PTE, 0, PTRS_PER_PTE);
}
- return ptdesc_page(ptdesc);
+ return ptdesc;
}
-void page_table_free_pgste(struct page *page)
+void page_table_free_pgste(struct ptdesc *ptdesc)
{
- pagetable_free(page_ptdesc(page));
+ pagetable_free(ptdesc);
}
#endif /* CONFIG_PGSTE */
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 3ff07b6bcd..2c944bafb0 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -721,9 +721,9 @@ static void ptep_zap_swap_entry(struct mm_struct *mm, swp_entry_t entry)
if (!non_swap_entry(entry))
dec_mm_counter(mm, MM_SWAPENTS);
else if (is_migration_entry(entry)) {
- struct page *page = pfn_swap_entry_to_page(entry);
+ struct folio *folio = pfn_swap_entry_folio(entry);
- dec_mm_counter(mm, mm_counter(page));
+ dec_mm_counter(mm, mm_counter(folio));
}
free_swap_and_cache(entry);
}
@@ -827,7 +827,7 @@ again:
return key ? -EFAULT : 0;
}
- if (pmd_large(*pmdp)) {
+ if (pmd_leaf(*pmdp)) {
paddr = pmd_val(*pmdp) & HPAGE_MASK;
paddr |= addr & ~HPAGE_MASK;
/*
@@ -938,7 +938,7 @@ again:
return 0;
}
- if (pmd_large(*pmdp)) {
+ if (pmd_leaf(*pmdp)) {
paddr = pmd_val(*pmdp) & HPAGE_MASK;
paddr |= addr & ~HPAGE_MASK;
cc = page_reset_referenced(paddr);
@@ -1002,7 +1002,7 @@ again:
return 0;
}
- if (pmd_large(*pmdp)) {
+ if (pmd_leaf(*pmdp)) {
paddr = pmd_val(*pmdp) & HPAGE_MASK;
paddr |= addr & ~HPAGE_MASK;
*key = page_get_storage_key(paddr);
diff --git a/arch/s390/mm/physaddr.c b/arch/s390/mm/physaddr.c
new file mode 100644
index 0000000000..59de866c72
--- /dev/null
+++ b/arch/s390/mm/physaddr.c
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/mmdebug.h>
+#include <linux/export.h>
+#include <linux/mm.h>
+#include <asm/page.h>
+
+unsigned long __phys_addr(unsigned long x, bool is_31bit)
+{
+ VIRTUAL_BUG_ON(is_vmalloc_or_module_addr((void *)(x)));
+ x = __pa_nodebug(x);
+ if (is_31bit)
+ VIRTUAL_BUG_ON(x >> 31);
+ return x;
+}
+EXPORT_SYMBOL(__phys_addr);
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index 84e173c02a..85cddf904c 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -33,8 +33,12 @@ static void __ref *vmem_alloc_pages(unsigned int order)
return memblock_alloc(size, size);
}
-static void vmem_free_pages(unsigned long addr, int order)
+static void vmem_free_pages(unsigned long addr, int order, struct vmem_altmap *altmap)
{
+ if (altmap) {
+ vmem_altmap_free(altmap, 1 << order);
+ return;
+ }
/* We don't expect boot memory to be removed ever. */
if (!slab_is_available() ||
WARN_ON_ONCE(PageReserved(virt_to_page((void *)addr))))
@@ -156,7 +160,8 @@ static bool vmemmap_unuse_sub_pmd(unsigned long start, unsigned long end)
/* __ref: we'll only call vmemmap_alloc_block() via vmemmap_populate() */
static int __ref modify_pte_table(pmd_t *pmd, unsigned long addr,
- unsigned long end, bool add, bool direct)
+ unsigned long end, bool add, bool direct,
+ struct vmem_altmap *altmap)
{
unsigned long prot, pages = 0;
int ret = -ENOMEM;
@@ -172,11 +177,11 @@ static int __ref modify_pte_table(pmd_t *pmd, unsigned long addr,
if (pte_none(*pte))
continue;
if (!direct)
- vmem_free_pages((unsigned long) pfn_to_virt(pte_pfn(*pte)), 0);
+ vmem_free_pages((unsigned long)pfn_to_virt(pte_pfn(*pte)), get_order(PAGE_SIZE), altmap);
pte_clear(&init_mm, addr, pte);
} else if (pte_none(*pte)) {
if (!direct) {
- void *new_page = vmemmap_alloc_block(PAGE_SIZE, NUMA_NO_NODE);
+ void *new_page = vmemmap_alloc_block_buf(PAGE_SIZE, NUMA_NO_NODE, altmap);
if (!new_page)
goto out;
@@ -213,7 +218,8 @@ static void try_free_pte_table(pmd_t *pmd, unsigned long start)
/* __ref: we'll only call vmemmap_alloc_block() via vmemmap_populate() */
static int __ref modify_pmd_table(pud_t *pud, unsigned long addr,
- unsigned long end, bool add, bool direct)
+ unsigned long end, bool add, bool direct,
+ struct vmem_altmap *altmap)
{
unsigned long next, prot, pages = 0;
int ret = -ENOMEM;
@@ -230,15 +236,15 @@ static int __ref modify_pmd_table(pud_t *pud, unsigned long addr,
if (!add) {
if (pmd_none(*pmd))
continue;
- if (pmd_large(*pmd)) {
+ if (pmd_leaf(*pmd)) {
if (IS_ALIGNED(addr, PMD_SIZE) &&
IS_ALIGNED(next, PMD_SIZE)) {
if (!direct)
- vmem_free_pages(pmd_deref(*pmd), get_order(PMD_SIZE));
+ vmem_free_pages(pmd_deref(*pmd), get_order(PMD_SIZE), altmap);
pmd_clear(pmd);
pages++;
} else if (!direct && vmemmap_unuse_sub_pmd(addr, next)) {
- vmem_free_pages(pmd_deref(*pmd), get_order(PMD_SIZE));
+ vmem_free_pages(pmd_deref(*pmd), get_order(PMD_SIZE), altmap);
pmd_clear(pmd);
}
continue;
@@ -261,7 +267,7 @@ static int __ref modify_pmd_table(pud_t *pud, unsigned long addr,
* page tables since vmemmap_populate gets
* called for each section separately.
*/
- new_page = vmemmap_alloc_block(PMD_SIZE, NUMA_NO_NODE);
+ new_page = vmemmap_alloc_block_buf(PMD_SIZE, NUMA_NO_NODE, altmap);
if (new_page) {
set_pmd(pmd, __pmd(__pa(new_page) | prot));
if (!IS_ALIGNED(addr, PMD_SIZE) ||
@@ -275,12 +281,12 @@ static int __ref modify_pmd_table(pud_t *pud, unsigned long addr,
if (!pte)
goto out;
pmd_populate(&init_mm, pmd, pte);
- } else if (pmd_large(*pmd)) {
+ } else if (pmd_leaf(*pmd)) {
if (!direct)
vmemmap_use_sub_pmd(addr, next);
continue;
}
- ret = modify_pte_table(pmd, addr, next, add, direct);
+ ret = modify_pte_table(pmd, addr, next, add, direct, altmap);
if (ret)
goto out;
if (!add)
@@ -302,12 +308,12 @@ static void try_free_pmd_table(pud_t *pud, unsigned long start)
for (i = 0; i < PTRS_PER_PMD; i++, pmd++)
if (!pmd_none(*pmd))
return;
- vmem_free_pages(pud_deref(*pud), CRST_ALLOC_ORDER);
+ vmem_free_pages(pud_deref(*pud), CRST_ALLOC_ORDER, NULL);
pud_clear(pud);
}
static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end,
- bool add, bool direct)
+ bool add, bool direct, struct vmem_altmap *altmap)
{
unsigned long next, prot, pages = 0;
int ret = -ENOMEM;
@@ -347,7 +353,7 @@ static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end,
} else if (pud_leaf(*pud)) {
continue;
}
- ret = modify_pmd_table(pud, addr, next, add, direct);
+ ret = modify_pmd_table(pud, addr, next, add, direct, altmap);
if (ret)
goto out;
if (!add)
@@ -370,12 +376,12 @@ static void try_free_pud_table(p4d_t *p4d, unsigned long start)
if (!pud_none(*pud))
return;
}
- vmem_free_pages(p4d_deref(*p4d), CRST_ALLOC_ORDER);
+ vmem_free_pages(p4d_deref(*p4d), CRST_ALLOC_ORDER, NULL);
p4d_clear(p4d);
}
static int modify_p4d_table(pgd_t *pgd, unsigned long addr, unsigned long end,
- bool add, bool direct)
+ bool add, bool direct, struct vmem_altmap *altmap)
{
unsigned long next;
int ret = -ENOMEM;
@@ -394,7 +400,7 @@ static int modify_p4d_table(pgd_t *pgd, unsigned long addr, unsigned long end,
goto out;
p4d_populate(&init_mm, p4d, pud);
}
- ret = modify_pud_table(p4d, addr, next, add, direct);
+ ret = modify_pud_table(p4d, addr, next, add, direct, altmap);
if (ret)
goto out;
if (!add)
@@ -415,12 +421,12 @@ static void try_free_p4d_table(pgd_t *pgd, unsigned long start)
if (!p4d_none(*p4d))
return;
}
- vmem_free_pages(pgd_deref(*pgd), CRST_ALLOC_ORDER);
+ vmem_free_pages(pgd_deref(*pgd), CRST_ALLOC_ORDER, NULL);
pgd_clear(pgd);
}
static int modify_pagetable(unsigned long start, unsigned long end, bool add,
- bool direct)
+ bool direct, struct vmem_altmap *altmap)
{
unsigned long addr, next;
int ret = -ENOMEM;
@@ -445,7 +451,7 @@ static int modify_pagetable(unsigned long start, unsigned long end, bool add,
goto out;
pgd_populate(&init_mm, pgd, p4d);
}
- ret = modify_p4d_table(pgd, addr, next, add, direct);
+ ret = modify_p4d_table(pgd, addr, next, add, direct, altmap);
if (ret)
goto out;
if (!add)
@@ -458,14 +464,16 @@ out:
return ret;
}
-static int add_pagetable(unsigned long start, unsigned long end, bool direct)
+static int add_pagetable(unsigned long start, unsigned long end, bool direct,
+ struct vmem_altmap *altmap)
{
- return modify_pagetable(start, end, true, direct);
+ return modify_pagetable(start, end, true, direct, altmap);
}
-static int remove_pagetable(unsigned long start, unsigned long end, bool direct)
+static int remove_pagetable(unsigned long start, unsigned long end, bool direct,
+ struct vmem_altmap *altmap)
{
- return modify_pagetable(start, end, false, direct);
+ return modify_pagetable(start, end, false, direct, altmap);
}
/*
@@ -474,7 +482,7 @@ static int remove_pagetable(unsigned long start, unsigned long end, bool direct)
static int vmem_add_range(unsigned long start, unsigned long size)
{
start = (unsigned long)__va(start);
- return add_pagetable(start, start + size, true);
+ return add_pagetable(start, start + size, true, NULL);
}
/*
@@ -483,7 +491,7 @@ static int vmem_add_range(unsigned long start, unsigned long size)
static void vmem_remove_range(unsigned long start, unsigned long size)
{
start = (unsigned long)__va(start);
- remove_pagetable(start, start + size, true);
+ remove_pagetable(start, start + size, true, NULL);
}
/*
@@ -496,9 +504,9 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
mutex_lock(&vmem_mutex);
/* We don't care about the node, just use NUMA_NO_NODE on allocations */
- ret = add_pagetable(start, end, false);
+ ret = add_pagetable(start, end, false, altmap);
if (ret)
- remove_pagetable(start, end, false);
+ remove_pagetable(start, end, false, altmap);
mutex_unlock(&vmem_mutex);
return ret;
}
@@ -509,7 +517,7 @@ void vmemmap_free(unsigned long start, unsigned long end,
struct vmem_altmap *altmap)
{
mutex_lock(&vmem_mutex);
- remove_pagetable(start, end, false);
+ remove_pagetable(start, end, false, altmap);
mutex_unlock(&vmem_mutex);
}
@@ -602,7 +610,7 @@ pte_t *vmem_get_alloc_pte(unsigned long addr, bool alloc)
if (!pte)
goto out;
pmd_populate(&init_mm, pmd, pte);
- } else if (WARN_ON_ONCE(pmd_large(*pmd))) {
+ } else if (WARN_ON_ONCE(pmd_leaf(*pmd))) {
goto out;
}
ptep = pte_offset_kernel(pmd, addr);