summaryrefslogtreecommitdiffstats
path: root/arch/riscv/mm
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-27 10:05:51 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-27 10:05:51 +0000
commit5d1646d90e1f2cceb9f0828f4b28318cd0ec7744 (patch)
treea94efe259b9009378be6d90eb30d2b019d95c194 /arch/riscv/mm
parentInitial commit. (diff)
downloadlinux-5d1646d90e1f2cceb9f0828f4b28318cd0ec7744.tar.xz
linux-5d1646d90e1f2cceb9f0828f4b28318cd0ec7744.zip
Adding upstream version 5.10.209.upstream/5.10.209upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'arch/riscv/mm')
-rw-r--r--arch/riscv/mm/Makefile32
-rw-r--r--arch/riscv/mm/cacheflush.c93
-rw-r--r--arch/riscv/mm/context.c67
-rw-r--r--arch/riscv/mm/extable.c24
-rw-r--r--arch/riscv/mm/fault.c305
-rw-r--r--arch/riscv/mm/hugetlbpage.c34
-rw-r--r--arch/riscv/mm/init.c687
-rw-r--r--arch/riscv/mm/kasan_init.c115
-rw-r--r--arch/riscv/mm/pageattr.c198
-rw-r--r--arch/riscv/mm/physaddr.c37
-rw-r--r--arch/riscv/mm/ptdump.c353
-rw-r--r--arch/riscv/mm/tlbflush.c56
12 files changed, 2001 insertions, 0 deletions
diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile
new file mode 100644
index 000000000..ac7a25298
--- /dev/null
+++ b/arch/riscv/mm/Makefile
@@ -0,0 +1,32 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+CFLAGS_init.o := -mcmodel=medany
+ifdef CONFIG_FTRACE
+CFLAGS_REMOVE_init.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_cacheflush.o = $(CC_FLAGS_FTRACE)
+endif
+
+KCOV_INSTRUMENT_init.o := n
+
+obj-y += init.o
+obj-y += extable.o
+obj-$(CONFIG_MMU) += fault.o pageattr.o
+obj-y += cacheflush.o
+obj-y += context.o
+
+ifeq ($(CONFIG_MMU),y)
+obj-$(CONFIG_SMP) += tlbflush.o
+endif
+obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
+obj-$(CONFIG_PTDUMP_CORE) += ptdump.o
+obj-$(CONFIG_KASAN) += kasan_init.o
+
+ifdef CONFIG_KASAN
+KASAN_SANITIZE_kasan_init.o := n
+KASAN_SANITIZE_init.o := n
+ifdef CONFIG_DEBUG_VIRTUAL
+KASAN_SANITIZE_physaddr.o := n
+endif
+endif
+
+obj-$(CONFIG_DEBUG_VIRTUAL) += physaddr.o
diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c
new file mode 100644
index 000000000..2ae1201cf
--- /dev/null
+++ b/arch/riscv/mm/cacheflush.c
@@ -0,0 +1,93 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2017 SiFive
+ */
+
+#include <asm/cacheflush.h>
+
+#ifdef CONFIG_SMP
+
+#include <asm/sbi.h>
+
+static void ipi_remote_fence_i(void *info)
+{
+ return local_flush_icache_all();
+}
+
+void flush_icache_all(void)
+{
+ local_flush_icache_all();
+
+ if (IS_ENABLED(CONFIG_RISCV_SBI))
+ sbi_remote_fence_i(NULL);
+ else
+ on_each_cpu(ipi_remote_fence_i, NULL, 1);
+}
+EXPORT_SYMBOL(flush_icache_all);
+
+/*
+ * Performs an icache flush for the given MM context. RISC-V has no direct
+ * mechanism for instruction cache shoot downs, so instead we send an IPI that
+ * informs the remote harts they need to flush their local instruction caches.
+ * To avoid pathologically slow behavior in a common case (a bunch of
+ * single-hart processes on a many-hart machine, ie 'make -j') we avoid the
+ * IPIs for harts that are not currently executing a MM context and instead
+ * schedule a deferred local instruction cache flush to be performed before
+ * execution resumes on each hart.
+ */
+void flush_icache_mm(struct mm_struct *mm, bool local)
+{
+ unsigned int cpu;
+ cpumask_t others, *mask;
+
+ preempt_disable();
+
+ /* Mark every hart's icache as needing a flush for this MM. */
+ mask = &mm->context.icache_stale_mask;
+ cpumask_setall(mask);
+ /* Flush this hart's I$ now, and mark it as flushed. */
+ cpu = smp_processor_id();
+ cpumask_clear_cpu(cpu, mask);
+ local_flush_icache_all();
+
+ /*
+ * Flush the I$ of other harts concurrently executing, and mark them as
+ * flushed.
+ */
+ cpumask_andnot(&others, mm_cpumask(mm), cpumask_of(cpu));
+ local |= cpumask_empty(&others);
+ if (mm == current->active_mm && local) {
+ /*
+ * It's assumed that at least one strongly ordered operation is
+ * performed on this hart between setting a hart's cpumask bit
+ * and scheduling this MM context on that hart. Sending an SBI
+ * remote message will do this, but in the case where no
+ * messages are sent we still need to order this hart's writes
+ * with flush_icache_deferred().
+ */
+ smp_mb();
+ } else if (IS_ENABLED(CONFIG_RISCV_SBI)) {
+ cpumask_t hartid_mask;
+
+ riscv_cpuid_to_hartid_mask(&others, &hartid_mask);
+ sbi_remote_fence_i(cpumask_bits(&hartid_mask));
+ } else {
+ on_each_cpu_mask(&others, ipi_remote_fence_i, NULL, 1);
+ }
+
+ preempt_enable();
+}
+
+#endif /* CONFIG_SMP */
+
+#ifdef CONFIG_MMU
+void flush_icache_pte(pte_t pte)
+{
+ struct page *page = pte_page(pte);
+
+ if (!test_bit(PG_dcache_clean, &page->flags)) {
+ flush_icache_all();
+ set_bit(PG_dcache_clean, &page->flags);
+ }
+}
+#endif /* CONFIG_MMU */
diff --git a/arch/riscv/mm/context.c b/arch/riscv/mm/context.c
new file mode 100644
index 000000000..613ec81a8
--- /dev/null
+++ b/arch/riscv/mm/context.c
@@ -0,0 +1,67 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2012 Regents of the University of California
+ * Copyright (C) 2017 SiFive
+ */
+
+#include <linux/mm.h>
+#include <asm/tlbflush.h>
+#include <asm/cacheflush.h>
+#include <asm/mmu_context.h>
+
+/*
+ * When necessary, performs a deferred icache flush for the given MM context,
+ * on the local CPU. RISC-V has no direct mechanism for instruction cache
+ * shoot downs, so instead we send an IPI that informs the remote harts they
+ * need to flush their local instruction caches. To avoid pathologically slow
+ * behavior in a common case (a bunch of single-hart processes on a many-hart
+ * machine, ie 'make -j') we avoid the IPIs for harts that are not currently
+ * executing a MM context and instead schedule a deferred local instruction
+ * cache flush to be performed before execution resumes on each hart. This
+ * actually performs that local instruction cache flush, which implicitly only
+ * refers to the current hart.
+ */
+static inline void flush_icache_deferred(struct mm_struct *mm)
+{
+#ifdef CONFIG_SMP
+ unsigned int cpu = smp_processor_id();
+ cpumask_t *mask = &mm->context.icache_stale_mask;
+
+ if (cpumask_test_cpu(cpu, mask)) {
+ cpumask_clear_cpu(cpu, mask);
+ /*
+ * Ensure the remote hart's writes are visible to this hart.
+ * This pairs with a barrier in flush_icache_mm.
+ */
+ smp_mb();
+ local_flush_icache_all();
+ }
+
+#endif
+}
+
+void switch_mm(struct mm_struct *prev, struct mm_struct *next,
+ struct task_struct *task)
+{
+ unsigned int cpu;
+
+ if (unlikely(prev == next))
+ return;
+
+ /*
+ * Mark the current MM context as inactive, and the next as
+ * active. This is at least used by the icache flushing
+ * routines in order to determine who should be flushed.
+ */
+ cpu = smp_processor_id();
+
+ cpumask_clear_cpu(cpu, mm_cpumask(prev));
+ cpumask_set_cpu(cpu, mm_cpumask(next));
+
+#ifdef CONFIG_MMU
+ csr_write(CSR_SATP, virt_to_pfn(next->pgd) | SATP_MODE);
+ local_flush_tlb_all();
+#endif
+
+ flush_icache_deferred(next);
+}
diff --git a/arch/riscv/mm/extable.c b/arch/riscv/mm/extable.c
new file mode 100644
index 000000000..2fc729422
--- /dev/null
+++ b/arch/riscv/mm/extable.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2009 Sunplus Core Technology Co., Ltd.
+ * Lennox Wu <lennox.wu@sunplusct.com>
+ * Chen Liqin <liqin.chen@sunplusct.com>
+ * Copyright (C) 2013 Regents of the University of California
+ */
+
+
+#include <linux/extable.h>
+#include <linux/module.h>
+#include <linux/uaccess.h>
+
+int fixup_exception(struct pt_regs *regs)
+{
+ const struct exception_table_entry *fixup;
+
+ fixup = search_exception_tables(regs->epc);
+ if (fixup) {
+ regs->epc = fixup->fixup;
+ return 1;
+ }
+ return 0;
+}
diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c
new file mode 100644
index 000000000..54b12943c
--- /dev/null
+++ b/arch/riscv/mm/fault.c
@@ -0,0 +1,305 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2009 Sunplus Core Technology Co., Ltd.
+ * Lennox Wu <lennox.wu@sunplusct.com>
+ * Chen Liqin <liqin.chen@sunplusct.com>
+ * Copyright (C) 2012 Regents of the University of California
+ */
+
+
+#include <linux/mm.h>
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/perf_event.h>
+#include <linux/signal.h>
+#include <linux/uaccess.h>
+
+#include <asm/ptrace.h>
+#include <asm/tlbflush.h>
+
+#include "../kernel/head.h"
+
+static inline void no_context(struct pt_regs *regs, unsigned long addr)
+{
+ /* Are we prepared to handle this kernel fault? */
+ if (fixup_exception(regs))
+ return;
+
+ /*
+ * Oops. The kernel tried to access some bad page. We'll have to
+ * terminate things with extreme prejudice.
+ */
+ bust_spinlocks(1);
+ pr_alert("Unable to handle kernel %s at virtual address " REG_FMT "\n",
+ (addr < PAGE_SIZE) ? "NULL pointer dereference" :
+ "paging request", addr);
+ die(regs, "Oops");
+ make_task_dead(SIGKILL);
+}
+
+static inline void mm_fault_error(struct pt_regs *regs, unsigned long addr, vm_fault_t fault)
+{
+ if (fault & VM_FAULT_OOM) {
+ /*
+ * We ran out of memory, call the OOM killer, and return the userspace
+ * (which will retry the fault, or kill us if we got oom-killed).
+ */
+ if (!user_mode(regs)) {
+ no_context(regs, addr);
+ return;
+ }
+ pagefault_out_of_memory();
+ return;
+ } else if (fault & VM_FAULT_SIGBUS) {
+ /* Kernel mode? Handle exceptions or die */
+ if (!user_mode(regs)) {
+ no_context(regs, addr);
+ return;
+ }
+ do_trap(regs, SIGBUS, BUS_ADRERR, addr);
+ return;
+ }
+ BUG();
+}
+
+static inline void bad_area(struct pt_regs *regs, struct mm_struct *mm, int code, unsigned long addr)
+{
+ /*
+ * Something tried to access memory that isn't in our memory map.
+ * Fix it, but check if it's kernel or user first.
+ */
+ mmap_read_unlock(mm);
+ /* User mode accesses just cause a SIGSEGV */
+ if (user_mode(regs)) {
+ do_trap(regs, SIGSEGV, code, addr);
+ return;
+ }
+
+ no_context(regs, addr);
+}
+
+static inline void vmalloc_fault(struct pt_regs *regs, int code, unsigned long addr)
+{
+ pgd_t *pgd, *pgd_k;
+ pud_t *pud, *pud_k;
+ p4d_t *p4d, *p4d_k;
+ pmd_t *pmd, *pmd_k;
+ pte_t *pte_k;
+ int index;
+ unsigned long pfn;
+
+ /* User mode accesses just cause a SIGSEGV */
+ if (user_mode(regs))
+ return do_trap(regs, SIGSEGV, code, addr);
+
+ /*
+ * Synchronize this task's top level page-table
+ * with the 'reference' page table.
+ *
+ * Do _not_ use "tsk->active_mm->pgd" here.
+ * We might be inside an interrupt in the middle
+ * of a task switch.
+ */
+ index = pgd_index(addr);
+ pfn = csr_read(CSR_SATP) & SATP_PPN;
+ pgd = (pgd_t *)pfn_to_virt(pfn) + index;
+ pgd_k = init_mm.pgd + index;
+
+ if (!pgd_present(*pgd_k)) {
+ no_context(regs, addr);
+ return;
+ }
+ set_pgd(pgd, *pgd_k);
+
+ p4d = p4d_offset(pgd, addr);
+ p4d_k = p4d_offset(pgd_k, addr);
+ if (!p4d_present(*p4d_k)) {
+ no_context(regs, addr);
+ return;
+ }
+
+ pud = pud_offset(p4d, addr);
+ pud_k = pud_offset(p4d_k, addr);
+ if (!pud_present(*pud_k)) {
+ no_context(regs, addr);
+ return;
+ }
+
+ /*
+ * Since the vmalloc area is global, it is unnecessary
+ * to copy individual PTEs
+ */
+ pmd = pmd_offset(pud, addr);
+ pmd_k = pmd_offset(pud_k, addr);
+ if (!pmd_present(*pmd_k)) {
+ no_context(regs, addr);
+ return;
+ }
+ set_pmd(pmd, *pmd_k);
+
+ /*
+ * Make sure the actual PTE exists as well to
+ * catch kernel vmalloc-area accesses to non-mapped
+ * addresses. If we don't do this, this will just
+ * silently loop forever.
+ */
+ pte_k = pte_offset_kernel(pmd_k, addr);
+ if (!pte_present(*pte_k)) {
+ no_context(regs, addr);
+ return;
+ }
+
+ /*
+ * The kernel assumes that TLBs don't cache invalid
+ * entries, but in RISC-V, SFENCE.VMA specifies an
+ * ordering constraint, not a cache flush; it is
+ * necessary even after writing invalid entries.
+ */
+ local_flush_tlb_page(addr);
+}
+
+static inline bool access_error(unsigned long cause, struct vm_area_struct *vma)
+{
+ switch (cause) {
+ case EXC_INST_PAGE_FAULT:
+ if (!(vma->vm_flags & VM_EXEC)) {
+ return true;
+ }
+ break;
+ case EXC_LOAD_PAGE_FAULT:
+ /* Write implies read */
+ if (!(vma->vm_flags & (VM_READ | VM_WRITE))) {
+ return true;
+ }
+ break;
+ case EXC_STORE_PAGE_FAULT:
+ if (!(vma->vm_flags & VM_WRITE)) {
+ return true;
+ }
+ break;
+ default:
+ panic("%s: unhandled cause %lu", __func__, cause);
+ }
+ return false;
+}
+
+/*
+ * This routine handles page faults. It determines the address and the
+ * problem, and then passes it off to one of the appropriate routines.
+ */
+asmlinkage void do_page_fault(struct pt_regs *regs)
+{
+ struct task_struct *tsk;
+ struct vm_area_struct *vma;
+ struct mm_struct *mm;
+ unsigned long addr, cause;
+ unsigned int flags = FAULT_FLAG_DEFAULT;
+ int code = SEGV_MAPERR;
+ vm_fault_t fault;
+
+ cause = regs->cause;
+ addr = regs->badaddr;
+
+ tsk = current;
+ mm = tsk->mm;
+
+ /*
+ * Fault-in kernel-space virtual memory on-demand.
+ * The 'reference' page table is init_mm.pgd.
+ *
+ * NOTE! We MUST NOT take any locks for this case. We may
+ * be in an interrupt or a critical region, and should
+ * only copy the information from the master page table,
+ * nothing more.
+ */
+ if (unlikely((addr >= VMALLOC_START) && (addr <= VMALLOC_END))) {
+ vmalloc_fault(regs, code, addr);
+ return;
+ }
+
+ /* Enable interrupts if they were enabled in the parent context. */
+ if (likely(regs->status & SR_PIE))
+ local_irq_enable();
+
+ /*
+ * If we're in an interrupt, have no user context, or are running
+ * in an atomic region, then we must not take the fault.
+ */
+ if (unlikely(faulthandler_disabled() || !mm)) {
+ no_context(regs, addr);
+ return;
+ }
+
+ if (user_mode(regs))
+ flags |= FAULT_FLAG_USER;
+
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
+
+ if (cause == EXC_STORE_PAGE_FAULT)
+ flags |= FAULT_FLAG_WRITE;
+ else if (cause == EXC_INST_PAGE_FAULT)
+ flags |= FAULT_FLAG_INSTRUCTION;
+retry:
+ mmap_read_lock(mm);
+ vma = find_vma(mm, addr);
+ if (unlikely(!vma)) {
+ bad_area(regs, mm, code, addr);
+ return;
+ }
+ if (likely(vma->vm_start <= addr))
+ goto good_area;
+ if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) {
+ bad_area(regs, mm, code, addr);
+ return;
+ }
+ if (unlikely(expand_stack(vma, addr))) {
+ bad_area(regs, mm, code, addr);
+ return;
+ }
+
+ /*
+ * Ok, we have a good vm_area for this memory access, so
+ * we can handle it.
+ */
+good_area:
+ code = SEGV_ACCERR;
+
+ if (unlikely(access_error(cause, vma))) {
+ bad_area(regs, mm, code, addr);
+ return;
+ }
+
+ /*
+ * If for any reason at all we could not handle the fault,
+ * make sure we exit gracefully rather than endlessly redo
+ * the fault.
+ */
+ fault = handle_mm_fault(vma, addr, flags, regs);
+
+ /*
+ * If we need to retry but a fatal signal is pending, handle the
+ * signal first. We do not need to release the mmap_lock because it
+ * would already be released in __lock_page_or_retry in mm/filemap.c.
+ */
+ if (fault_signal_pending(fault, regs))
+ return;
+
+ if (unlikely((fault & VM_FAULT_RETRY) && (flags & FAULT_FLAG_ALLOW_RETRY))) {
+ flags |= FAULT_FLAG_TRIED;
+
+ /*
+ * No need to mmap_read_unlock(mm) as we would
+ * have already released it in __lock_page_or_retry
+ * in mm/filemap.c.
+ */
+ goto retry;
+ }
+
+ mmap_read_unlock(mm);
+
+ if (unlikely(fault & VM_FAULT_ERROR)) {
+ mm_fault_error(regs, addr, fault);
+ return;
+ }
+ return;
+}
diff --git a/arch/riscv/mm/hugetlbpage.c b/arch/riscv/mm/hugetlbpage.c
new file mode 100644
index 000000000..932dadfdc
--- /dev/null
+++ b/arch/riscv/mm/hugetlbpage.c
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/hugetlb.h>
+#include <linux/err.h>
+
+int pud_huge(pud_t pud)
+{
+ return pud_leaf(pud);
+}
+
+int pmd_huge(pmd_t pmd)
+{
+ return pmd_leaf(pmd);
+}
+
+bool __init arch_hugetlb_valid_size(unsigned long size)
+{
+ if (size == HPAGE_SIZE)
+ return true;
+ else if (IS_ENABLED(CONFIG_64BIT) && size == PUD_SIZE)
+ return true;
+ else
+ return false;
+}
+
+#ifdef CONFIG_CONTIG_ALLOC
+static __init int gigantic_pages_init(void)
+{
+ /* With CONTIG_ALLOC, we can allocate gigantic pages at runtime */
+ if (IS_ENABLED(CONFIG_64BIT))
+ hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
+ return 0;
+}
+arch_initcall(gigantic_pages_init);
+#endif
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
new file mode 100644
index 000000000..6c2f38aac
--- /dev/null
+++ b/arch/riscv/mm/init.c
@@ -0,0 +1,687 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2012 Regents of the University of California
+ * Copyright (C) 2019 Western Digital Corporation or its affiliates.
+ */
+
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/memblock.h>
+#include <linux/initrd.h>
+#include <linux/swap.h>
+#include <linux/sizes.h>
+#include <linux/of_fdt.h>
+#include <linux/libfdt.h>
+#include <linux/set_memory.h>
+#include <linux/dma-map-ops.h>
+
+#include <asm/fixmap.h>
+#include <asm/tlbflush.h>
+#include <asm/sections.h>
+#include <asm/soc.h>
+#include <asm/io.h>
+#include <asm/ptdump.h>
+
+#include "../kernel/head.h"
+
+unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]
+ __page_aligned_bss;
+EXPORT_SYMBOL(empty_zero_page);
+
+extern char _start[];
+#define DTB_EARLY_BASE_VA PGDIR_SIZE
+void *dtb_early_va __initdata;
+uintptr_t dtb_early_pa __initdata;
+
+struct pt_alloc_ops {
+ pte_t *(*get_pte_virt)(phys_addr_t pa);
+ phys_addr_t (*alloc_pte)(uintptr_t va);
+#ifndef __PAGETABLE_PMD_FOLDED
+ pmd_t *(*get_pmd_virt)(phys_addr_t pa);
+ phys_addr_t (*alloc_pmd)(uintptr_t va);
+#endif
+};
+
+static phys_addr_t dma32_phys_limit __ro_after_init;
+
+static void __init zone_sizes_init(void)
+{
+ unsigned long max_zone_pfns[MAX_NR_ZONES] = { 0, };
+
+#ifdef CONFIG_ZONE_DMA32
+ max_zone_pfns[ZONE_DMA32] = PFN_DOWN(dma32_phys_limit);
+#endif
+ max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
+
+ free_area_init(max_zone_pfns);
+}
+
+static void setup_zero_page(void)
+{
+ memset((void *)empty_zero_page, 0, PAGE_SIZE);
+}
+
+#if defined(CONFIG_MMU) && defined(CONFIG_DEBUG_VM)
+static inline void print_mlk(char *name, unsigned long b, unsigned long t)
+{
+ pr_notice("%12s : 0x%08lx - 0x%08lx (%4ld kB)\n", name, b, t,
+ (((t) - (b)) >> 10));
+}
+
+static inline void print_mlm(char *name, unsigned long b, unsigned long t)
+{
+ pr_notice("%12s : 0x%08lx - 0x%08lx (%4ld MB)\n", name, b, t,
+ (((t) - (b)) >> 20));
+}
+
+static void print_vm_layout(void)
+{
+ pr_notice("Virtual kernel memory layout:\n");
+ print_mlk("fixmap", (unsigned long)FIXADDR_START,
+ (unsigned long)FIXADDR_TOP);
+ print_mlm("pci io", (unsigned long)PCI_IO_START,
+ (unsigned long)PCI_IO_END);
+ print_mlm("vmemmap", (unsigned long)VMEMMAP_START,
+ (unsigned long)VMEMMAP_END);
+ print_mlm("vmalloc", (unsigned long)VMALLOC_START,
+ (unsigned long)VMALLOC_END);
+ print_mlm("lowmem", (unsigned long)PAGE_OFFSET,
+ (unsigned long)high_memory);
+}
+#else
+static void print_vm_layout(void) { }
+#endif /* CONFIG_DEBUG_VM */
+
+void __init mem_init(void)
+{
+#ifdef CONFIG_FLATMEM
+ BUG_ON(!mem_map);
+#endif /* CONFIG_FLATMEM */
+
+ high_memory = (void *)(__va(PFN_PHYS(max_low_pfn)));
+ memblock_free_all();
+
+ mem_init_print_info(NULL);
+ print_vm_layout();
+}
+
+#ifdef CONFIG_BLK_DEV_INITRD
+static void __init setup_initrd(void)
+{
+ phys_addr_t start;
+ unsigned long size;
+
+ /* Ignore the virtul address computed during device tree parsing */
+ initrd_start = initrd_end = 0;
+
+ if (!phys_initrd_size)
+ return;
+ /*
+ * Round the memory region to page boundaries as per free_initrd_mem()
+ * This allows us to detect whether the pages overlapping the initrd
+ * are in use, but more importantly, reserves the entire set of pages
+ * as we don't want these pages allocated for other purposes.
+ */
+ start = round_down(phys_initrd_start, PAGE_SIZE);
+ size = phys_initrd_size + (phys_initrd_start - start);
+ size = round_up(size, PAGE_SIZE);
+
+ if (!memblock_is_region_memory(start, size)) {
+ pr_err("INITRD: 0x%08llx+0x%08lx is not a memory region",
+ (u64)start, size);
+ goto disable;
+ }
+
+ if (memblock_is_region_reserved(start, size)) {
+ pr_err("INITRD: 0x%08llx+0x%08lx overlaps in-use memory region\n",
+ (u64)start, size);
+ goto disable;
+ }
+
+ memblock_reserve(start, size);
+ /* Now convert initrd to virtual addresses */
+ initrd_start = (unsigned long)__va(phys_initrd_start);
+ initrd_end = initrd_start + phys_initrd_size;
+ initrd_below_start_ok = 1;
+
+ pr_info("Initial ramdisk at: 0x%p (%lu bytes)\n",
+ (void *)(initrd_start), size);
+ return;
+disable:
+ pr_cont(" - disabling initrd\n");
+ initrd_start = 0;
+ initrd_end = 0;
+}
+#endif /* CONFIG_BLK_DEV_INITRD */
+
+void __init setup_bootmem(void)
+{
+ phys_addr_t mem_start = 0;
+ phys_addr_t start, dram_end, end = 0;
+ phys_addr_t vmlinux_end = __pa_symbol(&_end);
+ phys_addr_t vmlinux_start = __pa_symbol(&_start);
+ phys_addr_t max_mapped_addr = __pa(~(ulong)0);
+ u64 i;
+
+ /* Find the memory region containing the kernel */
+ for_each_mem_range(i, &start, &end) {
+ phys_addr_t size = end - start;
+ if (!mem_start)
+ mem_start = start;
+ if (start <= vmlinux_start && vmlinux_end <= end)
+ BUG_ON(size == 0);
+ }
+
+ /*
+ * The maximal physical memory size is -PAGE_OFFSET.
+ * Make sure that any memory beyond mem_start + (-PAGE_OFFSET) is removed
+ * as it is unusable by kernel.
+ */
+ memblock_enforce_memory_limit(-PAGE_OFFSET);
+
+ /* Reserve from the start of the kernel to the end of the kernel */
+ memblock_reserve(vmlinux_start, vmlinux_end - vmlinux_start);
+
+ dram_end = memblock_end_of_DRAM();
+
+ /*
+ * memblock allocator is not aware of the fact that last 4K bytes of
+ * the addressable memory can not be mapped because of IS_ERR_VALUE
+ * macro. Make sure that last 4k bytes are not usable by memblock
+ * if end of dram is equal to maximum addressable memory.
+ */
+ if (max_mapped_addr == (dram_end - 1))
+ memblock_set_current_limit(max_mapped_addr - 4096);
+
+ max_pfn = PFN_DOWN(dram_end);
+ max_low_pfn = max_pfn;
+ dma32_phys_limit = min(4UL * SZ_1G, (unsigned long)PFN_PHYS(max_low_pfn));
+ set_max_mapnr(max_low_pfn);
+
+#ifdef CONFIG_BLK_DEV_INITRD
+ setup_initrd();
+#endif /* CONFIG_BLK_DEV_INITRD */
+
+ /*
+ * Avoid using early_init_fdt_reserve_self() since __pa() does
+ * not work for DTB pointers that are fixmap addresses
+ */
+ memblock_reserve(dtb_early_pa, fdt_totalsize(dtb_early_va));
+
+ dma_contiguous_reserve(dma32_phys_limit);
+ memblock_allow_resize();
+ memblock_dump_all();
+}
+
+#ifdef CONFIG_MMU
+static struct pt_alloc_ops pt_ops;
+
+unsigned long va_pa_offset;
+EXPORT_SYMBOL(va_pa_offset);
+unsigned long pfn_base;
+EXPORT_SYMBOL(pfn_base);
+
+pgd_t swapper_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
+pgd_t trampoline_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
+pte_t fixmap_pte[PTRS_PER_PTE] __page_aligned_bss;
+
+pgd_t early_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE);
+
+void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot)
+{
+ unsigned long addr = __fix_to_virt(idx);
+ pte_t *ptep;
+
+ BUG_ON(idx <= FIX_HOLE || idx >= __end_of_fixed_addresses);
+
+ ptep = &fixmap_pte[pte_index(addr)];
+
+ if (pgprot_val(prot))
+ set_pte(ptep, pfn_pte(phys >> PAGE_SHIFT, prot));
+ else
+ pte_clear(&init_mm, addr, ptep);
+ local_flush_tlb_page(addr);
+}
+
+static inline pte_t *__init get_pte_virt_early(phys_addr_t pa)
+{
+ return (pte_t *)((uintptr_t)pa);
+}
+
+static inline pte_t *__init get_pte_virt_fixmap(phys_addr_t pa)
+{
+ clear_fixmap(FIX_PTE);
+ return (pte_t *)set_fixmap_offset(FIX_PTE, pa);
+}
+
+static inline pte_t *get_pte_virt_late(phys_addr_t pa)
+{
+ return (pte_t *) __va(pa);
+}
+
+static inline phys_addr_t __init alloc_pte_early(uintptr_t va)
+{
+ /*
+ * We only create PMD or PGD early mappings so we
+ * should never reach here with MMU disabled.
+ */
+ BUG();
+}
+
+static inline phys_addr_t __init alloc_pte_fixmap(uintptr_t va)
+{
+ return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
+}
+
+static phys_addr_t alloc_pte_late(uintptr_t va)
+{
+ unsigned long vaddr;
+
+ vaddr = __get_free_page(GFP_KERNEL);
+ if (!vaddr || !pgtable_pte_page_ctor(virt_to_page(vaddr)))
+ BUG();
+ return __pa(vaddr);
+}
+
+static void __init create_pte_mapping(pte_t *ptep,
+ uintptr_t va, phys_addr_t pa,
+ phys_addr_t sz, pgprot_t prot)
+{
+ uintptr_t pte_idx = pte_index(va);
+
+ BUG_ON(sz != PAGE_SIZE);
+
+ if (pte_none(ptep[pte_idx]))
+ ptep[pte_idx] = pfn_pte(PFN_DOWN(pa), prot);
+}
+
+#ifndef __PAGETABLE_PMD_FOLDED
+
+pmd_t trampoline_pmd[PTRS_PER_PMD] __page_aligned_bss;
+pmd_t fixmap_pmd[PTRS_PER_PMD] __page_aligned_bss;
+pmd_t early_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE);
+pmd_t early_dtb_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE);
+
+static pmd_t *__init get_pmd_virt_early(phys_addr_t pa)
+{
+ /* Before MMU is enabled */
+ return (pmd_t *)((uintptr_t)pa);
+}
+
+static pmd_t *__init get_pmd_virt_fixmap(phys_addr_t pa)
+{
+ clear_fixmap(FIX_PMD);
+ return (pmd_t *)set_fixmap_offset(FIX_PMD, pa);
+}
+
+static pmd_t *get_pmd_virt_late(phys_addr_t pa)
+{
+ return (pmd_t *) __va(pa);
+}
+
+static phys_addr_t __init alloc_pmd_early(uintptr_t va)
+{
+ BUG_ON((va - PAGE_OFFSET) >> PGDIR_SHIFT);
+
+ return (uintptr_t)early_pmd;
+}
+
+static phys_addr_t __init alloc_pmd_fixmap(uintptr_t va)
+{
+ return memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
+}
+
+static phys_addr_t alloc_pmd_late(uintptr_t va)
+{
+ unsigned long vaddr;
+
+ vaddr = __get_free_page(GFP_KERNEL);
+ BUG_ON(!vaddr);
+ return __pa(vaddr);
+}
+
+static void __init create_pmd_mapping(pmd_t *pmdp,
+ uintptr_t va, phys_addr_t pa,
+ phys_addr_t sz, pgprot_t prot)
+{
+ pte_t *ptep;
+ phys_addr_t pte_phys;
+ uintptr_t pmd_idx = pmd_index(va);
+
+ if (sz == PMD_SIZE) {
+ if (pmd_none(pmdp[pmd_idx]))
+ pmdp[pmd_idx] = pfn_pmd(PFN_DOWN(pa), prot);
+ return;
+ }
+
+ if (pmd_none(pmdp[pmd_idx])) {
+ pte_phys = pt_ops.alloc_pte(va);
+ pmdp[pmd_idx] = pfn_pmd(PFN_DOWN(pte_phys), PAGE_TABLE);
+ ptep = pt_ops.get_pte_virt(pte_phys);
+ memset(ptep, 0, PAGE_SIZE);
+ } else {
+ pte_phys = PFN_PHYS(_pmd_pfn(pmdp[pmd_idx]));
+ ptep = pt_ops.get_pte_virt(pte_phys);
+ }
+
+ create_pte_mapping(ptep, va, pa, sz, prot);
+}
+
+#define pgd_next_t pmd_t
+#define alloc_pgd_next(__va) pt_ops.alloc_pmd(__va)
+#define get_pgd_next_virt(__pa) pt_ops.get_pmd_virt(__pa)
+#define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot) \
+ create_pmd_mapping(__nextp, __va, __pa, __sz, __prot)
+#define fixmap_pgd_next fixmap_pmd
+#else
+#define pgd_next_t pte_t
+#define alloc_pgd_next(__va) pt_ops.alloc_pte(__va)
+#define get_pgd_next_virt(__pa) pt_ops.get_pte_virt(__pa)
+#define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot) \
+ create_pte_mapping(__nextp, __va, __pa, __sz, __prot)
+#define fixmap_pgd_next fixmap_pte
+#endif
+
+void __init create_pgd_mapping(pgd_t *pgdp,
+ uintptr_t va, phys_addr_t pa,
+ phys_addr_t sz, pgprot_t prot)
+{
+ pgd_next_t *nextp;
+ phys_addr_t next_phys;
+ uintptr_t pgd_idx = pgd_index(va);
+
+ if (sz == PGDIR_SIZE) {
+ if (pgd_val(pgdp[pgd_idx]) == 0)
+ pgdp[pgd_idx] = pfn_pgd(PFN_DOWN(pa), prot);
+ return;
+ }
+
+ if (pgd_val(pgdp[pgd_idx]) == 0) {
+ next_phys = alloc_pgd_next(va);
+ pgdp[pgd_idx] = pfn_pgd(PFN_DOWN(next_phys), PAGE_TABLE);
+ nextp = get_pgd_next_virt(next_phys);
+ memset(nextp, 0, PAGE_SIZE);
+ } else {
+ next_phys = PFN_PHYS(_pgd_pfn(pgdp[pgd_idx]));
+ nextp = get_pgd_next_virt(next_phys);
+ }
+
+ create_pgd_next_mapping(nextp, va, pa, sz, prot);
+}
+
+static uintptr_t __init best_map_size(phys_addr_t base, phys_addr_t size)
+{
+ /* Upgrade to PMD_SIZE mappings whenever possible */
+ if ((base & (PMD_SIZE - 1)) || (size & (PMD_SIZE - 1)))
+ return PAGE_SIZE;
+
+ return PMD_SIZE;
+}
+
+/*
+ * setup_vm() is called from head.S with MMU-off.
+ *
+ * Following requirements should be honoured for setup_vm() to work
+ * correctly:
+ * 1) It should use PC-relative addressing for accessing kernel symbols.
+ * To achieve this we always use GCC cmodel=medany.
+ * 2) The compiler instrumentation for FTRACE will not work for setup_vm()
+ * so disable compiler instrumentation when FTRACE is enabled.
+ *
+ * Currently, the above requirements are honoured by using custom CFLAGS
+ * for init.o in mm/Makefile.
+ */
+
+#ifndef __riscv_cmodel_medany
+#error "setup_vm() is called from head.S before relocate so it should not use absolute addressing."
+#endif
+
+asmlinkage void __init setup_vm(uintptr_t dtb_pa)
+{
+ uintptr_t va, pa, end_va;
+ uintptr_t load_pa = (uintptr_t)(&_start);
+ uintptr_t load_sz = (uintptr_t)(&_end) - load_pa;
+ uintptr_t map_size;
+#ifndef __PAGETABLE_PMD_FOLDED
+ pmd_t fix_bmap_spmd, fix_bmap_epmd;
+#endif
+
+ va_pa_offset = PAGE_OFFSET - load_pa;
+ pfn_base = PFN_DOWN(load_pa);
+
+ /*
+ * Enforce boot alignment requirements of RV32 and
+ * RV64 by only allowing PMD or PGD mappings.
+ */
+ map_size = PMD_SIZE;
+
+ /* Sanity check alignment and size */
+ BUG_ON((PAGE_OFFSET % PGDIR_SIZE) != 0);
+ BUG_ON((load_pa % map_size) != 0);
+
+ pt_ops.alloc_pte = alloc_pte_early;
+ pt_ops.get_pte_virt = get_pte_virt_early;
+#ifndef __PAGETABLE_PMD_FOLDED
+ pt_ops.alloc_pmd = alloc_pmd_early;
+ pt_ops.get_pmd_virt = get_pmd_virt_early;
+#endif
+ /* Setup early PGD for fixmap */
+ create_pgd_mapping(early_pg_dir, FIXADDR_START,
+ (uintptr_t)fixmap_pgd_next, PGDIR_SIZE, PAGE_TABLE);
+
+#ifndef __PAGETABLE_PMD_FOLDED
+ /* Setup fixmap PMD */
+ create_pmd_mapping(fixmap_pmd, FIXADDR_START,
+ (uintptr_t)fixmap_pte, PMD_SIZE, PAGE_TABLE);
+ /* Setup trampoline PGD and PMD */
+ create_pgd_mapping(trampoline_pg_dir, PAGE_OFFSET,
+ (uintptr_t)trampoline_pmd, PGDIR_SIZE, PAGE_TABLE);
+ create_pmd_mapping(trampoline_pmd, PAGE_OFFSET,
+ load_pa, PMD_SIZE, PAGE_KERNEL_EXEC);
+#else
+ /* Setup trampoline PGD */
+ create_pgd_mapping(trampoline_pg_dir, PAGE_OFFSET,
+ load_pa, PGDIR_SIZE, PAGE_KERNEL_EXEC);
+#endif
+
+ /*
+ * Setup early PGD covering entire kernel which will allows
+ * us to reach paging_init(). We map all memory banks later
+ * in setup_vm_final() below.
+ */
+ end_va = PAGE_OFFSET + load_sz;
+ for (va = PAGE_OFFSET; va < end_va; va += map_size)
+ create_pgd_mapping(early_pg_dir, va,
+ load_pa + (va - PAGE_OFFSET),
+ map_size, PAGE_KERNEL_EXEC);
+
+#ifndef __PAGETABLE_PMD_FOLDED
+ /* Setup early PMD for DTB */
+ create_pgd_mapping(early_pg_dir, DTB_EARLY_BASE_VA,
+ (uintptr_t)early_dtb_pmd, PGDIR_SIZE, PAGE_TABLE);
+ /* Create two consecutive PMD mappings for FDT early scan */
+ pa = dtb_pa & ~(PMD_SIZE - 1);
+ create_pmd_mapping(early_dtb_pmd, DTB_EARLY_BASE_VA,
+ pa, PMD_SIZE, PAGE_KERNEL);
+ create_pmd_mapping(early_dtb_pmd, DTB_EARLY_BASE_VA + PMD_SIZE,
+ pa + PMD_SIZE, PMD_SIZE, PAGE_KERNEL);
+ dtb_early_va = (void *)DTB_EARLY_BASE_VA + (dtb_pa & (PMD_SIZE - 1));
+#else
+ /* Create two consecutive PGD mappings for FDT early scan */
+ pa = dtb_pa & ~(PGDIR_SIZE - 1);
+ create_pgd_mapping(early_pg_dir, DTB_EARLY_BASE_VA,
+ pa, PGDIR_SIZE, PAGE_KERNEL);
+ create_pgd_mapping(early_pg_dir, DTB_EARLY_BASE_VA + PGDIR_SIZE,
+ pa + PGDIR_SIZE, PGDIR_SIZE, PAGE_KERNEL);
+ dtb_early_va = (void *)DTB_EARLY_BASE_VA + (dtb_pa & (PGDIR_SIZE - 1));
+#endif
+ dtb_early_pa = dtb_pa;
+
+ /*
+ * Bootime fixmap only can handle PMD_SIZE mapping. Thus, boot-ioremap
+ * range can not span multiple pmds.
+ */
+ BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT)
+ != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT));
+
+#ifndef __PAGETABLE_PMD_FOLDED
+ /*
+ * Early ioremap fixmap is already created as it lies within first 2MB
+ * of fixmap region. We always map PMD_SIZE. Thus, both FIX_BTMAP_END
+ * FIX_BTMAP_BEGIN should lie in the same pmd. Verify that and warn
+ * the user if not.
+ */
+ fix_bmap_spmd = fixmap_pmd[pmd_index(__fix_to_virt(FIX_BTMAP_BEGIN))];
+ fix_bmap_epmd = fixmap_pmd[pmd_index(__fix_to_virt(FIX_BTMAP_END))];
+ if (pmd_val(fix_bmap_spmd) != pmd_val(fix_bmap_epmd)) {
+ WARN_ON(1);
+ pr_warn("fixmap btmap start [%08lx] != end [%08lx]\n",
+ pmd_val(fix_bmap_spmd), pmd_val(fix_bmap_epmd));
+ pr_warn("fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
+ fix_to_virt(FIX_BTMAP_BEGIN));
+ pr_warn("fix_to_virt(FIX_BTMAP_END): %08lx\n",
+ fix_to_virt(FIX_BTMAP_END));
+
+ pr_warn("FIX_BTMAP_END: %d\n", FIX_BTMAP_END);
+ pr_warn("FIX_BTMAP_BEGIN: %d\n", FIX_BTMAP_BEGIN);
+ }
+#endif
+}
+
+static void __init setup_vm_final(void)
+{
+ uintptr_t va, map_size;
+ phys_addr_t pa, start, end;
+ u64 i;
+
+ /**
+ * MMU is enabled at this point. But page table setup is not complete yet.
+ * fixmap page table alloc functions should be used at this point
+ */
+ pt_ops.alloc_pte = alloc_pte_fixmap;
+ pt_ops.get_pte_virt = get_pte_virt_fixmap;
+#ifndef __PAGETABLE_PMD_FOLDED
+ pt_ops.alloc_pmd = alloc_pmd_fixmap;
+ pt_ops.get_pmd_virt = get_pmd_virt_fixmap;
+#endif
+ /* Setup swapper PGD for fixmap */
+ create_pgd_mapping(swapper_pg_dir, FIXADDR_START,
+ __pa_symbol(fixmap_pgd_next),
+ PGDIR_SIZE, PAGE_TABLE);
+
+ /* Map all memory banks */
+ for_each_mem_range(i, &start, &end) {
+ if (start >= end)
+ break;
+ if (start <= __pa(PAGE_OFFSET) &&
+ __pa(PAGE_OFFSET) < end)
+ start = __pa(PAGE_OFFSET);
+
+ map_size = best_map_size(start, end - start);
+ for (pa = start; pa < end; pa += map_size) {
+ va = (uintptr_t)__va(pa);
+ create_pgd_mapping(swapper_pg_dir, va, pa,
+ map_size, PAGE_KERNEL_EXEC);
+ }
+ }
+
+ /* Clear fixmap PTE and PMD mappings */
+ clear_fixmap(FIX_PTE);
+ clear_fixmap(FIX_PMD);
+
+ /* Move to swapper page table */
+ csr_write(CSR_SATP, PFN_DOWN(__pa_symbol(swapper_pg_dir)) | SATP_MODE);
+ local_flush_tlb_all();
+
+ /* generic page allocation functions must be used to setup page table */
+ pt_ops.alloc_pte = alloc_pte_late;
+ pt_ops.get_pte_virt = get_pte_virt_late;
+#ifndef __PAGETABLE_PMD_FOLDED
+ pt_ops.alloc_pmd = alloc_pmd_late;
+ pt_ops.get_pmd_virt = get_pmd_virt_late;
+#endif
+}
+#else
+asmlinkage void __init setup_vm(uintptr_t dtb_pa)
+{
+#ifdef CONFIG_BUILTIN_DTB
+ dtb_early_va = soc_lookup_builtin_dtb();
+ if (!dtb_early_va) {
+ /* Fallback to first available DTS */
+ dtb_early_va = (void *) __dtb_start;
+ }
+#else
+ dtb_early_va = (void *)dtb_pa;
+#endif
+ dtb_early_pa = dtb_pa;
+}
+
+static inline void setup_vm_final(void)
+{
+}
+#endif /* CONFIG_MMU */
+
+#ifdef CONFIG_STRICT_KERNEL_RWX
+void mark_rodata_ro(void)
+{
+ unsigned long text_start = (unsigned long)_text;
+ unsigned long text_end = (unsigned long)_etext;
+ unsigned long rodata_start = (unsigned long)__start_rodata;
+ unsigned long data_start = (unsigned long)_data;
+ unsigned long max_low = (unsigned long)(__va(PFN_PHYS(max_low_pfn)));
+
+ set_memory_ro(text_start, (text_end - text_start) >> PAGE_SHIFT);
+ set_memory_ro(rodata_start, (data_start - rodata_start) >> PAGE_SHIFT);
+ set_memory_nx(rodata_start, (data_start - rodata_start) >> PAGE_SHIFT);
+ set_memory_nx(data_start, (max_low - data_start) >> PAGE_SHIFT);
+
+ debug_checkwx();
+}
+#endif
+
+static void __init resource_init(void)
+{
+ struct memblock_region *region;
+
+ for_each_mem_region(region) {
+ struct resource *res;
+
+ res = memblock_alloc(sizeof(struct resource), SMP_CACHE_BYTES);
+ if (!res)
+ panic("%s: Failed to allocate %zu bytes\n", __func__,
+ sizeof(struct resource));
+
+ if (memblock_is_nomap(region)) {
+ res->name = "reserved";
+ res->flags = IORESOURCE_MEM;
+ } else {
+ res->name = "System RAM";
+ res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
+ }
+ res->start = __pfn_to_phys(memblock_region_memory_base_pfn(region));
+ res->end = __pfn_to_phys(memblock_region_memory_end_pfn(region)) - 1;
+
+ request_resource(&iomem_resource, res);
+ }
+}
+
+void __init paging_init(void)
+{
+ setup_vm_final();
+ setup_zero_page();
+}
+
+void __init misc_mem_init(void)
+{
+ sparse_init();
+ zone_sizes_init();
+ resource_init();
+}
+
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
+ struct vmem_altmap *altmap)
+{
+ return vmemmap_populate_basepages(start, end, node, NULL);
+}
+#endif
diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c
new file mode 100644
index 000000000..2db442701
--- /dev/null
+++ b/arch/riscv/mm/kasan_init.c
@@ -0,0 +1,115 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2019 Andes Technology Corporation
+
+#include <linux/pfn.h>
+#include <linux/init_task.h>
+#include <linux/kasan.h>
+#include <linux/kernel.h>
+#include <linux/memblock.h>
+#include <linux/pgtable.h>
+#include <asm/tlbflush.h>
+#include <asm/fixmap.h>
+
+extern pgd_t early_pg_dir[PTRS_PER_PGD];
+asmlinkage void __init kasan_early_init(void)
+{
+ uintptr_t i;
+ pgd_t *pgd = early_pg_dir + pgd_index(KASAN_SHADOW_START);
+
+ BUILD_BUG_ON(KASAN_SHADOW_OFFSET !=
+ KASAN_SHADOW_END - (1UL << (64 - KASAN_SHADOW_SCALE_SHIFT)));
+
+ for (i = 0; i < PTRS_PER_PTE; ++i)
+ set_pte(kasan_early_shadow_pte + i,
+ pfn_pte(virt_to_pfn(kasan_early_shadow_page), PAGE_KERNEL));
+
+ for (i = 0; i < PTRS_PER_PMD; ++i)
+ set_pmd(kasan_early_shadow_pmd + i,
+ pfn_pmd(PFN_DOWN
+ (__pa((uintptr_t) kasan_early_shadow_pte)),
+ __pgprot(_PAGE_TABLE)));
+
+ for (i = KASAN_SHADOW_START; i < KASAN_SHADOW_END;
+ i += PGDIR_SIZE, ++pgd)
+ set_pgd(pgd,
+ pfn_pgd(PFN_DOWN
+ (__pa(((uintptr_t) kasan_early_shadow_pmd))),
+ __pgprot(_PAGE_TABLE)));
+
+ /* init for swapper_pg_dir */
+ pgd = pgd_offset_k(KASAN_SHADOW_START);
+
+ for (i = KASAN_SHADOW_START; i < KASAN_SHADOW_END;
+ i += PGDIR_SIZE, ++pgd)
+ set_pgd(pgd,
+ pfn_pgd(PFN_DOWN
+ (__pa(((uintptr_t) kasan_early_shadow_pmd))),
+ __pgprot(_PAGE_TABLE)));
+
+ local_flush_tlb_all();
+}
+
+static void __init populate(void *start, void *end)
+{
+ unsigned long i, offset;
+ unsigned long vaddr = (unsigned long)start & PAGE_MASK;
+ unsigned long vend = PAGE_ALIGN((unsigned long)end);
+ unsigned long n_pages = (vend - vaddr) / PAGE_SIZE;
+ unsigned long n_ptes =
+ ((n_pages + PTRS_PER_PTE) & -PTRS_PER_PTE) / PTRS_PER_PTE;
+ unsigned long n_pmds =
+ ((n_ptes + PTRS_PER_PMD) & -PTRS_PER_PMD) / PTRS_PER_PMD;
+
+ pte_t *pte =
+ memblock_alloc(n_ptes * PTRS_PER_PTE * sizeof(pte_t), PAGE_SIZE);
+ pmd_t *pmd =
+ memblock_alloc(n_pmds * PTRS_PER_PMD * sizeof(pmd_t), PAGE_SIZE);
+ pgd_t *pgd = pgd_offset_k(vaddr);
+
+ for (i = 0; i < n_pages; i++) {
+ phys_addr_t phys = memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
+ set_pte(&pte[i], pfn_pte(PHYS_PFN(phys), PAGE_KERNEL));
+ }
+
+ for (i = 0, offset = 0; i < n_ptes; i++, offset += PTRS_PER_PTE)
+ set_pmd(&pmd[i],
+ pfn_pmd(PFN_DOWN(__pa(&pte[offset])),
+ __pgprot(_PAGE_TABLE)));
+
+ for (i = 0, offset = 0; i < n_pmds; i++, offset += PTRS_PER_PMD)
+ set_pgd(&pgd[i],
+ pfn_pgd(PFN_DOWN(__pa(&pmd[offset])),
+ __pgprot(_PAGE_TABLE)));
+
+ local_flush_tlb_all();
+ memset(start, 0, end - start);
+}
+
+void __init kasan_init(void)
+{
+ phys_addr_t _start, _end;
+ u64 i;
+
+ kasan_populate_early_shadow((void *)KASAN_SHADOW_START,
+ (void *)kasan_mem_to_shadow((void *)
+ VMALLOC_END));
+
+ for_each_mem_range(i, &_start, &_end) {
+ void *start = (void *)__va(_start);
+ void *end = (void *)__va(_end);
+
+ if (start >= end)
+ break;
+
+ populate(kasan_mem_to_shadow(start), kasan_mem_to_shadow(end));
+ };
+
+ for (i = 0; i < PTRS_PER_PTE; i++)
+ set_pte(&kasan_early_shadow_pte[i],
+ mk_pte(virt_to_page(kasan_early_shadow_page),
+ __pgprot(_PAGE_PRESENT | _PAGE_READ |
+ _PAGE_ACCESSED)));
+
+ memset(kasan_early_shadow_page, 0, PAGE_SIZE);
+ init_task.kasan_depth = 0;
+}
diff --git a/arch/riscv/mm/pageattr.c b/arch/riscv/mm/pageattr.c
new file mode 100644
index 000000000..09f6be19b
--- /dev/null
+++ b/arch/riscv/mm/pageattr.c
@@ -0,0 +1,198 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2019 SiFive
+ */
+
+#include <linux/pagewalk.h>
+#include <linux/pgtable.h>
+#include <asm/tlbflush.h>
+#include <asm/bitops.h>
+#include <asm/set_memory.h>
+
+struct pageattr_masks {
+ pgprot_t set_mask;
+ pgprot_t clear_mask;
+};
+
+static unsigned long set_pageattr_masks(unsigned long val, struct mm_walk *walk)
+{
+ struct pageattr_masks *masks = walk->private;
+ unsigned long new_val = val;
+
+ new_val &= ~(pgprot_val(masks->clear_mask));
+ new_val |= (pgprot_val(masks->set_mask));
+
+ return new_val;
+}
+
+static int pageattr_pgd_entry(pgd_t *pgd, unsigned long addr,
+ unsigned long next, struct mm_walk *walk)
+{
+ pgd_t val = READ_ONCE(*pgd);
+
+ if (pgd_leaf(val)) {
+ val = __pgd(set_pageattr_masks(pgd_val(val), walk));
+ set_pgd(pgd, val);
+ }
+
+ return 0;
+}
+
+static int pageattr_p4d_entry(p4d_t *p4d, unsigned long addr,
+ unsigned long next, struct mm_walk *walk)
+{
+ p4d_t val = READ_ONCE(*p4d);
+
+ if (p4d_leaf(val)) {
+ val = __p4d(set_pageattr_masks(p4d_val(val), walk));
+ set_p4d(p4d, val);
+ }
+
+ return 0;
+}
+
+static int pageattr_pud_entry(pud_t *pud, unsigned long addr,
+ unsigned long next, struct mm_walk *walk)
+{
+ pud_t val = READ_ONCE(*pud);
+
+ if (pud_leaf(val)) {
+ val = __pud(set_pageattr_masks(pud_val(val), walk));
+ set_pud(pud, val);
+ }
+
+ return 0;
+}
+
+static int pageattr_pmd_entry(pmd_t *pmd, unsigned long addr,
+ unsigned long next, struct mm_walk *walk)
+{
+ pmd_t val = READ_ONCE(*pmd);
+
+ if (pmd_leaf(val)) {
+ val = __pmd(set_pageattr_masks(pmd_val(val), walk));
+ set_pmd(pmd, val);
+ }
+
+ return 0;
+}
+
+static int pageattr_pte_entry(pte_t *pte, unsigned long addr,
+ unsigned long next, struct mm_walk *walk)
+{
+ pte_t val = READ_ONCE(*pte);
+
+ val = __pte(set_pageattr_masks(pte_val(val), walk));
+ set_pte(pte, val);
+
+ return 0;
+}
+
+static int pageattr_pte_hole(unsigned long addr, unsigned long next,
+ int depth, struct mm_walk *walk)
+{
+ /* Nothing to do here */
+ return 0;
+}
+
+static const struct mm_walk_ops pageattr_ops = {
+ .pgd_entry = pageattr_pgd_entry,
+ .p4d_entry = pageattr_p4d_entry,
+ .pud_entry = pageattr_pud_entry,
+ .pmd_entry = pageattr_pmd_entry,
+ .pte_entry = pageattr_pte_entry,
+ .pte_hole = pageattr_pte_hole,
+};
+
+static int __set_memory(unsigned long addr, int numpages, pgprot_t set_mask,
+ pgprot_t clear_mask)
+{
+ int ret;
+ unsigned long start = addr;
+ unsigned long end = start + PAGE_SIZE * numpages;
+ struct pageattr_masks masks = {
+ .set_mask = set_mask,
+ .clear_mask = clear_mask
+ };
+
+ if (!numpages)
+ return 0;
+
+ mmap_write_lock(&init_mm);
+ ret = walk_page_range_novma(&init_mm, start, end, &pageattr_ops, NULL,
+ &masks);
+ mmap_write_unlock(&init_mm);
+
+ flush_tlb_kernel_range(start, end);
+
+ return ret;
+}
+
+int set_memory_ro(unsigned long addr, int numpages)
+{
+ return __set_memory(addr, numpages, __pgprot(_PAGE_READ),
+ __pgprot(_PAGE_WRITE));
+}
+
+int set_memory_rw(unsigned long addr, int numpages)
+{
+ return __set_memory(addr, numpages, __pgprot(_PAGE_READ | _PAGE_WRITE),
+ __pgprot(0));
+}
+
+int set_memory_x(unsigned long addr, int numpages)
+{
+ return __set_memory(addr, numpages, __pgprot(_PAGE_EXEC), __pgprot(0));
+}
+
+int set_memory_nx(unsigned long addr, int numpages)
+{
+ return __set_memory(addr, numpages, __pgprot(0), __pgprot(_PAGE_EXEC));
+}
+
+int set_direct_map_invalid_noflush(struct page *page)
+{
+ int ret;
+ unsigned long start = (unsigned long)page_address(page);
+ unsigned long end = start + PAGE_SIZE;
+ struct pageattr_masks masks = {
+ .set_mask = __pgprot(0),
+ .clear_mask = __pgprot(_PAGE_PRESENT)
+ };
+
+ mmap_read_lock(&init_mm);
+ ret = walk_page_range(&init_mm, start, end, &pageattr_ops, &masks);
+ mmap_read_unlock(&init_mm);
+
+ return ret;
+}
+
+int set_direct_map_default_noflush(struct page *page)
+{
+ int ret;
+ unsigned long start = (unsigned long)page_address(page);
+ unsigned long end = start + PAGE_SIZE;
+ struct pageattr_masks masks = {
+ .set_mask = PAGE_KERNEL,
+ .clear_mask = __pgprot(0)
+ };
+
+ mmap_read_lock(&init_mm);
+ ret = walk_page_range(&init_mm, start, end, &pageattr_ops, &masks);
+ mmap_read_unlock(&init_mm);
+
+ return ret;
+}
+
+void __kernel_map_pages(struct page *page, int numpages, int enable)
+{
+ if (!debug_pagealloc_enabled())
+ return;
+
+ if (enable)
+ __set_memory((unsigned long)page_address(page), numpages,
+ __pgprot(_PAGE_PRESENT), __pgprot(0));
+ else
+ __set_memory((unsigned long)page_address(page), numpages,
+ __pgprot(0), __pgprot(_PAGE_PRESENT));
+}
diff --git a/arch/riscv/mm/physaddr.c b/arch/riscv/mm/physaddr.c
new file mode 100644
index 000000000..e8e4dcd39
--- /dev/null
+++ b/arch/riscv/mm/physaddr.c
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/types.h>
+#include <linux/mmdebug.h>
+#include <linux/mm.h>
+#include <asm/page.h>
+#include <asm/sections.h>
+
+phys_addr_t __virt_to_phys(unsigned long x)
+{
+ phys_addr_t y = x - PAGE_OFFSET;
+
+ /*
+ * Boundary checking aginst the kernel linear mapping space.
+ */
+ WARN(y >= KERN_VIRT_SIZE,
+ "virt_to_phys used for non-linear address: %pK (%pS)\n",
+ (void *)x, (void *)x);
+
+ return __va_to_pa_nodebug(x);
+}
+EXPORT_SYMBOL(__virt_to_phys);
+
+phys_addr_t __phys_addr_symbol(unsigned long x)
+{
+ unsigned long kernel_start = (unsigned long)PAGE_OFFSET;
+ unsigned long kernel_end = (unsigned long)_end;
+
+ /*
+ * Boundary checking aginst the kernel image mapping.
+ * __pa_symbol should only be used on kernel symbol addresses.
+ */
+ VIRTUAL_BUG_ON(x < kernel_start || x > kernel_end);
+
+ return __va_to_pa_nodebug(x);
+}
+EXPORT_SYMBOL(__phys_addr_symbol);
diff --git a/arch/riscv/mm/ptdump.c b/arch/riscv/mm/ptdump.c
new file mode 100644
index 000000000..ace74dec7
--- /dev/null
+++ b/arch/riscv/mm/ptdump.c
@@ -0,0 +1,353 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2019 SiFive
+ */
+
+#include <linux/efi.h>
+#include <linux/init.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/ptdump.h>
+
+#include <asm/ptdump.h>
+#include <linux/pgtable.h>
+#include <asm/kasan.h>
+
+#define pt_dump_seq_printf(m, fmt, args...) \
+({ \
+ if (m) \
+ seq_printf(m, fmt, ##args); \
+})
+
+#define pt_dump_seq_puts(m, fmt) \
+({ \
+ if (m) \
+ seq_printf(m, fmt); \
+})
+
+/*
+ * The page dumper groups page table entries of the same type into a single
+ * description. It uses pg_state to track the range information while
+ * iterating over the pte entries. When the continuity is broken it then
+ * dumps out a description of the range.
+ */
+struct pg_state {
+ struct ptdump_state ptdump;
+ struct seq_file *seq;
+ const struct addr_marker *marker;
+ unsigned long start_address;
+ unsigned long start_pa;
+ unsigned long last_pa;
+ int level;
+ u64 current_prot;
+ bool check_wx;
+ unsigned long wx_pages;
+};
+
+/* Address marker */
+struct addr_marker {
+ unsigned long start_address;
+ const char *name;
+};
+
+/* Private information for debugfs */
+struct ptd_mm_info {
+ struct mm_struct *mm;
+ const struct addr_marker *markers;
+ unsigned long base_addr;
+ unsigned long end;
+};
+
+static struct addr_marker address_markers[] = {
+#ifdef CONFIG_KASAN
+ {KASAN_SHADOW_START, "Kasan shadow start"},
+ {KASAN_SHADOW_END, "Kasan shadow end"},
+#endif
+ {FIXADDR_START, "Fixmap start"},
+ {FIXADDR_TOP, "Fixmap end"},
+ {PCI_IO_START, "PCI I/O start"},
+ {PCI_IO_END, "PCI I/O end"},
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+ {VMEMMAP_START, "vmemmap start"},
+ {VMEMMAP_END, "vmemmap end"},
+#endif
+ {VMALLOC_START, "vmalloc() area"},
+ {VMALLOC_END, "vmalloc() end"},
+ {PAGE_OFFSET, "Linear mapping"},
+ {-1, NULL},
+};
+
+static struct ptd_mm_info kernel_ptd_info = {
+ .mm = &init_mm,
+ .markers = address_markers,
+ .base_addr = KERN_VIRT_START,
+ .end = ULONG_MAX,
+};
+
+#ifdef CONFIG_EFI
+static struct addr_marker efi_addr_markers[] = {
+ { 0, "UEFI runtime start" },
+ { SZ_1G, "UEFI runtime end" },
+ { -1, NULL }
+};
+
+static struct ptd_mm_info efi_ptd_info = {
+ .mm = &efi_mm,
+ .markers = efi_addr_markers,
+ .base_addr = 0,
+ .end = SZ_2G,
+};
+#endif
+
+/* Page Table Entry */
+struct prot_bits {
+ u64 mask;
+ u64 val;
+ const char *set;
+ const char *clear;
+};
+
+static const struct prot_bits pte_bits[] = {
+ {
+ .mask = _PAGE_SOFT,
+ .val = _PAGE_SOFT,
+ .set = "RSW",
+ .clear = " ",
+ }, {
+ .mask = _PAGE_DIRTY,
+ .val = _PAGE_DIRTY,
+ .set = "D",
+ .clear = ".",
+ }, {
+ .mask = _PAGE_ACCESSED,
+ .val = _PAGE_ACCESSED,
+ .set = "A",
+ .clear = ".",
+ }, {
+ .mask = _PAGE_GLOBAL,
+ .val = _PAGE_GLOBAL,
+ .set = "G",
+ .clear = ".",
+ }, {
+ .mask = _PAGE_USER,
+ .val = _PAGE_USER,
+ .set = "U",
+ .clear = ".",
+ }, {
+ .mask = _PAGE_EXEC,
+ .val = _PAGE_EXEC,
+ .set = "X",
+ .clear = ".",
+ }, {
+ .mask = _PAGE_WRITE,
+ .val = _PAGE_WRITE,
+ .set = "W",
+ .clear = ".",
+ }, {
+ .mask = _PAGE_READ,
+ .val = _PAGE_READ,
+ .set = "R",
+ .clear = ".",
+ }, {
+ .mask = _PAGE_PRESENT,
+ .val = _PAGE_PRESENT,
+ .set = "V",
+ .clear = ".",
+ }
+};
+
+/* Page Level */
+struct pg_level {
+ const char *name;
+ u64 mask;
+};
+
+static struct pg_level pg_level[] = {
+ { /* pgd */
+ .name = "PGD",
+ }, { /* p4d */
+ .name = (CONFIG_PGTABLE_LEVELS > 4) ? "P4D" : "PGD",
+ }, { /* pud */
+ .name = (CONFIG_PGTABLE_LEVELS > 3) ? "PUD" : "PGD",
+ }, { /* pmd */
+ .name = (CONFIG_PGTABLE_LEVELS > 2) ? "PMD" : "PGD",
+ }, { /* pte */
+ .name = "PTE",
+ },
+};
+
+static void dump_prot(struct pg_state *st)
+{
+ unsigned int i;
+
+ for (i = 0; i < ARRAY_SIZE(pte_bits); i++) {
+ const char *s;
+
+ if ((st->current_prot & pte_bits[i].mask) == pte_bits[i].val)
+ s = pte_bits[i].set;
+ else
+ s = pte_bits[i].clear;
+
+ if (s)
+ pt_dump_seq_printf(st->seq, " %s", s);
+ }
+}
+
+#ifdef CONFIG_64BIT
+#define ADDR_FORMAT "0x%016lx"
+#else
+#define ADDR_FORMAT "0x%08lx"
+#endif
+static void dump_addr(struct pg_state *st, unsigned long addr)
+{
+ static const char units[] = "KMGTPE";
+ const char *unit = units;
+ unsigned long delta;
+
+ pt_dump_seq_printf(st->seq, ADDR_FORMAT "-" ADDR_FORMAT " ",
+ st->start_address, addr);
+
+ pt_dump_seq_printf(st->seq, " " ADDR_FORMAT " ", st->start_pa);
+ delta = (addr - st->start_address) >> 10;
+
+ while (!(delta & 1023) && unit[1]) {
+ delta >>= 10;
+ unit++;
+ }
+
+ pt_dump_seq_printf(st->seq, "%9lu%c %s", delta, *unit,
+ pg_level[st->level].name);
+}
+
+static void note_prot_wx(struct pg_state *st, unsigned long addr)
+{
+ if (!st->check_wx)
+ return;
+
+ if ((st->current_prot & (_PAGE_WRITE | _PAGE_EXEC)) !=
+ (_PAGE_WRITE | _PAGE_EXEC))
+ return;
+
+ WARN_ONCE(1, "riscv/mm: Found insecure W+X mapping at address %p/%pS\n",
+ (void *)st->start_address, (void *)st->start_address);
+
+ st->wx_pages += (addr - st->start_address) / PAGE_SIZE;
+}
+
+static void note_page(struct ptdump_state *pt_st, unsigned long addr,
+ int level, u64 val)
+{
+ struct pg_state *st = container_of(pt_st, struct pg_state, ptdump);
+ u64 pa = PFN_PHYS(pte_pfn(__pte(val)));
+ u64 prot = 0;
+
+ if (level >= 0)
+ prot = val & pg_level[level].mask;
+
+ if (st->level == -1) {
+ st->level = level;
+ st->current_prot = prot;
+ st->start_address = addr;
+ st->start_pa = pa;
+ st->last_pa = pa;
+ pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
+ } else if (prot != st->current_prot ||
+ level != st->level || addr >= st->marker[1].start_address) {
+ if (st->current_prot) {
+ note_prot_wx(st, addr);
+ dump_addr(st, addr);
+ dump_prot(st);
+ pt_dump_seq_puts(st->seq, "\n");
+ }
+
+ while (addr >= st->marker[1].start_address) {
+ st->marker++;
+ pt_dump_seq_printf(st->seq, "---[ %s ]---\n",
+ st->marker->name);
+ }
+
+ st->start_address = addr;
+ st->start_pa = pa;
+ st->last_pa = pa;
+ st->current_prot = prot;
+ st->level = level;
+ } else {
+ st->last_pa = pa;
+ }
+}
+
+static void ptdump_walk(struct seq_file *s, struct ptd_mm_info *pinfo)
+{
+ struct pg_state st = {
+ .seq = s,
+ .marker = pinfo->markers,
+ .level = -1,
+ .ptdump = {
+ .note_page = note_page,
+ .range = (struct ptdump_range[]) {
+ {pinfo->base_addr, pinfo->end},
+ {0, 0}
+ }
+ }
+ };
+
+ ptdump_walk_pgd(&st.ptdump, pinfo->mm, NULL);
+}
+
+void ptdump_check_wx(void)
+{
+ struct pg_state st = {
+ .seq = NULL,
+ .marker = (struct addr_marker[]) {
+ {0, NULL},
+ {-1, NULL},
+ },
+ .level = -1,
+ .check_wx = true,
+ .ptdump = {
+ .note_page = note_page,
+ .range = (struct ptdump_range[]) {
+ {KERN_VIRT_START, ULONG_MAX},
+ {0, 0}
+ }
+ }
+ };
+
+ ptdump_walk_pgd(&st.ptdump, &init_mm, NULL);
+
+ if (st.wx_pages)
+ pr_warn("Checked W+X mappings: failed, %lu W+X pages found\n",
+ st.wx_pages);
+ else
+ pr_info("Checked W+X mappings: passed, no W+X pages found\n");
+}
+
+static int ptdump_show(struct seq_file *m, void *v)
+{
+ ptdump_walk(m, m->private);
+
+ return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(ptdump);
+
+static int ptdump_init(void)
+{
+ unsigned int i, j;
+
+ for (i = 0; i < ARRAY_SIZE(pg_level); i++)
+ for (j = 0; j < ARRAY_SIZE(pte_bits); j++)
+ pg_level[i].mask |= pte_bits[j].mask;
+
+ debugfs_create_file("kernel_page_tables", 0400, NULL, &kernel_ptd_info,
+ &ptdump_fops);
+#ifdef CONFIG_EFI
+ if (efi_enabled(EFI_RUNTIME_SERVICES))
+ debugfs_create_file("efi_page_tables", 0400, NULL, &efi_ptd_info,
+ &ptdump_fops);
+#endif
+
+ return 0;
+}
+
+device_initcall(ptdump_init);
diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c
new file mode 100644
index 000000000..720b443c4
--- /dev/null
+++ b/arch/riscv/mm/tlbflush.c
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/mm.h>
+#include <linux/smp.h>
+#include <linux/sched.h>
+#include <asm/sbi.h>
+
+void flush_tlb_all(void)
+{
+ sbi_remote_sfence_vma(NULL, 0, -1);
+}
+
+/*
+ * This function must not be called with cmask being null.
+ * Kernel may panic if cmask is NULL.
+ */
+static void __sbi_tlb_flush_range(struct cpumask *cmask, unsigned long start,
+ unsigned long size)
+{
+ struct cpumask hmask;
+ unsigned int cpuid;
+
+ if (cpumask_empty(cmask))
+ return;
+
+ cpuid = get_cpu();
+
+ if (cpumask_any_but(cmask, cpuid) >= nr_cpu_ids) {
+ /* local cpu is the only cpu present in cpumask */
+ if (size <= PAGE_SIZE)
+ local_flush_tlb_page(start);
+ else
+ local_flush_tlb_all();
+ } else {
+ riscv_cpuid_to_hartid_mask(cmask, &hmask);
+ sbi_remote_sfence_vma(cpumask_bits(&hmask), start, size);
+ }
+
+ put_cpu();
+}
+
+void flush_tlb_mm(struct mm_struct *mm)
+{
+ __sbi_tlb_flush_range(mm_cpumask(mm), 0, -1);
+}
+
+void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr)
+{
+ __sbi_tlb_flush_range(mm_cpumask(vma->vm_mm), addr, PAGE_SIZE);
+}
+
+void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
+ unsigned long end)
+{
+ __sbi_tlb_flush_range(mm_cpumask(vma->vm_mm), start, end - start);
+}