diff options
Diffstat (limited to '')
-rw-r--r-- | arch/riscv/mm/Makefile | 5 | ||||
-rw-r--r-- | arch/riscv/mm/cacheflush.c | 120 | ||||
-rw-r--r-- | arch/riscv/mm/context.c | 42 | ||||
-rw-r--r-- | arch/riscv/mm/dma-noncoherent.c | 3 | ||||
-rw-r--r-- | arch/riscv/mm/fault.c | 5 | ||||
-rw-r--r-- | arch/riscv/mm/hugetlbpage.c | 10 | ||||
-rw-r--r-- | arch/riscv/mm/init.c | 55 | ||||
-rw-r--r-- | arch/riscv/mm/tlbflush.c | 52 |
8 files changed, 217 insertions, 75 deletions
diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile index 2c869f8026..cbe4d775ef 100644 --- a/arch/riscv/mm/Makefile +++ b/arch/riscv/mm/Makefile @@ -13,14 +13,11 @@ endif KCOV_INSTRUMENT_init.o := n obj-y += init.o -obj-$(CONFIG_MMU) += extable.o fault.o pageattr.o pgtable.o +obj-$(CONFIG_MMU) += extable.o fault.o pageattr.o pgtable.o tlbflush.o obj-y += cacheflush.o obj-y += context.o obj-y += pmem.o -ifeq ($(CONFIG_MMU),y) -obj-$(CONFIG_SMP) += tlbflush.o -endif obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o obj-$(CONFIG_PTDUMP_CORE) += ptdump.o obj-$(CONFIG_KASAN) += kasan_init.o diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c index bc61ee5975..a03c994eed 100644 --- a/arch/riscv/mm/cacheflush.c +++ b/arch/riscv/mm/cacheflush.c @@ -5,6 +5,7 @@ #include <linux/acpi.h> #include <linux/of.h> +#include <linux/prctl.h> #include <asm/acpi.h> #include <asm/cacheflush.h> @@ -21,7 +22,9 @@ void flush_icache_all(void) { local_flush_icache_all(); - if (IS_ENABLED(CONFIG_RISCV_SBI) && !riscv_use_ipi_for_rfence()) + if (num_online_cpus() < 2) + return; + else if (riscv_use_sbi_for_rfence()) sbi_remote_fence_i(NULL); else on_each_cpu(ipi_remote_fence_i, NULL, 1); @@ -69,8 +72,7 @@ void flush_icache_mm(struct mm_struct *mm, bool local) * with flush_icache_deferred(). */ smp_mb(); - } else if (IS_ENABLED(CONFIG_RISCV_SBI) && - !riscv_use_ipi_for_rfence()) { + } else if (riscv_use_sbi_for_rfence()) { sbi_remote_fence_i(&others); } else { on_each_cpu_mask(&others, ipi_remote_fence_i, NULL, 1); @@ -152,3 +154,115 @@ void __init riscv_init_cbo_blocksizes(void) if (cboz_block_size) riscv_cboz_block_size = cboz_block_size; } + +#ifdef CONFIG_SMP +static void set_icache_stale_mask(void) +{ + cpumask_t *mask; + bool stale_cpu; + + /* + * Mark every other hart's icache as needing a flush for + * this MM. Maintain the previous value of the current + * cpu to handle the case when this function is called + * concurrently on different harts. + */ + mask = ¤t->mm->context.icache_stale_mask; + stale_cpu = cpumask_test_cpu(smp_processor_id(), mask); + + cpumask_setall(mask); + cpumask_assign_cpu(smp_processor_id(), mask, stale_cpu); +} +#endif + +/** + * riscv_set_icache_flush_ctx() - Enable/disable icache flushing instructions in + * userspace. + * @ctx: Set the type of icache flushing instructions permitted/prohibited in + * userspace. Supported values described below. + * + * Supported values for ctx: + * + * * %PR_RISCV_CTX_SW_FENCEI_ON: Allow fence.i in user space. + * + * * %PR_RISCV_CTX_SW_FENCEI_OFF: Disallow fence.i in user space. All threads in + * a process will be affected when ``scope == PR_RISCV_SCOPE_PER_PROCESS``. + * Therefore, caution must be taken; use this flag only when you can guarantee + * that no thread in the process will emit fence.i from this point onward. + * + * @scope: Set scope of where icache flushing instructions are allowed to be + * emitted. Supported values described below. + * + * Supported values for scope: + * + * * %PR_RISCV_SCOPE_PER_PROCESS: Ensure the icache of any thread in this process + * is coherent with instruction storage upon + * migration. + * + * * %PR_RISCV_SCOPE_PER_THREAD: Ensure the icache of the current thread is + * coherent with instruction storage upon + * migration. + * + * When ``scope == PR_RISCV_SCOPE_PER_PROCESS``, all threads in the process are + * permitted to emit icache flushing instructions. Whenever any thread in the + * process is migrated, the corresponding hart's icache will be guaranteed to be + * consistent with instruction storage. This does not enforce any guarantees + * outside of migration. If a thread modifies an instruction that another thread + * may attempt to execute, the other thread must still emit an icache flushing + * instruction before attempting to execute the potentially modified + * instruction. This must be performed by the user-space program. + * + * In per-thread context (eg. ``scope == PR_RISCV_SCOPE_PER_THREAD``) only the + * thread calling this function is permitted to emit icache flushing + * instructions. When the thread is migrated, the corresponding hart's icache + * will be guaranteed to be consistent with instruction storage. + * + * On kernels configured without SMP, this function is a nop as migrations + * across harts will not occur. + */ +int riscv_set_icache_flush_ctx(unsigned long ctx, unsigned long scope) +{ +#ifdef CONFIG_SMP + switch (ctx) { + case PR_RISCV_CTX_SW_FENCEI_ON: + switch (scope) { + case PR_RISCV_SCOPE_PER_PROCESS: + current->mm->context.force_icache_flush = true; + break; + case PR_RISCV_SCOPE_PER_THREAD: + current->thread.force_icache_flush = true; + break; + default: + return -EINVAL; + } + break; + case PR_RISCV_CTX_SW_FENCEI_OFF: + switch (scope) { + case PR_RISCV_SCOPE_PER_PROCESS: + current->mm->context.force_icache_flush = false; + + set_icache_stale_mask(); + break; + case PR_RISCV_SCOPE_PER_THREAD: + current->thread.force_icache_flush = false; + + set_icache_stale_mask(); + break; + default: + return -EINVAL; + } + break; + default: + return -EINVAL; + } + return 0; +#else + switch (ctx) { + case PR_RISCV_CTX_SW_FENCEI_ON: + case PR_RISCV_CTX_SW_FENCEI_OFF: + return 0; + default: + return -EINVAL; + } +#endif +} diff --git a/arch/riscv/mm/context.c b/arch/riscv/mm/context.c index ba8eb39446..4abe3de232 100644 --- a/arch/riscv/mm/context.c +++ b/arch/riscv/mm/context.c @@ -15,14 +15,13 @@ #include <asm/tlbflush.h> #include <asm/cacheflush.h> #include <asm/mmu_context.h> +#include <asm/switch_to.h> #ifdef CONFIG_MMU DEFINE_STATIC_KEY_FALSE(use_asid_allocator); -static unsigned long asid_bits; static unsigned long num_asids; -unsigned long asid_mask; static atomic_long_t current_version; @@ -81,7 +80,7 @@ static void __flush_context(void) if (cntx == 0) cntx = per_cpu(reserved_context, i); - __set_bit(cntx & asid_mask, context_asid_map); + __set_bit(cntx2asid(cntx), context_asid_map); per_cpu(reserved_context, i) = cntx; } @@ -102,7 +101,7 @@ static unsigned long __new_context(struct mm_struct *mm) lockdep_assert_held(&context_lock); if (cntx != 0) { - unsigned long newcntx = ver | (cntx & asid_mask); + unsigned long newcntx = ver | cntx2asid(cntx); /* * If our current CONTEXT was active during a rollover, we @@ -115,7 +114,7 @@ static unsigned long __new_context(struct mm_struct *mm) * We had a valid CONTEXT in a previous life, so try to * re-use it if possible. */ - if (!__test_and_set_bit(cntx & asid_mask, context_asid_map)) + if (!__test_and_set_bit(cntx2asid(cntx), context_asid_map)) return newcntx; } @@ -128,7 +127,7 @@ static unsigned long __new_context(struct mm_struct *mm) goto set_asid; /* We're out of ASIDs, so increment current_version */ - ver = atomic_long_add_return_relaxed(num_asids, ¤t_version); + ver = atomic_long_add_return_relaxed(BIT(SATP_ASID_BITS), ¤t_version); /* Flush everything */ __flush_context(); @@ -168,7 +167,7 @@ static void set_mm_asid(struct mm_struct *mm, unsigned int cpu) */ old_active_cntx = atomic_long_read(&per_cpu(active_context, cpu)); if (old_active_cntx && - ((cntx & ~asid_mask) == atomic_long_read(¤t_version)) && + (cntx2version(cntx) == atomic_long_read(¤t_version)) && atomic_long_cmpxchg_relaxed(&per_cpu(active_context, cpu), old_active_cntx, cntx)) goto switch_mm_fast; @@ -177,7 +176,7 @@ static void set_mm_asid(struct mm_struct *mm, unsigned int cpu) /* Check that our ASID belongs to the current_version. */ cntx = atomic_long_read(&mm->context.id); - if ((cntx & ~asid_mask) != atomic_long_read(¤t_version)) { + if (cntx2version(cntx) != atomic_long_read(¤t_version)) { cntx = __new_context(mm); atomic_long_set(&mm->context.id, cntx); } @@ -191,7 +190,7 @@ static void set_mm_asid(struct mm_struct *mm, unsigned int cpu) switch_mm_fast: csr_write(CSR_SATP, virt_to_pfn(mm->pgd) | - ((cntx & asid_mask) << SATP_ASID_SHIFT) | + (cntx2asid(cntx) << SATP_ASID_SHIFT) | satp_mode); if (need_flush_tlb) @@ -202,7 +201,7 @@ static void set_mm_noasid(struct mm_struct *mm) { /* Switch the page table and blindly nuke entire local TLB */ csr_write(CSR_SATP, virt_to_pfn(mm->pgd) | satp_mode); - local_flush_tlb_all(); + local_flush_tlb_all_asid(0); } static inline void set_mm(struct mm_struct *prev, @@ -227,7 +226,7 @@ static inline void set_mm(struct mm_struct *prev, static int __init asids_init(void) { - unsigned long old; + unsigned long asid_bits, old; /* Figure-out number of ASID bits in HW */ old = csr_read(CSR_SATP); @@ -247,7 +246,6 @@ static int __init asids_init(void) /* Pre-compute ASID details */ if (asid_bits) { num_asids = 1 << asid_bits; - asid_mask = num_asids - 1; } /* @@ -255,7 +253,7 @@ static int __init asids_init(void) * at-least twice more than CPUs */ if (num_asids > (2 * num_possible_cpus())) { - atomic_long_set(¤t_version, num_asids); + atomic_long_set(¤t_version, BIT(SATP_ASID_BITS)); context_asid_map = bitmap_zalloc(num_asids, GFP_KERNEL); if (!context_asid_map) @@ -297,21 +295,23 @@ static inline void set_mm(struct mm_struct *prev, * * The "cpu" argument must be the current local CPU number. */ -static inline void flush_icache_deferred(struct mm_struct *mm, unsigned int cpu) +static inline void flush_icache_deferred(struct mm_struct *mm, unsigned int cpu, + struct task_struct *task) { #ifdef CONFIG_SMP - cpumask_t *mask = &mm->context.icache_stale_mask; - - if (cpumask_test_cpu(cpu, mask)) { - cpumask_clear_cpu(cpu, mask); + if (cpumask_test_and_clear_cpu(cpu, &mm->context.icache_stale_mask)) { /* * Ensure the remote hart's writes are visible to this hart. * This pairs with a barrier in flush_icache_mm. */ smp_mb(); - local_flush_icache_all(); - } + /* + * If cache will be flushed in switch_to, no need to flush here. + */ + if (!(task && switch_to_should_flush_icache(task))) + local_flush_icache_all(); + } #endif } @@ -334,5 +334,5 @@ void switch_mm(struct mm_struct *prev, struct mm_struct *next, set_mm(prev, next, cpu); - flush_icache_deferred(next, cpu); + flush_icache_deferred(next, cpu, task); } diff --git a/arch/riscv/mm/dma-noncoherent.c b/arch/riscv/mm/dma-noncoherent.c index 843107f834..cb89d7e0ba 100644 --- a/arch/riscv/mm/dma-noncoherent.c +++ b/arch/riscv/mm/dma-noncoherent.c @@ -128,8 +128,7 @@ void arch_dma_prep_coherent(struct page *page, size_t size) ALT_CMO_OP(FLUSH, flush_addr, size, riscv_cbom_block_size); } -void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, - bool coherent) +void arch_setup_dma_ops(struct device *dev, bool coherent) { WARN_TAINT(!coherent && riscv_cbom_block_size > ARCH_DMA_MINALIGN, TAINT_CPU_OUT_OF_SPEC, diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c index 3ba1d4dde5..5224f37338 100644 --- a/arch/riscv/mm/fault.c +++ b/arch/riscv/mm/fault.c @@ -292,7 +292,10 @@ void handle_page_fault(struct pt_regs *regs) if (unlikely(access_error(cause, vma))) { vma_end_read(vma); - goto lock_mmap; + count_vm_vma_lock_event(VMA_LOCK_SUCCESS); + tsk->thread.bad_cause = cause; + bad_area_nosemaphore(regs, SEGV_ACCERR, addr); + return; } fault = handle_mm_fault(vma, addr, flags | FAULT_FLAG_VMA_LOCK, regs); diff --git a/arch/riscv/mm/hugetlbpage.c b/arch/riscv/mm/hugetlbpage.c index 5ef2a68911..0ebd968b33 100644 --- a/arch/riscv/mm/hugetlbpage.c +++ b/arch/riscv/mm/hugetlbpage.c @@ -399,16 +399,6 @@ static bool is_napot_size(unsigned long size) #endif /*CONFIG_RISCV_ISA_SVNAPOT*/ -int pud_huge(pud_t pud) -{ - return pud_leaf(pud); -} - -int pmd_huge(pmd_t pmd) -{ - return pmd_leaf(pmd); -} - static bool __hugetlb_valid_size(unsigned long size) { if (size == HPAGE_SIZE) diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 46b4ad418f..e3405e4b99 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -24,6 +24,7 @@ #include <linux/elf.h> #endif #include <linux/kfence.h> +#include <linux/execmem.h> #include <asm/fixmap.h> #include <asm/io.h> @@ -49,8 +50,8 @@ u64 satp_mode __ro_after_init = SATP_MODE_32; EXPORT_SYMBOL(satp_mode); #ifdef CONFIG_64BIT -bool pgtable_l4_enabled = IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_XIP_KERNEL); -bool pgtable_l5_enabled = IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_XIP_KERNEL); +bool pgtable_l4_enabled __ro_after_init = !IS_ENABLED(CONFIG_XIP_KERNEL); +bool pgtable_l5_enabled __ro_after_init = !IS_ENABLED(CONFIG_XIP_KERNEL); EXPORT_SYMBOL(pgtable_l4_enabled); EXPORT_SYMBOL(pgtable_l5_enabled); #endif @@ -161,11 +162,25 @@ static void print_vm_layout(void) { } void __init mem_init(void) { + bool swiotlb = max_pfn > PFN_DOWN(dma32_phys_limit); #ifdef CONFIG_FLATMEM BUG_ON(!mem_map); #endif /* CONFIG_FLATMEM */ - swiotlb_init(max_pfn > PFN_DOWN(dma32_phys_limit), SWIOTLB_VERBOSE); + if (IS_ENABLED(CONFIG_DMA_BOUNCE_UNALIGNED_KMALLOC) && !swiotlb && + dma_cache_alignment != 1) { + /* + * If no bouncing needed for ZONE_DMA, allocate 1MB swiotlb + * buffer per 1GB of RAM for kmalloc() bouncing on + * non-coherent platforms. + */ + unsigned long size = + DIV_ROUND_UP(memblock_phys_mem_size(), 1024); + swiotlb_adjust_size(min(swiotlb_size_or_default(), size)); + swiotlb = true; + } + + swiotlb_init(swiotlb, SWIOTLB_VERBOSE); memblock_free_all(); print_vm_layout(); @@ -1485,3 +1500,37 @@ void __init pgtable_cache_init(void) preallocate_pgd_pages_range(MODULES_VADDR, MODULES_END, "bpf/modules"); } #endif + +#ifdef CONFIG_EXECMEM +#ifdef CONFIG_MMU +static struct execmem_info execmem_info __ro_after_init; + +struct execmem_info __init *execmem_arch_setup(void) +{ + execmem_info = (struct execmem_info){ + .ranges = { + [EXECMEM_DEFAULT] = { + .start = MODULES_VADDR, + .end = MODULES_END, + .pgprot = PAGE_KERNEL, + .alignment = 1, + }, + [EXECMEM_KPROBES] = { + .start = VMALLOC_START, + .end = VMALLOC_END, + .pgprot = PAGE_KERNEL_READ_EXEC, + .alignment = 1, + }, + [EXECMEM_BPF] = { + .start = BPF_JIT_REGION_START, + .end = BPF_JIT_REGION_END, + .pgprot = PAGE_KERNEL, + .alignment = PAGE_SIZE, + }, + }, + }; + + return &execmem_info; +} +#endif /* CONFIG_MMU */ +#endif /* CONFIG_EXECMEM */ diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c index a6f7887748..9b6e86ce38 100644 --- a/arch/riscv/mm/tlbflush.c +++ b/arch/riscv/mm/tlbflush.c @@ -11,7 +11,7 @@ * Flush entire TLB if number of entries to be flushed is greater * than the threshold below. */ -static unsigned long tlb_flush_all_threshold __read_mostly = 64; +unsigned long tlb_flush_all_threshold __read_mostly = 64; static void local_flush_tlb_range_threshold_asid(unsigned long start, unsigned long size, @@ -56,10 +56,12 @@ static void __ipi_flush_tlb_all(void *info) void flush_tlb_all(void) { - if (riscv_use_ipi_for_rfence()) - on_each_cpu(__ipi_flush_tlb_all, NULL, 1); - else + if (num_online_cpus() < 2) + local_flush_tlb_all(); + else if (riscv_use_sbi_for_rfence()) sbi_remote_sfence_vma_asid(NULL, 0, FLUSH_TLB_MAX_SIZE, FLUSH_TLB_NO_ASID); + else + on_each_cpu(__ipi_flush_tlb_all, NULL, 1); } struct flush_tlb_range_data { @@ -80,46 +82,34 @@ static void __flush_tlb_range(const struct cpumask *cmask, unsigned long asid, unsigned long start, unsigned long size, unsigned long stride) { - struct flush_tlb_range_data ftd; - bool broadcast; + unsigned int cpu; if (cpumask_empty(cmask)) return; - if (cmask != cpu_online_mask) { - unsigned int cpuid; + cpu = get_cpu(); - cpuid = get_cpu(); - /* check if the tlbflush needs to be sent to other CPUs */ - broadcast = cpumask_any_but(cmask, cpuid) < nr_cpu_ids; + /* Check if the TLB flush needs to be sent to other CPUs. */ + if (cpumask_any_but(cmask, cpu) >= nr_cpu_ids) { + local_flush_tlb_range_asid(start, size, stride, asid); + } else if (riscv_use_sbi_for_rfence()) { + sbi_remote_sfence_vma_asid(cmask, start, size, asid); } else { - broadcast = true; - } + struct flush_tlb_range_data ftd; - if (broadcast) { - if (riscv_use_ipi_for_rfence()) { - ftd.asid = asid; - ftd.start = start; - ftd.size = size; - ftd.stride = stride; - on_each_cpu_mask(cmask, - __ipi_flush_tlb_range_asid, - &ftd, 1); - } else - sbi_remote_sfence_vma_asid(cmask, - start, size, asid); - } else { - local_flush_tlb_range_asid(start, size, stride, asid); + ftd.asid = asid; + ftd.start = start; + ftd.size = size; + ftd.stride = stride; + on_each_cpu_mask(cmask, __ipi_flush_tlb_range_asid, &ftd, 1); } - if (cmask != cpu_online_mask) - put_cpu(); + put_cpu(); } static inline unsigned long get_mm_asid(struct mm_struct *mm) { - return static_branch_unlikely(&use_asid_allocator) ? - atomic_long_read(&mm->context.id) & asid_mask : FLUSH_TLB_NO_ASID; + return cntx2asid(atomic_long_read(&mm->context.id)); } void flush_tlb_mm(struct mm_struct *mm) |