diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 18:49:45 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 18:49:45 +0000 |
commit | 2c3c1048746a4622d8c89a29670120dc8fab93c4 (patch) | |
tree | 848558de17fb3008cdf4d861b01ac7781903ce39 /arch/riscv/kernel | |
parent | Initial commit. (diff) | |
download | linux-2c3c1048746a4622d8c89a29670120dc8fab93c4.tar.xz linux-2c3c1048746a4622d8c89a29670120dc8fab93c4.zip |
Adding upstream version 6.1.76.upstream/6.1.76
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'arch/riscv/kernel')
88 files changed, 12131 insertions, 0 deletions
diff --git a/arch/riscv/kernel/.gitignore b/arch/riscv/kernel/.gitignore new file mode 100644 index 000000000..e052ed331 --- /dev/null +++ b/arch/riscv/kernel/.gitignore @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +/vmlinux.lds diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile new file mode 100644 index 000000000..ab333cb79 --- /dev/null +++ b/arch/riscv/kernel/Makefile @@ -0,0 +1,91 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# Makefile for the RISC-V Linux kernel +# + +ifdef CONFIG_FTRACE +CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_patch.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_sbi.o = $(CC_FLAGS_FTRACE) +endif +CFLAGS_syscall_table.o += $(call cc-option,-Wno-override-init,) +CFLAGS_compat_syscall_table.o += $(call cc-option,-Wno-override-init,) + +ifdef CONFIG_KEXEC +AFLAGS_kexec_relocate.o := -mcmodel=medany $(call cc-option,-mno-relax) +endif + +# cmodel=medany and notrace when patching early +ifdef CONFIG_RISCV_ALTERNATIVE_EARLY +CFLAGS_alternative.o := -mcmodel=medany +CFLAGS_cpufeature.o := -mcmodel=medany +ifdef CONFIG_FTRACE +CFLAGS_REMOVE_alternative.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_cpufeature.o = $(CC_FLAGS_FTRACE) +endif +ifdef CONFIG_KASAN +KASAN_SANITIZE_alternative.o := n +KASAN_SANITIZE_cpufeature.o := n +endif +endif + +extra-y += vmlinux.lds + +obj-y += head.o +obj-y += soc.o +obj-$(CONFIG_RISCV_ALTERNATIVE) += alternative.o +obj-y += cpu.o +obj-y += cpufeature.o +obj-y += entry.o +obj-y += irq.o +obj-y += process.o +obj-y += ptrace.o +obj-y += reset.o +obj-y += setup.o +obj-y += signal.o +obj-y += syscall_table.o +obj-y += sys_riscv.o +obj-y += time.o +obj-y += traps.o +obj-y += riscv_ksyms.o +obj-y += stacktrace.o +obj-y += cacheinfo.o +obj-y += patch.o +obj-y += probes/ +obj-$(CONFIG_MMU) += vdso.o vdso/ + +obj-$(CONFIG_RISCV_M_MODE) += traps_misaligned.o +obj-$(CONFIG_FPU) += fpu.o +obj-$(CONFIG_SMP) += smpboot.o +obj-$(CONFIG_SMP) += smp.o +obj-$(CONFIG_SMP) += cpu_ops.o + +obj-$(CONFIG_RISCV_BOOT_SPINWAIT) += cpu_ops_spinwait.o +obj-$(CONFIG_MODULES) += module.o +obj-$(CONFIG_MODULE_SECTIONS) += module-sections.o + +obj-$(CONFIG_CPU_PM) += suspend_entry.o suspend.o + +obj-$(CONFIG_FUNCTION_TRACER) += mcount.o ftrace.o +obj-$(CONFIG_DYNAMIC_FTRACE) += mcount-dyn.o + +obj-$(CONFIG_TRACE_IRQFLAGS) += trace_irq.o + +obj-$(CONFIG_PERF_EVENTS) += perf_callchain.o +obj-$(CONFIG_HAVE_PERF_REGS) += perf_regs.o +obj-$(CONFIG_RISCV_SBI) += sbi.o +ifeq ($(CONFIG_RISCV_SBI), y) +obj-$(CONFIG_SMP) += cpu_ops_sbi.o +endif +obj-$(CONFIG_HOTPLUG_CPU) += cpu-hotplug.o +obj-$(CONFIG_KGDB) += kgdb.o +obj-$(CONFIG_KEXEC_CORE) += kexec_relocate.o crash_save_regs.o machine_kexec.o +obj-$(CONFIG_KEXEC_FILE) += elf_kexec.o machine_kexec_file.o +obj-$(CONFIG_CRASH_DUMP) += crash_dump.o + +obj-$(CONFIG_JUMP_LABEL) += jump_label.o + +obj-$(CONFIG_EFI) += efi.o +obj-$(CONFIG_COMPAT) += compat_syscall_table.o +obj-$(CONFIG_COMPAT) += compat_signal.o +obj-$(CONFIG_COMPAT) += compat_vdso/ diff --git a/arch/riscv/kernel/alternative.c b/arch/riscv/kernel/alternative.c new file mode 100644 index 000000000..a7d26a00b --- /dev/null +++ b/arch/riscv/kernel/alternative.c @@ -0,0 +1,118 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * alternative runtime patching + * inspired by the ARM64 and x86 version + * + * Copyright (C) 2021 Sifive. + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/cpu.h> +#include <linux/uaccess.h> +#include <asm/alternative.h> +#include <asm/sections.h> +#include <asm/vendorid_list.h> +#include <asm/sbi.h> +#include <asm/csr.h> + +struct cpu_manufacturer_info_t { + unsigned long vendor_id; + unsigned long arch_id; + unsigned long imp_id; + void (*patch_func)(struct alt_entry *begin, struct alt_entry *end, + unsigned long archid, unsigned long impid, + unsigned int stage); +}; + +static void __init_or_module riscv_fill_cpu_mfr_info(struct cpu_manufacturer_info_t *cpu_mfr_info) +{ +#ifdef CONFIG_RISCV_M_MODE + cpu_mfr_info->vendor_id = csr_read(CSR_MVENDORID); + cpu_mfr_info->arch_id = csr_read(CSR_MARCHID); + cpu_mfr_info->imp_id = csr_read(CSR_MIMPID); +#else + cpu_mfr_info->vendor_id = sbi_get_mvendorid(); + cpu_mfr_info->arch_id = sbi_get_marchid(); + cpu_mfr_info->imp_id = sbi_get_mimpid(); +#endif + + switch (cpu_mfr_info->vendor_id) { +#ifdef CONFIG_ERRATA_SIFIVE + case SIFIVE_VENDOR_ID: + cpu_mfr_info->patch_func = sifive_errata_patch_func; + break; +#endif +#ifdef CONFIG_ERRATA_THEAD + case THEAD_VENDOR_ID: + cpu_mfr_info->patch_func = thead_errata_patch_func; + break; +#endif + default: + cpu_mfr_info->patch_func = NULL; + } +} + +/* + * This is called very early in the boot process (directly after we run + * a feature detect on the boot CPU). No need to worry about other CPUs + * here. + */ +static void __init_or_module _apply_alternatives(struct alt_entry *begin, + struct alt_entry *end, + unsigned int stage) +{ + struct cpu_manufacturer_info_t cpu_mfr_info; + + riscv_fill_cpu_mfr_info(&cpu_mfr_info); + + riscv_cpufeature_patch_func(begin, end, stage); + + if (!cpu_mfr_info.patch_func) + return; + + cpu_mfr_info.patch_func(begin, end, + cpu_mfr_info.arch_id, + cpu_mfr_info.imp_id, + stage); +} + +void __init apply_boot_alternatives(void) +{ + /* If called on non-boot cpu things could go wrong */ + WARN_ON(smp_processor_id() != 0); + + _apply_alternatives((struct alt_entry *)__alt_start, + (struct alt_entry *)__alt_end, + RISCV_ALTERNATIVES_BOOT); +} + +/* + * apply_early_boot_alternatives() is called from setup_vm() with MMU-off. + * + * Following requirements should be honoured for it to work correctly: + * 1) It should use PC-relative addressing for accessing kernel symbols. + * To achieve this we always use GCC cmodel=medany. + * 2) The compiler instrumentation for FTRACE will not work for setup_vm() + * so disable compiler instrumentation when FTRACE is enabled. + * + * Currently, the above requirements are honoured by using custom CFLAGS + * for alternative.o in kernel/Makefile. + */ +void __init apply_early_boot_alternatives(void) +{ +#ifdef CONFIG_RISCV_ALTERNATIVE_EARLY + _apply_alternatives((struct alt_entry *)__alt_start, + (struct alt_entry *)__alt_end, + RISCV_ALTERNATIVES_EARLY_BOOT); +#endif +} + +#ifdef CONFIG_MODULES +void apply_module_alternatives(void *start, size_t length) +{ + _apply_alternatives((struct alt_entry *)start, + (struct alt_entry *)(start + length), + RISCV_ALTERNATIVES_MODULE); +} +#endif diff --git a/arch/riscv/kernel/asm-offsets.c b/arch/riscv/kernel/asm-offsets.c new file mode 100644 index 000000000..df9444397 --- /dev/null +++ b/arch/riscv/kernel/asm-offsets.c @@ -0,0 +1,477 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2012 Regents of the University of California + * Copyright (C) 2017 SiFive + */ + +#define GENERATING_ASM_OFFSETS + +#include <linux/kbuild.h> +#include <linux/mm.h> +#include <linux/sched.h> +#include <asm/kvm_host.h> +#include <asm/thread_info.h> +#include <asm/ptrace.h> +#include <asm/cpu_ops_sbi.h> +#include <asm/suspend.h> + +void asm_offsets(void); + +void asm_offsets(void) +{ + OFFSET(TASK_THREAD_RA, task_struct, thread.ra); + OFFSET(TASK_THREAD_SP, task_struct, thread.sp); + OFFSET(TASK_THREAD_S0, task_struct, thread.s[0]); + OFFSET(TASK_THREAD_S1, task_struct, thread.s[1]); + OFFSET(TASK_THREAD_S2, task_struct, thread.s[2]); + OFFSET(TASK_THREAD_S3, task_struct, thread.s[3]); + OFFSET(TASK_THREAD_S4, task_struct, thread.s[4]); + OFFSET(TASK_THREAD_S5, task_struct, thread.s[5]); + OFFSET(TASK_THREAD_S6, task_struct, thread.s[6]); + OFFSET(TASK_THREAD_S7, task_struct, thread.s[7]); + OFFSET(TASK_THREAD_S8, task_struct, thread.s[8]); + OFFSET(TASK_THREAD_S9, task_struct, thread.s[9]); + OFFSET(TASK_THREAD_S10, task_struct, thread.s[10]); + OFFSET(TASK_THREAD_S11, task_struct, thread.s[11]); + OFFSET(TASK_TI_FLAGS, task_struct, thread_info.flags); + OFFSET(TASK_TI_PREEMPT_COUNT, task_struct, thread_info.preempt_count); + OFFSET(TASK_TI_KERNEL_SP, task_struct, thread_info.kernel_sp); + OFFSET(TASK_TI_USER_SP, task_struct, thread_info.user_sp); + + OFFSET(TASK_THREAD_F0, task_struct, thread.fstate.f[0]); + OFFSET(TASK_THREAD_F1, task_struct, thread.fstate.f[1]); + OFFSET(TASK_THREAD_F2, task_struct, thread.fstate.f[2]); + OFFSET(TASK_THREAD_F3, task_struct, thread.fstate.f[3]); + OFFSET(TASK_THREAD_F4, task_struct, thread.fstate.f[4]); + OFFSET(TASK_THREAD_F5, task_struct, thread.fstate.f[5]); + OFFSET(TASK_THREAD_F6, task_struct, thread.fstate.f[6]); + OFFSET(TASK_THREAD_F7, task_struct, thread.fstate.f[7]); + OFFSET(TASK_THREAD_F8, task_struct, thread.fstate.f[8]); + OFFSET(TASK_THREAD_F9, task_struct, thread.fstate.f[9]); + OFFSET(TASK_THREAD_F10, task_struct, thread.fstate.f[10]); + OFFSET(TASK_THREAD_F11, task_struct, thread.fstate.f[11]); + OFFSET(TASK_THREAD_F12, task_struct, thread.fstate.f[12]); + OFFSET(TASK_THREAD_F13, task_struct, thread.fstate.f[13]); + OFFSET(TASK_THREAD_F14, task_struct, thread.fstate.f[14]); + OFFSET(TASK_THREAD_F15, task_struct, thread.fstate.f[15]); + OFFSET(TASK_THREAD_F16, task_struct, thread.fstate.f[16]); + OFFSET(TASK_THREAD_F17, task_struct, thread.fstate.f[17]); + OFFSET(TASK_THREAD_F18, task_struct, thread.fstate.f[18]); + OFFSET(TASK_THREAD_F19, task_struct, thread.fstate.f[19]); + OFFSET(TASK_THREAD_F20, task_struct, thread.fstate.f[20]); + OFFSET(TASK_THREAD_F21, task_struct, thread.fstate.f[21]); + OFFSET(TASK_THREAD_F22, task_struct, thread.fstate.f[22]); + OFFSET(TASK_THREAD_F23, task_struct, thread.fstate.f[23]); + OFFSET(TASK_THREAD_F24, task_struct, thread.fstate.f[24]); + OFFSET(TASK_THREAD_F25, task_struct, thread.fstate.f[25]); + OFFSET(TASK_THREAD_F26, task_struct, thread.fstate.f[26]); + OFFSET(TASK_THREAD_F27, task_struct, thread.fstate.f[27]); + OFFSET(TASK_THREAD_F28, task_struct, thread.fstate.f[28]); + OFFSET(TASK_THREAD_F29, task_struct, thread.fstate.f[29]); + OFFSET(TASK_THREAD_F30, task_struct, thread.fstate.f[30]); + OFFSET(TASK_THREAD_F31, task_struct, thread.fstate.f[31]); + OFFSET(TASK_THREAD_FCSR, task_struct, thread.fstate.fcsr); +#ifdef CONFIG_STACKPROTECTOR + OFFSET(TSK_STACK_CANARY, task_struct, stack_canary); +#endif + + DEFINE(PT_SIZE, sizeof(struct pt_regs)); + OFFSET(PT_EPC, pt_regs, epc); + OFFSET(PT_RA, pt_regs, ra); + OFFSET(PT_FP, pt_regs, s0); + OFFSET(PT_S0, pt_regs, s0); + OFFSET(PT_S1, pt_regs, s1); + OFFSET(PT_S2, pt_regs, s2); + OFFSET(PT_S3, pt_regs, s3); + OFFSET(PT_S4, pt_regs, s4); + OFFSET(PT_S5, pt_regs, s5); + OFFSET(PT_S6, pt_regs, s6); + OFFSET(PT_S7, pt_regs, s7); + OFFSET(PT_S8, pt_regs, s8); + OFFSET(PT_S9, pt_regs, s9); + OFFSET(PT_S10, pt_regs, s10); + OFFSET(PT_S11, pt_regs, s11); + OFFSET(PT_SP, pt_regs, sp); + OFFSET(PT_TP, pt_regs, tp); + OFFSET(PT_A0, pt_regs, a0); + OFFSET(PT_A1, pt_regs, a1); + OFFSET(PT_A2, pt_regs, a2); + OFFSET(PT_A3, pt_regs, a3); + OFFSET(PT_A4, pt_regs, a4); + OFFSET(PT_A5, pt_regs, a5); + OFFSET(PT_A6, pt_regs, a6); + OFFSET(PT_A7, pt_regs, a7); + OFFSET(PT_T0, pt_regs, t0); + OFFSET(PT_T1, pt_regs, t1); + OFFSET(PT_T2, pt_regs, t2); + OFFSET(PT_T3, pt_regs, t3); + OFFSET(PT_T4, pt_regs, t4); + OFFSET(PT_T5, pt_regs, t5); + OFFSET(PT_T6, pt_regs, t6); + OFFSET(PT_GP, pt_regs, gp); + OFFSET(PT_ORIG_A0, pt_regs, orig_a0); + OFFSET(PT_STATUS, pt_regs, status); + OFFSET(PT_BADADDR, pt_regs, badaddr); + OFFSET(PT_CAUSE, pt_regs, cause); + + OFFSET(SUSPEND_CONTEXT_REGS, suspend_context, regs); + + OFFSET(KVM_ARCH_GUEST_ZERO, kvm_vcpu_arch, guest_context.zero); + OFFSET(KVM_ARCH_GUEST_RA, kvm_vcpu_arch, guest_context.ra); + OFFSET(KVM_ARCH_GUEST_SP, kvm_vcpu_arch, guest_context.sp); + OFFSET(KVM_ARCH_GUEST_GP, kvm_vcpu_arch, guest_context.gp); + OFFSET(KVM_ARCH_GUEST_TP, kvm_vcpu_arch, guest_context.tp); + OFFSET(KVM_ARCH_GUEST_T0, kvm_vcpu_arch, guest_context.t0); + OFFSET(KVM_ARCH_GUEST_T1, kvm_vcpu_arch, guest_context.t1); + OFFSET(KVM_ARCH_GUEST_T2, kvm_vcpu_arch, guest_context.t2); + OFFSET(KVM_ARCH_GUEST_S0, kvm_vcpu_arch, guest_context.s0); + OFFSET(KVM_ARCH_GUEST_S1, kvm_vcpu_arch, guest_context.s1); + OFFSET(KVM_ARCH_GUEST_A0, kvm_vcpu_arch, guest_context.a0); + OFFSET(KVM_ARCH_GUEST_A1, kvm_vcpu_arch, guest_context.a1); + OFFSET(KVM_ARCH_GUEST_A2, kvm_vcpu_arch, guest_context.a2); + OFFSET(KVM_ARCH_GUEST_A3, kvm_vcpu_arch, guest_context.a3); + OFFSET(KVM_ARCH_GUEST_A4, kvm_vcpu_arch, guest_context.a4); + OFFSET(KVM_ARCH_GUEST_A5, kvm_vcpu_arch, guest_context.a5); + OFFSET(KVM_ARCH_GUEST_A6, kvm_vcpu_arch, guest_context.a6); + OFFSET(KVM_ARCH_GUEST_A7, kvm_vcpu_arch, guest_context.a7); + OFFSET(KVM_ARCH_GUEST_S2, kvm_vcpu_arch, guest_context.s2); + OFFSET(KVM_ARCH_GUEST_S3, kvm_vcpu_arch, guest_context.s3); + OFFSET(KVM_ARCH_GUEST_S4, kvm_vcpu_arch, guest_context.s4); + OFFSET(KVM_ARCH_GUEST_S5, kvm_vcpu_arch, guest_context.s5); + OFFSET(KVM_ARCH_GUEST_S6, kvm_vcpu_arch, guest_context.s6); + OFFSET(KVM_ARCH_GUEST_S7, kvm_vcpu_arch, guest_context.s7); + OFFSET(KVM_ARCH_GUEST_S8, kvm_vcpu_arch, guest_context.s8); + OFFSET(KVM_ARCH_GUEST_S9, kvm_vcpu_arch, guest_context.s9); + OFFSET(KVM_ARCH_GUEST_S10, kvm_vcpu_arch, guest_context.s10); + OFFSET(KVM_ARCH_GUEST_S11, kvm_vcpu_arch, guest_context.s11); + OFFSET(KVM_ARCH_GUEST_T3, kvm_vcpu_arch, guest_context.t3); + OFFSET(KVM_ARCH_GUEST_T4, kvm_vcpu_arch, guest_context.t4); + OFFSET(KVM_ARCH_GUEST_T5, kvm_vcpu_arch, guest_context.t5); + OFFSET(KVM_ARCH_GUEST_T6, kvm_vcpu_arch, guest_context.t6); + OFFSET(KVM_ARCH_GUEST_SEPC, kvm_vcpu_arch, guest_context.sepc); + OFFSET(KVM_ARCH_GUEST_SSTATUS, kvm_vcpu_arch, guest_context.sstatus); + OFFSET(KVM_ARCH_GUEST_HSTATUS, kvm_vcpu_arch, guest_context.hstatus); + OFFSET(KVM_ARCH_GUEST_SCOUNTEREN, kvm_vcpu_arch, guest_csr.scounteren); + + OFFSET(KVM_ARCH_HOST_ZERO, kvm_vcpu_arch, host_context.zero); + OFFSET(KVM_ARCH_HOST_RA, kvm_vcpu_arch, host_context.ra); + OFFSET(KVM_ARCH_HOST_SP, kvm_vcpu_arch, host_context.sp); + OFFSET(KVM_ARCH_HOST_GP, kvm_vcpu_arch, host_context.gp); + OFFSET(KVM_ARCH_HOST_TP, kvm_vcpu_arch, host_context.tp); + OFFSET(KVM_ARCH_HOST_T0, kvm_vcpu_arch, host_context.t0); + OFFSET(KVM_ARCH_HOST_T1, kvm_vcpu_arch, host_context.t1); + OFFSET(KVM_ARCH_HOST_T2, kvm_vcpu_arch, host_context.t2); + OFFSET(KVM_ARCH_HOST_S0, kvm_vcpu_arch, host_context.s0); + OFFSET(KVM_ARCH_HOST_S1, kvm_vcpu_arch, host_context.s1); + OFFSET(KVM_ARCH_HOST_A0, kvm_vcpu_arch, host_context.a0); + OFFSET(KVM_ARCH_HOST_A1, kvm_vcpu_arch, host_context.a1); + OFFSET(KVM_ARCH_HOST_A2, kvm_vcpu_arch, host_context.a2); + OFFSET(KVM_ARCH_HOST_A3, kvm_vcpu_arch, host_context.a3); + OFFSET(KVM_ARCH_HOST_A4, kvm_vcpu_arch, host_context.a4); + OFFSET(KVM_ARCH_HOST_A5, kvm_vcpu_arch, host_context.a5); + OFFSET(KVM_ARCH_HOST_A6, kvm_vcpu_arch, host_context.a6); + OFFSET(KVM_ARCH_HOST_A7, kvm_vcpu_arch, host_context.a7); + OFFSET(KVM_ARCH_HOST_S2, kvm_vcpu_arch, host_context.s2); + OFFSET(KVM_ARCH_HOST_S3, kvm_vcpu_arch, host_context.s3); + OFFSET(KVM_ARCH_HOST_S4, kvm_vcpu_arch, host_context.s4); + OFFSET(KVM_ARCH_HOST_S5, kvm_vcpu_arch, host_context.s5); + OFFSET(KVM_ARCH_HOST_S6, kvm_vcpu_arch, host_context.s6); + OFFSET(KVM_ARCH_HOST_S7, kvm_vcpu_arch, host_context.s7); + OFFSET(KVM_ARCH_HOST_S8, kvm_vcpu_arch, host_context.s8); + OFFSET(KVM_ARCH_HOST_S9, kvm_vcpu_arch, host_context.s9); + OFFSET(KVM_ARCH_HOST_S10, kvm_vcpu_arch, host_context.s10); + OFFSET(KVM_ARCH_HOST_S11, kvm_vcpu_arch, host_context.s11); + OFFSET(KVM_ARCH_HOST_T3, kvm_vcpu_arch, host_context.t3); + OFFSET(KVM_ARCH_HOST_T4, kvm_vcpu_arch, host_context.t4); + OFFSET(KVM_ARCH_HOST_T5, kvm_vcpu_arch, host_context.t5); + OFFSET(KVM_ARCH_HOST_T6, kvm_vcpu_arch, host_context.t6); + OFFSET(KVM_ARCH_HOST_SEPC, kvm_vcpu_arch, host_context.sepc); + OFFSET(KVM_ARCH_HOST_SSTATUS, kvm_vcpu_arch, host_context.sstatus); + OFFSET(KVM_ARCH_HOST_HSTATUS, kvm_vcpu_arch, host_context.hstatus); + OFFSET(KVM_ARCH_HOST_SSCRATCH, kvm_vcpu_arch, host_sscratch); + OFFSET(KVM_ARCH_HOST_STVEC, kvm_vcpu_arch, host_stvec); + OFFSET(KVM_ARCH_HOST_SCOUNTEREN, kvm_vcpu_arch, host_scounteren); + + OFFSET(KVM_ARCH_TRAP_SEPC, kvm_cpu_trap, sepc); + OFFSET(KVM_ARCH_TRAP_SCAUSE, kvm_cpu_trap, scause); + OFFSET(KVM_ARCH_TRAP_STVAL, kvm_cpu_trap, stval); + OFFSET(KVM_ARCH_TRAP_HTVAL, kvm_cpu_trap, htval); + OFFSET(KVM_ARCH_TRAP_HTINST, kvm_cpu_trap, htinst); + + /* F extension */ + + OFFSET(KVM_ARCH_FP_F_F0, kvm_cpu_context, fp.f.f[0]); + OFFSET(KVM_ARCH_FP_F_F1, kvm_cpu_context, fp.f.f[1]); + OFFSET(KVM_ARCH_FP_F_F2, kvm_cpu_context, fp.f.f[2]); + OFFSET(KVM_ARCH_FP_F_F3, kvm_cpu_context, fp.f.f[3]); + OFFSET(KVM_ARCH_FP_F_F4, kvm_cpu_context, fp.f.f[4]); + OFFSET(KVM_ARCH_FP_F_F5, kvm_cpu_context, fp.f.f[5]); + OFFSET(KVM_ARCH_FP_F_F6, kvm_cpu_context, fp.f.f[6]); + OFFSET(KVM_ARCH_FP_F_F7, kvm_cpu_context, fp.f.f[7]); + OFFSET(KVM_ARCH_FP_F_F8, kvm_cpu_context, fp.f.f[8]); + OFFSET(KVM_ARCH_FP_F_F9, kvm_cpu_context, fp.f.f[9]); + OFFSET(KVM_ARCH_FP_F_F10, kvm_cpu_context, fp.f.f[10]); + OFFSET(KVM_ARCH_FP_F_F11, kvm_cpu_context, fp.f.f[11]); + OFFSET(KVM_ARCH_FP_F_F12, kvm_cpu_context, fp.f.f[12]); + OFFSET(KVM_ARCH_FP_F_F13, kvm_cpu_context, fp.f.f[13]); + OFFSET(KVM_ARCH_FP_F_F14, kvm_cpu_context, fp.f.f[14]); + OFFSET(KVM_ARCH_FP_F_F15, kvm_cpu_context, fp.f.f[15]); + OFFSET(KVM_ARCH_FP_F_F16, kvm_cpu_context, fp.f.f[16]); + OFFSET(KVM_ARCH_FP_F_F17, kvm_cpu_context, fp.f.f[17]); + OFFSET(KVM_ARCH_FP_F_F18, kvm_cpu_context, fp.f.f[18]); + OFFSET(KVM_ARCH_FP_F_F19, kvm_cpu_context, fp.f.f[19]); + OFFSET(KVM_ARCH_FP_F_F20, kvm_cpu_context, fp.f.f[20]); + OFFSET(KVM_ARCH_FP_F_F21, kvm_cpu_context, fp.f.f[21]); + OFFSET(KVM_ARCH_FP_F_F22, kvm_cpu_context, fp.f.f[22]); + OFFSET(KVM_ARCH_FP_F_F23, kvm_cpu_context, fp.f.f[23]); + OFFSET(KVM_ARCH_FP_F_F24, kvm_cpu_context, fp.f.f[24]); + OFFSET(KVM_ARCH_FP_F_F25, kvm_cpu_context, fp.f.f[25]); + OFFSET(KVM_ARCH_FP_F_F26, kvm_cpu_context, fp.f.f[26]); + OFFSET(KVM_ARCH_FP_F_F27, kvm_cpu_context, fp.f.f[27]); + OFFSET(KVM_ARCH_FP_F_F28, kvm_cpu_context, fp.f.f[28]); + OFFSET(KVM_ARCH_FP_F_F29, kvm_cpu_context, fp.f.f[29]); + OFFSET(KVM_ARCH_FP_F_F30, kvm_cpu_context, fp.f.f[30]); + OFFSET(KVM_ARCH_FP_F_F31, kvm_cpu_context, fp.f.f[31]); + OFFSET(KVM_ARCH_FP_F_FCSR, kvm_cpu_context, fp.f.fcsr); + + /* D extension */ + + OFFSET(KVM_ARCH_FP_D_F0, kvm_cpu_context, fp.d.f[0]); + OFFSET(KVM_ARCH_FP_D_F1, kvm_cpu_context, fp.d.f[1]); + OFFSET(KVM_ARCH_FP_D_F2, kvm_cpu_context, fp.d.f[2]); + OFFSET(KVM_ARCH_FP_D_F3, kvm_cpu_context, fp.d.f[3]); + OFFSET(KVM_ARCH_FP_D_F4, kvm_cpu_context, fp.d.f[4]); + OFFSET(KVM_ARCH_FP_D_F5, kvm_cpu_context, fp.d.f[5]); + OFFSET(KVM_ARCH_FP_D_F6, kvm_cpu_context, fp.d.f[6]); + OFFSET(KVM_ARCH_FP_D_F7, kvm_cpu_context, fp.d.f[7]); + OFFSET(KVM_ARCH_FP_D_F8, kvm_cpu_context, fp.d.f[8]); + OFFSET(KVM_ARCH_FP_D_F9, kvm_cpu_context, fp.d.f[9]); + OFFSET(KVM_ARCH_FP_D_F10, kvm_cpu_context, fp.d.f[10]); + OFFSET(KVM_ARCH_FP_D_F11, kvm_cpu_context, fp.d.f[11]); + OFFSET(KVM_ARCH_FP_D_F12, kvm_cpu_context, fp.d.f[12]); + OFFSET(KVM_ARCH_FP_D_F13, kvm_cpu_context, fp.d.f[13]); + OFFSET(KVM_ARCH_FP_D_F14, kvm_cpu_context, fp.d.f[14]); + OFFSET(KVM_ARCH_FP_D_F15, kvm_cpu_context, fp.d.f[15]); + OFFSET(KVM_ARCH_FP_D_F16, kvm_cpu_context, fp.d.f[16]); + OFFSET(KVM_ARCH_FP_D_F17, kvm_cpu_context, fp.d.f[17]); + OFFSET(KVM_ARCH_FP_D_F18, kvm_cpu_context, fp.d.f[18]); + OFFSET(KVM_ARCH_FP_D_F19, kvm_cpu_context, fp.d.f[19]); + OFFSET(KVM_ARCH_FP_D_F20, kvm_cpu_context, fp.d.f[20]); + OFFSET(KVM_ARCH_FP_D_F21, kvm_cpu_context, fp.d.f[21]); + OFFSET(KVM_ARCH_FP_D_F22, kvm_cpu_context, fp.d.f[22]); + OFFSET(KVM_ARCH_FP_D_F23, kvm_cpu_context, fp.d.f[23]); + OFFSET(KVM_ARCH_FP_D_F24, kvm_cpu_context, fp.d.f[24]); + OFFSET(KVM_ARCH_FP_D_F25, kvm_cpu_context, fp.d.f[25]); + OFFSET(KVM_ARCH_FP_D_F26, kvm_cpu_context, fp.d.f[26]); + OFFSET(KVM_ARCH_FP_D_F27, kvm_cpu_context, fp.d.f[27]); + OFFSET(KVM_ARCH_FP_D_F28, kvm_cpu_context, fp.d.f[28]); + OFFSET(KVM_ARCH_FP_D_F29, kvm_cpu_context, fp.d.f[29]); + OFFSET(KVM_ARCH_FP_D_F30, kvm_cpu_context, fp.d.f[30]); + OFFSET(KVM_ARCH_FP_D_F31, kvm_cpu_context, fp.d.f[31]); + OFFSET(KVM_ARCH_FP_D_FCSR, kvm_cpu_context, fp.d.fcsr); + + /* + * THREAD_{F,X}* might be larger than a S-type offset can handle, but + * these are used in performance-sensitive assembly so we can't resort + * to loading the long immediate every time. + */ + DEFINE(TASK_THREAD_RA_RA, + offsetof(struct task_struct, thread.ra) + - offsetof(struct task_struct, thread.ra) + ); + DEFINE(TASK_THREAD_SP_RA, + offsetof(struct task_struct, thread.sp) + - offsetof(struct task_struct, thread.ra) + ); + DEFINE(TASK_THREAD_S0_RA, + offsetof(struct task_struct, thread.s[0]) + - offsetof(struct task_struct, thread.ra) + ); + DEFINE(TASK_THREAD_S1_RA, + offsetof(struct task_struct, thread.s[1]) + - offsetof(struct task_struct, thread.ra) + ); + DEFINE(TASK_THREAD_S2_RA, + offsetof(struct task_struct, thread.s[2]) + - offsetof(struct task_struct, thread.ra) + ); + DEFINE(TASK_THREAD_S3_RA, + offsetof(struct task_struct, thread.s[3]) + - offsetof(struct task_struct, thread.ra) + ); + DEFINE(TASK_THREAD_S4_RA, + offsetof(struct task_struct, thread.s[4]) + - offsetof(struct task_struct, thread.ra) + ); + DEFINE(TASK_THREAD_S5_RA, + offsetof(struct task_struct, thread.s[5]) + - offsetof(struct task_struct, thread.ra) + ); + DEFINE(TASK_THREAD_S6_RA, + offsetof(struct task_struct, thread.s[6]) + - offsetof(struct task_struct, thread.ra) + ); + DEFINE(TASK_THREAD_S7_RA, + offsetof(struct task_struct, thread.s[7]) + - offsetof(struct task_struct, thread.ra) + ); + DEFINE(TASK_THREAD_S8_RA, + offsetof(struct task_struct, thread.s[8]) + - offsetof(struct task_struct, thread.ra) + ); + DEFINE(TASK_THREAD_S9_RA, + offsetof(struct task_struct, thread.s[9]) + - offsetof(struct task_struct, thread.ra) + ); + DEFINE(TASK_THREAD_S10_RA, + offsetof(struct task_struct, thread.s[10]) + - offsetof(struct task_struct, thread.ra) + ); + DEFINE(TASK_THREAD_S11_RA, + offsetof(struct task_struct, thread.s[11]) + - offsetof(struct task_struct, thread.ra) + ); + + DEFINE(TASK_THREAD_F0_F0, + offsetof(struct task_struct, thread.fstate.f[0]) + - offsetof(struct task_struct, thread.fstate.f[0]) + ); + DEFINE(TASK_THREAD_F1_F0, + offsetof(struct task_struct, thread.fstate.f[1]) + - offsetof(struct task_struct, thread.fstate.f[0]) + ); + DEFINE(TASK_THREAD_F2_F0, + offsetof(struct task_struct, thread.fstate.f[2]) + - offsetof(struct task_struct, thread.fstate.f[0]) + ); + DEFINE(TASK_THREAD_F3_F0, + offsetof(struct task_struct, thread.fstate.f[3]) + - offsetof(struct task_struct, thread.fstate.f[0]) + ); + DEFINE(TASK_THREAD_F4_F0, + offsetof(struct task_struct, thread.fstate.f[4]) + - offsetof(struct task_struct, thread.fstate.f[0]) + ); + DEFINE(TASK_THREAD_F5_F0, + offsetof(struct task_struct, thread.fstate.f[5]) + - offsetof(struct task_struct, thread.fstate.f[0]) + ); + DEFINE(TASK_THREAD_F6_F0, + offsetof(struct task_struct, thread.fstate.f[6]) + - offsetof(struct task_struct, thread.fstate.f[0]) + ); + DEFINE(TASK_THREAD_F7_F0, + offsetof(struct task_struct, thread.fstate.f[7]) + - offsetof(struct task_struct, thread.fstate.f[0]) + ); + DEFINE(TASK_THREAD_F8_F0, + offsetof(struct task_struct, thread.fstate.f[8]) + - offsetof(struct task_struct, thread.fstate.f[0]) + ); + DEFINE(TASK_THREAD_F9_F0, + offsetof(struct task_struct, thread.fstate.f[9]) + - offsetof(struct task_struct, thread.fstate.f[0]) + ); + DEFINE(TASK_THREAD_F10_F0, + offsetof(struct task_struct, thread.fstate.f[10]) + - offsetof(struct task_struct, thread.fstate.f[0]) + ); + DEFINE(TASK_THREAD_F11_F0, + offsetof(struct task_struct, thread.fstate.f[11]) + - offsetof(struct task_struct, thread.fstate.f[0]) + ); + DEFINE(TASK_THREAD_F12_F0, + offsetof(struct task_struct, thread.fstate.f[12]) + - offsetof(struct task_struct, thread.fstate.f[0]) + ); + DEFINE(TASK_THREAD_F13_F0, + offsetof(struct task_struct, thread.fstate.f[13]) + - offsetof(struct task_struct, thread.fstate.f[0]) + ); + DEFINE(TASK_THREAD_F14_F0, + offsetof(struct task_struct, thread.fstate.f[14]) + - offsetof(struct task_struct, thread.fstate.f[0]) + ); + DEFINE(TASK_THREAD_F15_F0, + offsetof(struct task_struct, thread.fstate.f[15]) + - offsetof(struct task_struct, thread.fstate.f[0]) + ); + DEFINE(TASK_THREAD_F16_F0, + offsetof(struct task_struct, thread.fstate.f[16]) + - offsetof(struct task_struct, thread.fstate.f[0]) + ); + DEFINE(TASK_THREAD_F17_F0, + offsetof(struct task_struct, thread.fstate.f[17]) + - offsetof(struct task_struct, thread.fstate.f[0]) + ); + DEFINE(TASK_THREAD_F18_F0, + offsetof(struct task_struct, thread.fstate.f[18]) + - offsetof(struct task_struct, thread.fstate.f[0]) + ); + DEFINE(TASK_THREAD_F19_F0, + offsetof(struct task_struct, thread.fstate.f[19]) + - offsetof(struct task_struct, thread.fstate.f[0]) + ); + DEFINE(TASK_THREAD_F20_F0, + offsetof(struct task_struct, thread.fstate.f[20]) + - offsetof(struct task_struct, thread.fstate.f[0]) + ); + DEFINE(TASK_THREAD_F21_F0, + offsetof(struct task_struct, thread.fstate.f[21]) + - offsetof(struct task_struct, thread.fstate.f[0]) + ); + DEFINE(TASK_THREAD_F22_F0, + offsetof(struct task_struct, thread.fstate.f[22]) + - offsetof(struct task_struct, thread.fstate.f[0]) + ); + DEFINE(TASK_THREAD_F23_F0, + offsetof(struct task_struct, thread.fstate.f[23]) + - offsetof(struct task_struct, thread.fstate.f[0]) + ); + DEFINE(TASK_THREAD_F24_F0, + offsetof(struct task_struct, thread.fstate.f[24]) + - offsetof(struct task_struct, thread.fstate.f[0]) + ); + DEFINE(TASK_THREAD_F25_F0, + offsetof(struct task_struct, thread.fstate.f[25]) + - offsetof(struct task_struct, thread.fstate.f[0]) + ); + DEFINE(TASK_THREAD_F26_F0, + offsetof(struct task_struct, thread.fstate.f[26]) + - offsetof(struct task_struct, thread.fstate.f[0]) + ); + DEFINE(TASK_THREAD_F27_F0, + offsetof(struct task_struct, thread.fstate.f[27]) + - offsetof(struct task_struct, thread.fstate.f[0]) + ); + DEFINE(TASK_THREAD_F28_F0, + offsetof(struct task_struct, thread.fstate.f[28]) + - offsetof(struct task_struct, thread.fstate.f[0]) + ); + DEFINE(TASK_THREAD_F29_F0, + offsetof(struct task_struct, thread.fstate.f[29]) + - offsetof(struct task_struct, thread.fstate.f[0]) + ); + DEFINE(TASK_THREAD_F30_F0, + offsetof(struct task_struct, thread.fstate.f[30]) + - offsetof(struct task_struct, thread.fstate.f[0]) + ); + DEFINE(TASK_THREAD_F31_F0, + offsetof(struct task_struct, thread.fstate.f[31]) + - offsetof(struct task_struct, thread.fstate.f[0]) + ); + DEFINE(TASK_THREAD_FCSR_F0, + offsetof(struct task_struct, thread.fstate.fcsr) + - offsetof(struct task_struct, thread.fstate.f[0]) + ); + + /* + * We allocate a pt_regs on the stack when entering the kernel. This + * ensures the alignment is sane. + */ + DEFINE(PT_SIZE_ON_STACK, ALIGN(sizeof(struct pt_regs), STACK_ALIGN)); + + OFFSET(KERNEL_MAP_VIRT_ADDR, kernel_mapping, virt_addr); + OFFSET(SBI_HART_BOOT_TASK_PTR_OFFSET, sbi_hart_boot_data, task_ptr); + OFFSET(SBI_HART_BOOT_STACK_PTR_OFFSET, sbi_hart_boot_data, stack_ptr); +} diff --git a/arch/riscv/kernel/cacheinfo.c b/arch/riscv/kernel/cacheinfo.c new file mode 100644 index 000000000..90deabfe6 --- /dev/null +++ b/arch/riscv/kernel/cacheinfo.c @@ -0,0 +1,189 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2017 SiFive + */ + +#include <linux/cpu.h> +#include <linux/of.h> +#include <linux/of_device.h> +#include <asm/cacheinfo.h> + +static struct riscv_cacheinfo_ops *rv_cache_ops; + +void riscv_set_cacheinfo_ops(struct riscv_cacheinfo_ops *ops) +{ + rv_cache_ops = ops; +} +EXPORT_SYMBOL_GPL(riscv_set_cacheinfo_ops); + +const struct attribute_group * +cache_get_priv_group(struct cacheinfo *this_leaf) +{ + if (rv_cache_ops && rv_cache_ops->get_priv_group) + return rv_cache_ops->get_priv_group(this_leaf); + return NULL; +} + +static struct cacheinfo *get_cacheinfo(u32 level, enum cache_type type) +{ + /* + * Using raw_smp_processor_id() elides a preemptability check, but this + * is really indicative of a larger problem: the cacheinfo UABI assumes + * that cores have a homonogenous view of the cache hierarchy. That + * happens to be the case for the current set of RISC-V systems, but + * likely won't be true in general. Since there's no way to provide + * correct information for these systems via the current UABI we're + * just eliding the check for now. + */ + struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(raw_smp_processor_id()); + struct cacheinfo *this_leaf; + int index; + + for (index = 0; index < this_cpu_ci->num_leaves; index++) { + this_leaf = this_cpu_ci->info_list + index; + if (this_leaf->level == level && this_leaf->type == type) + return this_leaf; + } + + return NULL; +} + +uintptr_t get_cache_size(u32 level, enum cache_type type) +{ + struct cacheinfo *this_leaf = get_cacheinfo(level, type); + + return this_leaf ? this_leaf->size : 0; +} + +uintptr_t get_cache_geometry(u32 level, enum cache_type type) +{ + struct cacheinfo *this_leaf = get_cacheinfo(level, type); + + return this_leaf ? (this_leaf->ways_of_associativity << 16 | + this_leaf->coherency_line_size) : + 0; +} + +static void ci_leaf_init(struct cacheinfo *this_leaf, enum cache_type type, + unsigned int level, unsigned int size, + unsigned int sets, unsigned int line_size) +{ + this_leaf->level = level; + this_leaf->type = type; + this_leaf->size = size; + this_leaf->number_of_sets = sets; + this_leaf->coherency_line_size = line_size; + + /* + * If the cache is fully associative, there is no need to + * check the other properties. + */ + if (sets == 1) + return; + + /* + * Set the ways number for n-ways associative, make sure + * all properties are big than zero. + */ + if (sets > 0 && size > 0 && line_size > 0) + this_leaf->ways_of_associativity = (size / sets) / line_size; +} + +static void fill_cacheinfo(struct cacheinfo **this_leaf, + struct device_node *node, unsigned int level) +{ + unsigned int size, sets, line_size; + + if (!of_property_read_u32(node, "cache-size", &size) && + !of_property_read_u32(node, "cache-block-size", &line_size) && + !of_property_read_u32(node, "cache-sets", &sets)) { + ci_leaf_init((*this_leaf)++, CACHE_TYPE_UNIFIED, level, size, sets, line_size); + } + + if (!of_property_read_u32(node, "i-cache-size", &size) && + !of_property_read_u32(node, "i-cache-sets", &sets) && + !of_property_read_u32(node, "i-cache-block-size", &line_size)) { + ci_leaf_init((*this_leaf)++, CACHE_TYPE_INST, level, size, sets, line_size); + } + + if (!of_property_read_u32(node, "d-cache-size", &size) && + !of_property_read_u32(node, "d-cache-sets", &sets) && + !of_property_read_u32(node, "d-cache-block-size", &line_size)) { + ci_leaf_init((*this_leaf)++, CACHE_TYPE_DATA, level, size, sets, line_size); + } +} + +int init_cache_level(unsigned int cpu) +{ + struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); + struct device_node *np = of_cpu_device_node_get(cpu); + struct device_node *prev = NULL; + int levels = 0, leaves = 0, level; + + if (of_property_read_bool(np, "cache-size")) + ++leaves; + if (of_property_read_bool(np, "i-cache-size")) + ++leaves; + if (of_property_read_bool(np, "d-cache-size")) + ++leaves; + if (leaves > 0) + levels = 1; + + prev = np; + while ((np = of_find_next_cache_node(np))) { + of_node_put(prev); + prev = np; + if (!of_device_is_compatible(np, "cache")) + break; + if (of_property_read_u32(np, "cache-level", &level)) + break; + if (level <= levels) + break; + if (of_property_read_bool(np, "cache-size")) + ++leaves; + if (of_property_read_bool(np, "i-cache-size")) + ++leaves; + if (of_property_read_bool(np, "d-cache-size")) + ++leaves; + levels = level; + } + + of_node_put(np); + this_cpu_ci->num_levels = levels; + this_cpu_ci->num_leaves = leaves; + + return 0; +} + +int populate_cache_leaves(unsigned int cpu) +{ + struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); + struct cacheinfo *this_leaf = this_cpu_ci->info_list; + struct device_node *np = of_cpu_device_node_get(cpu); + struct device_node *prev = NULL; + int levels = 1, level = 1; + + /* Level 1 caches in cpu node */ + fill_cacheinfo(&this_leaf, np, level); + + /* Next level caches in cache nodes */ + prev = np; + while ((np = of_find_next_cache_node(np))) { + of_node_put(prev); + prev = np; + + if (!of_device_is_compatible(np, "cache")) + break; + if (of_property_read_u32(np, "cache-level", &level)) + break; + if (level <= levels) + break; + + fill_cacheinfo(&this_leaf, np, level); + + levels = level; + } + of_node_put(np); + + return 0; +} diff --git a/arch/riscv/kernel/compat_signal.c b/arch/riscv/kernel/compat_signal.c new file mode 100644 index 000000000..6ec4e3425 --- /dev/null +++ b/arch/riscv/kernel/compat_signal.c @@ -0,0 +1,243 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include <linux/compat.h> +#include <linux/signal.h> +#include <linux/uaccess.h> +#include <linux/syscalls.h> +#include <linux/linkage.h> + +#include <asm/csr.h> +#include <asm/signal32.h> +#include <asm/switch_to.h> +#include <asm/ucontext.h> +#include <asm/vdso.h> + +#define COMPAT_DEBUG_SIG 0 + +struct compat_sigcontext { + struct compat_user_regs_struct sc_regs; + union __riscv_fp_state sc_fpregs; +}; + +struct compat_ucontext { + compat_ulong_t uc_flags; + struct compat_ucontext *uc_link; + compat_stack_t uc_stack; + sigset_t uc_sigmask; + /* There's some padding here to allow sigset_t to be expanded in the + * future. Though this is unlikely, other architectures put uc_sigmask + * at the end of this structure and explicitly state it can be + * expanded, so we didn't want to box ourselves in here. */ + __u8 __unused[1024 / 8 - sizeof(sigset_t)]; + /* We can't put uc_sigmask at the end of this structure because we need + * to be able to expand sigcontext in the future. For example, the + * vector ISA extension will almost certainly add ISA state. We want + * to ensure all user-visible ISA state can be saved and restored via a + * ucontext, so we're putting this at the end in order to allow for + * infinite extensibility. Since we know this will be extended and we + * assume sigset_t won't be extended an extreme amount, we're + * prioritizing this. */ + struct compat_sigcontext uc_mcontext; +}; + +struct compat_rt_sigframe { + struct compat_siginfo info; + struct compat_ucontext uc; +}; + +#ifdef CONFIG_FPU +static long compat_restore_fp_state(struct pt_regs *regs, + union __riscv_fp_state __user *sc_fpregs) +{ + long err; + struct __riscv_d_ext_state __user *state = &sc_fpregs->d; + size_t i; + + err = __copy_from_user(¤t->thread.fstate, state, sizeof(*state)); + if (unlikely(err)) + return err; + + fstate_restore(current, regs); + + /* We support no other extension state at this time. */ + for (i = 0; i < ARRAY_SIZE(sc_fpregs->q.reserved); i++) { + u32 value; + + err = __get_user(value, &sc_fpregs->q.reserved[i]); + if (unlikely(err)) + break; + if (value != 0) + return -EINVAL; + } + + return err; +} + +static long compat_save_fp_state(struct pt_regs *regs, + union __riscv_fp_state __user *sc_fpregs) +{ + long err; + struct __riscv_d_ext_state __user *state = &sc_fpregs->d; + size_t i; + + fstate_save(current, regs); + err = __copy_to_user(state, ¤t->thread.fstate, sizeof(*state)); + if (unlikely(err)) + return err; + + /* We support no other extension state at this time. */ + for (i = 0; i < ARRAY_SIZE(sc_fpregs->q.reserved); i++) { + err = __put_user(0, &sc_fpregs->q.reserved[i]); + if (unlikely(err)) + break; + } + + return err; +} +#else +#define compat_save_fp_state(task, regs) (0) +#define compat_restore_fp_state(task, regs) (0) +#endif + +static long compat_restore_sigcontext(struct pt_regs *regs, + struct compat_sigcontext __user *sc) +{ + long err; + struct compat_user_regs_struct cregs; + + /* sc_regs is structured the same as the start of pt_regs */ + err = __copy_from_user(&cregs, &sc->sc_regs, sizeof(sc->sc_regs)); + + cregs_to_regs(&cregs, regs); + + /* Restore the floating-point state. */ + if (has_fpu()) + err |= compat_restore_fp_state(regs, &sc->sc_fpregs); + return err; +} + +COMPAT_SYSCALL_DEFINE0(rt_sigreturn) +{ + struct pt_regs *regs = current_pt_regs(); + struct compat_rt_sigframe __user *frame; + struct task_struct *task; + sigset_t set; + + /* Always make any pending restarted system calls return -EINTR */ + current->restart_block.fn = do_no_restart_syscall; + + frame = (struct compat_rt_sigframe __user *)regs->sp; + + if (!access_ok(frame, sizeof(*frame))) + goto badframe; + + if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) + goto badframe; + + set_current_blocked(&set); + + if (compat_restore_sigcontext(regs, &frame->uc.uc_mcontext)) + goto badframe; + + if (compat_restore_altstack(&frame->uc.uc_stack)) + goto badframe; + + return regs->a0; + +badframe: + task = current; + if (show_unhandled_signals) { + pr_info_ratelimited( + "%s[%d]: bad frame in %s: frame=%p pc=%p sp=%p\n", + task->comm, task_pid_nr(task), __func__, + frame, (void *)regs->epc, (void *)regs->sp); + } + force_sig(SIGSEGV); + return 0; +} + +static long compat_setup_sigcontext(struct compat_rt_sigframe __user *frame, + struct pt_regs *regs) +{ + struct compat_sigcontext __user *sc = &frame->uc.uc_mcontext; + struct compat_user_regs_struct cregs; + long err; + + regs_to_cregs(&cregs, regs); + + /* sc_regs is structured the same as the start of pt_regs */ + err = __copy_to_user(&sc->sc_regs, &cregs, sizeof(sc->sc_regs)); + /* Save the floating-point state. */ + if (has_fpu()) + err |= compat_save_fp_state(regs, &sc->sc_fpregs); + return err; +} + +static inline void __user *compat_get_sigframe(struct ksignal *ksig, + struct pt_regs *regs, size_t framesize) +{ + unsigned long sp; + /* Default to using normal stack */ + sp = regs->sp; + + /* + * If we are on the alternate signal stack and would overflow it, don't. + * Return an always-bogus address instead so we will die with SIGSEGV. + */ + if (on_sig_stack(sp) && !likely(on_sig_stack(sp - framesize))) + return (void __user __force *)(-1UL); + + /* This is the X/Open sanctioned signal stack switching. */ + sp = sigsp(sp, ksig) - framesize; + + /* Align the stack frame. */ + sp &= ~0xfUL; + + return (void __user *)sp; +} + +int compat_setup_rt_frame(struct ksignal *ksig, sigset_t *set, + struct pt_regs *regs) +{ + struct compat_rt_sigframe __user *frame; + long err = 0; + + frame = compat_get_sigframe(ksig, regs, sizeof(*frame)); + if (!access_ok(frame, sizeof(*frame))) + return -EFAULT; + + err |= copy_siginfo_to_user32(&frame->info, &ksig->info); + + /* Create the ucontext. */ + err |= __put_user(0, &frame->uc.uc_flags); + err |= __put_user(NULL, &frame->uc.uc_link); + err |= __compat_save_altstack(&frame->uc.uc_stack, regs->sp); + err |= compat_setup_sigcontext(frame, regs); + err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); + if (err) + return -EFAULT; + + regs->ra = (unsigned long)COMPAT_VDSO_SYMBOL( + current->mm->context.vdso, rt_sigreturn); + + /* + * Set up registers for signal handler. + * Registers that we don't modify keep the value they had from + * user-space at the time we took the signal. + * We always pass siginfo and mcontext, regardless of SA_SIGINFO, + * since some things rely on this (e.g. glibc's debug/segfault.c). + */ + regs->epc = (unsigned long)ksig->ka.sa.sa_handler; + regs->sp = (unsigned long)frame; + regs->a0 = ksig->sig; /* a0: signal number */ + regs->a1 = (unsigned long)(&frame->info); /* a1: siginfo pointer */ + regs->a2 = (unsigned long)(&frame->uc); /* a2: ucontext pointer */ + +#if COMPAT_DEBUG_SIG + pr_info("SIG deliver (%s:%d): sig=%d pc=%p ra=%p sp=%p\n", + current->comm, task_pid_nr(current), ksig->sig, + (void *)regs->epc, (void *)regs->ra, frame); +#endif + + return 0; +} diff --git a/arch/riscv/kernel/compat_syscall_table.c b/arch/riscv/kernel/compat_syscall_table.c new file mode 100644 index 000000000..651f2b009 --- /dev/null +++ b/arch/riscv/kernel/compat_syscall_table.c @@ -0,0 +1,19 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#define __SYSCALL_COMPAT + +#include <linux/compat.h> +#include <linux/syscalls.h> +#include <asm-generic/mman-common.h> +#include <asm-generic/syscalls.h> +#include <asm/syscall.h> + +#undef __SYSCALL +#define __SYSCALL(nr, call) [nr] = (call), + +asmlinkage long compat_sys_rt_sigreturn(void); + +void * const compat_sys_call_table[__NR_syscalls] = { + [0 ... __NR_syscalls - 1] = sys_ni_syscall, +#include <asm/unistd.h> +}; diff --git a/arch/riscv/kernel/compat_vdso/.gitignore b/arch/riscv/kernel/compat_vdso/.gitignore new file mode 100644 index 000000000..19d83d846 --- /dev/null +++ b/arch/riscv/kernel/compat_vdso/.gitignore @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +compat_vdso.lds diff --git a/arch/riscv/kernel/compat_vdso/Makefile b/arch/riscv/kernel/compat_vdso/Makefile new file mode 100644 index 000000000..737c0857b --- /dev/null +++ b/arch/riscv/kernel/compat_vdso/Makefile @@ -0,0 +1,88 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# Makefile for compat_vdso +# + +# Symbols present in the compat_vdso +compat_vdso-syms = rt_sigreturn +compat_vdso-syms += getcpu +compat_vdso-syms += flush_icache + +COMPAT_CC := $(CC) +COMPAT_LD := $(LD) + +# binutils 2.35 does not support the zifencei extension, but in the ISA +# spec 20191213, G stands for IMAFD_ZICSR_ZIFENCEI. +ifdef CONFIG_TOOLCHAIN_NEEDS_EXPLICIT_ZICSR_ZIFENCEI + COMPAT_CC_FLAGS := -march=rv32g -mabi=ilp32 +else + COMPAT_CC_FLAGS := -march=rv32imafd -mabi=ilp32 +endif +COMPAT_LD_FLAGS := -melf32lriscv + +# Disable attributes, as they're useless and break the build. +COMPAT_CC_FLAGS += $(call cc-option,-mno-riscv-attribute) +COMPAT_CC_FLAGS += $(call as-option,-Wa$(comma)-mno-arch-attr) + +# Files to link into the compat_vdso +obj-compat_vdso = $(patsubst %, %.o, $(compat_vdso-syms)) note.o + +# Build rules +targets := $(obj-compat_vdso) compat_vdso.so compat_vdso.so.dbg compat_vdso.lds +obj-compat_vdso := $(addprefix $(obj)/, $(obj-compat_vdso)) + +obj-y += compat_vdso.o +CPPFLAGS_compat_vdso.lds += -P -C -U$(ARCH) + +# Disable profiling and instrumentation for VDSO code +GCOV_PROFILE := n +KCOV_INSTRUMENT := n +KASAN_SANITIZE := n +UBSAN_SANITIZE := n + +# Force dependency +$(obj)/compat_vdso.o: $(obj)/compat_vdso.so + +# link rule for the .so file, .lds has to be first +$(obj)/compat_vdso.so.dbg: $(obj)/compat_vdso.lds $(obj-compat_vdso) FORCE + $(call if_changed,compat_vdsold) +LDFLAGS_compat_vdso.so.dbg = -shared -S -soname=linux-compat_vdso.so.1 \ + --build-id=sha1 --hash-style=both --eh-frame-hdr + +$(obj-compat_vdso): %.o: %.S FORCE + $(call if_changed_dep,compat_vdsoas) + +# strip rule for the .so file +$(obj)/%.so: OBJCOPYFLAGS := -S +$(obj)/%.so: $(obj)/%.so.dbg FORCE + $(call if_changed,objcopy) + +# Generate VDSO offsets using helper script +gen-compat_vdsosym := $(srctree)/$(src)/gen_compat_vdso_offsets.sh +quiet_cmd_compat_vdsosym = VDSOSYM $@ + cmd_compat_vdsosym = $(NM) $< | $(gen-compat_vdsosym) | LC_ALL=C sort > $@ + +include/generated/compat_vdso-offsets.h: $(obj)/compat_vdso.so.dbg FORCE + $(call if_changed,compat_vdsosym) + +# actual build commands +# The DSO images are built using a special linker script +# Make sure only to export the intended __compat_vdso_xxx symbol offsets. +quiet_cmd_compat_vdsold = VDSOLD $@ + cmd_compat_vdsold = $(COMPAT_LD) $(ld_flags) $(COMPAT_LD_FLAGS) -T $(filter-out FORCE,$^) -o $@.tmp && \ + $(OBJCOPY) $(patsubst %, -G __compat_vdso_%, $(compat_vdso-syms)) $@.tmp $@ && \ + rm $@.tmp + +# actual build commands +quiet_cmd_compat_vdsoas = VDSOAS $@ + cmd_compat_vdsoas = $(COMPAT_CC) $(a_flags) $(COMPAT_CC_FLAGS) -c -o $@ $< + +# install commands for the unstripped file +quiet_cmd_compat_vdso_install = INSTALL $@ + cmd_compat_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/compat_vdso/$@ + +compat_vdso.so: $(obj)/compat_vdso.so.dbg + @mkdir -p $(MODLIB)/compat_vdso + $(call cmd,compat_vdso_install) + +compat_vdso_install: compat_vdso.so diff --git a/arch/riscv/kernel/compat_vdso/compat_vdso.S b/arch/riscv/kernel/compat_vdso/compat_vdso.S new file mode 100644 index 000000000..ffd66237e --- /dev/null +++ b/arch/riscv/kernel/compat_vdso/compat_vdso.S @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#define vdso_start compat_vdso_start +#define vdso_end compat_vdso_end + +#define __VDSO_PATH "arch/riscv/kernel/compat_vdso/compat_vdso.so" + +#include "../vdso/vdso.S" diff --git a/arch/riscv/kernel/compat_vdso/compat_vdso.lds.S b/arch/riscv/kernel/compat_vdso/compat_vdso.lds.S new file mode 100644 index 000000000..c7c9355d3 --- /dev/null +++ b/arch/riscv/kernel/compat_vdso/compat_vdso.lds.S @@ -0,0 +1,3 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include "../vdso/vdso.lds.S" diff --git a/arch/riscv/kernel/compat_vdso/flush_icache.S b/arch/riscv/kernel/compat_vdso/flush_icache.S new file mode 100644 index 000000000..523dd8b96 --- /dev/null +++ b/arch/riscv/kernel/compat_vdso/flush_icache.S @@ -0,0 +1,3 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include "../vdso/flush_icache.S" diff --git a/arch/riscv/kernel/compat_vdso/gen_compat_vdso_offsets.sh b/arch/riscv/kernel/compat_vdso/gen_compat_vdso_offsets.sh new file mode 100755 index 000000000..8ac070c78 --- /dev/null +++ b/arch/riscv/kernel/compat_vdso/gen_compat_vdso_offsets.sh @@ -0,0 +1,5 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +LC_ALL=C +sed -n -e 's/^[0]\+\(0[0-9a-fA-F]*\) . \(__vdso_[a-zA-Z0-9_]*\)$/\#define compat\2_offset\t0x\1/p' diff --git a/arch/riscv/kernel/compat_vdso/getcpu.S b/arch/riscv/kernel/compat_vdso/getcpu.S new file mode 100644 index 000000000..10f463efe --- /dev/null +++ b/arch/riscv/kernel/compat_vdso/getcpu.S @@ -0,0 +1,3 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include "../vdso/getcpu.S" diff --git a/arch/riscv/kernel/compat_vdso/note.S b/arch/riscv/kernel/compat_vdso/note.S new file mode 100644 index 000000000..b10312907 --- /dev/null +++ b/arch/riscv/kernel/compat_vdso/note.S @@ -0,0 +1,3 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include "../vdso/note.S" diff --git a/arch/riscv/kernel/compat_vdso/rt_sigreturn.S b/arch/riscv/kernel/compat_vdso/rt_sigreturn.S new file mode 100644 index 000000000..884aada4f --- /dev/null +++ b/arch/riscv/kernel/compat_vdso/rt_sigreturn.S @@ -0,0 +1,3 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include "../vdso/rt_sigreturn.S" diff --git a/arch/riscv/kernel/cpu-hotplug.c b/arch/riscv/kernel/cpu-hotplug.c new file mode 100644 index 000000000..f7a832e3a --- /dev/null +++ b/arch/riscv/kernel/cpu-hotplug.c @@ -0,0 +1,83 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 Western Digital Corporation or its affiliates. + */ + +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/sched.h> +#include <linux/err.h> +#include <linux/irq.h> +#include <linux/cpu.h> +#include <linux/sched/hotplug.h> +#include <asm/irq.h> +#include <asm/cpu_ops.h> +#include <asm/numa.h> +#include <asm/sbi.h> + +bool cpu_has_hotplug(unsigned int cpu) +{ + if (cpu_ops[cpu]->cpu_stop) + return true; + + return false; +} + +/* + * __cpu_disable runs on the processor to be shutdown. + */ +int __cpu_disable(void) +{ + int ret = 0; + unsigned int cpu = smp_processor_id(); + + if (!cpu_ops[cpu] || !cpu_ops[cpu]->cpu_stop) + return -EOPNOTSUPP; + + if (cpu_ops[cpu]->cpu_disable) + ret = cpu_ops[cpu]->cpu_disable(cpu); + + if (ret) + return ret; + + remove_cpu_topology(cpu); + numa_remove_cpu(cpu); + set_cpu_online(cpu, false); + irq_migrate_all_off_this_cpu(); + + return ret; +} + +/* + * Called on the thread which is asking for a CPU to be shutdown. + */ +void __cpu_die(unsigned int cpu) +{ + int ret = 0; + + if (!cpu_wait_death(cpu, 5)) { + pr_err("CPU %u: didn't die\n", cpu); + return; + } + pr_notice("CPU%u: off\n", cpu); + + /* Verify from the firmware if the cpu is really stopped*/ + if (cpu_ops[cpu]->cpu_is_stopped) + ret = cpu_ops[cpu]->cpu_is_stopped(cpu); + if (ret) + pr_warn("CPU%d may not have stopped: %d\n", cpu, ret); +} + +/* + * Called from the idle thread for the CPU which has been shutdown. + */ +void arch_cpu_idle_dead(void) +{ + idle_task_exit(); + + (void)cpu_report_death(); + + cpu_ops[smp_processor_id()]->cpu_stop(); + /* It should never reach here */ + BUG(); +} diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c new file mode 100644 index 000000000..0f76181dc --- /dev/null +++ b/arch/riscv/kernel/cpu.c @@ -0,0 +1,267 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2012 Regents of the University of California + */ + +#include <linux/cpu.h> +#include <linux/init.h> +#include <linux/seq_file.h> +#include <linux/of.h> +#include <asm/csr.h> +#include <asm/hwcap.h> +#include <asm/sbi.h> +#include <asm/smp.h> +#include <asm/pgtable.h> + +/* + * Returns the hart ID of the given device tree node, or -ENODEV if the node + * isn't an enabled and valid RISC-V hart node. + */ +int riscv_of_processor_hartid(struct device_node *node, unsigned long *hart) +{ + const char *isa; + + if (!of_device_is_compatible(node, "riscv")) { + pr_warn("Found incompatible CPU\n"); + return -ENODEV; + } + + *hart = (unsigned long) of_get_cpu_hwid(node, 0); + if (*hart == ~0UL) { + pr_warn("Found CPU without hart ID\n"); + return -ENODEV; + } + + if (!of_device_is_available(node)) { + pr_info("CPU with hartid=%lu is not available\n", *hart); + return -ENODEV; + } + + if (of_property_read_string(node, "riscv,isa", &isa)) { + pr_warn("CPU with hartid=%lu has no \"riscv,isa\" property\n", *hart); + return -ENODEV; + } + if (isa[0] != 'r' || isa[1] != 'v') { + pr_warn("CPU with hartid=%lu has an invalid ISA of \"%s\"\n", *hart, isa); + return -ENODEV; + } + + return 0; +} + +/* + * Find hart ID of the CPU DT node under which given DT node falls. + * + * To achieve this, we walk up the DT tree until we find an active + * RISC-V core (HART) node and extract the cpuid from it. + */ +int riscv_of_parent_hartid(struct device_node *node, unsigned long *hartid) +{ + for (; node; node = node->parent) { + if (of_device_is_compatible(node, "riscv")) { + *hartid = (unsigned long)of_get_cpu_hwid(node, 0); + if (*hartid == ~0UL) { + pr_warn("Found CPU without hart ID\n"); + return -ENODEV; + } + return 0; + } + } + + return -1; +} + +#ifdef CONFIG_PROC_FS + +struct riscv_cpuinfo { + unsigned long mvendorid; + unsigned long marchid; + unsigned long mimpid; +}; +static DEFINE_PER_CPU(struct riscv_cpuinfo, riscv_cpuinfo); + +static int riscv_cpuinfo_starting(unsigned int cpu) +{ + struct riscv_cpuinfo *ci = this_cpu_ptr(&riscv_cpuinfo); + +#if IS_ENABLED(CONFIG_RISCV_SBI) + ci->mvendorid = sbi_spec_is_0_1() ? 0 : sbi_get_mvendorid(); + ci->marchid = sbi_spec_is_0_1() ? 0 : sbi_get_marchid(); + ci->mimpid = sbi_spec_is_0_1() ? 0 : sbi_get_mimpid(); +#elif IS_ENABLED(CONFIG_RISCV_M_MODE) + ci->mvendorid = csr_read(CSR_MVENDORID); + ci->marchid = csr_read(CSR_MARCHID); + ci->mimpid = csr_read(CSR_MIMPID); +#else + ci->mvendorid = 0; + ci->marchid = 0; + ci->mimpid = 0; +#endif + + return 0; +} + +static int __init riscv_cpuinfo_init(void) +{ + int ret; + + ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "riscv/cpuinfo:starting", + riscv_cpuinfo_starting, NULL); + if (ret < 0) { + pr_err("cpuinfo: failed to register hotplug callbacks.\n"); + return ret; + } + + return 0; +} +device_initcall(riscv_cpuinfo_init); + +#define __RISCV_ISA_EXT_DATA(UPROP, EXTID) \ + { \ + .uprop = #UPROP, \ + .isa_ext_id = EXTID, \ + } +/* + * Here are the ordering rules of extension naming defined by RISC-V + * specification : + * 1. All extensions should be separated from other multi-letter extensions + * by an underscore. + * 2. The first letter following the 'Z' conventionally indicates the most + * closely related alphabetical extension category, IMAFDQLCBKJTPVH. + * If multiple 'Z' extensions are named, they should be ordered first + * by category, then alphabetically within a category. + * 3. Standard supervisor-level extensions (starts with 'S') should be + * listed after standard unprivileged extensions. If multiple + * supervisor-level extensions are listed, they should be ordered + * alphabetically. + * 4. Non-standard extensions (starts with 'X') must be listed after all + * standard extensions. They must be separated from other multi-letter + * extensions by an underscore. + */ +static struct riscv_isa_ext_data isa_ext_arr[] = { + __RISCV_ISA_EXT_DATA(sscofpmf, RISCV_ISA_EXT_SSCOFPMF), + __RISCV_ISA_EXT_DATA(sstc, RISCV_ISA_EXT_SSTC), + __RISCV_ISA_EXT_DATA(svinval, RISCV_ISA_EXT_SVINVAL), + __RISCV_ISA_EXT_DATA(svpbmt, RISCV_ISA_EXT_SVPBMT), + __RISCV_ISA_EXT_DATA(zicbom, RISCV_ISA_EXT_ZICBOM), + __RISCV_ISA_EXT_DATA(zihintpause, RISCV_ISA_EXT_ZIHINTPAUSE), + __RISCV_ISA_EXT_DATA("", RISCV_ISA_EXT_MAX), +}; + +static void print_isa_ext(struct seq_file *f) +{ + struct riscv_isa_ext_data *edata; + int i = 0, arr_sz; + + arr_sz = ARRAY_SIZE(isa_ext_arr) - 1; + + /* No extension support available */ + if (arr_sz <= 0) + return; + + for (i = 0; i <= arr_sz; i++) { + edata = &isa_ext_arr[i]; + if (!__riscv_isa_extension_available(NULL, edata->isa_ext_id)) + continue; + seq_printf(f, "_%s", edata->uprop); + } +} + +/* + * These are the only valid base (single letter) ISA extensions as per the spec. + * It also specifies the canonical order in which it appears in the spec. + * Some of the extension may just be a place holder for now (B, K, P, J). + * This should be updated once corresponding extensions are ratified. + */ +static const char base_riscv_exts[13] = "imafdqcbkjpvh"; + +static void print_isa(struct seq_file *f, const char *isa) +{ + int i; + + seq_puts(f, "isa\t\t: "); + /* Print the rv[64/32] part */ + seq_write(f, isa, 4); + for (i = 0; i < sizeof(base_riscv_exts); i++) { + if (__riscv_isa_extension_available(NULL, base_riscv_exts[i] - 'a')) + /* Print only enabled the base ISA extensions */ + seq_write(f, &base_riscv_exts[i], 1); + } + print_isa_ext(f); + seq_puts(f, "\n"); +} + +static void print_mmu(struct seq_file *f) +{ + char sv_type[16]; + +#ifdef CONFIG_MMU +#if defined(CONFIG_32BIT) + strncpy(sv_type, "sv32", 5); +#elif defined(CONFIG_64BIT) + if (pgtable_l5_enabled) + strncpy(sv_type, "sv57", 5); + else if (pgtable_l4_enabled) + strncpy(sv_type, "sv48", 5); + else + strncpy(sv_type, "sv39", 5); +#endif +#else + strncpy(sv_type, "none", 5); +#endif /* CONFIG_MMU */ + seq_printf(f, "mmu\t\t: %s\n", sv_type); +} + +static void *c_start(struct seq_file *m, loff_t *pos) +{ + if (*pos == nr_cpu_ids) + return NULL; + + *pos = cpumask_next(*pos - 1, cpu_online_mask); + if ((*pos) < nr_cpu_ids) + return (void *)(uintptr_t)(1 + *pos); + return NULL; +} + +static void *c_next(struct seq_file *m, void *v, loff_t *pos) +{ + (*pos)++; + return c_start(m, pos); +} + +static void c_stop(struct seq_file *m, void *v) +{ +} + +static int c_show(struct seq_file *m, void *v) +{ + unsigned long cpu_id = (unsigned long)v - 1; + struct device_node *node = of_get_cpu_node(cpu_id, NULL); + struct riscv_cpuinfo *ci = per_cpu_ptr(&riscv_cpuinfo, cpu_id); + const char *compat, *isa; + + seq_printf(m, "processor\t: %lu\n", cpu_id); + seq_printf(m, "hart\t\t: %lu\n", cpuid_to_hartid_map(cpu_id)); + if (!of_property_read_string(node, "riscv,isa", &isa)) + print_isa(m, isa); + print_mmu(m); + if (!of_property_read_string(node, "compatible", &compat) + && strcmp(compat, "riscv")) + seq_printf(m, "uarch\t\t: %s\n", compat); + seq_printf(m, "mvendorid\t: 0x%lx\n", ci->mvendorid); + seq_printf(m, "marchid\t\t: 0x%lx\n", ci->marchid); + seq_printf(m, "mimpid\t\t: 0x%lx\n", ci->mimpid); + seq_puts(m, "\n"); + of_node_put(node); + + return 0; +} + +const struct seq_operations cpuinfo_op = { + .start = c_start, + .next = c_next, + .stop = c_stop, + .show = c_show +}; + +#endif /* CONFIG_PROC_FS */ diff --git a/arch/riscv/kernel/cpu_ops.c b/arch/riscv/kernel/cpu_ops.c new file mode 100644 index 000000000..eb479a88a --- /dev/null +++ b/arch/riscv/kernel/cpu_ops.c @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2020 Western Digital Corporation or its affiliates. + */ + +#include <linux/errno.h> +#include <linux/mm.h> +#include <linux/of.h> +#include <linux/string.h> +#include <linux/sched.h> +#include <asm/cpu_ops.h> +#include <asm/cpu_ops_sbi.h> +#include <asm/sbi.h> +#include <asm/smp.h> + +const struct cpu_operations *cpu_ops[NR_CPUS] __ro_after_init; + +extern const struct cpu_operations cpu_ops_sbi; +#ifndef CONFIG_RISCV_BOOT_SPINWAIT +const struct cpu_operations cpu_ops_spinwait = { + .name = "", + .cpu_prepare = NULL, + .cpu_start = NULL, +}; +#endif + +void __init cpu_set_ops(int cpuid) +{ +#if IS_ENABLED(CONFIG_RISCV_SBI) + if (sbi_probe_extension(SBI_EXT_HSM)) { + if (!cpuid) + pr_info("SBI HSM extension detected\n"); + cpu_ops[cpuid] = &cpu_ops_sbi; + } else +#endif + cpu_ops[cpuid] = &cpu_ops_spinwait; +} diff --git a/arch/riscv/kernel/cpu_ops_sbi.c b/arch/riscv/kernel/cpu_ops_sbi.c new file mode 100644 index 000000000..efa0f0816 --- /dev/null +++ b/arch/riscv/kernel/cpu_ops_sbi.c @@ -0,0 +1,129 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * HSM extension and cpu_ops implementation. + * + * Copyright (c) 2020 Western Digital Corporation or its affiliates. + */ + +#include <linux/init.h> +#include <linux/mm.h> +#include <linux/sched/task_stack.h> +#include <asm/cpu_ops.h> +#include <asm/cpu_ops_sbi.h> +#include <asm/sbi.h> +#include <asm/smp.h> + +extern char secondary_start_sbi[]; +const struct cpu_operations cpu_ops_sbi; + +/* + * Ordered booting via HSM brings one cpu at a time. However, cpu hotplug can + * be invoked from multiple threads in parallel. Define a per cpu data + * to handle that. + */ +static DEFINE_PER_CPU(struct sbi_hart_boot_data, boot_data); + +static int sbi_hsm_hart_start(unsigned long hartid, unsigned long saddr, + unsigned long priv) +{ + struct sbiret ret; + + ret = sbi_ecall(SBI_EXT_HSM, SBI_EXT_HSM_HART_START, + hartid, saddr, priv, 0, 0, 0); + if (ret.error) + return sbi_err_map_linux_errno(ret.error); + else + return 0; +} + +#ifdef CONFIG_HOTPLUG_CPU +static int sbi_hsm_hart_stop(void) +{ + struct sbiret ret; + + ret = sbi_ecall(SBI_EXT_HSM, SBI_EXT_HSM_HART_STOP, 0, 0, 0, 0, 0, 0); + + if (ret.error) + return sbi_err_map_linux_errno(ret.error); + else + return 0; +} + +static int sbi_hsm_hart_get_status(unsigned long hartid) +{ + struct sbiret ret; + + ret = sbi_ecall(SBI_EXT_HSM, SBI_EXT_HSM_HART_STATUS, + hartid, 0, 0, 0, 0, 0); + if (ret.error) + return sbi_err_map_linux_errno(ret.error); + else + return ret.value; +} +#endif + +static int sbi_cpu_start(unsigned int cpuid, struct task_struct *tidle) +{ + unsigned long boot_addr = __pa_symbol(secondary_start_sbi); + unsigned long hartid = cpuid_to_hartid_map(cpuid); + unsigned long hsm_data; + struct sbi_hart_boot_data *bdata = &per_cpu(boot_data, cpuid); + + /* Make sure tidle is updated */ + smp_mb(); + bdata->task_ptr = tidle; + bdata->stack_ptr = task_stack_page(tidle) + THREAD_SIZE; + /* Make sure boot data is updated */ + smp_mb(); + hsm_data = __pa(bdata); + return sbi_hsm_hart_start(hartid, boot_addr, hsm_data); +} + +static int sbi_cpu_prepare(unsigned int cpuid) +{ + if (!cpu_ops_sbi.cpu_start) { + pr_err("cpu start method not defined for CPU [%d]\n", cpuid); + return -ENODEV; + } + return 0; +} + +#ifdef CONFIG_HOTPLUG_CPU +static int sbi_cpu_disable(unsigned int cpuid) +{ + if (!cpu_ops_sbi.cpu_stop) + return -EOPNOTSUPP; + return 0; +} + +static void sbi_cpu_stop(void) +{ + int ret; + + ret = sbi_hsm_hart_stop(); + pr_crit("Unable to stop the cpu %u (%d)\n", smp_processor_id(), ret); +} + +static int sbi_cpu_is_stopped(unsigned int cpuid) +{ + int rc; + unsigned long hartid = cpuid_to_hartid_map(cpuid); + + rc = sbi_hsm_hart_get_status(hartid); + + if (rc == SBI_HSM_STATE_STOPPED) + return 0; + return rc; +} +#endif + +const struct cpu_operations cpu_ops_sbi = { + .name = "sbi", + .cpu_prepare = sbi_cpu_prepare, + .cpu_start = sbi_cpu_start, +#ifdef CONFIG_HOTPLUG_CPU + .cpu_disable = sbi_cpu_disable, + .cpu_stop = sbi_cpu_stop, + .cpu_is_stopped = sbi_cpu_is_stopped, +#endif +}; diff --git a/arch/riscv/kernel/cpu_ops_spinwait.c b/arch/riscv/kernel/cpu_ops_spinwait.c new file mode 100644 index 000000000..d98d19226 --- /dev/null +++ b/arch/riscv/kernel/cpu_ops_spinwait.c @@ -0,0 +1,70 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2020 Western Digital Corporation or its affiliates. + */ + +#include <linux/errno.h> +#include <linux/of.h> +#include <linux/string.h> +#include <linux/sched/task_stack.h> +#include <asm/cpu_ops.h> +#include <asm/sbi.h> +#include <asm/smp.h> + +#include "head.h" + +const struct cpu_operations cpu_ops_spinwait; +void *__cpu_spinwait_stack_pointer[NR_CPUS] __section(".data"); +void *__cpu_spinwait_task_pointer[NR_CPUS] __section(".data"); + +static void cpu_update_secondary_bootdata(unsigned int cpuid, + struct task_struct *tidle) +{ + unsigned long hartid = cpuid_to_hartid_map(cpuid); + + /* + * The hartid must be less than NR_CPUS to avoid out-of-bound access + * errors for __cpu_spinwait_stack/task_pointer. That is not always possible + * for platforms with discontiguous hartid numbering scheme. That's why + * spinwait booting is not the recommended approach for any platforms + * booting Linux in S-mode and can be disabled in the future. + */ + if (hartid == INVALID_HARTID || hartid >= (unsigned long) NR_CPUS) + return; + + /* Make sure tidle is updated */ + smp_mb(); + WRITE_ONCE(__cpu_spinwait_stack_pointer[hartid], + task_stack_page(tidle) + THREAD_SIZE); + WRITE_ONCE(__cpu_spinwait_task_pointer[hartid], tidle); +} + +static int spinwait_cpu_prepare(unsigned int cpuid) +{ + if (!cpu_ops_spinwait.cpu_start) { + pr_err("cpu start method not defined for CPU [%d]\n", cpuid); + return -ENODEV; + } + return 0; +} + +static int spinwait_cpu_start(unsigned int cpuid, struct task_struct *tidle) +{ + /* + * In this protocol, all cpus boot on their own accord. _start + * selects the first cpu to boot the kernel and causes the remainder + * of the cpus to spin in a loop waiting for their stack pointer to be + * setup by that main cpu. Writing to bootdata + * (i.e __cpu_spinwait_stack_pointer) signals to the spinning cpus that they + * can continue the boot process. + */ + cpu_update_secondary_bootdata(cpuid, tidle); + + return 0; +} + +const struct cpu_operations cpu_ops_spinwait = { + .name = "spinwait", + .cpu_prepare = spinwait_cpu_prepare, + .cpu_start = spinwait_cpu_start, +}; diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c new file mode 100644 index 000000000..fd1238df6 --- /dev/null +++ b/arch/riscv/kernel/cpufeature.c @@ -0,0 +1,327 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copied from arch/arm64/kernel/cpufeature.c + * + * Copyright (C) 2015 ARM Ltd. + * Copyright (C) 2017 SiFive + */ + +#include <linux/bitmap.h> +#include <linux/ctype.h> +#include <linux/libfdt.h> +#include <linux/memory.h> +#include <linux/module.h> +#include <linux/of.h> +#include <asm/alternative.h> +#include <asm/cacheflush.h> +#include <asm/errata_list.h> +#include <asm/hwcap.h> +#include <asm/patch.h> +#include <asm/pgtable.h> +#include <asm/processor.h> +#include <asm/smp.h> +#include <asm/switch_to.h> + +#define NUM_ALPHA_EXTS ('z' - 'a' + 1) + +unsigned long elf_hwcap __read_mostly; + +/* Host ISA bitmap */ +static DECLARE_BITMAP(riscv_isa, RISCV_ISA_EXT_MAX) __read_mostly; + +DEFINE_STATIC_KEY_ARRAY_FALSE(riscv_isa_ext_keys, RISCV_ISA_EXT_KEY_MAX); +EXPORT_SYMBOL(riscv_isa_ext_keys); + +/** + * riscv_isa_extension_base() - Get base extension word + * + * @isa_bitmap: ISA bitmap to use + * Return: base extension word as unsigned long value + * + * NOTE: If isa_bitmap is NULL then Host ISA bitmap will be used. + */ +unsigned long riscv_isa_extension_base(const unsigned long *isa_bitmap) +{ + if (!isa_bitmap) + return riscv_isa[0]; + return isa_bitmap[0]; +} +EXPORT_SYMBOL_GPL(riscv_isa_extension_base); + +/** + * __riscv_isa_extension_available() - Check whether given extension + * is available or not + * + * @isa_bitmap: ISA bitmap to use + * @bit: bit position of the desired extension + * Return: true or false + * + * NOTE: If isa_bitmap is NULL then Host ISA bitmap will be used. + */ +bool __riscv_isa_extension_available(const unsigned long *isa_bitmap, int bit) +{ + const unsigned long *bmap = (isa_bitmap) ? isa_bitmap : riscv_isa; + + if (bit >= RISCV_ISA_EXT_MAX) + return false; + + return test_bit(bit, bmap) ? true : false; +} +EXPORT_SYMBOL_GPL(__riscv_isa_extension_available); + +void __init riscv_fill_hwcap(void) +{ + struct device_node *node; + const char *isa; + char print_str[NUM_ALPHA_EXTS + 1]; + int i, j, rc; + static unsigned long isa2hwcap[256] = {0}; + unsigned long hartid; + + isa2hwcap['i'] = isa2hwcap['I'] = COMPAT_HWCAP_ISA_I; + isa2hwcap['m'] = isa2hwcap['M'] = COMPAT_HWCAP_ISA_M; + isa2hwcap['a'] = isa2hwcap['A'] = COMPAT_HWCAP_ISA_A; + isa2hwcap['f'] = isa2hwcap['F'] = COMPAT_HWCAP_ISA_F; + isa2hwcap['d'] = isa2hwcap['D'] = COMPAT_HWCAP_ISA_D; + isa2hwcap['c'] = isa2hwcap['C'] = COMPAT_HWCAP_ISA_C; + + elf_hwcap = 0; + + bitmap_zero(riscv_isa, RISCV_ISA_EXT_MAX); + + for_each_of_cpu_node(node) { + unsigned long this_hwcap = 0; + DECLARE_BITMAP(this_isa, RISCV_ISA_EXT_MAX); + const char *temp; + + rc = riscv_of_processor_hartid(node, &hartid); + if (rc < 0) + continue; + + if (of_property_read_string(node, "riscv,isa", &isa)) { + pr_warn("Unable to find \"riscv,isa\" devicetree entry\n"); + continue; + } + + temp = isa; +#if IS_ENABLED(CONFIG_32BIT) + if (!strncmp(isa, "rv32", 4)) + isa += 4; +#elif IS_ENABLED(CONFIG_64BIT) + if (!strncmp(isa, "rv64", 4)) + isa += 4; +#endif + /* The riscv,isa DT property must start with rv64 or rv32 */ + if (temp == isa) + continue; + bitmap_zero(this_isa, RISCV_ISA_EXT_MAX); + for (; *isa; ++isa) { + const char *ext = isa++; + const char *ext_end = isa; + bool ext_long = false, ext_err = false; + + switch (*ext) { + case 's': + /** + * Workaround for invalid single-letter 's' & 'u'(QEMU). + * No need to set the bit in riscv_isa as 's' & 'u' are + * not valid ISA extensions. It works until multi-letter + * extension starting with "Su" appears. + */ + if (ext[-1] != '_' && ext[1] == 'u') { + ++isa; + ext_err = true; + break; + } + fallthrough; + case 'x': + case 'z': + ext_long = true; + /* Multi-letter extension must be delimited */ + for (; *isa && *isa != '_'; ++isa) + if (unlikely(!islower(*isa) + && !isdigit(*isa))) + ext_err = true; + /* Parse backwards */ + ext_end = isa; + if (unlikely(ext_err)) + break; + if (!isdigit(ext_end[-1])) + break; + /* Skip the minor version */ + while (isdigit(*--ext_end)) + ; + if (ext_end[0] != 'p' + || !isdigit(ext_end[-1])) { + /* Advance it to offset the pre-decrement */ + ++ext_end; + break; + } + /* Skip the major version */ + while (isdigit(*--ext_end)) + ; + ++ext_end; + break; + default: + if (unlikely(!islower(*ext))) { + ext_err = true; + break; + } + /* Find next extension */ + if (!isdigit(*isa)) + break; + /* Skip the minor version */ + while (isdigit(*++isa)) + ; + if (*isa != 'p') + break; + if (!isdigit(*++isa)) { + --isa; + break; + } + /* Skip the major version */ + while (isdigit(*++isa)) + ; + break; + } + if (*isa != '_') + --isa; + +#define SET_ISA_EXT_MAP(name, bit) \ + do { \ + if ((ext_end - ext == sizeof(name) - 1) && \ + !memcmp(ext, name, sizeof(name) - 1)) \ + set_bit(bit, this_isa); \ + } while (false) \ + + if (unlikely(ext_err)) + continue; + if (!ext_long) { + this_hwcap |= isa2hwcap[(unsigned char)(*ext)]; + set_bit(*ext - 'a', this_isa); + } else { + SET_ISA_EXT_MAP("sscofpmf", RISCV_ISA_EXT_SSCOFPMF); + SET_ISA_EXT_MAP("svpbmt", RISCV_ISA_EXT_SVPBMT); + SET_ISA_EXT_MAP("zicbom", RISCV_ISA_EXT_ZICBOM); + SET_ISA_EXT_MAP("zihintpause", RISCV_ISA_EXT_ZIHINTPAUSE); + SET_ISA_EXT_MAP("sstc", RISCV_ISA_EXT_SSTC); + SET_ISA_EXT_MAP("svinval", RISCV_ISA_EXT_SVINVAL); + } +#undef SET_ISA_EXT_MAP + } + + /* + * All "okay" hart should have same isa. Set HWCAP based on + * common capabilities of every "okay" hart, in case they don't + * have. + */ + if (elf_hwcap) + elf_hwcap &= this_hwcap; + else + elf_hwcap = this_hwcap; + + if (bitmap_empty(riscv_isa, RISCV_ISA_EXT_MAX)) + bitmap_copy(riscv_isa, this_isa, RISCV_ISA_EXT_MAX); + else + bitmap_and(riscv_isa, riscv_isa, this_isa, RISCV_ISA_EXT_MAX); + } + + /* We don't support systems with F but without D, so mask those out + * here. */ + if ((elf_hwcap & COMPAT_HWCAP_ISA_F) && !(elf_hwcap & COMPAT_HWCAP_ISA_D)) { + pr_info("This kernel does not support systems with F but not D\n"); + elf_hwcap &= ~COMPAT_HWCAP_ISA_F; + } + + memset(print_str, 0, sizeof(print_str)); + for (i = 0, j = 0; i < NUM_ALPHA_EXTS; i++) + if (riscv_isa[0] & BIT_MASK(i)) + print_str[j++] = (char)('a' + i); + pr_info("riscv: base ISA extensions %s\n", print_str); + + memset(print_str, 0, sizeof(print_str)); + for (i = 0, j = 0; i < NUM_ALPHA_EXTS; i++) + if (elf_hwcap & BIT_MASK(i)) + print_str[j++] = (char)('a' + i); + pr_info("riscv: ELF capabilities %s\n", print_str); + + for_each_set_bit(i, riscv_isa, RISCV_ISA_EXT_MAX) { + j = riscv_isa_ext2key(i); + if (j >= 0) + static_branch_enable(&riscv_isa_ext_keys[j]); + } +} + +#ifdef CONFIG_RISCV_ALTERNATIVE +static bool __init_or_module cpufeature_probe_svpbmt(unsigned int stage) +{ + if (!IS_ENABLED(CONFIG_RISCV_ISA_SVPBMT)) + return false; + + if (stage == RISCV_ALTERNATIVES_EARLY_BOOT) + return false; + + return riscv_isa_extension_available(NULL, SVPBMT); +} + +static bool __init_or_module cpufeature_probe_zicbom(unsigned int stage) +{ + if (!IS_ENABLED(CONFIG_RISCV_ISA_ZICBOM)) + return false; + + if (stage == RISCV_ALTERNATIVES_EARLY_BOOT) + return false; + + if (!riscv_isa_extension_available(NULL, ZICBOM)) + return false; + + riscv_noncoherent_supported(); + return true; +} + +/* + * Probe presence of individual extensions. + * + * This code may also be executed before kernel relocation, so we cannot use + * addresses generated by the address-of operator as they won't be valid in + * this context. + */ +static u32 __init_or_module cpufeature_probe(unsigned int stage) +{ + u32 cpu_req_feature = 0; + + if (cpufeature_probe_svpbmt(stage)) + cpu_req_feature |= BIT(CPUFEATURE_SVPBMT); + + if (cpufeature_probe_zicbom(stage)) + cpu_req_feature |= BIT(CPUFEATURE_ZICBOM); + + return cpu_req_feature; +} + +void __init_or_module riscv_cpufeature_patch_func(struct alt_entry *begin, + struct alt_entry *end, + unsigned int stage) +{ + u32 cpu_req_feature = cpufeature_probe(stage); + struct alt_entry *alt; + u32 tmp; + + for (alt = begin; alt < end; alt++) { + if (alt->vendor_id != 0) + continue; + if (alt->errata_id >= CPUFEATURE_NUMBER) { + WARN(1, "This feature id:%d is not in kernel cpufeature list", + alt->errata_id); + continue; + } + + tmp = (1U << alt->errata_id); + if (cpu_req_feature & tmp) { + mutex_lock(&text_mutex); + patch_text_nosync(alt->old_ptr, alt->alt_ptr, alt->alt_len); + mutex_unlock(&text_mutex); + } + } +} +#endif diff --git a/arch/riscv/kernel/crash_dump.c b/arch/riscv/kernel/crash_dump.c new file mode 100644 index 000000000..ea2158cee --- /dev/null +++ b/arch/riscv/kernel/crash_dump.c @@ -0,0 +1,28 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * This code comes from arch/arm64/kernel/crash_dump.c + * Created by: AKASHI Takahiro <takahiro.akashi@linaro.org> + * Copyright (C) 2017 Linaro Limited + */ + +#include <linux/crash_dump.h> +#include <linux/io.h> +#include <linux/uio.h> + +ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn, + size_t csize, unsigned long offset) +{ + void *vaddr; + + if (!csize) + return 0; + + vaddr = memremap(__pfn_to_phys(pfn), PAGE_SIZE, MEMREMAP_WB); + if (!vaddr) + return -ENOMEM; + + csize = copy_to_iter(vaddr + offset, csize, iter); + + memunmap(vaddr); + return csize; +} diff --git a/arch/riscv/kernel/crash_save_regs.S b/arch/riscv/kernel/crash_save_regs.S new file mode 100644 index 000000000..b2a1908c0 --- /dev/null +++ b/arch/riscv/kernel/crash_save_regs.S @@ -0,0 +1,56 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 FORTH-ICS/CARV + * Nick Kossifidis <mick@ics.forth.gr> + */ + +#include <asm/asm.h> /* For RISCV_* and REG_* macros */ +#include <asm/csr.h> /* For CSR_* macros */ +#include <asm/asm-offsets.h> /* For offsets on pt_regs */ +#include <linux/linkage.h> /* For SYM_* macros */ + +.section ".text" +SYM_CODE_START(riscv_crash_save_regs) + REG_S ra, PT_RA(a0) /* x1 */ + REG_S sp, PT_SP(a0) /* x2 */ + REG_S gp, PT_GP(a0) /* x3 */ + REG_S tp, PT_TP(a0) /* x4 */ + REG_S t0, PT_T0(a0) /* x5 */ + REG_S t1, PT_T1(a0) /* x6 */ + REG_S t2, PT_T2(a0) /* x7 */ + REG_S s0, PT_S0(a0) /* x8/fp */ + REG_S s1, PT_S1(a0) /* x9 */ + REG_S a0, PT_A0(a0) /* x10 */ + REG_S a1, PT_A1(a0) /* x11 */ + REG_S a2, PT_A2(a0) /* x12 */ + REG_S a3, PT_A3(a0) /* x13 */ + REG_S a4, PT_A4(a0) /* x14 */ + REG_S a5, PT_A5(a0) /* x15 */ + REG_S a6, PT_A6(a0) /* x16 */ + REG_S a7, PT_A7(a0) /* x17 */ + REG_S s2, PT_S2(a0) /* x18 */ + REG_S s3, PT_S3(a0) /* x19 */ + REG_S s4, PT_S4(a0) /* x20 */ + REG_S s5, PT_S5(a0) /* x21 */ + REG_S s6, PT_S6(a0) /* x22 */ + REG_S s7, PT_S7(a0) /* x23 */ + REG_S s8, PT_S8(a0) /* x24 */ + REG_S s9, PT_S9(a0) /* x25 */ + REG_S s10, PT_S10(a0) /* x26 */ + REG_S s11, PT_S11(a0) /* x27 */ + REG_S t3, PT_T3(a0) /* x28 */ + REG_S t4, PT_T4(a0) /* x29 */ + REG_S t5, PT_T5(a0) /* x30 */ + REG_S t6, PT_T6(a0) /* x31 */ + + csrr t1, CSR_STATUS + auipc t2, 0x0 + csrr t3, CSR_TVAL + csrr t4, CSR_CAUSE + + REG_S t1, PT_STATUS(a0) + REG_S t2, PT_EPC(a0) + REG_S t3, PT_BADADDR(a0) + REG_S t4, PT_CAUSE(a0) + ret +SYM_CODE_END(riscv_crash_save_regs) diff --git a/arch/riscv/kernel/efi-header.S b/arch/riscv/kernel/efi-header.S new file mode 100644 index 000000000..8e733aa48 --- /dev/null +++ b/arch/riscv/kernel/efi-header.S @@ -0,0 +1,111 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2020 Western Digital Corporation or its affiliates. + * Adapted from arch/arm64/kernel/efi-header.S + */ + +#include <linux/pe.h> +#include <linux/sizes.h> + + .macro __EFI_PE_HEADER + .long PE_MAGIC +coff_header: +#ifdef CONFIG_64BIT + .short IMAGE_FILE_MACHINE_RISCV64 // Machine +#else + .short IMAGE_FILE_MACHINE_RISCV32 // Machine +#endif + .short section_count // NumberOfSections + .long 0 // TimeDateStamp + .long 0 // PointerToSymbolTable + .long 0 // NumberOfSymbols + .short section_table - optional_header // SizeOfOptionalHeader + .short IMAGE_FILE_DEBUG_STRIPPED | \ + IMAGE_FILE_EXECUTABLE_IMAGE | \ + IMAGE_FILE_LINE_NUMS_STRIPPED // Characteristics + +optional_header: +#ifdef CONFIG_64BIT + .short PE_OPT_MAGIC_PE32PLUS // PE32+ format +#else + .short PE_OPT_MAGIC_PE32 // PE32 format +#endif + .byte 0x02 // MajorLinkerVersion + .byte 0x14 // MinorLinkerVersion + .long __pecoff_text_end - efi_header_end // SizeOfCode + .long __pecoff_data_virt_size // SizeOfInitializedData + .long 0 // SizeOfUninitializedData + .long __efistub_efi_pe_entry - _start // AddressOfEntryPoint + .long efi_header_end - _start // BaseOfCode +#ifdef CONFIG_32BIT + .long __pecoff_text_end - _start // BaseOfData +#endif + +extra_header_fields: + .quad 0 // ImageBase + .long PECOFF_SECTION_ALIGNMENT // SectionAlignment + .long PECOFF_FILE_ALIGNMENT // FileAlignment + .short 0 // MajorOperatingSystemVersion + .short 0 // MinorOperatingSystemVersion + .short LINUX_EFISTUB_MAJOR_VERSION // MajorImageVersion + .short LINUX_EFISTUB_MINOR_VERSION // MinorImageVersion + .short 0 // MajorSubsystemVersion + .short 0 // MinorSubsystemVersion + .long 0 // Win32VersionValue + + .long _end - _start // SizeOfImage + + // Everything before the kernel image is considered part of the header + .long efi_header_end - _start // SizeOfHeaders + .long 0 // CheckSum + .short IMAGE_SUBSYSTEM_EFI_APPLICATION // Subsystem + .short 0 // DllCharacteristics + .quad 0 // SizeOfStackReserve + .quad 0 // SizeOfStackCommit + .quad 0 // SizeOfHeapReserve + .quad 0 // SizeOfHeapCommit + .long 0 // LoaderFlags + .long (section_table - .) / 8 // NumberOfRvaAndSizes + + .quad 0 // ExportTable + .quad 0 // ImportTable + .quad 0 // ResourceTable + .quad 0 // ExceptionTable + .quad 0 // CertificationTable + .quad 0 // BaseRelocationTable + + // Section table +section_table: + .ascii ".text\0\0\0" + .long __pecoff_text_end - efi_header_end // VirtualSize + .long efi_header_end - _start // VirtualAddress + .long __pecoff_text_end - efi_header_end // SizeOfRawData + .long efi_header_end - _start // PointerToRawData + + .long 0 // PointerToRelocations + .long 0 // PointerToLineNumbers + .short 0 // NumberOfRelocations + .short 0 // NumberOfLineNumbers + .long IMAGE_SCN_CNT_CODE | \ + IMAGE_SCN_MEM_READ | \ + IMAGE_SCN_MEM_EXECUTE // Characteristics + + .ascii ".data\0\0\0" + .long __pecoff_data_virt_size // VirtualSize + .long __pecoff_text_end - _start // VirtualAddress + .long __pecoff_data_raw_size // SizeOfRawData + .long __pecoff_text_end - _start // PointerToRawData + + .long 0 // PointerToRelocations + .long 0 // PointerToLineNumbers + .short 0 // NumberOfRelocations + .short 0 // NumberOfLineNumbers + .long IMAGE_SCN_CNT_INITIALIZED_DATA | \ + IMAGE_SCN_MEM_READ | \ + IMAGE_SCN_MEM_WRITE // Characteristics + + .set section_count, (. - section_table) / 40 + + .balign 0x1000 +efi_header_end: + .endm diff --git a/arch/riscv/kernel/efi.c b/arch/riscv/kernel/efi.c new file mode 100644 index 000000000..1aa540350 --- /dev/null +++ b/arch/riscv/kernel/efi.c @@ -0,0 +1,96 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020 Western Digital Corporation or its affiliates. + * Adapted from arch/arm64/kernel/efi.c + */ + +#include <linux/efi.h> +#include <linux/init.h> + +#include <asm/efi.h> +#include <asm/pgtable.h> +#include <asm/pgtable-bits.h> + +/* + * Only regions of type EFI_RUNTIME_SERVICES_CODE need to be + * executable, everything else can be mapped with the XN bits + * set. Also take the new (optional) RO/XP bits into account. + */ +static __init pgprot_t efimem_to_pgprot_map(efi_memory_desc_t *md) +{ + u64 attr = md->attribute; + u32 type = md->type; + + if (type == EFI_MEMORY_MAPPED_IO) + return PAGE_KERNEL; + + /* R-- */ + if ((attr & (EFI_MEMORY_XP | EFI_MEMORY_RO)) == + (EFI_MEMORY_XP | EFI_MEMORY_RO)) + return PAGE_KERNEL_READ; + + /* R-X */ + if (attr & EFI_MEMORY_RO) + return PAGE_KERNEL_READ_EXEC; + + /* RW- */ + if (((attr & (EFI_MEMORY_RP | EFI_MEMORY_WP | EFI_MEMORY_XP)) == + EFI_MEMORY_XP) || + type != EFI_RUNTIME_SERVICES_CODE) + return PAGE_KERNEL; + + /* RWX */ + return PAGE_KERNEL_EXEC; +} + +int __init efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md) +{ + pgprot_t prot = __pgprot(pgprot_val(efimem_to_pgprot_map(md)) & + ~(_PAGE_GLOBAL)); + int i; + + /* RISC-V maps one page at a time */ + for (i = 0; i < md->num_pages; i++) + create_pgd_mapping(mm->pgd, md->virt_addr + i * PAGE_SIZE, + md->phys_addr + i * PAGE_SIZE, + PAGE_SIZE, prot); + return 0; +} + +static int __init set_permissions(pte_t *ptep, unsigned long addr, void *data) +{ + efi_memory_desc_t *md = data; + pte_t pte = READ_ONCE(*ptep); + unsigned long val; + + if (md->attribute & EFI_MEMORY_RO) { + val = pte_val(pte) & ~_PAGE_WRITE; + val |= _PAGE_READ; + pte = __pte(val); + } + if (md->attribute & EFI_MEMORY_XP) { + val = pte_val(pte) & ~_PAGE_EXEC; + pte = __pte(val); + } + set_pte(ptep, pte); + + return 0; +} + +int __init efi_set_mapping_permissions(struct mm_struct *mm, + efi_memory_desc_t *md) +{ + BUG_ON(md->type != EFI_RUNTIME_SERVICES_CODE && + md->type != EFI_RUNTIME_SERVICES_DATA); + + /* + * Calling apply_to_page_range() is only safe on regions that are + * guaranteed to be mapped down to pages. Since we are only called + * for regions that have been mapped using efi_create_mapping() above + * (and this is checked by the generic Memory Attributes table parsing + * routines), there is no need to check that again here. + */ + return apply_to_page_range(mm, md->virt_addr, + md->num_pages << EFI_PAGE_SHIFT, + set_permissions, md); +} diff --git a/arch/riscv/kernel/elf_kexec.c b/arch/riscv/kernel/elf_kexec.c new file mode 100644 index 000000000..b3b96ff46 --- /dev/null +++ b/arch/riscv/kernel/elf_kexec.c @@ -0,0 +1,469 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Load ELF vmlinux file for the kexec_file_load syscall. + * + * Copyright (C) 2021 Huawei Technologies Co, Ltd. + * + * Author: Liao Chang (liaochang1@huawei.com) + * + * Based on kexec-tools' kexec-elf-riscv.c, heavily modified + * for kernel. + */ + +#define pr_fmt(fmt) "kexec_image: " fmt + +#include <linux/elf.h> +#include <linux/kexec.h> +#include <linux/slab.h> +#include <linux/of.h> +#include <linux/libfdt.h> +#include <linux/types.h> +#include <linux/memblock.h> +#include <asm/setup.h> + +int arch_kimage_file_post_load_cleanup(struct kimage *image) +{ + kvfree(image->arch.fdt); + image->arch.fdt = NULL; + + vfree(image->elf_headers); + image->elf_headers = NULL; + image->elf_headers_sz = 0; + + return kexec_image_post_load_cleanup_default(image); +} + +static int riscv_kexec_elf_load(struct kimage *image, struct elfhdr *ehdr, + struct kexec_elf_info *elf_info, unsigned long old_pbase, + unsigned long new_pbase) +{ + int i; + int ret = 0; + size_t size; + struct kexec_buf kbuf; + const struct elf_phdr *phdr; + + kbuf.image = image; + + for (i = 0; i < ehdr->e_phnum; i++) { + phdr = &elf_info->proghdrs[i]; + if (phdr->p_type != PT_LOAD) + continue; + + size = phdr->p_filesz; + if (size > phdr->p_memsz) + size = phdr->p_memsz; + + kbuf.buffer = (void *) elf_info->buffer + phdr->p_offset; + kbuf.bufsz = size; + kbuf.buf_align = phdr->p_align; + kbuf.mem = phdr->p_paddr - old_pbase + new_pbase; + kbuf.memsz = phdr->p_memsz; + kbuf.top_down = false; + ret = kexec_add_buffer(&kbuf); + if (ret) + break; + } + + return ret; +} + +/* + * Go through the available phsyical memory regions and find one that hold + * an image of the specified size. + */ +static int elf_find_pbase(struct kimage *image, unsigned long kernel_len, + struct elfhdr *ehdr, struct kexec_elf_info *elf_info, + unsigned long *old_pbase, unsigned long *new_pbase) +{ + int i; + int ret; + struct kexec_buf kbuf; + const struct elf_phdr *phdr; + unsigned long lowest_paddr = ULONG_MAX; + unsigned long lowest_vaddr = ULONG_MAX; + + for (i = 0; i < ehdr->e_phnum; i++) { + phdr = &elf_info->proghdrs[i]; + if (phdr->p_type != PT_LOAD) + continue; + + if (lowest_paddr > phdr->p_paddr) + lowest_paddr = phdr->p_paddr; + + if (lowest_vaddr > phdr->p_vaddr) + lowest_vaddr = phdr->p_vaddr; + } + + kbuf.image = image; + kbuf.buf_min = lowest_paddr; + kbuf.buf_max = ULONG_MAX; + + /* + * Current riscv boot protocol requires 2MB alignment for + * RV64 and 4MB alignment for RV32 + * + */ + kbuf.buf_align = PMD_SIZE; + kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; + kbuf.memsz = ALIGN(kernel_len, PAGE_SIZE); + kbuf.top_down = false; + ret = arch_kexec_locate_mem_hole(&kbuf); + if (!ret) { + *old_pbase = lowest_paddr; + *new_pbase = kbuf.mem; + image->start = ehdr->e_entry - lowest_vaddr + kbuf.mem; + } + return ret; +} + +static int get_nr_ram_ranges_callback(struct resource *res, void *arg) +{ + unsigned int *nr_ranges = arg; + + (*nr_ranges)++; + return 0; +} + +static int prepare_elf64_ram_headers_callback(struct resource *res, void *arg) +{ + struct crash_mem *cmem = arg; + + cmem->ranges[cmem->nr_ranges].start = res->start; + cmem->ranges[cmem->nr_ranges].end = res->end; + cmem->nr_ranges++; + + return 0; +} + +static int prepare_elf_headers(void **addr, unsigned long *sz) +{ + struct crash_mem *cmem; + unsigned int nr_ranges; + int ret; + + nr_ranges = 1; /* For exclusion of crashkernel region */ + walk_system_ram_res(0, -1, &nr_ranges, get_nr_ram_ranges_callback); + + cmem = kmalloc(struct_size(cmem, ranges, nr_ranges), GFP_KERNEL); + if (!cmem) + return -ENOMEM; + + cmem->max_nr_ranges = nr_ranges; + cmem->nr_ranges = 0; + ret = walk_system_ram_res(0, -1, cmem, prepare_elf64_ram_headers_callback); + if (ret) + goto out; + + /* Exclude crashkernel region */ + ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end); + if (!ret) + ret = crash_prepare_elf64_headers(cmem, true, addr, sz); + +out: + kfree(cmem); + return ret; +} + +static char *setup_kdump_cmdline(struct kimage *image, char *cmdline, + unsigned long cmdline_len) +{ + int elfcorehdr_strlen; + char *cmdline_ptr; + + cmdline_ptr = kzalloc(COMMAND_LINE_SIZE, GFP_KERNEL); + if (!cmdline_ptr) + return NULL; + + elfcorehdr_strlen = sprintf(cmdline_ptr, "elfcorehdr=0x%lx ", + image->elf_load_addr); + + if (elfcorehdr_strlen + cmdline_len > COMMAND_LINE_SIZE) { + pr_err("Appending elfcorehdr=<addr> exceeds cmdline size\n"); + kfree(cmdline_ptr); + return NULL; + } + + memcpy(cmdline_ptr + elfcorehdr_strlen, cmdline, cmdline_len); + /* Ensure it's nul terminated */ + cmdline_ptr[COMMAND_LINE_SIZE - 1] = '\0'; + return cmdline_ptr; +} + +static void *elf_kexec_load(struct kimage *image, char *kernel_buf, + unsigned long kernel_len, char *initrd, + unsigned long initrd_len, char *cmdline, + unsigned long cmdline_len) +{ + int ret; + unsigned long old_kernel_pbase = ULONG_MAX; + unsigned long new_kernel_pbase = 0UL; + unsigned long initrd_pbase = 0UL; + unsigned long headers_sz; + unsigned long kernel_start; + void *fdt, *headers; + struct elfhdr ehdr; + struct kexec_buf kbuf; + struct kexec_elf_info elf_info; + char *modified_cmdline = NULL; + + ret = kexec_build_elf_info(kernel_buf, kernel_len, &ehdr, &elf_info); + if (ret) + return ERR_PTR(ret); + + ret = elf_find_pbase(image, kernel_len, &ehdr, &elf_info, + &old_kernel_pbase, &new_kernel_pbase); + if (ret) + goto out; + kernel_start = image->start; + pr_notice("The entry point of kernel at 0x%lx\n", image->start); + + /* Add the kernel binary to the image */ + ret = riscv_kexec_elf_load(image, &ehdr, &elf_info, + old_kernel_pbase, new_kernel_pbase); + if (ret) + goto out; + + kbuf.image = image; + kbuf.buf_min = new_kernel_pbase + kernel_len; + kbuf.buf_max = ULONG_MAX; + + /* Add elfcorehdr */ + if (image->type == KEXEC_TYPE_CRASH) { + ret = prepare_elf_headers(&headers, &headers_sz); + if (ret) { + pr_err("Preparing elf core header failed\n"); + goto out; + } + + kbuf.buffer = headers; + kbuf.bufsz = headers_sz; + kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; + kbuf.memsz = headers_sz; + kbuf.buf_align = ELF_CORE_HEADER_ALIGN; + kbuf.top_down = true; + + ret = kexec_add_buffer(&kbuf); + if (ret) { + vfree(headers); + goto out; + } + image->elf_headers = headers; + image->elf_load_addr = kbuf.mem; + image->elf_headers_sz = headers_sz; + + pr_debug("Loaded elf core header at 0x%lx bufsz=0x%lx memsz=0x%lx\n", + image->elf_load_addr, kbuf.bufsz, kbuf.memsz); + + /* Setup cmdline for kdump kernel case */ + modified_cmdline = setup_kdump_cmdline(image, cmdline, + cmdline_len); + if (!modified_cmdline) { + pr_err("Setting up cmdline for kdump kernel failed\n"); + ret = -EINVAL; + goto out; + } + cmdline = modified_cmdline; + } + +#ifdef CONFIG_ARCH_HAS_KEXEC_PURGATORY + /* Add purgatory to the image */ + kbuf.top_down = true; + kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; + ret = kexec_load_purgatory(image, &kbuf); + if (ret) { + pr_err("Error loading purgatory ret=%d\n", ret); + goto out; + } + ret = kexec_purgatory_get_set_symbol(image, "riscv_kernel_entry", + &kernel_start, + sizeof(kernel_start), 0); + if (ret) + pr_err("Error update purgatory ret=%d\n", ret); +#endif /* CONFIG_ARCH_HAS_KEXEC_PURGATORY */ + + /* Add the initrd to the image */ + if (initrd != NULL) { + kbuf.buffer = initrd; + kbuf.bufsz = kbuf.memsz = initrd_len; + kbuf.buf_align = PAGE_SIZE; + kbuf.top_down = true; + kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; + ret = kexec_add_buffer(&kbuf); + if (ret) + goto out; + initrd_pbase = kbuf.mem; + pr_notice("Loaded initrd at 0x%lx\n", initrd_pbase); + } + + /* Add the DTB to the image */ + fdt = of_kexec_alloc_and_setup_fdt(image, initrd_pbase, + initrd_len, cmdline, 0); + if (!fdt) { + pr_err("Error setting up the new device tree.\n"); + ret = -EINVAL; + goto out; + } + + fdt_pack(fdt); + kbuf.buffer = fdt; + kbuf.bufsz = kbuf.memsz = fdt_totalsize(fdt); + kbuf.buf_align = PAGE_SIZE; + kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; + kbuf.top_down = true; + ret = kexec_add_buffer(&kbuf); + if (ret) { + pr_err("Error add DTB kbuf ret=%d\n", ret); + goto out_free_fdt; + } + /* Cache the fdt buffer address for memory cleanup */ + image->arch.fdt = fdt; + pr_notice("Loaded device tree at 0x%lx\n", kbuf.mem); + goto out; + +out_free_fdt: + kvfree(fdt); +out: + kfree(modified_cmdline); + kexec_free_elf_info(&elf_info); + return ret ? ERR_PTR(ret) : NULL; +} + +#define RV_X(x, s, n) (((x) >> (s)) & ((1 << (n)) - 1)) +#define RISCV_IMM_BITS 12 +#define RISCV_IMM_REACH (1LL << RISCV_IMM_BITS) +#define RISCV_CONST_HIGH_PART(x) \ + (((x) + (RISCV_IMM_REACH >> 1)) & ~(RISCV_IMM_REACH - 1)) +#define RISCV_CONST_LOW_PART(x) ((x) - RISCV_CONST_HIGH_PART(x)) + +#define ENCODE_ITYPE_IMM(x) \ + (RV_X(x, 0, 12) << 20) +#define ENCODE_BTYPE_IMM(x) \ + ((RV_X(x, 1, 4) << 8) | (RV_X(x, 5, 6) << 25) | \ + (RV_X(x, 11, 1) << 7) | (RV_X(x, 12, 1) << 31)) +#define ENCODE_UTYPE_IMM(x) \ + (RV_X(x, 12, 20) << 12) +#define ENCODE_JTYPE_IMM(x) \ + ((RV_X(x, 1, 10) << 21) | (RV_X(x, 11, 1) << 20) | \ + (RV_X(x, 12, 8) << 12) | (RV_X(x, 20, 1) << 31)) +#define ENCODE_CBTYPE_IMM(x) \ + ((RV_X(x, 1, 2) << 3) | (RV_X(x, 3, 2) << 10) | (RV_X(x, 5, 1) << 2) | \ + (RV_X(x, 6, 2) << 5) | (RV_X(x, 8, 1) << 12)) +#define ENCODE_CJTYPE_IMM(x) \ + ((RV_X(x, 1, 3) << 3) | (RV_X(x, 4, 1) << 11) | (RV_X(x, 5, 1) << 2) | \ + (RV_X(x, 6, 1) << 7) | (RV_X(x, 7, 1) << 6) | (RV_X(x, 8, 2) << 9) | \ + (RV_X(x, 10, 1) << 8) | (RV_X(x, 11, 1) << 12)) +#define ENCODE_UJTYPE_IMM(x) \ + (ENCODE_UTYPE_IMM(RISCV_CONST_HIGH_PART(x)) | \ + (ENCODE_ITYPE_IMM(RISCV_CONST_LOW_PART(x)) << 32)) +#define ENCODE_UITYPE_IMM(x) \ + (ENCODE_UTYPE_IMM(x) | (ENCODE_ITYPE_IMM(x) << 32)) + +#define CLEAN_IMM(type, x) \ + ((~ENCODE_##type##_IMM((uint64_t)(-1))) & (x)) + +int arch_kexec_apply_relocations_add(struct purgatory_info *pi, + Elf_Shdr *section, + const Elf_Shdr *relsec, + const Elf_Shdr *symtab) +{ + const char *strtab, *name, *shstrtab; + const Elf_Shdr *sechdrs; + Elf64_Rela *relas; + int i, r_type; + + /* String & section header string table */ + sechdrs = (void *)pi->ehdr + pi->ehdr->e_shoff; + strtab = (char *)pi->ehdr + sechdrs[symtab->sh_link].sh_offset; + shstrtab = (char *)pi->ehdr + sechdrs[pi->ehdr->e_shstrndx].sh_offset; + + relas = (void *)pi->ehdr + relsec->sh_offset; + + for (i = 0; i < relsec->sh_size / sizeof(*relas); i++) { + const Elf_Sym *sym; /* symbol to relocate */ + unsigned long addr; /* final location after relocation */ + unsigned long val; /* relocated symbol value */ + unsigned long sec_base; /* relocated symbol value */ + void *loc; /* tmp location to modify */ + + sym = (void *)pi->ehdr + symtab->sh_offset; + sym += ELF64_R_SYM(relas[i].r_info); + + if (sym->st_name) + name = strtab + sym->st_name; + else + name = shstrtab + sechdrs[sym->st_shndx].sh_name; + + loc = pi->purgatory_buf; + loc += section->sh_offset; + loc += relas[i].r_offset; + + if (sym->st_shndx == SHN_ABS) + sec_base = 0; + else if (sym->st_shndx >= pi->ehdr->e_shnum) { + pr_err("Invalid section %d for symbol %s\n", + sym->st_shndx, name); + return -ENOEXEC; + } else + sec_base = pi->sechdrs[sym->st_shndx].sh_addr; + + val = sym->st_value; + val += sec_base; + val += relas[i].r_addend; + + addr = section->sh_addr + relas[i].r_offset; + + r_type = ELF64_R_TYPE(relas[i].r_info); + + switch (r_type) { + case R_RISCV_BRANCH: + *(u32 *)loc = CLEAN_IMM(BTYPE, *(u32 *)loc) | + ENCODE_BTYPE_IMM(val - addr); + break; + case R_RISCV_JAL: + *(u32 *)loc = CLEAN_IMM(JTYPE, *(u32 *)loc) | + ENCODE_JTYPE_IMM(val - addr); + break; + /* + * With no R_RISCV_PCREL_LO12_S, R_RISCV_PCREL_LO12_I + * sym is expected to be next to R_RISCV_PCREL_HI20 + * in purgatory relsec. Handle it like R_RISCV_CALL + * sym, instead of searching the whole relsec. + */ + case R_RISCV_PCREL_HI20: + case R_RISCV_CALL_PLT: + case R_RISCV_CALL: + *(u64 *)loc = CLEAN_IMM(UITYPE, *(u64 *)loc) | + ENCODE_UJTYPE_IMM(val - addr); + break; + case R_RISCV_RVC_BRANCH: + *(u32 *)loc = CLEAN_IMM(CBTYPE, *(u32 *)loc) | + ENCODE_CBTYPE_IMM(val - addr); + break; + case R_RISCV_RVC_JUMP: + *(u32 *)loc = CLEAN_IMM(CJTYPE, *(u32 *)loc) | + ENCODE_CJTYPE_IMM(val - addr); + break; + case R_RISCV_ADD32: + *(u32 *)loc += val; + break; + case R_RISCV_SUB32: + *(u32 *)loc -= val; + break; + /* It has been applied by R_RISCV_PCREL_HI20 sym */ + case R_RISCV_PCREL_LO12_I: + case R_RISCV_ALIGN: + case R_RISCV_RELAX: + break; + default: + pr_err("Unknown rela relocation: %d\n", r_type); + return -ENOEXEC; + } + } + return 0; +} + +const struct kexec_file_ops elf_kexec_ops = { + .probe = kexec_elf_probe, + .load = elf_kexec_load, +}; diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S new file mode 100644 index 000000000..3221a9e5f --- /dev/null +++ b/arch/riscv/kernel/entry.S @@ -0,0 +1,607 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2012 Regents of the University of California + * Copyright (C) 2017 SiFive + */ + +#include <linux/init.h> +#include <linux/linkage.h> + +#include <asm/asm.h> +#include <asm/csr.h> +#include <asm/unistd.h> +#include <asm/thread_info.h> +#include <asm/asm-offsets.h> +#include <asm/errata_list.h> + +#if !IS_ENABLED(CONFIG_PREEMPTION) +.set resume_kernel, restore_all +#endif + +ENTRY(handle_exception) + /* + * If coming from userspace, preserve the user thread pointer and load + * the kernel thread pointer. If we came from the kernel, the scratch + * register will contain 0, and we should continue on the current TP. + */ + csrrw tp, CSR_SCRATCH, tp + bnez tp, _save_context + +_restore_kernel_tpsp: + csrr tp, CSR_SCRATCH + REG_S sp, TASK_TI_KERNEL_SP(tp) + +#ifdef CONFIG_VMAP_STACK + addi sp, sp, -(PT_SIZE_ON_STACK) + srli sp, sp, THREAD_SHIFT + andi sp, sp, 0x1 + bnez sp, handle_kernel_stack_overflow + REG_L sp, TASK_TI_KERNEL_SP(tp) +#endif + +_save_context: + REG_S sp, TASK_TI_USER_SP(tp) + REG_L sp, TASK_TI_KERNEL_SP(tp) + addi sp, sp, -(PT_SIZE_ON_STACK) + REG_S x1, PT_RA(sp) + REG_S x3, PT_GP(sp) + REG_S x5, PT_T0(sp) + REG_S x6, PT_T1(sp) + REG_S x7, PT_T2(sp) + REG_S x8, PT_S0(sp) + REG_S x9, PT_S1(sp) + REG_S x10, PT_A0(sp) + REG_S x11, PT_A1(sp) + REG_S x12, PT_A2(sp) + REG_S x13, PT_A3(sp) + REG_S x14, PT_A4(sp) + REG_S x15, PT_A5(sp) + REG_S x16, PT_A6(sp) + REG_S x17, PT_A7(sp) + REG_S x18, PT_S2(sp) + REG_S x19, PT_S3(sp) + REG_S x20, PT_S4(sp) + REG_S x21, PT_S5(sp) + REG_S x22, PT_S6(sp) + REG_S x23, PT_S7(sp) + REG_S x24, PT_S8(sp) + REG_S x25, PT_S9(sp) + REG_S x26, PT_S10(sp) + REG_S x27, PT_S11(sp) + REG_S x28, PT_T3(sp) + REG_S x29, PT_T4(sp) + REG_S x30, PT_T5(sp) + REG_S x31, PT_T6(sp) + + /* + * Disable user-mode memory access as it should only be set in the + * actual user copy routines. + * + * Disable the FPU to detect illegal usage of floating point in kernel + * space. + */ + li t0, SR_SUM | SR_FS + + REG_L s0, TASK_TI_USER_SP(tp) + csrrc s1, CSR_STATUS, t0 + csrr s2, CSR_EPC + csrr s3, CSR_TVAL + csrr s4, CSR_CAUSE + csrr s5, CSR_SCRATCH + REG_S s0, PT_SP(sp) + REG_S s1, PT_STATUS(sp) + REG_S s2, PT_EPC(sp) + REG_S s3, PT_BADADDR(sp) + REG_S s4, PT_CAUSE(sp) + REG_S s5, PT_TP(sp) + + /* + * Set the scratch register to 0, so that if a recursive exception + * occurs, the exception vector knows it came from the kernel + */ + csrw CSR_SCRATCH, x0 + + /* Load the global pointer */ +.option push +.option norelax + la gp, __global_pointer$ +.option pop + +#ifdef CONFIG_TRACE_IRQFLAGS + call __trace_hardirqs_off +#endif + +#ifdef CONFIG_CONTEXT_TRACKING_USER + /* If previous state is in user mode, call user_exit_callable(). */ + li a0, SR_PP + and a0, s1, a0 + bnez a0, skip_context_tracking + call user_exit_callable +skip_context_tracking: +#endif + + /* + * MSB of cause differentiates between + * interrupts and exceptions + */ + bge s4, zero, 1f + + la ra, ret_from_exception + + /* Handle interrupts */ + move a0, sp /* pt_regs */ + la a1, generic_handle_arch_irq + jr a1 +1: + /* + * Exceptions run with interrupts enabled or disabled depending on the + * state of SR_PIE in m/sstatus. + */ + andi t0, s1, SR_PIE + beqz t0, 1f + /* kprobes, entered via ebreak, must have interrupts disabled. */ + li t0, EXC_BREAKPOINT + beq s4, t0, 1f +#ifdef CONFIG_TRACE_IRQFLAGS + call __trace_hardirqs_on +#endif + csrs CSR_STATUS, SR_IE + +1: + la ra, ret_from_exception + /* Handle syscalls */ + li t0, EXC_SYSCALL + beq s4, t0, handle_syscall + + /* Handle other exceptions */ + slli t0, s4, RISCV_LGPTR + la t1, excp_vect_table + la t2, excp_vect_table_end + move a0, sp /* pt_regs */ + add t0, t1, t0 + /* Check if exception code lies within bounds */ + bgeu t0, t2, 1f + REG_L t0, 0(t0) + jr t0 +1: + tail do_trap_unknown + +handle_syscall: +#ifdef CONFIG_RISCV_M_MODE + /* + * When running is M-Mode (no MMU config), MPIE does not get set. + * As a result, we need to force enable interrupts here because + * handle_exception did not do set SR_IE as it always sees SR_PIE + * being cleared. + */ + csrs CSR_STATUS, SR_IE +#endif +#if defined(CONFIG_TRACE_IRQFLAGS) || defined(CONFIG_CONTEXT_TRACKING_USER) + /* Recover a0 - a7 for system calls */ + REG_L a0, PT_A0(sp) + REG_L a1, PT_A1(sp) + REG_L a2, PT_A2(sp) + REG_L a3, PT_A3(sp) + REG_L a4, PT_A4(sp) + REG_L a5, PT_A5(sp) + REG_L a6, PT_A6(sp) + REG_L a7, PT_A7(sp) +#endif + /* save the initial A0 value (needed in signal handlers) */ + REG_S a0, PT_ORIG_A0(sp) + /* + * Advance SEPC to avoid executing the original + * scall instruction on sret + */ + addi s2, s2, 0x4 + REG_S s2, PT_EPC(sp) + /* Trace syscalls, but only if requested by the user. */ + REG_L t0, TASK_TI_FLAGS(tp) + andi t0, t0, _TIF_SYSCALL_WORK + bnez t0, handle_syscall_trace_enter +check_syscall_nr: + /* Check to make sure we don't jump to a bogus syscall number. */ + li t0, __NR_syscalls + la s0, sys_ni_syscall + /* + * Syscall number held in a7. + * If syscall number is above allowed value, redirect to ni_syscall. + */ + bgeu a7, t0, 3f +#ifdef CONFIG_COMPAT + REG_L s0, PT_STATUS(sp) + srli s0, s0, SR_UXL_SHIFT + andi s0, s0, (SR_UXL >> SR_UXL_SHIFT) + li t0, (SR_UXL_32 >> SR_UXL_SHIFT) + sub t0, s0, t0 + bnez t0, 1f + + /* Call compat_syscall */ + la s0, compat_sys_call_table + j 2f +1: +#endif + /* Call syscall */ + la s0, sys_call_table +2: + slli t0, a7, RISCV_LGPTR + add s0, s0, t0 + REG_L s0, 0(s0) +3: + jalr s0 + +ret_from_syscall: + /* Set user a0 to kernel a0 */ + REG_S a0, PT_A0(sp) + /* + * We didn't execute the actual syscall. + * Seccomp already set return value for the current task pt_regs. + * (If it was configured with SECCOMP_RET_ERRNO/TRACE) + */ +ret_from_syscall_rejected: +#ifdef CONFIG_DEBUG_RSEQ + move a0, sp + call rseq_syscall +#endif + /* Trace syscalls, but only if requested by the user. */ + REG_L t0, TASK_TI_FLAGS(tp) + andi t0, t0, _TIF_SYSCALL_WORK + bnez t0, handle_syscall_trace_exit + +ret_from_exception: + REG_L s0, PT_STATUS(sp) + csrc CSR_STATUS, SR_IE +#ifdef CONFIG_TRACE_IRQFLAGS + call __trace_hardirqs_off +#endif +#ifdef CONFIG_RISCV_M_MODE + /* the MPP value is too large to be used as an immediate arg for addi */ + li t0, SR_MPP + and s0, s0, t0 +#else + andi s0, s0, SR_SPP +#endif + bnez s0, resume_kernel + + /* Interrupts must be disabled here so flags are checked atomically */ + REG_L s0, TASK_TI_FLAGS(tp) /* current_thread_info->flags */ + andi s1, s0, _TIF_WORK_MASK + bnez s1, resume_userspace_slow +resume_userspace: +#ifdef CONFIG_CONTEXT_TRACKING_USER + call user_enter_callable +#endif + + /* Save unwound kernel stack pointer in thread_info */ + addi s0, sp, PT_SIZE_ON_STACK + REG_S s0, TASK_TI_KERNEL_SP(tp) + + /* + * Save TP into the scratch register , so we can find the kernel data + * structures again. + */ + csrw CSR_SCRATCH, tp + +restore_all: +#ifdef CONFIG_TRACE_IRQFLAGS + REG_L s1, PT_STATUS(sp) + andi t0, s1, SR_PIE + beqz t0, 1f + call __trace_hardirqs_on + j 2f +1: + call __trace_hardirqs_off +2: +#endif + REG_L a0, PT_STATUS(sp) + /* + * The current load reservation is effectively part of the processor's + * state, in the sense that load reservations cannot be shared between + * different hart contexts. We can't actually save and restore a load + * reservation, so instead here we clear any existing reservation -- + * it's always legal for implementations to clear load reservations at + * any point (as long as the forward progress guarantee is kept, but + * we'll ignore that here). + * + * Dangling load reservations can be the result of taking a trap in the + * middle of an LR/SC sequence, but can also be the result of a taken + * forward branch around an SC -- which is how we implement CAS. As a + * result we need to clear reservations between the last CAS and the + * jump back to the new context. While it is unlikely the store + * completes, implementations are allowed to expand reservations to be + * arbitrarily large. + */ + REG_L a2, PT_EPC(sp) + REG_SC x0, a2, PT_EPC(sp) + + csrw CSR_STATUS, a0 + csrw CSR_EPC, a2 + + REG_L x1, PT_RA(sp) + REG_L x3, PT_GP(sp) + REG_L x4, PT_TP(sp) + REG_L x5, PT_T0(sp) + REG_L x6, PT_T1(sp) + REG_L x7, PT_T2(sp) + REG_L x8, PT_S0(sp) + REG_L x9, PT_S1(sp) + REG_L x10, PT_A0(sp) + REG_L x11, PT_A1(sp) + REG_L x12, PT_A2(sp) + REG_L x13, PT_A3(sp) + REG_L x14, PT_A4(sp) + REG_L x15, PT_A5(sp) + REG_L x16, PT_A6(sp) + REG_L x17, PT_A7(sp) + REG_L x18, PT_S2(sp) + REG_L x19, PT_S3(sp) + REG_L x20, PT_S4(sp) + REG_L x21, PT_S5(sp) + REG_L x22, PT_S6(sp) + REG_L x23, PT_S7(sp) + REG_L x24, PT_S8(sp) + REG_L x25, PT_S9(sp) + REG_L x26, PT_S10(sp) + REG_L x27, PT_S11(sp) + REG_L x28, PT_T3(sp) + REG_L x29, PT_T4(sp) + REG_L x30, PT_T5(sp) + REG_L x31, PT_T6(sp) + + REG_L x2, PT_SP(sp) + +#ifdef CONFIG_RISCV_M_MODE + mret +#else + sret +#endif + +#if IS_ENABLED(CONFIG_PREEMPTION) +resume_kernel: + REG_L s0, TASK_TI_PREEMPT_COUNT(tp) + bnez s0, restore_all + REG_L s0, TASK_TI_FLAGS(tp) + andi s0, s0, _TIF_NEED_RESCHED + beqz s0, restore_all + call preempt_schedule_irq + j restore_all +#endif + +resume_userspace_slow: + /* Enter slow path for supplementary processing */ + move a0, sp /* pt_regs */ + move a1, s0 /* current_thread_info->flags */ + call do_work_pending + j resume_userspace + +/* Slow paths for ptrace. */ +handle_syscall_trace_enter: + move a0, sp + call do_syscall_trace_enter + move t0, a0 + REG_L a0, PT_A0(sp) + REG_L a1, PT_A1(sp) + REG_L a2, PT_A2(sp) + REG_L a3, PT_A3(sp) + REG_L a4, PT_A4(sp) + REG_L a5, PT_A5(sp) + REG_L a6, PT_A6(sp) + REG_L a7, PT_A7(sp) + bnez t0, ret_from_syscall_rejected + j check_syscall_nr +handle_syscall_trace_exit: + move a0, sp + call do_syscall_trace_exit + j ret_from_exception + +#ifdef CONFIG_VMAP_STACK +handle_kernel_stack_overflow: + /* + * Takes the psuedo-spinlock for the shadow stack, in case multiple + * harts are concurrently overflowing their kernel stacks. We could + * store any value here, but since we're overflowing the kernel stack + * already we only have SP to use as a scratch register. So we just + * swap in the address of the spinlock, as that's definately non-zero. + * + * Pairs with a store_release in handle_bad_stack(). + */ +1: la sp, spin_shadow_stack + REG_AMOSWAP_AQ sp, sp, (sp) + bnez sp, 1b + + la sp, shadow_stack + addi sp, sp, SHADOW_OVERFLOW_STACK_SIZE + + //save caller register to shadow stack + addi sp, sp, -(PT_SIZE_ON_STACK) + REG_S x1, PT_RA(sp) + REG_S x5, PT_T0(sp) + REG_S x6, PT_T1(sp) + REG_S x7, PT_T2(sp) + REG_S x10, PT_A0(sp) + REG_S x11, PT_A1(sp) + REG_S x12, PT_A2(sp) + REG_S x13, PT_A3(sp) + REG_S x14, PT_A4(sp) + REG_S x15, PT_A5(sp) + REG_S x16, PT_A6(sp) + REG_S x17, PT_A7(sp) + REG_S x28, PT_T3(sp) + REG_S x29, PT_T4(sp) + REG_S x30, PT_T5(sp) + REG_S x31, PT_T6(sp) + + la ra, restore_caller_reg + tail get_overflow_stack + +restore_caller_reg: + //save per-cpu overflow stack + REG_S a0, -8(sp) + //restore caller register from shadow_stack + REG_L x1, PT_RA(sp) + REG_L x5, PT_T0(sp) + REG_L x6, PT_T1(sp) + REG_L x7, PT_T2(sp) + REG_L x10, PT_A0(sp) + REG_L x11, PT_A1(sp) + REG_L x12, PT_A2(sp) + REG_L x13, PT_A3(sp) + REG_L x14, PT_A4(sp) + REG_L x15, PT_A5(sp) + REG_L x16, PT_A6(sp) + REG_L x17, PT_A7(sp) + REG_L x28, PT_T3(sp) + REG_L x29, PT_T4(sp) + REG_L x30, PT_T5(sp) + REG_L x31, PT_T6(sp) + + //load per-cpu overflow stack + REG_L sp, -8(sp) + addi sp, sp, -(PT_SIZE_ON_STACK) + + //save context to overflow stack + REG_S x1, PT_RA(sp) + REG_S x3, PT_GP(sp) + REG_S x5, PT_T0(sp) + REG_S x6, PT_T1(sp) + REG_S x7, PT_T2(sp) + REG_S x8, PT_S0(sp) + REG_S x9, PT_S1(sp) + REG_S x10, PT_A0(sp) + REG_S x11, PT_A1(sp) + REG_S x12, PT_A2(sp) + REG_S x13, PT_A3(sp) + REG_S x14, PT_A4(sp) + REG_S x15, PT_A5(sp) + REG_S x16, PT_A6(sp) + REG_S x17, PT_A7(sp) + REG_S x18, PT_S2(sp) + REG_S x19, PT_S3(sp) + REG_S x20, PT_S4(sp) + REG_S x21, PT_S5(sp) + REG_S x22, PT_S6(sp) + REG_S x23, PT_S7(sp) + REG_S x24, PT_S8(sp) + REG_S x25, PT_S9(sp) + REG_S x26, PT_S10(sp) + REG_S x27, PT_S11(sp) + REG_S x28, PT_T3(sp) + REG_S x29, PT_T4(sp) + REG_S x30, PT_T5(sp) + REG_S x31, PT_T6(sp) + + REG_L s0, TASK_TI_KERNEL_SP(tp) + csrr s1, CSR_STATUS + csrr s2, CSR_EPC + csrr s3, CSR_TVAL + csrr s4, CSR_CAUSE + csrr s5, CSR_SCRATCH + REG_S s0, PT_SP(sp) + REG_S s1, PT_STATUS(sp) + REG_S s2, PT_EPC(sp) + REG_S s3, PT_BADADDR(sp) + REG_S s4, PT_CAUSE(sp) + REG_S s5, PT_TP(sp) + move a0, sp + tail handle_bad_stack +#endif + +END(handle_exception) + +ENTRY(ret_from_fork) + la ra, ret_from_exception + tail schedule_tail +ENDPROC(ret_from_fork) + +ENTRY(ret_from_kernel_thread) + call schedule_tail + /* Call fn(arg) */ + la ra, ret_from_exception + move a0, s1 + jr s0 +ENDPROC(ret_from_kernel_thread) + + +/* + * Integer register context switch + * The callee-saved registers must be saved and restored. + * + * a0: previous task_struct (must be preserved across the switch) + * a1: next task_struct + * + * The value of a0 and a1 must be preserved by this function, as that's how + * arguments are passed to schedule_tail. + */ +ENTRY(__switch_to) + /* Save context into prev->thread */ + li a4, TASK_THREAD_RA + add a3, a0, a4 + add a4, a1, a4 + REG_S ra, TASK_THREAD_RA_RA(a3) + REG_S sp, TASK_THREAD_SP_RA(a3) + REG_S s0, TASK_THREAD_S0_RA(a3) + REG_S s1, TASK_THREAD_S1_RA(a3) + REG_S s2, TASK_THREAD_S2_RA(a3) + REG_S s3, TASK_THREAD_S3_RA(a3) + REG_S s4, TASK_THREAD_S4_RA(a3) + REG_S s5, TASK_THREAD_S5_RA(a3) + REG_S s6, TASK_THREAD_S6_RA(a3) + REG_S s7, TASK_THREAD_S7_RA(a3) + REG_S s8, TASK_THREAD_S8_RA(a3) + REG_S s9, TASK_THREAD_S9_RA(a3) + REG_S s10, TASK_THREAD_S10_RA(a3) + REG_S s11, TASK_THREAD_S11_RA(a3) + /* Restore context from next->thread */ + REG_L ra, TASK_THREAD_RA_RA(a4) + REG_L sp, TASK_THREAD_SP_RA(a4) + REG_L s0, TASK_THREAD_S0_RA(a4) + REG_L s1, TASK_THREAD_S1_RA(a4) + REG_L s2, TASK_THREAD_S2_RA(a4) + REG_L s3, TASK_THREAD_S3_RA(a4) + REG_L s4, TASK_THREAD_S4_RA(a4) + REG_L s5, TASK_THREAD_S5_RA(a4) + REG_L s6, TASK_THREAD_S6_RA(a4) + REG_L s7, TASK_THREAD_S7_RA(a4) + REG_L s8, TASK_THREAD_S8_RA(a4) + REG_L s9, TASK_THREAD_S9_RA(a4) + REG_L s10, TASK_THREAD_S10_RA(a4) + REG_L s11, TASK_THREAD_S11_RA(a4) + /* The offset of thread_info in task_struct is zero. */ + move tp, a1 + ret +ENDPROC(__switch_to) + +#ifndef CONFIG_MMU +#define do_page_fault do_trap_unknown +#endif + + .section ".rodata" + .align LGREG + /* Exception vector table */ +ENTRY(excp_vect_table) + RISCV_PTR do_trap_insn_misaligned + ALT_INSN_FAULT(RISCV_PTR do_trap_insn_fault) + RISCV_PTR do_trap_insn_illegal + RISCV_PTR do_trap_break + RISCV_PTR do_trap_load_misaligned + RISCV_PTR do_trap_load_fault + RISCV_PTR do_trap_store_misaligned + RISCV_PTR do_trap_store_fault + RISCV_PTR do_trap_ecall_u /* system call, gets intercepted */ + RISCV_PTR do_trap_ecall_s + RISCV_PTR do_trap_unknown + RISCV_PTR do_trap_ecall_m + /* instruciton page fault */ + ALT_PAGE_FAULT(RISCV_PTR do_page_fault) + RISCV_PTR do_page_fault /* load page fault */ + RISCV_PTR do_trap_unknown + RISCV_PTR do_page_fault /* store page fault */ +excp_vect_table_end: +END(excp_vect_table) + +#ifndef CONFIG_MMU +ENTRY(__user_rt_sigreturn) + li a7, __NR_rt_sigreturn + scall +END(__user_rt_sigreturn) +#endif diff --git a/arch/riscv/kernel/fpu.S b/arch/riscv/kernel/fpu.S new file mode 100644 index 000000000..dd2205473 --- /dev/null +++ b/arch/riscv/kernel/fpu.S @@ -0,0 +1,106 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2012 Regents of the University of California + * Copyright (C) 2017 SiFive + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/linkage.h> + +#include <asm/asm.h> +#include <asm/csr.h> +#include <asm/asm-offsets.h> + +ENTRY(__fstate_save) + li a2, TASK_THREAD_F0 + add a0, a0, a2 + li t1, SR_FS + csrs CSR_STATUS, t1 + frcsr t0 + fsd f0, TASK_THREAD_F0_F0(a0) + fsd f1, TASK_THREAD_F1_F0(a0) + fsd f2, TASK_THREAD_F2_F0(a0) + fsd f3, TASK_THREAD_F3_F0(a0) + fsd f4, TASK_THREAD_F4_F0(a0) + fsd f5, TASK_THREAD_F5_F0(a0) + fsd f6, TASK_THREAD_F6_F0(a0) + fsd f7, TASK_THREAD_F7_F0(a0) + fsd f8, TASK_THREAD_F8_F0(a0) + fsd f9, TASK_THREAD_F9_F0(a0) + fsd f10, TASK_THREAD_F10_F0(a0) + fsd f11, TASK_THREAD_F11_F0(a0) + fsd f12, TASK_THREAD_F12_F0(a0) + fsd f13, TASK_THREAD_F13_F0(a0) + fsd f14, TASK_THREAD_F14_F0(a0) + fsd f15, TASK_THREAD_F15_F0(a0) + fsd f16, TASK_THREAD_F16_F0(a0) + fsd f17, TASK_THREAD_F17_F0(a0) + fsd f18, TASK_THREAD_F18_F0(a0) + fsd f19, TASK_THREAD_F19_F0(a0) + fsd f20, TASK_THREAD_F20_F0(a0) + fsd f21, TASK_THREAD_F21_F0(a0) + fsd f22, TASK_THREAD_F22_F0(a0) + fsd f23, TASK_THREAD_F23_F0(a0) + fsd f24, TASK_THREAD_F24_F0(a0) + fsd f25, TASK_THREAD_F25_F0(a0) + fsd f26, TASK_THREAD_F26_F0(a0) + fsd f27, TASK_THREAD_F27_F0(a0) + fsd f28, TASK_THREAD_F28_F0(a0) + fsd f29, TASK_THREAD_F29_F0(a0) + fsd f30, TASK_THREAD_F30_F0(a0) + fsd f31, TASK_THREAD_F31_F0(a0) + sw t0, TASK_THREAD_FCSR_F0(a0) + csrc CSR_STATUS, t1 + ret +ENDPROC(__fstate_save) + +ENTRY(__fstate_restore) + li a2, TASK_THREAD_F0 + add a0, a0, a2 + li t1, SR_FS + lw t0, TASK_THREAD_FCSR_F0(a0) + csrs CSR_STATUS, t1 + fld f0, TASK_THREAD_F0_F0(a0) + fld f1, TASK_THREAD_F1_F0(a0) + fld f2, TASK_THREAD_F2_F0(a0) + fld f3, TASK_THREAD_F3_F0(a0) + fld f4, TASK_THREAD_F4_F0(a0) + fld f5, TASK_THREAD_F5_F0(a0) + fld f6, TASK_THREAD_F6_F0(a0) + fld f7, TASK_THREAD_F7_F0(a0) + fld f8, TASK_THREAD_F8_F0(a0) + fld f9, TASK_THREAD_F9_F0(a0) + fld f10, TASK_THREAD_F10_F0(a0) + fld f11, TASK_THREAD_F11_F0(a0) + fld f12, TASK_THREAD_F12_F0(a0) + fld f13, TASK_THREAD_F13_F0(a0) + fld f14, TASK_THREAD_F14_F0(a0) + fld f15, TASK_THREAD_F15_F0(a0) + fld f16, TASK_THREAD_F16_F0(a0) + fld f17, TASK_THREAD_F17_F0(a0) + fld f18, TASK_THREAD_F18_F0(a0) + fld f19, TASK_THREAD_F19_F0(a0) + fld f20, TASK_THREAD_F20_F0(a0) + fld f21, TASK_THREAD_F21_F0(a0) + fld f22, TASK_THREAD_F22_F0(a0) + fld f23, TASK_THREAD_F23_F0(a0) + fld f24, TASK_THREAD_F24_F0(a0) + fld f25, TASK_THREAD_F25_F0(a0) + fld f26, TASK_THREAD_F26_F0(a0) + fld f27, TASK_THREAD_F27_F0(a0) + fld f28, TASK_THREAD_F28_F0(a0) + fld f29, TASK_THREAD_F29_F0(a0) + fld f30, TASK_THREAD_F30_F0(a0) + fld f31, TASK_THREAD_F31_F0(a0) + fscsr t0 + csrc CSR_STATUS, t1 + ret +ENDPROC(__fstate_restore) diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c new file mode 100644 index 000000000..03a6434a8 --- /dev/null +++ b/arch/riscv/kernel/ftrace.c @@ -0,0 +1,209 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2013 Linaro Limited + * Author: AKASHI Takahiro <takahiro.akashi@linaro.org> + * Copyright (C) 2017 Andes Technology Corporation + */ + +#include <linux/ftrace.h> +#include <linux/uaccess.h> +#include <linux/memory.h> +#include <asm/cacheflush.h> +#include <asm/patch.h> + +#ifdef CONFIG_DYNAMIC_FTRACE +void ftrace_arch_code_modify_prepare(void) __acquires(&text_mutex) +{ + mutex_lock(&text_mutex); + + /* + * The code sequences we use for ftrace can't be patched while the + * kernel is running, so we need to use stop_machine() to modify them + * for now. This doesn't play nice with text_mutex, we use this flag + * to elide the check. + */ + riscv_patch_in_stop_machine = true; +} + +void ftrace_arch_code_modify_post_process(void) __releases(&text_mutex) +{ + riscv_patch_in_stop_machine = false; + mutex_unlock(&text_mutex); +} + +static int ftrace_check_current_call(unsigned long hook_pos, + unsigned int *expected) +{ + unsigned int replaced[2]; + unsigned int nops[2] = {NOP4, NOP4}; + + /* we expect nops at the hook position */ + if (!expected) + expected = nops; + + /* + * Read the text we want to modify; + * return must be -EFAULT on read error + */ + if (copy_from_kernel_nofault(replaced, (void *)hook_pos, + MCOUNT_INSN_SIZE)) + return -EFAULT; + + /* + * Make sure it is what we expect it to be; + * return must be -EINVAL on failed comparison + */ + if (memcmp(expected, replaced, sizeof(replaced))) { + pr_err("%p: expected (%08x %08x) but got (%08x %08x)\n", + (void *)hook_pos, expected[0], expected[1], replaced[0], + replaced[1]); + return -EINVAL; + } + + return 0; +} + +static int __ftrace_modify_call(unsigned long hook_pos, unsigned long target, + bool enable, bool ra) +{ + unsigned int call[2]; + unsigned int nops[2] = {NOP4, NOP4}; + + if (ra) + make_call_ra(hook_pos, target, call); + else + make_call_t0(hook_pos, target, call); + + /* Replace the auipc-jalr pair at once. Return -EPERM on write error. */ + if (patch_text_nosync + ((void *)hook_pos, enable ? call : nops, MCOUNT_INSN_SIZE)) + return -EPERM; + + return 0; +} + +int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) +{ + unsigned int call[2]; + + make_call_t0(rec->ip, addr, call); + + if (patch_text_nosync((void *)rec->ip, call, MCOUNT_INSN_SIZE)) + return -EPERM; + + return 0; +} + +int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, + unsigned long addr) +{ + unsigned int nops[2] = {NOP4, NOP4}; + + if (patch_text_nosync((void *)rec->ip, nops, MCOUNT_INSN_SIZE)) + return -EPERM; + + return 0; +} + +/* + * This is called early on, and isn't wrapped by + * ftrace_arch_code_modify_{prepare,post_process}() and therefor doesn't hold + * text_mutex, which triggers a lockdep failure. SMP isn't running so we could + * just directly poke the text, but it's simpler to just take the lock + * ourselves. + */ +int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec) +{ + int out; + + mutex_lock(&text_mutex); + out = ftrace_make_nop(mod, rec, MCOUNT_ADDR); + mutex_unlock(&text_mutex); + + return out; +} + +int ftrace_update_ftrace_func(ftrace_func_t func) +{ + int ret = __ftrace_modify_call((unsigned long)&ftrace_call, + (unsigned long)func, true, true); + if (!ret) { + ret = __ftrace_modify_call((unsigned long)&ftrace_regs_call, + (unsigned long)func, true, true); + } + + return ret; +} +#endif + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS +int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, + unsigned long addr) +{ + unsigned int call[2]; + unsigned long caller = rec->ip; + int ret; + + make_call_t0(caller, old_addr, call); + ret = ftrace_check_current_call(caller, call); + + if (ret) + return ret; + + return __ftrace_modify_call(caller, addr, true, false); +} +#endif + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +/* + * Most of this function is copied from arm64. + */ +void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, + unsigned long frame_pointer) +{ + unsigned long return_hooker = (unsigned long)&return_to_handler; + unsigned long old; + + if (unlikely(atomic_read(¤t->tracing_graph_pause))) + return; + + /* + * We don't suffer access faults, so no extra fault-recovery assembly + * is needed here. + */ + old = *parent; + + if (!function_graph_enter(old, self_addr, frame_pointer, parent)) + *parent = return_hooker; +} + +#ifdef CONFIG_DYNAMIC_FTRACE +extern void ftrace_graph_call(void); +extern void ftrace_graph_regs_call(void); +int ftrace_enable_ftrace_graph_caller(void) +{ + int ret; + + ret = __ftrace_modify_call((unsigned long)&ftrace_graph_call, + (unsigned long)&prepare_ftrace_return, true, true); + if (ret) + return ret; + + return __ftrace_modify_call((unsigned long)&ftrace_graph_regs_call, + (unsigned long)&prepare_ftrace_return, true, true); +} + +int ftrace_disable_ftrace_graph_caller(void) +{ + int ret; + + ret = __ftrace_modify_call((unsigned long)&ftrace_graph_call, + (unsigned long)&prepare_ftrace_return, false, true); + if (ret) + return ret; + + return __ftrace_modify_call((unsigned long)&ftrace_graph_regs_call, + (unsigned long)&prepare_ftrace_return, false, true); +} +#endif /* CONFIG_DYNAMIC_FTRACE */ +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S new file mode 100644 index 000000000..4bf6c449d --- /dev/null +++ b/arch/riscv/kernel/head.S @@ -0,0 +1,437 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2012 Regents of the University of California + */ + +#include <asm/asm-offsets.h> +#include <asm/asm.h> +#include <linux/init.h> +#include <linux/linkage.h> +#include <asm/thread_info.h> +#include <asm/page.h> +#include <asm/pgtable.h> +#include <asm/csr.h> +#include <asm/cpu_ops_sbi.h> +#include <asm/hwcap.h> +#include <asm/image.h> +#include <asm/xip_fixup.h> +#include "efi-header.S" + +__HEAD +ENTRY(_start) + /* + * Image header expected by Linux boot-loaders. The image header data + * structure is described in asm/image.h. + * Do not modify it without modifying the structure and all bootloaders + * that expects this header format!! + */ +#ifdef CONFIG_EFI + /* + * This instruction decodes to "MZ" ASCII required by UEFI. + */ + c.li s4,-13 + j _start_kernel +#else + /* jump to start kernel */ + j _start_kernel + /* reserved */ + .word 0 +#endif + .balign 8 +#ifdef CONFIG_RISCV_M_MODE + /* Image load offset (0MB) from start of RAM for M-mode */ + .dword 0 +#else +#if __riscv_xlen == 64 + /* Image load offset(2MB) from start of RAM */ + .dword 0x200000 +#else + /* Image load offset(4MB) from start of RAM */ + .dword 0x400000 +#endif +#endif + /* Effective size of kernel image */ + .dword _end - _start + .dword __HEAD_FLAGS + .word RISCV_HEADER_VERSION + .word 0 + .dword 0 + .ascii RISCV_IMAGE_MAGIC + .balign 4 + .ascii RISCV_IMAGE_MAGIC2 +#ifdef CONFIG_EFI + .word pe_head_start - _start +pe_head_start: + + __EFI_PE_HEADER +#else + .word 0 +#endif + +.align 2 +#ifdef CONFIG_MMU + .global relocate_enable_mmu +relocate_enable_mmu: + /* Relocate return address */ + la a1, kernel_map + XIP_FIXUP_OFFSET a1 + REG_L a1, KERNEL_MAP_VIRT_ADDR(a1) + la a2, _start + sub a1, a1, a2 + add ra, ra, a1 + + /* Point stvec to virtual address of intruction after satp write */ + la a2, 1f + add a2, a2, a1 + csrw CSR_TVEC, a2 + + /* Compute satp for kernel page tables, but don't load it yet */ + srl a2, a0, PAGE_SHIFT + la a1, satp_mode + REG_L a1, 0(a1) + or a2, a2, a1 + + /* + * Load trampoline page directory, which will cause us to trap to + * stvec if VA != PA, or simply fall through if VA == PA. We need a + * full fence here because setup_vm() just wrote these PTEs and we need + * to ensure the new translations are in use. + */ + la a0, trampoline_pg_dir + XIP_FIXUP_OFFSET a0 + srl a0, a0, PAGE_SHIFT + or a0, a0, a1 + sfence.vma + csrw CSR_SATP, a0 +.align 2 +1: + /* Set trap vector to spin forever to help debug */ + la a0, .Lsecondary_park + csrw CSR_TVEC, a0 + + /* Reload the global pointer */ +.option push +.option norelax + la gp, __global_pointer$ +.option pop + + /* + * Switch to kernel page tables. A full fence is necessary in order to + * avoid using the trampoline translations, which are only correct for + * the first superpage. Fetching the fence is guaranteed to work + * because that first superpage is translated the same way. + */ + csrw CSR_SATP, a2 + sfence.vma + + ret +#endif /* CONFIG_MMU */ +#ifdef CONFIG_SMP + .global secondary_start_sbi +secondary_start_sbi: + /* Mask all interrupts */ + csrw CSR_IE, zero + csrw CSR_IP, zero + + /* Load the global pointer */ + .option push + .option norelax + la gp, __global_pointer$ + .option pop + + /* + * Disable FPU to detect illegal usage of + * floating point in kernel space + */ + li t0, SR_FS + csrc CSR_STATUS, t0 + + /* Set trap vector to spin forever to help debug */ + la a3, .Lsecondary_park + csrw CSR_TVEC, a3 + + /* a0 contains the hartid & a1 contains boot data */ + li a2, SBI_HART_BOOT_TASK_PTR_OFFSET + XIP_FIXUP_OFFSET a2 + add a2, a2, a1 + REG_L tp, (a2) + li a3, SBI_HART_BOOT_STACK_PTR_OFFSET + XIP_FIXUP_OFFSET a3 + add a3, a3, a1 + REG_L sp, (a3) + +.Lsecondary_start_common: + +#ifdef CONFIG_MMU + /* Enable virtual memory and relocate to virtual address */ + la a0, swapper_pg_dir + XIP_FIXUP_OFFSET a0 + call relocate_enable_mmu +#endif + call setup_trap_vector + tail smp_callin +#endif /* CONFIG_SMP */ + +.align 2 +setup_trap_vector: + /* Set trap vector to exception handler */ + la a0, handle_exception + csrw CSR_TVEC, a0 + + /* + * Set sup0 scratch register to 0, indicating to exception vector that + * we are presently executing in kernel. + */ + csrw CSR_SCRATCH, zero + ret + +.align 2 +.Lsecondary_park: + /* We lack SMP support or have too many harts, so park this hart */ + wfi + j .Lsecondary_park + +END(_start) + +ENTRY(_start_kernel) + /* Mask all interrupts */ + csrw CSR_IE, zero + csrw CSR_IP, zero + +#ifdef CONFIG_RISCV_M_MODE + /* flush the instruction cache */ + fence.i + + /* Reset all registers except ra, a0, a1 */ + call reset_regs + + /* + * Setup a PMP to permit access to all of memory. Some machines may + * not implement PMPs, so we set up a quick trap handler to just skip + * touching the PMPs on any trap. + */ + la a0, pmp_done + csrw CSR_TVEC, a0 + + li a0, -1 + csrw CSR_PMPADDR0, a0 + li a0, (PMP_A_NAPOT | PMP_R | PMP_W | PMP_X) + csrw CSR_PMPCFG0, a0 +.align 2 +pmp_done: + + /* + * The hartid in a0 is expected later on, and we have no firmware + * to hand it to us. + */ + csrr a0, CSR_MHARTID +#endif /* CONFIG_RISCV_M_MODE */ + + /* Load the global pointer */ +.option push +.option norelax + la gp, __global_pointer$ +.option pop + + /* + * Disable FPU to detect illegal usage of + * floating point in kernel space + */ + li t0, SR_FS + csrc CSR_STATUS, t0 + +#ifdef CONFIG_RISCV_BOOT_SPINWAIT + li t0, CONFIG_NR_CPUS + blt a0, t0, .Lgood_cores + tail .Lsecondary_park +.Lgood_cores: + + /* The lottery system is only required for spinwait booting method */ +#ifndef CONFIG_XIP_KERNEL + /* Pick one hart to run the main boot sequence */ + la a3, hart_lottery + li a2, 1 + amoadd.w a3, a2, (a3) + bnez a3, .Lsecondary_start + +#else + /* hart_lottery in flash contains a magic number */ + la a3, hart_lottery + mv a2, a3 + XIP_FIXUP_OFFSET a2 + XIP_FIXUP_FLASH_OFFSET a3 + lw t1, (a3) + amoswap.w t0, t1, (a2) + /* first time here if hart_lottery in RAM is not set */ + beq t0, t1, .Lsecondary_start + +#endif /* CONFIG_XIP */ +#endif /* CONFIG_RISCV_BOOT_SPINWAIT */ + +#ifdef CONFIG_XIP_KERNEL + la sp, _end + THREAD_SIZE + XIP_FIXUP_OFFSET sp + mv s0, a0 + call __copy_data + + /* Restore a0 copy */ + mv a0, s0 +#endif + +#ifndef CONFIG_XIP_KERNEL + /* Clear BSS for flat non-ELF images */ + la a3, __bss_start + la a4, __bss_stop + ble a4, a3, clear_bss_done +clear_bss: + REG_S zero, (a3) + add a3, a3, RISCV_SZPTR + blt a3, a4, clear_bss +clear_bss_done: +#endif + /* Save hart ID and DTB physical address */ + mv s0, a0 + mv s1, a1 + + la a2, boot_cpu_hartid + XIP_FIXUP_OFFSET a2 + REG_S a0, (a2) + + /* Initialize page tables and relocate to virtual addresses */ + la tp, init_task + la sp, init_thread_union + THREAD_SIZE + XIP_FIXUP_OFFSET sp +#ifdef CONFIG_BUILTIN_DTB + la a0, __dtb_start + XIP_FIXUP_OFFSET a0 +#else + mv a0, s1 +#endif /* CONFIG_BUILTIN_DTB */ + call setup_vm +#ifdef CONFIG_MMU + la a0, early_pg_dir + XIP_FIXUP_OFFSET a0 + call relocate_enable_mmu +#endif /* CONFIG_MMU */ + + call setup_trap_vector + /* Restore C environment */ + la tp, init_task + la sp, init_thread_union + THREAD_SIZE + +#ifdef CONFIG_KASAN + call kasan_early_init +#endif + /* Start the kernel */ + call soc_early_init + tail start_kernel + +#ifdef CONFIG_RISCV_BOOT_SPINWAIT +.Lsecondary_start: + /* Set trap vector to spin forever to help debug */ + la a3, .Lsecondary_park + csrw CSR_TVEC, a3 + + slli a3, a0, LGREG + la a1, __cpu_spinwait_stack_pointer + XIP_FIXUP_OFFSET a1 + la a2, __cpu_spinwait_task_pointer + XIP_FIXUP_OFFSET a2 + add a1, a3, a1 + add a2, a3, a2 + + /* + * This hart didn't win the lottery, so we wait for the winning hart to + * get far enough along the boot process that it should continue. + */ +.Lwait_for_cpu_up: + /* FIXME: We should WFI to save some energy here. */ + REG_L sp, (a1) + REG_L tp, (a2) + beqz sp, .Lwait_for_cpu_up + beqz tp, .Lwait_for_cpu_up + fence + + tail .Lsecondary_start_common +#endif /* CONFIG_RISCV_BOOT_SPINWAIT */ + +END(_start_kernel) + +#ifdef CONFIG_RISCV_M_MODE +ENTRY(reset_regs) + li sp, 0 + li gp, 0 + li tp, 0 + li t0, 0 + li t1, 0 + li t2, 0 + li s0, 0 + li s1, 0 + li a2, 0 + li a3, 0 + li a4, 0 + li a5, 0 + li a6, 0 + li a7, 0 + li s2, 0 + li s3, 0 + li s4, 0 + li s5, 0 + li s6, 0 + li s7, 0 + li s8, 0 + li s9, 0 + li s10, 0 + li s11, 0 + li t3, 0 + li t4, 0 + li t5, 0 + li t6, 0 + csrw CSR_SCRATCH, 0 + +#ifdef CONFIG_FPU + csrr t0, CSR_MISA + andi t0, t0, (COMPAT_HWCAP_ISA_F | COMPAT_HWCAP_ISA_D) + beqz t0, .Lreset_regs_done + + li t1, SR_FS + csrs CSR_STATUS, t1 + fmv.s.x f0, zero + fmv.s.x f1, zero + fmv.s.x f2, zero + fmv.s.x f3, zero + fmv.s.x f4, zero + fmv.s.x f5, zero + fmv.s.x f6, zero + fmv.s.x f7, zero + fmv.s.x f8, zero + fmv.s.x f9, zero + fmv.s.x f10, zero + fmv.s.x f11, zero + fmv.s.x f12, zero + fmv.s.x f13, zero + fmv.s.x f14, zero + fmv.s.x f15, zero + fmv.s.x f16, zero + fmv.s.x f17, zero + fmv.s.x f18, zero + fmv.s.x f19, zero + fmv.s.x f20, zero + fmv.s.x f21, zero + fmv.s.x f22, zero + fmv.s.x f23, zero + fmv.s.x f24, zero + fmv.s.x f25, zero + fmv.s.x f26, zero + fmv.s.x f27, zero + fmv.s.x f28, zero + fmv.s.x f29, zero + fmv.s.x f30, zero + fmv.s.x f31, zero + csrw fcsr, 0 + /* note that the caller must clear SR_FS */ +#endif /* CONFIG_FPU */ +.Lreset_regs_done: + ret +END(reset_regs) +#endif /* CONFIG_RISCV_M_MODE */ diff --git a/arch/riscv/kernel/head.h b/arch/riscv/kernel/head.h new file mode 100644 index 000000000..726731ada --- /dev/null +++ b/arch/riscv/kernel/head.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2019 SiFive, Inc. + */ +#ifndef __ASM_HEAD_H +#define __ASM_HEAD_H + +#include <linux/linkage.h> +#include <linux/init.h> + +extern atomic_t hart_lottery; + +asmlinkage void do_page_fault(struct pt_regs *regs); +asmlinkage void __init setup_vm(uintptr_t dtb_pa); +#ifdef CONFIG_XIP_KERNEL +asmlinkage void __init __copy_data(void); +#endif + +#ifdef CONFIG_RISCV_BOOT_SPINWAIT +extern void *__cpu_spinwait_stack_pointer[]; +extern void *__cpu_spinwait_task_pointer[]; +#endif + +#endif /* __ASM_HEAD_H */ diff --git a/arch/riscv/kernel/image-vars.h b/arch/riscv/kernel/image-vars.h new file mode 100644 index 000000000..d6e5f7399 --- /dev/null +++ b/arch/riscv/kernel/image-vars.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2020 Western Digital Corporation or its affiliates. + * Linker script variables to be set after section resolution, as + * ld.lld does not like variables assigned before SECTIONS is processed. + * Based on arch/arm64/kernel/image-vars.h + */ +#ifndef __RISCV_KERNEL_IMAGE_VARS_H +#define __RISCV_KERNEL_IMAGE_VARS_H + +#ifndef LINKER_SCRIPT +#error This file should only be included in vmlinux.lds.S +#endif + +#ifdef CONFIG_EFI + +/* + * The EFI stub has its own symbol namespace prefixed by __efistub_, to + * isolate it from the kernel proper. The following symbols are legally + * accessed by the stub, so provide some aliases to make them accessible. + * Only include data symbols here, or text symbols of functions that are + * guaranteed to be safe when executed at another offset than they were + * linked at. The routines below are all implemented in assembler in a + * position independent manner + */ +__efistub_memcmp = memcmp; +__efistub_memchr = memchr; +__efistub_strlen = strlen; +__efistub_strnlen = strnlen; +__efistub_strcmp = strcmp; +__efistub_strncmp = strncmp; +__efistub_strrchr = strrchr; + +__efistub__start = _start; +__efistub__start_kernel = _start_kernel; +__efistub__end = _end; +__efistub__edata = _edata; +__efistub_screen_info = screen_info; + +#endif + +#endif /* __RISCV_KERNEL_IMAGE_VARS_H */ diff --git a/arch/riscv/kernel/irq.c b/arch/riscv/kernel/irq.c new file mode 100644 index 000000000..7207fa08d --- /dev/null +++ b/arch/riscv/kernel/irq.c @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2012 Regents of the University of California + * Copyright (C) 2017 SiFive + * Copyright (C) 2018 Christoph Hellwig + */ + +#include <linux/interrupt.h> +#include <linux/irqchip.h> +#include <linux/seq_file.h> +#include <asm/smp.h> + +int arch_show_interrupts(struct seq_file *p, int prec) +{ + show_ipi_stats(p, prec); + return 0; +} + +void __init init_IRQ(void) +{ + irqchip_init(); + if (!handle_arch_irq) + panic("No interrupt controller found."); +} diff --git a/arch/riscv/kernel/jump_label.c b/arch/riscv/kernel/jump_label.c new file mode 100644 index 000000000..e6694759d --- /dev/null +++ b/arch/riscv/kernel/jump_label.c @@ -0,0 +1,41 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020 Emil Renner Berthing + * + * Based on arch/arm64/kernel/jump_label.c + */ +#include <linux/jump_label.h> +#include <linux/kernel.h> +#include <linux/memory.h> +#include <linux/mutex.h> +#include <asm/bug.h> +#include <asm/patch.h> + +#define RISCV_INSN_NOP 0x00000013U +#define RISCV_INSN_JAL 0x0000006fU + +void arch_jump_label_transform(struct jump_entry *entry, + enum jump_label_type type) +{ + void *addr = (void *)jump_entry_code(entry); + u32 insn; + + if (type == JUMP_LABEL_JMP) { + long offset = jump_entry_target(entry) - jump_entry_code(entry); + + if (WARN_ON(offset & 1 || offset < -524288 || offset >= 524288)) + return; + + insn = RISCV_INSN_JAL | + (((u32)offset & GENMASK(19, 12)) << (12 - 12)) | + (((u32)offset & GENMASK(11, 11)) << (20 - 11)) | + (((u32)offset & GENMASK(10, 1)) << (21 - 1)) | + (((u32)offset & GENMASK(20, 20)) << (31 - 20)); + } else { + insn = RISCV_INSN_NOP; + } + + mutex_lock(&text_mutex); + patch_text_nosync(addr, &insn, sizeof(insn)); + mutex_unlock(&text_mutex); +} diff --git a/arch/riscv/kernel/kexec_relocate.S b/arch/riscv/kernel/kexec_relocate.S new file mode 100644 index 000000000..059c5e216 --- /dev/null +++ b/arch/riscv/kernel/kexec_relocate.S @@ -0,0 +1,221 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2019 FORTH-ICS/CARV + * Nick Kossifidis <mick@ics.forth.gr> + */ + +#include <asm/asm.h> /* For RISCV_* and REG_* macros */ +#include <asm/csr.h> /* For CSR_* macros */ +#include <asm/page.h> /* For PAGE_SIZE */ +#include <linux/linkage.h> /* For SYM_* macros */ + +.section ".rodata" +SYM_CODE_START(riscv_kexec_relocate) + + /* + * s0: Pointer to the current entry + * s1: (const) Phys address to jump to after relocation + * s2: (const) Phys address of the FDT image + * s3: (const) The hartid of the current hart + * s4: Pointer to the destination address for the relocation + * s5: (const) Number of words per page + * s6: (const) 1, used for subtraction + * s7: (const) kernel_map.va_pa_offset, used when switching MMU off + * s8: (const) Physical address of the main loop + * s9: (debug) indirection page counter + * s10: (debug) entry counter + * s11: (debug) copied words counter + */ + mv s0, a0 + mv s1, a1 + mv s2, a2 + mv s3, a3 + mv s4, zero + li s5, (PAGE_SIZE / RISCV_SZPTR) + li s6, 1 + mv s7, a4 + mv s8, zero + mv s9, zero + mv s10, zero + mv s11, zero + + /* Disable / cleanup interrupts */ + csrw CSR_SIE, zero + csrw CSR_SIP, zero + + /* + * When we switch SATP.MODE to "Bare" we'll only + * play with physical addresses. However the first time + * we try to jump somewhere, the offset on the jump + * will be relative to pc which will still be on VA. To + * deal with this we set stvec to the physical address at + * the start of the loop below so that we jump there in + * any case. + */ + la s8, 1f + sub s8, s8, s7 + csrw CSR_STVEC, s8 + + /* Process entries in a loop */ +.align 2 +1: + addi s10, s10, 1 + REG_L t0, 0(s0) /* t0 = *image->entry */ + addi s0, s0, RISCV_SZPTR /* image->entry++ */ + + /* IND_DESTINATION entry ? -> save destination address */ + andi t1, t0, 0x1 + beqz t1, 2f + andi s4, t0, ~0x1 + j 1b + +2: + /* IND_INDIRECTION entry ? -> update next entry ptr (PA) */ + andi t1, t0, 0x2 + beqz t1, 2f + andi s0, t0, ~0x2 + addi s9, s9, 1 + csrw CSR_SATP, zero + jalr zero, s8, 0 + +2: + /* IND_DONE entry ? -> jump to done label */ + andi t1, t0, 0x4 + beqz t1, 2f + j 4f + +2: + /* + * IND_SOURCE entry ? -> copy page word by word to the + * destination address we got from IND_DESTINATION + */ + andi t1, t0, 0x8 + beqz t1, 1b /* Unknown entry type, ignore it */ + andi t0, t0, ~0x8 + mv t3, s5 /* i = num words per page */ +3: /* copy loop */ + REG_L t1, (t0) /* t1 = *src_ptr */ + REG_S t1, (s4) /* *dst_ptr = *src_ptr */ + addi t0, t0, RISCV_SZPTR /* stc_ptr++ */ + addi s4, s4, RISCV_SZPTR /* dst_ptr++ */ + sub t3, t3, s6 /* i-- */ + addi s11, s11, 1 /* c++ */ + beqz t3, 1b /* copy done ? */ + j 3b + +4: + /* Pass the arguments to the next kernel / Cleanup*/ + mv a0, s3 + mv a1, s2 + mv a2, s1 + + /* Cleanup */ + mv a3, zero + mv a4, zero + mv a5, zero + mv a6, zero + mv a7, zero + + mv s0, zero + mv s1, zero + mv s2, zero + mv s3, zero + mv s4, zero + mv s5, zero + mv s6, zero + mv s7, zero + mv s8, zero + mv s9, zero + mv s10, zero + mv s11, zero + + mv t0, zero + mv t1, zero + mv t2, zero + mv t3, zero + mv t4, zero + mv t5, zero + mv t6, zero + csrw CSR_SEPC, zero + csrw CSR_SCAUSE, zero + csrw CSR_SSCRATCH, zero + + /* + * Make sure the relocated code is visible + * and jump to the new kernel + */ + fence.i + + jalr zero, a2, 0 + +SYM_CODE_END(riscv_kexec_relocate) +riscv_kexec_relocate_end: + + +/* Used for jumping to crashkernel */ +.section ".text" +SYM_CODE_START(riscv_kexec_norelocate) + /* + * s0: (const) Phys address to jump to + * s1: (const) Phys address of the FDT image + * s2: (const) The hartid of the current hart + */ + mv s0, a1 + mv s1, a2 + mv s2, a3 + + /* Disable / cleanup interrupts */ + csrw CSR_SIE, zero + csrw CSR_SIP, zero + + /* Pass the arguments to the next kernel / Cleanup*/ + mv a0, s2 + mv a1, s1 + mv a2, s0 + + /* Cleanup */ + mv a3, zero + mv a4, zero + mv a5, zero + mv a6, zero + mv a7, zero + + mv s0, zero + mv s1, zero + mv s2, zero + mv s3, zero + mv s4, zero + mv s5, zero + mv s6, zero + mv s7, zero + mv s8, zero + mv s9, zero + mv s10, zero + mv s11, zero + + mv t0, zero + mv t1, zero + mv t2, zero + mv t3, zero + mv t4, zero + mv t5, zero + mv t6, zero + csrw CSR_SEPC, zero + csrw CSR_SCAUSE, zero + csrw CSR_SSCRATCH, zero + + /* + * Switch to physical addressing + * This will also trigger a jump to CSR_STVEC + * which in this case is the address of the new + * kernel. + */ + csrw CSR_STVEC, a2 + csrw CSR_SATP, zero + +SYM_CODE_END(riscv_kexec_norelocate) + +.section ".rodata" +SYM_DATA(riscv_kexec_relocate_size, + .long riscv_kexec_relocate_end - riscv_kexec_relocate) + diff --git a/arch/riscv/kernel/kgdb.c b/arch/riscv/kernel/kgdb.c new file mode 100644 index 000000000..963ed7edc --- /dev/null +++ b/arch/riscv/kernel/kgdb.c @@ -0,0 +1,390 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020 SiFive + */ + +#include <linux/ptrace.h> +#include <linux/kdebug.h> +#include <linux/bug.h> +#include <linux/kgdb.h> +#include <linux/irqflags.h> +#include <linux/string.h> +#include <asm/cacheflush.h> +#include <asm/gdb_xml.h> +#include <asm/parse_asm.h> + +enum { + NOT_KGDB_BREAK = 0, + KGDB_SW_BREAK, + KGDB_COMPILED_BREAK, + KGDB_SW_SINGLE_STEP +}; + +static unsigned long stepped_address; +static unsigned int stepped_opcode; + +#if __riscv_xlen == 32 +/* C.JAL is an RV32C-only instruction */ +DECLARE_INSN(c_jal, MATCH_C_JAL, MASK_C_JAL) +#else +#define is_c_jal_insn(opcode) 0 +#endif +DECLARE_INSN(jalr, MATCH_JALR, MASK_JALR) +DECLARE_INSN(jal, MATCH_JAL, MASK_JAL) +DECLARE_INSN(c_jr, MATCH_C_JR, MASK_C_JR) +DECLARE_INSN(c_jalr, MATCH_C_JALR, MASK_C_JALR) +DECLARE_INSN(c_j, MATCH_C_J, MASK_C_J) +DECLARE_INSN(beq, MATCH_BEQ, MASK_BEQ) +DECLARE_INSN(bne, MATCH_BNE, MASK_BNE) +DECLARE_INSN(blt, MATCH_BLT, MASK_BLT) +DECLARE_INSN(bge, MATCH_BGE, MASK_BGE) +DECLARE_INSN(bltu, MATCH_BLTU, MASK_BLTU) +DECLARE_INSN(bgeu, MATCH_BGEU, MASK_BGEU) +DECLARE_INSN(c_beqz, MATCH_C_BEQZ, MASK_C_BEQZ) +DECLARE_INSN(c_bnez, MATCH_C_BNEZ, MASK_C_BNEZ) +DECLARE_INSN(sret, MATCH_SRET, MASK_SRET) + +static int decode_register_index(unsigned long opcode, int offset) +{ + return (opcode >> offset) & 0x1F; +} + +static int decode_register_index_short(unsigned long opcode, int offset) +{ + return ((opcode >> offset) & 0x7) + 8; +} + +/* Calculate the new address for after a step */ +static int get_step_address(struct pt_regs *regs, unsigned long *next_addr) +{ + unsigned long pc = regs->epc; + unsigned long *regs_ptr = (unsigned long *)regs; + unsigned int rs1_num, rs2_num; + int op_code; + + if (get_kernel_nofault(op_code, (void *)pc)) + return -EINVAL; + if ((op_code & __INSN_LENGTH_MASK) != __INSN_LENGTH_GE_32) { + if (is_c_jalr_insn(op_code) || is_c_jr_insn(op_code)) { + rs1_num = decode_register_index(op_code, RVC_C2_RS1_OPOFF); + *next_addr = regs_ptr[rs1_num]; + } else if (is_c_j_insn(op_code) || is_c_jal_insn(op_code)) { + *next_addr = EXTRACT_RVC_J_IMM(op_code) + pc; + } else if (is_c_beqz_insn(op_code)) { + rs1_num = decode_register_index_short(op_code, + RVC_C1_RS1_OPOFF); + if (!rs1_num || regs_ptr[rs1_num] == 0) + *next_addr = EXTRACT_RVC_B_IMM(op_code) + pc; + else + *next_addr = pc + 2; + } else if (is_c_bnez_insn(op_code)) { + rs1_num = + decode_register_index_short(op_code, RVC_C1_RS1_OPOFF); + if (rs1_num && regs_ptr[rs1_num] != 0) + *next_addr = EXTRACT_RVC_B_IMM(op_code) + pc; + else + *next_addr = pc + 2; + } else { + *next_addr = pc + 2; + } + } else { + if ((op_code & __INSN_OPCODE_MASK) == __INSN_BRANCH_OPCODE) { + bool result = false; + long imm = EXTRACT_BTYPE_IMM(op_code); + unsigned long rs1_val = 0, rs2_val = 0; + + rs1_num = decode_register_index(op_code, RVG_RS1_OPOFF); + rs2_num = decode_register_index(op_code, RVG_RS2_OPOFF); + if (rs1_num) + rs1_val = regs_ptr[rs1_num]; + if (rs2_num) + rs2_val = regs_ptr[rs2_num]; + + if (is_beq_insn(op_code)) + result = (rs1_val == rs2_val) ? true : false; + else if (is_bne_insn(op_code)) + result = (rs1_val != rs2_val) ? true : false; + else if (is_blt_insn(op_code)) + result = + ((long)rs1_val < + (long)rs2_val) ? true : false; + else if (is_bge_insn(op_code)) + result = + ((long)rs1_val >= + (long)rs2_val) ? true : false; + else if (is_bltu_insn(op_code)) + result = (rs1_val < rs2_val) ? true : false; + else if (is_bgeu_insn(op_code)) + result = (rs1_val >= rs2_val) ? true : false; + if (result) + *next_addr = imm + pc; + else + *next_addr = pc + 4; + } else if (is_jal_insn(op_code)) { + *next_addr = EXTRACT_JTYPE_IMM(op_code) + pc; + } else if (is_jalr_insn(op_code)) { + rs1_num = decode_register_index(op_code, RVG_RS1_OPOFF); + if (rs1_num) + *next_addr = ((unsigned long *)regs)[rs1_num]; + *next_addr += EXTRACT_ITYPE_IMM(op_code); + } else if (is_sret_insn(op_code)) { + *next_addr = pc; + } else { + *next_addr = pc + 4; + } + } + return 0; +} + +static int do_single_step(struct pt_regs *regs) +{ + /* Determine where the target instruction will send us to */ + unsigned long addr = 0; + int error = get_step_address(regs, &addr); + + if (error) + return error; + + /* Store the op code in the stepped address */ + error = get_kernel_nofault(stepped_opcode, (void *)addr); + if (error) + return error; + + stepped_address = addr; + + /* Replace the op code with the break instruction */ + error = copy_to_kernel_nofault((void *)stepped_address, + arch_kgdb_ops.gdb_bpt_instr, + BREAK_INSTR_SIZE); + /* Flush and return */ + if (!error) { + flush_icache_range(addr, addr + BREAK_INSTR_SIZE); + kgdb_single_step = 1; + atomic_set(&kgdb_cpu_doing_single_step, + raw_smp_processor_id()); + } else { + stepped_address = 0; + stepped_opcode = 0; + } + return error; +} + +/* Undo a single step */ +static void undo_single_step(struct pt_regs *regs) +{ + if (stepped_opcode != 0) { + copy_to_kernel_nofault((void *)stepped_address, + (void *)&stepped_opcode, BREAK_INSTR_SIZE); + flush_icache_range(stepped_address, + stepped_address + BREAK_INSTR_SIZE); + } + stepped_address = 0; + stepped_opcode = 0; + kgdb_single_step = 0; + atomic_set(&kgdb_cpu_doing_single_step, -1); +} + +struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = { + {DBG_REG_ZERO, GDB_SIZEOF_REG, -1}, + {DBG_REG_RA, GDB_SIZEOF_REG, offsetof(struct pt_regs, ra)}, + {DBG_REG_SP, GDB_SIZEOF_REG, offsetof(struct pt_regs, sp)}, + {DBG_REG_GP, GDB_SIZEOF_REG, offsetof(struct pt_regs, gp)}, + {DBG_REG_TP, GDB_SIZEOF_REG, offsetof(struct pt_regs, tp)}, + {DBG_REG_T0, GDB_SIZEOF_REG, offsetof(struct pt_regs, t0)}, + {DBG_REG_T1, GDB_SIZEOF_REG, offsetof(struct pt_regs, t1)}, + {DBG_REG_T2, GDB_SIZEOF_REG, offsetof(struct pt_regs, t2)}, + {DBG_REG_FP, GDB_SIZEOF_REG, offsetof(struct pt_regs, s0)}, + {DBG_REG_S1, GDB_SIZEOF_REG, offsetof(struct pt_regs, a1)}, + {DBG_REG_A0, GDB_SIZEOF_REG, offsetof(struct pt_regs, a0)}, + {DBG_REG_A1, GDB_SIZEOF_REG, offsetof(struct pt_regs, a1)}, + {DBG_REG_A2, GDB_SIZEOF_REG, offsetof(struct pt_regs, a2)}, + {DBG_REG_A3, GDB_SIZEOF_REG, offsetof(struct pt_regs, a3)}, + {DBG_REG_A4, GDB_SIZEOF_REG, offsetof(struct pt_regs, a4)}, + {DBG_REG_A5, GDB_SIZEOF_REG, offsetof(struct pt_regs, a5)}, + {DBG_REG_A6, GDB_SIZEOF_REG, offsetof(struct pt_regs, a6)}, + {DBG_REG_A7, GDB_SIZEOF_REG, offsetof(struct pt_regs, a7)}, + {DBG_REG_S2, GDB_SIZEOF_REG, offsetof(struct pt_regs, s2)}, + {DBG_REG_S3, GDB_SIZEOF_REG, offsetof(struct pt_regs, s3)}, + {DBG_REG_S4, GDB_SIZEOF_REG, offsetof(struct pt_regs, s4)}, + {DBG_REG_S5, GDB_SIZEOF_REG, offsetof(struct pt_regs, s5)}, + {DBG_REG_S6, GDB_SIZEOF_REG, offsetof(struct pt_regs, s6)}, + {DBG_REG_S7, GDB_SIZEOF_REG, offsetof(struct pt_regs, s7)}, + {DBG_REG_S8, GDB_SIZEOF_REG, offsetof(struct pt_regs, s8)}, + {DBG_REG_S9, GDB_SIZEOF_REG, offsetof(struct pt_regs, s9)}, + {DBG_REG_S10, GDB_SIZEOF_REG, offsetof(struct pt_regs, s10)}, + {DBG_REG_S11, GDB_SIZEOF_REG, offsetof(struct pt_regs, s11)}, + {DBG_REG_T3, GDB_SIZEOF_REG, offsetof(struct pt_regs, t3)}, + {DBG_REG_T4, GDB_SIZEOF_REG, offsetof(struct pt_regs, t4)}, + {DBG_REG_T5, GDB_SIZEOF_REG, offsetof(struct pt_regs, t5)}, + {DBG_REG_T6, GDB_SIZEOF_REG, offsetof(struct pt_regs, t6)}, + {DBG_REG_EPC, GDB_SIZEOF_REG, offsetof(struct pt_regs, epc)}, + {DBG_REG_STATUS, GDB_SIZEOF_REG, offsetof(struct pt_regs, status)}, + {DBG_REG_BADADDR, GDB_SIZEOF_REG, offsetof(struct pt_regs, badaddr)}, + {DBG_REG_CAUSE, GDB_SIZEOF_REG, offsetof(struct pt_regs, cause)}, +}; + +char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs) +{ + if (regno >= DBG_MAX_REG_NUM || regno < 0) + return NULL; + + if (dbg_reg_def[regno].offset != -1) + memcpy(mem, (void *)regs + dbg_reg_def[regno].offset, + dbg_reg_def[regno].size); + else + memset(mem, 0, dbg_reg_def[regno].size); + return dbg_reg_def[regno].name; +} + +int dbg_set_reg(int regno, void *mem, struct pt_regs *regs) +{ + if (regno >= DBG_MAX_REG_NUM || regno < 0) + return -EINVAL; + + if (dbg_reg_def[regno].offset != -1) + memcpy((void *)regs + dbg_reg_def[regno].offset, mem, + dbg_reg_def[regno].size); + return 0; +} + +void +sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *task) +{ + /* Initialize to zero */ + memset((char *)gdb_regs, 0, NUMREGBYTES); + + gdb_regs[DBG_REG_SP_OFF] = task->thread.sp; + gdb_regs[DBG_REG_FP_OFF] = task->thread.s[0]; + gdb_regs[DBG_REG_S1_OFF] = task->thread.s[1]; + gdb_regs[DBG_REG_S2_OFF] = task->thread.s[2]; + gdb_regs[DBG_REG_S3_OFF] = task->thread.s[3]; + gdb_regs[DBG_REG_S4_OFF] = task->thread.s[4]; + gdb_regs[DBG_REG_S5_OFF] = task->thread.s[5]; + gdb_regs[DBG_REG_S6_OFF] = task->thread.s[6]; + gdb_regs[DBG_REG_S7_OFF] = task->thread.s[7]; + gdb_regs[DBG_REG_S8_OFF] = task->thread.s[8]; + gdb_regs[DBG_REG_S9_OFF] = task->thread.s[10]; + gdb_regs[DBG_REG_S10_OFF] = task->thread.s[11]; + gdb_regs[DBG_REG_EPC_OFF] = task->thread.ra; +} + +void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc) +{ + regs->epc = pc; +} + +void kgdb_arch_handle_qxfer_pkt(char *remcom_in_buffer, + char *remcom_out_buffer) +{ + if (!strncmp(remcom_in_buffer, gdb_xfer_read_target, + sizeof(gdb_xfer_read_target))) + strcpy(remcom_out_buffer, riscv_gdb_stub_target_desc); + else if (!strncmp(remcom_in_buffer, gdb_xfer_read_cpuxml, + sizeof(gdb_xfer_read_cpuxml))) + strcpy(remcom_out_buffer, riscv_gdb_stub_cpuxml); +} + +static inline void kgdb_arch_update_addr(struct pt_regs *regs, + char *remcom_in_buffer) +{ + unsigned long addr; + char *ptr; + + ptr = &remcom_in_buffer[1]; + if (kgdb_hex2long(&ptr, &addr)) + regs->epc = addr; +} + +int kgdb_arch_handle_exception(int vector, int signo, int err_code, + char *remcom_in_buffer, char *remcom_out_buffer, + struct pt_regs *regs) +{ + int err = 0; + + undo_single_step(regs); + + switch (remcom_in_buffer[0]) { + case 'c': + case 'D': + case 'k': + if (remcom_in_buffer[0] == 'c') + kgdb_arch_update_addr(regs, remcom_in_buffer); + break; + case 's': + kgdb_arch_update_addr(regs, remcom_in_buffer); + err = do_single_step(regs); + break; + default: + err = -1; + } + return err; +} + +static int kgdb_riscv_kgdbbreak(unsigned long addr) +{ + if (stepped_address == addr) + return KGDB_SW_SINGLE_STEP; + if (atomic_read(&kgdb_setting_breakpoint)) + if (addr == (unsigned long)&kgdb_compiled_break) + return KGDB_COMPILED_BREAK; + + return kgdb_has_hit_break(addr); +} + +static int kgdb_riscv_notify(struct notifier_block *self, unsigned long cmd, + void *ptr) +{ + struct die_args *args = (struct die_args *)ptr; + struct pt_regs *regs = args->regs; + unsigned long flags; + int type; + + if (user_mode(regs)) + return NOTIFY_DONE; + + type = kgdb_riscv_kgdbbreak(regs->epc); + if (type == NOT_KGDB_BREAK && cmd == DIE_TRAP) + return NOTIFY_DONE; + + local_irq_save(flags); + + if (kgdb_handle_exception(type == KGDB_SW_SINGLE_STEP ? 0 : 1, + args->signr, cmd, regs)) + return NOTIFY_DONE; + + if (type == KGDB_COMPILED_BREAK) + regs->epc += 4; + + local_irq_restore(flags); + + return NOTIFY_STOP; +} + +static struct notifier_block kgdb_notifier = { + .notifier_call = kgdb_riscv_notify, +}; + +int kgdb_arch_init(void) +{ + register_die_notifier(&kgdb_notifier); + + return 0; +} + +void kgdb_arch_exit(void) +{ + unregister_die_notifier(&kgdb_notifier); +} + +/* + * Global data + */ +#ifdef CONFIG_RISCV_ISA_C +const struct kgdb_arch arch_kgdb_ops = { + .gdb_bpt_instr = {0x02, 0x90}, /* c.ebreak */ +}; +#else +const struct kgdb_arch arch_kgdb_ops = { + .gdb_bpt_instr = {0x73, 0x00, 0x10, 0x00}, /* ebreak */ +}; +#endif diff --git a/arch/riscv/kernel/machine_kexec.c b/arch/riscv/kernel/machine_kexec.c new file mode 100644 index 000000000..2d139b724 --- /dev/null +++ b/arch/riscv/kernel/machine_kexec.c @@ -0,0 +1,239 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 FORTH-ICS/CARV + * Nick Kossifidis <mick@ics.forth.gr> + */ + +#include <linux/kexec.h> +#include <asm/kexec.h> /* For riscv_kexec_* symbol defines */ +#include <linux/smp.h> /* For smp_send_stop () */ +#include <asm/cacheflush.h> /* For local_flush_icache_all() */ +#include <asm/barrier.h> /* For smp_wmb() */ +#include <asm/page.h> /* For PAGE_MASK */ +#include <linux/libfdt.h> /* For fdt_check_header() */ +#include <asm/set_memory.h> /* For set_memory_x() */ +#include <linux/compiler.h> /* For unreachable() */ +#include <linux/cpu.h> /* For cpu_down() */ +#include <linux/reboot.h> +#include <linux/interrupt.h> +#include <linux/irq.h> + +/* + * kexec_image_info - Print received image details + */ +static void +kexec_image_info(const struct kimage *image) +{ + unsigned long i; + + pr_debug("Kexec image info:\n"); + pr_debug("\ttype: %d\n", image->type); + pr_debug("\tstart: %lx\n", image->start); + pr_debug("\thead: %lx\n", image->head); + pr_debug("\tnr_segments: %lu\n", image->nr_segments); + + for (i = 0; i < image->nr_segments; i++) { + pr_debug("\t segment[%lu]: %016lx - %016lx", i, + image->segment[i].mem, + image->segment[i].mem + image->segment[i].memsz); + pr_debug("\t\t0x%lx bytes, %lu pages\n", + (unsigned long) image->segment[i].memsz, + (unsigned long) image->segment[i].memsz / PAGE_SIZE); + } +} + +/* + * machine_kexec_prepare - Initialize kexec + * + * This function is called from do_kexec_load, when the user has + * provided us with an image to be loaded. Its goal is to validate + * the image and prepare the control code buffer as needed. + * Note that kimage_alloc_init has already been called and the + * control buffer has already been allocated. + */ +int +machine_kexec_prepare(struct kimage *image) +{ + struct kimage_arch *internal = &image->arch; + struct fdt_header fdt = {0}; + void *control_code_buffer = NULL; + unsigned int control_code_buffer_sz = 0; + int i = 0; + + kexec_image_info(image); + + /* Find the Flattened Device Tree and save its physical address */ + for (i = 0; i < image->nr_segments; i++) { + if (image->segment[i].memsz <= sizeof(fdt)) + continue; + + if (image->file_mode) + memcpy(&fdt, image->segment[i].buf, sizeof(fdt)); + else if (copy_from_user(&fdt, image->segment[i].buf, sizeof(fdt))) + continue; + + if (fdt_check_header(&fdt)) + continue; + + internal->fdt_addr = (unsigned long) image->segment[i].mem; + break; + } + + if (!internal->fdt_addr) { + pr_err("Device tree not included in the provided image\n"); + return -EINVAL; + } + + /* Copy the assembler code for relocation to the control page */ + if (image->type != KEXEC_TYPE_CRASH) { + control_code_buffer = page_address(image->control_code_page); + control_code_buffer_sz = page_size(image->control_code_page); + + if (unlikely(riscv_kexec_relocate_size > control_code_buffer_sz)) { + pr_err("Relocation code doesn't fit within a control page\n"); + return -EINVAL; + } + + memcpy(control_code_buffer, riscv_kexec_relocate, + riscv_kexec_relocate_size); + + /* Mark the control page executable */ + set_memory_x((unsigned long) control_code_buffer, 1); + } + + return 0; +} + + +/* + * machine_kexec_cleanup - Cleanup any leftovers from + * machine_kexec_prepare + * + * This function is called by kimage_free to handle any arch-specific + * allocations done on machine_kexec_prepare. Since we didn't do any + * allocations there, this is just an empty function. Note that the + * control buffer is freed by kimage_free. + */ +void +machine_kexec_cleanup(struct kimage *image) +{ +} + + +/* + * machine_shutdown - Prepare for a kexec reboot + * + * This function is called by kernel_kexec just before machine_kexec + * below. Its goal is to prepare the rest of the system (the other + * harts and possibly devices etc) for a kexec reboot. + */ +void machine_shutdown(void) +{ + /* + * No more interrupts on this hart + * until we are back up. + */ + local_irq_disable(); + +#if defined(CONFIG_HOTPLUG_CPU) + smp_shutdown_nonboot_cpus(smp_processor_id()); +#endif +} + +static void machine_kexec_mask_interrupts(void) +{ + unsigned int i; + struct irq_desc *desc; + + for_each_irq_desc(i, desc) { + struct irq_chip *chip; + int ret; + + chip = irq_desc_get_chip(desc); + if (!chip) + continue; + + /* + * First try to remove the active state. If this + * fails, try to EOI the interrupt. + */ + ret = irq_set_irqchip_state(i, IRQCHIP_STATE_ACTIVE, false); + + if (ret && irqd_irq_inprogress(&desc->irq_data) && + chip->irq_eoi) + chip->irq_eoi(&desc->irq_data); + + if (chip->irq_mask) + chip->irq_mask(&desc->irq_data); + + if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data)) + chip->irq_disable(&desc->irq_data); + } +} + +/* + * machine_crash_shutdown - Prepare to kexec after a kernel crash + * + * This function is called by crash_kexec just before machine_kexec + * and its goal is to shutdown non-crashing cpus and save registers. + */ +void +machine_crash_shutdown(struct pt_regs *regs) +{ + local_irq_disable(); + + /* shutdown non-crashing cpus */ + crash_smp_send_stop(); + + crash_save_cpu(regs, smp_processor_id()); + machine_kexec_mask_interrupts(); + + pr_info("Starting crashdump kernel...\n"); +} + +/* + * machine_kexec - Jump to the loaded kimage + * + * This function is called by kernel_kexec which is called by the + * reboot system call when the reboot cmd is LINUX_REBOOT_CMD_KEXEC, + * or by crash_kernel which is called by the kernel's arch-specific + * trap handler in case of a kernel panic. It's the final stage of + * the kexec process where the pre-loaded kimage is ready to be + * executed. We assume at this point that all other harts are + * suspended and this hart will be the new boot hart. + */ +void __noreturn +machine_kexec(struct kimage *image) +{ + struct kimage_arch *internal = &image->arch; + unsigned long jump_addr = (unsigned long) image->start; + unsigned long first_ind_entry = (unsigned long) &image->head; + unsigned long this_cpu_id = __smp_processor_id(); + unsigned long this_hart_id = cpuid_to_hartid_map(this_cpu_id); + unsigned long fdt_addr = internal->fdt_addr; + void *control_code_buffer = page_address(image->control_code_page); + riscv_kexec_method kexec_method = NULL; + +#ifdef CONFIG_SMP + WARN(smp_crash_stop_failed(), + "Some CPUs may be stale, kdump will be unreliable.\n"); +#endif + + if (image->type != KEXEC_TYPE_CRASH) + kexec_method = control_code_buffer; + else + kexec_method = (riscv_kexec_method) &riscv_kexec_norelocate; + + pr_notice("Will call new kernel at %08lx from hart id %lx\n", + jump_addr, this_hart_id); + pr_notice("FDT image at %08lx\n", fdt_addr); + + /* Make sure the relocation code is visible to the hart */ + local_flush_icache_all(); + + /* Jump to the relocation code */ + pr_notice("Bye...\n"); + kexec_method(first_ind_entry, jump_addr, fdt_addr, + this_hart_id, kernel_map.va_pa_offset); + unreachable(); +} diff --git a/arch/riscv/kernel/machine_kexec_file.c b/arch/riscv/kernel/machine_kexec_file.c new file mode 100644 index 000000000..b0bf8c172 --- /dev/null +++ b/arch/riscv/kernel/machine_kexec_file.c @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * kexec_file for riscv, use vmlinux as the dump-capture kernel image. + * + * Copyright (C) 2021 Huawei Technologies Co, Ltd. + * + * Author: Liao Chang (liaochang1@huawei.com) + */ +#include <linux/kexec.h> + +const struct kexec_file_ops * const kexec_file_loaders[] = { + &elf_kexec_ops, + NULL +}; diff --git a/arch/riscv/kernel/mcount-dyn.S b/arch/riscv/kernel/mcount-dyn.S new file mode 100644 index 000000000..125de818d --- /dev/null +++ b/arch/riscv/kernel/mcount-dyn.S @@ -0,0 +1,191 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2017 Andes Technology Corporation */ + +#include <linux/init.h> +#include <linux/linkage.h> +#include <asm/asm.h> +#include <asm/csr.h> +#include <asm/unistd.h> +#include <asm/thread_info.h> +#include <asm/asm-offsets.h> +#include <asm-generic/export.h> +#include <asm/ftrace.h> + + .text + +#define FENTRY_RA_OFFSET 8 +#define ABI_SIZE_ON_STACK 80 +#define ABI_A0 0 +#define ABI_A1 8 +#define ABI_A2 16 +#define ABI_A3 24 +#define ABI_A4 32 +#define ABI_A5 40 +#define ABI_A6 48 +#define ABI_A7 56 +#define ABI_T0 64 +#define ABI_RA 72 + + .macro SAVE_ABI + addi sp, sp, -ABI_SIZE_ON_STACK + + REG_S a0, ABI_A0(sp) + REG_S a1, ABI_A1(sp) + REG_S a2, ABI_A2(sp) + REG_S a3, ABI_A3(sp) + REG_S a4, ABI_A4(sp) + REG_S a5, ABI_A5(sp) + REG_S a6, ABI_A6(sp) + REG_S a7, ABI_A7(sp) + REG_S t0, ABI_T0(sp) + REG_S ra, ABI_RA(sp) + .endm + + .macro RESTORE_ABI + REG_L a0, ABI_A0(sp) + REG_L a1, ABI_A1(sp) + REG_L a2, ABI_A2(sp) + REG_L a3, ABI_A3(sp) + REG_L a4, ABI_A4(sp) + REG_L a5, ABI_A5(sp) + REG_L a6, ABI_A6(sp) + REG_L a7, ABI_A7(sp) + REG_L t0, ABI_T0(sp) + REG_L ra, ABI_RA(sp) + + addi sp, sp, ABI_SIZE_ON_STACK + .endm + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS + .macro SAVE_ALL + addi sp, sp, -PT_SIZE_ON_STACK + + REG_S t0, PT_EPC(sp) + REG_S x1, PT_RA(sp) + REG_S x2, PT_SP(sp) + REG_S x3, PT_GP(sp) + REG_S x4, PT_TP(sp) + REG_S x5, PT_T0(sp) + REG_S x6, PT_T1(sp) + REG_S x7, PT_T2(sp) + REG_S x8, PT_S0(sp) + REG_S x9, PT_S1(sp) + REG_S x10, PT_A0(sp) + REG_S x11, PT_A1(sp) + REG_S x12, PT_A2(sp) + REG_S x13, PT_A3(sp) + REG_S x14, PT_A4(sp) + REG_S x15, PT_A5(sp) + REG_S x16, PT_A6(sp) + REG_S x17, PT_A7(sp) + REG_S x18, PT_S2(sp) + REG_S x19, PT_S3(sp) + REG_S x20, PT_S4(sp) + REG_S x21, PT_S5(sp) + REG_S x22, PT_S6(sp) + REG_S x23, PT_S7(sp) + REG_S x24, PT_S8(sp) + REG_S x25, PT_S9(sp) + REG_S x26, PT_S10(sp) + REG_S x27, PT_S11(sp) + REG_S x28, PT_T3(sp) + REG_S x29, PT_T4(sp) + REG_S x30, PT_T5(sp) + REG_S x31, PT_T6(sp) + .endm + + .macro RESTORE_ALL + REG_L t0, PT_EPC(sp) + REG_L x1, PT_RA(sp) + REG_L x2, PT_SP(sp) + REG_L x3, PT_GP(sp) + REG_L x4, PT_TP(sp) + REG_L x6, PT_T1(sp) + REG_L x7, PT_T2(sp) + REG_L x8, PT_S0(sp) + REG_L x9, PT_S1(sp) + REG_L x10, PT_A0(sp) + REG_L x11, PT_A1(sp) + REG_L x12, PT_A2(sp) + REG_L x13, PT_A3(sp) + REG_L x14, PT_A4(sp) + REG_L x15, PT_A5(sp) + REG_L x16, PT_A6(sp) + REG_L x17, PT_A7(sp) + REG_L x18, PT_S2(sp) + REG_L x19, PT_S3(sp) + REG_L x20, PT_S4(sp) + REG_L x21, PT_S5(sp) + REG_L x22, PT_S6(sp) + REG_L x23, PT_S7(sp) + REG_L x24, PT_S8(sp) + REG_L x25, PT_S9(sp) + REG_L x26, PT_S10(sp) + REG_L x27, PT_S11(sp) + REG_L x28, PT_T3(sp) + REG_L x29, PT_T4(sp) + REG_L x30, PT_T5(sp) + REG_L x31, PT_T6(sp) + + addi sp, sp, PT_SIZE_ON_STACK + .endm +#endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */ + +ENTRY(ftrace_caller) + SAVE_ABI + + addi a0, t0, -FENTRY_RA_OFFSET + la a1, function_trace_op + REG_L a2, 0(a1) + mv a1, ra + mv a3, sp + +ftrace_call: + .global ftrace_call + call ftrace_stub + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + addi a0, sp, ABI_RA + REG_L a1, ABI_T0(sp) + addi a1, a1, -FENTRY_RA_OFFSET +#ifdef HAVE_FUNCTION_GRAPH_FP_TEST + mv a2, s0 +#endif +ftrace_graph_call: + .global ftrace_graph_call + call ftrace_stub +#endif + RESTORE_ABI + jr t0 +ENDPROC(ftrace_caller) + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS +ENTRY(ftrace_regs_caller) + SAVE_ALL + + addi a0, t0, -FENTRY_RA_OFFSET + la a1, function_trace_op + REG_L a2, 0(a1) + mv a1, ra + mv a3, sp + +ftrace_regs_call: + .global ftrace_regs_call + call ftrace_stub + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + addi a0, sp, PT_RA + REG_L a1, PT_EPC(sp) + addi a1, a1, -FENTRY_RA_OFFSET +#ifdef HAVE_FUNCTION_GRAPH_FP_TEST + mv a2, s0 +#endif +ftrace_graph_regs_call: + .global ftrace_graph_regs_call + call ftrace_stub +#endif + + RESTORE_ALL + jr t0 +ENDPROC(ftrace_regs_caller) +#endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */ diff --git a/arch/riscv/kernel/mcount.S b/arch/riscv/kernel/mcount.S new file mode 100644 index 000000000..6d462681c --- /dev/null +++ b/arch/riscv/kernel/mcount.S @@ -0,0 +1,129 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2017 Andes Technology Corporation */ + +#include <linux/init.h> +#include <linux/linkage.h> +#include <asm/asm.h> +#include <asm/csr.h> +#include <asm/unistd.h> +#include <asm/thread_info.h> +#include <asm/asm-offsets.h> +#include <asm-generic/export.h> +#include <asm/ftrace.h> + + .text + + .macro SAVE_ABI_STATE + addi sp, sp, -16 + sd s0, 0(sp) + sd ra, 8(sp) + addi s0, sp, 16 + .endm + + /* + * The call to ftrace_return_to_handler would overwrite the return + * register if a0 was not saved. + */ + .macro SAVE_RET_ABI_STATE + addi sp, sp, -32 + sd s0, 16(sp) + sd ra, 24(sp) + sd a0, 8(sp) + addi s0, sp, 32 + .endm + + .macro RESTORE_ABI_STATE + ld ra, 8(sp) + ld s0, 0(sp) + addi sp, sp, 16 + .endm + + .macro RESTORE_RET_ABI_STATE + ld ra, 24(sp) + ld s0, 16(sp) + ld a0, 8(sp) + addi sp, sp, 32 + .endm + +ENTRY(ftrace_stub) +#ifdef CONFIG_DYNAMIC_FTRACE + .global MCOUNT_NAME + .set MCOUNT_NAME, ftrace_stub +#endif + ret +ENDPROC(ftrace_stub) + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +ENTRY(return_to_handler) +/* + * On implementing the frame point test, the ideal way is to compare the + * s0 (frame pointer, if enabled) on entry and the sp (stack pointer) on return. + * However, the psABI of variable-length-argument functions does not allow this. + * + * So alternatively we check the *old* frame pointer position, that is, the + * value stored in -16(s0) on entry, and the s0 on return. + */ +#ifdef HAVE_FUNCTION_GRAPH_FP_TEST + mv t6, s0 +#endif + SAVE_RET_ABI_STATE +#ifdef HAVE_FUNCTION_GRAPH_FP_TEST + mv a0, t6 +#endif + call ftrace_return_to_handler + mv a1, a0 + RESTORE_RET_ABI_STATE + jalr a1 +ENDPROC(return_to_handler) +#endif + +#ifndef CONFIG_DYNAMIC_FTRACE +ENTRY(MCOUNT_NAME) + la t4, ftrace_stub +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + la t0, ftrace_graph_return + ld t1, 0(t0) + bne t1, t4, do_ftrace_graph_caller + + la t3, ftrace_graph_entry + ld t2, 0(t3) + la t6, ftrace_graph_entry_stub + bne t2, t6, do_ftrace_graph_caller +#endif + la t3, ftrace_trace_function + ld t5, 0(t3) + bne t5, t4, do_trace + ret + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +/* + * A pseudo representation for the function graph tracer: + * prepare_to_return(&ra_to_caller_of_caller, ra_to_caller) + */ +do_ftrace_graph_caller: + addi a0, s0, -8 + mv a1, ra +#ifdef HAVE_FUNCTION_GRAPH_FP_TEST + ld a2, -16(s0) +#endif + SAVE_ABI_STATE + call prepare_ftrace_return + RESTORE_ABI_STATE + ret +#endif + +/* + * A pseudo representation for the function tracer: + * (*ftrace_trace_function)(ra_to_caller, ra_to_caller_of_caller) + */ +do_trace: + ld a1, -8(s0) + mv a0, ra + + SAVE_ABI_STATE + jalr t5 + RESTORE_ABI_STATE + ret +ENDPROC(MCOUNT_NAME) +#endif +EXPORT_SYMBOL(MCOUNT_NAME) diff --git a/arch/riscv/kernel/module-sections.c b/arch/riscv/kernel/module-sections.c new file mode 100644 index 000000000..e264e59e5 --- /dev/null +++ b/arch/riscv/kernel/module-sections.c @@ -0,0 +1,159 @@ +/* SPDX-License-Identifier: GPL-2.0 + * + * Copyright (C) 2014-2017 Linaro Ltd. <ard.biesheuvel@linaro.org> + * + * Copyright (C) 2018 Andes Technology Corporation <zong@andestech.com> + */ + +#include <linux/elf.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/moduleloader.h> + +unsigned long module_emit_got_entry(struct module *mod, unsigned long val) +{ + struct mod_section *got_sec = &mod->arch.got; + int i = got_sec->num_entries; + struct got_entry *got = get_got_entry(val, got_sec); + + if (got) + return (unsigned long)got; + + /* There is no duplicate entry, create a new one */ + got = (struct got_entry *)got_sec->shdr->sh_addr; + got[i] = emit_got_entry(val); + + got_sec->num_entries++; + BUG_ON(got_sec->num_entries > got_sec->max_entries); + + return (unsigned long)&got[i]; +} + +unsigned long module_emit_plt_entry(struct module *mod, unsigned long val) +{ + struct mod_section *got_plt_sec = &mod->arch.got_plt; + struct got_entry *got_plt; + struct mod_section *plt_sec = &mod->arch.plt; + struct plt_entry *plt = get_plt_entry(val, plt_sec, got_plt_sec); + int i = plt_sec->num_entries; + + if (plt) + return (unsigned long)plt; + + /* There is no duplicate entry, create a new one */ + got_plt = (struct got_entry *)got_plt_sec->shdr->sh_addr; + got_plt[i] = emit_got_entry(val); + plt = (struct plt_entry *)plt_sec->shdr->sh_addr; + plt[i] = emit_plt_entry(val, + (unsigned long)&plt[i], + (unsigned long)&got_plt[i]); + + plt_sec->num_entries++; + got_plt_sec->num_entries++; + BUG_ON(plt_sec->num_entries > plt_sec->max_entries); + + return (unsigned long)&plt[i]; +} + +static int is_rela_equal(const Elf_Rela *x, const Elf_Rela *y) +{ + return x->r_info == y->r_info && x->r_addend == y->r_addend; +} + +static bool duplicate_rela(const Elf_Rela *rela, int idx) +{ + int i; + for (i = 0; i < idx; i++) { + if (is_rela_equal(&rela[i], &rela[idx])) + return true; + } + return false; +} + +static void count_max_entries(Elf_Rela *relas, int num, + unsigned int *plts, unsigned int *gots) +{ + unsigned int type, i; + + for (i = 0; i < num; i++) { + type = ELF_RISCV_R_TYPE(relas[i].r_info); + if (type == R_RISCV_CALL_PLT) { + if (!duplicate_rela(relas, i)) + (*plts)++; + } else if (type == R_RISCV_GOT_HI20) { + if (!duplicate_rela(relas, i)) + (*gots)++; + } + } +} + +int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, + char *secstrings, struct module *mod) +{ + unsigned int num_plts = 0; + unsigned int num_gots = 0; + int i; + + /* + * Find the empty .got and .plt sections. + */ + for (i = 0; i < ehdr->e_shnum; i++) { + if (!strcmp(secstrings + sechdrs[i].sh_name, ".plt")) + mod->arch.plt.shdr = sechdrs + i; + else if (!strcmp(secstrings + sechdrs[i].sh_name, ".got")) + mod->arch.got.shdr = sechdrs + i; + else if (!strcmp(secstrings + sechdrs[i].sh_name, ".got.plt")) + mod->arch.got_plt.shdr = sechdrs + i; + } + + if (!mod->arch.plt.shdr) { + pr_err("%s: module PLT section(s) missing\n", mod->name); + return -ENOEXEC; + } + if (!mod->arch.got.shdr) { + pr_err("%s: module GOT section(s) missing\n", mod->name); + return -ENOEXEC; + } + if (!mod->arch.got_plt.shdr) { + pr_err("%s: module GOT.PLT section(s) missing\n", mod->name); + return -ENOEXEC; + } + + /* Calculate the maxinum number of entries */ + for (i = 0; i < ehdr->e_shnum; i++) { + Elf_Rela *relas = (void *)ehdr + sechdrs[i].sh_offset; + int num_rela = sechdrs[i].sh_size / sizeof(Elf_Rela); + Elf_Shdr *dst_sec = sechdrs + sechdrs[i].sh_info; + + if (sechdrs[i].sh_type != SHT_RELA) + continue; + + /* ignore relocations that operate on non-exec sections */ + if (!(dst_sec->sh_flags & SHF_EXECINSTR)) + continue; + + count_max_entries(relas, num_rela, &num_plts, &num_gots); + } + + mod->arch.plt.shdr->sh_type = SHT_NOBITS; + mod->arch.plt.shdr->sh_flags = SHF_EXECINSTR | SHF_ALLOC; + mod->arch.plt.shdr->sh_addralign = L1_CACHE_BYTES; + mod->arch.plt.shdr->sh_size = (num_plts + 1) * sizeof(struct plt_entry); + mod->arch.plt.num_entries = 0; + mod->arch.plt.max_entries = num_plts; + + mod->arch.got.shdr->sh_type = SHT_NOBITS; + mod->arch.got.shdr->sh_flags = SHF_ALLOC; + mod->arch.got.shdr->sh_addralign = L1_CACHE_BYTES; + mod->arch.got.shdr->sh_size = (num_gots + 1) * sizeof(struct got_entry); + mod->arch.got.num_entries = 0; + mod->arch.got.max_entries = num_gots; + + mod->arch.got_plt.shdr->sh_type = SHT_NOBITS; + mod->arch.got_plt.shdr->sh_flags = SHF_ALLOC; + mod->arch.got_plt.shdr->sh_addralign = L1_CACHE_BYTES; + mod->arch.got_plt.shdr->sh_size = (num_plts + 1) * sizeof(struct got_entry); + mod->arch.got_plt.num_entries = 0; + mod->arch.got_plt.max_entries = num_plts; + return 0; +} diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c new file mode 100644 index 000000000..a331001e3 --- /dev/null +++ b/arch/riscv/kernel/module.c @@ -0,0 +1,459 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * + * Copyright (C) 2017 Zihao Yu + */ + +#include <linux/elf.h> +#include <linux/err.h> +#include <linux/errno.h> +#include <linux/moduleloader.h> +#include <linux/vmalloc.h> +#include <linux/sizes.h> +#include <linux/pgtable.h> +#include <asm/alternative.h> +#include <asm/sections.h> + +/* + * The auipc+jalr instruction pair can reach any PC-relative offset + * in the range [-2^31 - 2^11, 2^31 - 2^11) + */ +static bool riscv_insn_valid_32bit_offset(ptrdiff_t val) +{ +#ifdef CONFIG_32BIT + return true; +#else + return (-(1L << 31) - (1L << 11)) <= val && val < ((1L << 31) - (1L << 11)); +#endif +} + +static int apply_r_riscv_32_rela(struct module *me, u32 *location, Elf_Addr v) +{ + if (v != (u32)v) { + pr_err("%s: value %016llx out of range for 32-bit field\n", + me->name, (long long)v); + return -EINVAL; + } + *location = v; + return 0; +} + +static int apply_r_riscv_64_rela(struct module *me, u32 *location, Elf_Addr v) +{ + *(u64 *)location = v; + return 0; +} + +static int apply_r_riscv_branch_rela(struct module *me, u32 *location, + Elf_Addr v) +{ + ptrdiff_t offset = (void *)v - (void *)location; + u32 imm12 = (offset & 0x1000) << (31 - 12); + u32 imm11 = (offset & 0x800) >> (11 - 7); + u32 imm10_5 = (offset & 0x7e0) << (30 - 10); + u32 imm4_1 = (offset & 0x1e) << (11 - 4); + + *location = (*location & 0x1fff07f) | imm12 | imm11 | imm10_5 | imm4_1; + return 0; +} + +static int apply_r_riscv_jal_rela(struct module *me, u32 *location, + Elf_Addr v) +{ + ptrdiff_t offset = (void *)v - (void *)location; + u32 imm20 = (offset & 0x100000) << (31 - 20); + u32 imm19_12 = (offset & 0xff000); + u32 imm11 = (offset & 0x800) << (20 - 11); + u32 imm10_1 = (offset & 0x7fe) << (30 - 10); + + *location = (*location & 0xfff) | imm20 | imm19_12 | imm11 | imm10_1; + return 0; +} + +static int apply_r_riscv_rvc_branch_rela(struct module *me, u32 *location, + Elf_Addr v) +{ + ptrdiff_t offset = (void *)v - (void *)location; + u16 imm8 = (offset & 0x100) << (12 - 8); + u16 imm7_6 = (offset & 0xc0) >> (6 - 5); + u16 imm5 = (offset & 0x20) >> (5 - 2); + u16 imm4_3 = (offset & 0x18) << (12 - 5); + u16 imm2_1 = (offset & 0x6) << (12 - 10); + + *(u16 *)location = (*(u16 *)location & 0xe383) | + imm8 | imm7_6 | imm5 | imm4_3 | imm2_1; + return 0; +} + +static int apply_r_riscv_rvc_jump_rela(struct module *me, u32 *location, + Elf_Addr v) +{ + ptrdiff_t offset = (void *)v - (void *)location; + u16 imm11 = (offset & 0x800) << (12 - 11); + u16 imm10 = (offset & 0x400) >> (10 - 8); + u16 imm9_8 = (offset & 0x300) << (12 - 11); + u16 imm7 = (offset & 0x80) >> (7 - 6); + u16 imm6 = (offset & 0x40) << (12 - 11); + u16 imm5 = (offset & 0x20) >> (5 - 2); + u16 imm4 = (offset & 0x10) << (12 - 5); + u16 imm3_1 = (offset & 0xe) << (12 - 10); + + *(u16 *)location = (*(u16 *)location & 0xe003) | + imm11 | imm10 | imm9_8 | imm7 | imm6 | imm5 | imm4 | imm3_1; + return 0; +} + +static int apply_r_riscv_pcrel_hi20_rela(struct module *me, u32 *location, + Elf_Addr v) +{ + ptrdiff_t offset = (void *)v - (void *)location; + s32 hi20; + + if (!riscv_insn_valid_32bit_offset(offset)) { + pr_err( + "%s: target %016llx can not be addressed by the 32-bit offset from PC = %p\n", + me->name, (long long)v, location); + return -EINVAL; + } + + hi20 = (offset + 0x800) & 0xfffff000; + *location = (*location & 0xfff) | hi20; + return 0; +} + +static int apply_r_riscv_pcrel_lo12_i_rela(struct module *me, u32 *location, + Elf_Addr v) +{ + /* + * v is the lo12 value to fill. It is calculated before calling this + * handler. + */ + *location = (*location & 0xfffff) | ((v & 0xfff) << 20); + return 0; +} + +static int apply_r_riscv_pcrel_lo12_s_rela(struct module *me, u32 *location, + Elf_Addr v) +{ + /* + * v is the lo12 value to fill. It is calculated before calling this + * handler. + */ + u32 imm11_5 = (v & 0xfe0) << (31 - 11); + u32 imm4_0 = (v & 0x1f) << (11 - 4); + + *location = (*location & 0x1fff07f) | imm11_5 | imm4_0; + return 0; +} + +static int apply_r_riscv_hi20_rela(struct module *me, u32 *location, + Elf_Addr v) +{ + s32 hi20; + + if (IS_ENABLED(CONFIG_CMODEL_MEDLOW)) { + pr_err( + "%s: target %016llx can not be addressed by the 32-bit offset from PC = %p\n", + me->name, (long long)v, location); + return -EINVAL; + } + + hi20 = ((s32)v + 0x800) & 0xfffff000; + *location = (*location & 0xfff) | hi20; + return 0; +} + +static int apply_r_riscv_lo12_i_rela(struct module *me, u32 *location, + Elf_Addr v) +{ + /* Skip medlow checking because of filtering by HI20 already */ + s32 hi20 = ((s32)v + 0x800) & 0xfffff000; + s32 lo12 = ((s32)v - hi20); + *location = (*location & 0xfffff) | ((lo12 & 0xfff) << 20); + return 0; +} + +static int apply_r_riscv_lo12_s_rela(struct module *me, u32 *location, + Elf_Addr v) +{ + /* Skip medlow checking because of filtering by HI20 already */ + s32 hi20 = ((s32)v + 0x800) & 0xfffff000; + s32 lo12 = ((s32)v - hi20); + u32 imm11_5 = (lo12 & 0xfe0) << (31 - 11); + u32 imm4_0 = (lo12 & 0x1f) << (11 - 4); + *location = (*location & 0x1fff07f) | imm11_5 | imm4_0; + return 0; +} + +static int apply_r_riscv_got_hi20_rela(struct module *me, u32 *location, + Elf_Addr v) +{ + ptrdiff_t offset = (void *)v - (void *)location; + s32 hi20; + + /* Always emit the got entry */ + if (IS_ENABLED(CONFIG_MODULE_SECTIONS)) { + offset = module_emit_got_entry(me, v); + offset = (void *)offset - (void *)location; + } else { + pr_err( + "%s: can not generate the GOT entry for symbol = %016llx from PC = %p\n", + me->name, (long long)v, location); + return -EINVAL; + } + + hi20 = (offset + 0x800) & 0xfffff000; + *location = (*location & 0xfff) | hi20; + return 0; +} + +static int apply_r_riscv_call_plt_rela(struct module *me, u32 *location, + Elf_Addr v) +{ + ptrdiff_t offset = (void *)v - (void *)location; + u32 hi20, lo12; + + if (!riscv_insn_valid_32bit_offset(offset)) { + /* Only emit the plt entry if offset over 32-bit range */ + if (IS_ENABLED(CONFIG_MODULE_SECTIONS)) { + offset = module_emit_plt_entry(me, v); + offset = (void *)offset - (void *)location; + } else { + pr_err( + "%s: target %016llx can not be addressed by the 32-bit offset from PC = %p\n", + me->name, (long long)v, location); + return -EINVAL; + } + } + + hi20 = (offset + 0x800) & 0xfffff000; + lo12 = (offset - hi20) & 0xfff; + *location = (*location & 0xfff) | hi20; + *(location + 1) = (*(location + 1) & 0xfffff) | (lo12 << 20); + return 0; +} + +static int apply_r_riscv_call_rela(struct module *me, u32 *location, + Elf_Addr v) +{ + ptrdiff_t offset = (void *)v - (void *)location; + u32 hi20, lo12; + + if (!riscv_insn_valid_32bit_offset(offset)) { + pr_err( + "%s: target %016llx can not be addressed by the 32-bit offset from PC = %p\n", + me->name, (long long)v, location); + return -EINVAL; + } + + hi20 = (offset + 0x800) & 0xfffff000; + lo12 = (offset - hi20) & 0xfff; + *location = (*location & 0xfff) | hi20; + *(location + 1) = (*(location + 1) & 0xfffff) | (lo12 << 20); + return 0; +} + +static int apply_r_riscv_relax_rela(struct module *me, u32 *location, + Elf_Addr v) +{ + return 0; +} + +static int apply_r_riscv_align_rela(struct module *me, u32 *location, + Elf_Addr v) +{ + pr_err( + "%s: The unexpected relocation type 'R_RISCV_ALIGN' from PC = %p\n", + me->name, location); + return -EINVAL; +} + +static int apply_r_riscv_add32_rela(struct module *me, u32 *location, + Elf_Addr v) +{ + *(u32 *)location += (u32)v; + return 0; +} + +static int apply_r_riscv_add64_rela(struct module *me, u32 *location, + Elf_Addr v) +{ + *(u64 *)location += (u64)v; + return 0; +} + +static int apply_r_riscv_sub32_rela(struct module *me, u32 *location, + Elf_Addr v) +{ + *(u32 *)location -= (u32)v; + return 0; +} + +static int apply_r_riscv_sub64_rela(struct module *me, u32 *location, + Elf_Addr v) +{ + *(u64 *)location -= (u64)v; + return 0; +} + +static int (*reloc_handlers_rela[]) (struct module *me, u32 *location, + Elf_Addr v) = { + [R_RISCV_32] = apply_r_riscv_32_rela, + [R_RISCV_64] = apply_r_riscv_64_rela, + [R_RISCV_BRANCH] = apply_r_riscv_branch_rela, + [R_RISCV_JAL] = apply_r_riscv_jal_rela, + [R_RISCV_RVC_BRANCH] = apply_r_riscv_rvc_branch_rela, + [R_RISCV_RVC_JUMP] = apply_r_riscv_rvc_jump_rela, + [R_RISCV_PCREL_HI20] = apply_r_riscv_pcrel_hi20_rela, + [R_RISCV_PCREL_LO12_I] = apply_r_riscv_pcrel_lo12_i_rela, + [R_RISCV_PCREL_LO12_S] = apply_r_riscv_pcrel_lo12_s_rela, + [R_RISCV_HI20] = apply_r_riscv_hi20_rela, + [R_RISCV_LO12_I] = apply_r_riscv_lo12_i_rela, + [R_RISCV_LO12_S] = apply_r_riscv_lo12_s_rela, + [R_RISCV_GOT_HI20] = apply_r_riscv_got_hi20_rela, + [R_RISCV_CALL_PLT] = apply_r_riscv_call_plt_rela, + [R_RISCV_CALL] = apply_r_riscv_call_rela, + [R_RISCV_RELAX] = apply_r_riscv_relax_rela, + [R_RISCV_ALIGN] = apply_r_riscv_align_rela, + [R_RISCV_ADD32] = apply_r_riscv_add32_rela, + [R_RISCV_ADD64] = apply_r_riscv_add64_rela, + [R_RISCV_SUB32] = apply_r_riscv_sub32_rela, + [R_RISCV_SUB64] = apply_r_riscv_sub64_rela, +}; + +int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, + unsigned int symindex, unsigned int relsec, + struct module *me) +{ + Elf_Rela *rel = (void *) sechdrs[relsec].sh_addr; + int (*handler)(struct module *me, u32 *location, Elf_Addr v); + Elf_Sym *sym; + u32 *location; + unsigned int i, type; + Elf_Addr v; + int res; + + pr_debug("Applying relocate section %u to %u\n", relsec, + sechdrs[relsec].sh_info); + + for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) { + /* This is where to make the change */ + location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr + + rel[i].r_offset; + /* This is the symbol it is referring to */ + sym = (Elf_Sym *)sechdrs[symindex].sh_addr + + ELF_RISCV_R_SYM(rel[i].r_info); + if (IS_ERR_VALUE(sym->st_value)) { + /* Ignore unresolved weak symbol */ + if (ELF_ST_BIND(sym->st_info) == STB_WEAK) + continue; + pr_warn("%s: Unknown symbol %s\n", + me->name, strtab + sym->st_name); + return -ENOENT; + } + + type = ELF_RISCV_R_TYPE(rel[i].r_info); + + if (type < ARRAY_SIZE(reloc_handlers_rela)) + handler = reloc_handlers_rela[type]; + else + handler = NULL; + + if (!handler) { + pr_err("%s: Unknown relocation type %u\n", + me->name, type); + return -EINVAL; + } + + v = sym->st_value + rel[i].r_addend; + + if (type == R_RISCV_PCREL_LO12_I || type == R_RISCV_PCREL_LO12_S) { + unsigned int j; + + for (j = 0; j < sechdrs[relsec].sh_size / sizeof(*rel); j++) { + unsigned long hi20_loc = + sechdrs[sechdrs[relsec].sh_info].sh_addr + + rel[j].r_offset; + u32 hi20_type = ELF_RISCV_R_TYPE(rel[j].r_info); + + /* Find the corresponding HI20 relocation entry */ + if (hi20_loc == sym->st_value + && (hi20_type == R_RISCV_PCREL_HI20 + || hi20_type == R_RISCV_GOT_HI20)) { + s32 hi20, lo12; + Elf_Sym *hi20_sym = + (Elf_Sym *)sechdrs[symindex].sh_addr + + ELF_RISCV_R_SYM(rel[j].r_info); + unsigned long hi20_sym_val = + hi20_sym->st_value + + rel[j].r_addend; + + /* Calculate lo12 */ + size_t offset = hi20_sym_val - hi20_loc; + if (IS_ENABLED(CONFIG_MODULE_SECTIONS) + && hi20_type == R_RISCV_GOT_HI20) { + offset = module_emit_got_entry( + me, hi20_sym_val); + offset = offset - hi20_loc; + } + hi20 = (offset + 0x800) & 0xfffff000; + lo12 = offset - hi20; + v = lo12; + + break; + } + } + if (j == sechdrs[relsec].sh_size / sizeof(*rel)) { + pr_err( + "%s: Can not find HI20 relocation information\n", + me->name); + return -EINVAL; + } + } + + res = handler(me, location, v); + if (res) + return res; + } + + return 0; +} + +#if defined(CONFIG_MMU) && defined(CONFIG_64BIT) +void *module_alloc(unsigned long size) +{ + return __vmalloc_node_range(size, 1, MODULES_VADDR, + MODULES_END, GFP_KERNEL, + PAGE_KERNEL, VM_FLUSH_RESET_PERMS, + NUMA_NO_NODE, + __builtin_return_address(0)); +} +#endif + +static const Elf_Shdr *find_section(const Elf_Ehdr *hdr, + const Elf_Shdr *sechdrs, + const char *name) +{ + const Elf_Shdr *s, *se; + const char *secstrs = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; + + for (s = sechdrs, se = sechdrs + hdr->e_shnum; s < se; s++) { + if (strcmp(name, secstrs + s->sh_name) == 0) + return s; + } + + return NULL; +} + +int module_finalize(const Elf_Ehdr *hdr, + const Elf_Shdr *sechdrs, + struct module *me) +{ + const Elf_Shdr *s; + + s = find_section(hdr, sechdrs, ".alternative"); + if (s) + apply_module_alternatives((void *)s->sh_addr, s->sh_size); + + return 0; +} diff --git a/arch/riscv/kernel/patch.c b/arch/riscv/kernel/patch.c new file mode 100644 index 000000000..160e5c1ca --- /dev/null +++ b/arch/riscv/kernel/patch.c @@ -0,0 +1,164 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020 SiFive + */ + +#include <linux/spinlock.h> +#include <linux/mm.h> +#include <linux/memory.h> +#include <linux/uaccess.h> +#include <linux/stop_machine.h> +#include <asm/kprobes.h> +#include <asm/cacheflush.h> +#include <asm/fixmap.h> +#include <asm/ftrace.h> +#include <asm/patch.h> +#include <asm/sections.h> + +struct patch_insn { + void *addr; + u32 insn; + atomic_t cpu_count; +}; + +int riscv_patch_in_stop_machine = false; + +#ifdef CONFIG_MMU + +static inline bool is_kernel_exittext(uintptr_t addr) +{ + return system_state < SYSTEM_RUNNING && + addr >= (uintptr_t)__exittext_begin && + addr < (uintptr_t)__exittext_end; +} + +/* + * The fix_to_virt(, idx) needs a const value (not a dynamic variable of + * reg-a0) or BUILD_BUG_ON failed with "idx >= __end_of_fixed_addresses". + * So use '__always_inline' and 'const unsigned int fixmap' here. + */ +static __always_inline void *patch_map(void *addr, const unsigned int fixmap) +{ + uintptr_t uintaddr = (uintptr_t) addr; + struct page *page; + + if (core_kernel_text(uintaddr) || is_kernel_exittext(uintaddr)) + page = phys_to_page(__pa_symbol(addr)); + else if (IS_ENABLED(CONFIG_STRICT_MODULE_RWX)) + page = vmalloc_to_page(addr); + else + return addr; + + BUG_ON(!page); + + return (void *)set_fixmap_offset(fixmap, page_to_phys(page) + + (uintaddr & ~PAGE_MASK)); +} + +static void patch_unmap(int fixmap) +{ + clear_fixmap(fixmap); +} +NOKPROBE_SYMBOL(patch_unmap); + +static int patch_insn_write(void *addr, const void *insn, size_t len) +{ + void *waddr = addr; + bool across_pages = (((uintptr_t) addr & ~PAGE_MASK) + len) > PAGE_SIZE; + int ret; + + /* + * Before reaching here, it was expected to lock the text_mutex + * already, so we don't need to give another lock here and could + * ensure that it was safe between each cores. + * + * We're currently using stop_machine() for ftrace & kprobes, and while + * that ensures text_mutex is held before installing the mappings it + * does not ensure text_mutex is held by the calling thread. That's + * safe but triggers a lockdep failure, so just elide it for that + * specific case. + */ + if (!riscv_patch_in_stop_machine) + lockdep_assert_held(&text_mutex); + + if (across_pages) + patch_map(addr + len, FIX_TEXT_POKE1); + + waddr = patch_map(addr, FIX_TEXT_POKE0); + + ret = copy_to_kernel_nofault(waddr, insn, len); + + patch_unmap(FIX_TEXT_POKE0); + + if (across_pages) + patch_unmap(FIX_TEXT_POKE1); + + return ret; +} +NOKPROBE_SYMBOL(patch_insn_write); +#else +static int patch_insn_write(void *addr, const void *insn, size_t len) +{ + return copy_to_kernel_nofault(addr, insn, len); +} +NOKPROBE_SYMBOL(patch_insn_write); +#endif /* CONFIG_MMU */ + +int patch_text_nosync(void *addr, const void *insns, size_t len) +{ + u32 *tp = addr; + int ret; + + ret = patch_insn_write(tp, insns, len); + + if (!ret) + flush_icache_range((uintptr_t) tp, (uintptr_t) tp + len); + + return ret; +} +NOKPROBE_SYMBOL(patch_text_nosync); + +static int patch_text_cb(void *data) +{ + struct patch_insn *patch = data; + int ret = 0; + + if (atomic_inc_return(&patch->cpu_count) == num_online_cpus()) { + ret = + patch_text_nosync(patch->addr, &patch->insn, + GET_INSN_LENGTH(patch->insn)); + atomic_inc(&patch->cpu_count); + } else { + while (atomic_read(&patch->cpu_count) <= num_online_cpus()) + cpu_relax(); + smp_mb(); + } + + return ret; +} +NOKPROBE_SYMBOL(patch_text_cb); + +int patch_text(void *addr, u32 insn) +{ + int ret; + struct patch_insn patch = { + .addr = addr, + .insn = insn, + .cpu_count = ATOMIC_INIT(0), + }; + + /* + * kprobes takes text_mutex, before calling patch_text(), but as we call + * calls stop_machine(), the lockdep assertion in patch_insn_write() + * gets confused by the context in which the lock is taken. + * Instead, ensure the lock is held before calling stop_machine(), and + * set riscv_patch_in_stop_machine to skip the check in + * patch_insn_write(). + */ + lockdep_assert_held(&text_mutex); + riscv_patch_in_stop_machine = true; + ret = stop_machine_cpuslocked(patch_text_cb, &patch, cpu_online_mask); + riscv_patch_in_stop_machine = false; + return ret; +} +NOKPROBE_SYMBOL(patch_text); diff --git a/arch/riscv/kernel/perf_callchain.c b/arch/riscv/kernel/perf_callchain.c new file mode 100644 index 000000000..3348a61de --- /dev/null +++ b/arch/riscv/kernel/perf_callchain.c @@ -0,0 +1,78 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2019 Hangzhou C-SKY Microsystems co.,ltd. */ + +#include <linux/perf_event.h> +#include <linux/uaccess.h> + +#include <asm/stacktrace.h> + +/* + * Get the return address for a single stackframe and return a pointer to the + * next frame tail. + */ +static unsigned long user_backtrace(struct perf_callchain_entry_ctx *entry, + unsigned long fp, unsigned long reg_ra) +{ + struct stackframe buftail; + unsigned long ra = 0; + unsigned long __user *user_frame_tail = + (unsigned long __user *)(fp - sizeof(struct stackframe)); + + /* Check accessibility of one struct frame_tail beyond */ + if (!access_ok(user_frame_tail, sizeof(buftail))) + return 0; + if (__copy_from_user_inatomic(&buftail, user_frame_tail, + sizeof(buftail))) + return 0; + + if (reg_ra != 0) + ra = reg_ra; + else + ra = buftail.ra; + + fp = buftail.fp; + if (ra != 0) + perf_callchain_store(entry, ra); + else + return 0; + + return fp; +} + +/* + * This will be called when the target is in user mode + * This function will only be called when we use + * "PERF_SAMPLE_CALLCHAIN" in + * kernel/events/core.c:perf_prepare_sample() + * + * How to trigger perf_callchain_[user/kernel] : + * $ perf record -e cpu-clock --call-graph fp ./program + * $ perf report --call-graph + * + * On RISC-V platform, the program being sampled and the C library + * need to be compiled with -fno-omit-frame-pointer, otherwise + * the user stack will not contain function frame. + */ +void perf_callchain_user(struct perf_callchain_entry_ctx *entry, + struct pt_regs *regs) +{ + unsigned long fp = 0; + + fp = regs->s0; + perf_callchain_store(entry, regs->epc); + + fp = user_backtrace(entry, fp, regs->ra); + while (fp && !(fp & 0x3) && entry->nr < entry->max_stack) + fp = user_backtrace(entry, fp, 0); +} + +static bool fill_callchain(void *entry, unsigned long pc) +{ + return perf_callchain_store(entry, pc) == 0; +} + +void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, + struct pt_regs *regs) +{ + walk_stackframe(NULL, regs, fill_callchain, entry); +} diff --git a/arch/riscv/kernel/perf_regs.c b/arch/riscv/kernel/perf_regs.c new file mode 100644 index 000000000..fd304a248 --- /dev/null +++ b/arch/riscv/kernel/perf_regs.c @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2019 Hangzhou C-SKY Microsystems co.,ltd. */ + +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/perf_event.h> +#include <linux/bug.h> +#include <asm/perf_regs.h> +#include <asm/ptrace.h> + +u64 perf_reg_value(struct pt_regs *regs, int idx) +{ + if (WARN_ON_ONCE((u32)idx >= PERF_REG_RISCV_MAX)) + return 0; + + return ((unsigned long *)regs)[idx]; +} + +#define REG_RESERVED (~((1ULL << PERF_REG_RISCV_MAX) - 1)) + +int perf_reg_validate(u64 mask) +{ + if (!mask || mask & REG_RESERVED) + return -EINVAL; + + return 0; +} + +u64 perf_reg_abi(struct task_struct *task) +{ +#if __riscv_xlen == 64 + return PERF_SAMPLE_REGS_ABI_64; +#else + return PERF_SAMPLE_REGS_ABI_32; +#endif +} + +void perf_get_regs_user(struct perf_regs *regs_user, + struct pt_regs *regs) +{ + regs_user->regs = task_pt_regs(current); + regs_user->abi = perf_reg_abi(current); +} diff --git a/arch/riscv/kernel/probes/Makefile b/arch/riscv/kernel/probes/Makefile new file mode 100644 index 000000000..90dea3abd --- /dev/null +++ b/arch/riscv/kernel/probes/Makefile @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-2.0 +obj-$(CONFIG_KPROBES) += kprobes.o decode-insn.o simulate-insn.o +obj-$(CONFIG_KPROBES) += kprobes_trampoline.o +obj-$(CONFIG_KPROBES_ON_FTRACE) += ftrace.o +obj-$(CONFIG_UPROBES) += uprobes.o decode-insn.o simulate-insn.o +CFLAGS_REMOVE_simulate-insn.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_rethook.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_rethook_trampoline.o = $(CC_FLAGS_FTRACE) diff --git a/arch/riscv/kernel/probes/decode-insn.c b/arch/riscv/kernel/probes/decode-insn.c new file mode 100644 index 000000000..64f6183b4 --- /dev/null +++ b/arch/riscv/kernel/probes/decode-insn.c @@ -0,0 +1,47 @@ +// SPDX-License-Identifier: GPL-2.0+ + +#include <linux/kernel.h> +#include <linux/kprobes.h> +#include <linux/module.h> +#include <linux/kallsyms.h> +#include <asm/sections.h> + +#include "decode-insn.h" +#include "simulate-insn.h" + +/* Return: + * INSN_REJECTED If instruction is one not allowed to kprobe, + * INSN_GOOD_NO_SLOT If instruction is supported but doesn't use its slot. + */ +enum probe_insn __kprobes +riscv_probe_decode_insn(probe_opcode_t *addr, struct arch_probe_insn *api) +{ + probe_opcode_t insn = *addr; + + /* + * Reject instructions list: + */ + RISCV_INSN_REJECTED(system, insn); + RISCV_INSN_REJECTED(fence, insn); + + /* + * Simulate instructions list: + * TODO: the REJECTED ones below need to be implemented + */ +#ifdef CONFIG_RISCV_ISA_C + RISCV_INSN_REJECTED(c_j, insn); + RISCV_INSN_REJECTED(c_jr, insn); + RISCV_INSN_REJECTED(c_jal, insn); + RISCV_INSN_REJECTED(c_jalr, insn); + RISCV_INSN_REJECTED(c_beqz, insn); + RISCV_INSN_REJECTED(c_bnez, insn); + RISCV_INSN_REJECTED(c_ebreak, insn); +#endif + + RISCV_INSN_SET_SIMULATE(jal, insn); + RISCV_INSN_SET_SIMULATE(jalr, insn); + RISCV_INSN_SET_SIMULATE(auipc, insn); + RISCV_INSN_SET_SIMULATE(branch, insn); + + return INSN_GOOD; +} diff --git a/arch/riscv/kernel/probes/decode-insn.h b/arch/riscv/kernel/probes/decode-insn.h new file mode 100644 index 000000000..42269a7d6 --- /dev/null +++ b/arch/riscv/kernel/probes/decode-insn.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ + +#ifndef _RISCV_KERNEL_KPROBES_DECODE_INSN_H +#define _RISCV_KERNEL_KPROBES_DECODE_INSN_H + +#include <asm/sections.h> +#include <asm/kprobes.h> + +enum probe_insn { + INSN_REJECTED, + INSN_GOOD_NO_SLOT, + INSN_GOOD, +}; + +enum probe_insn __kprobes +riscv_probe_decode_insn(probe_opcode_t *addr, struct arch_probe_insn *asi); + +#endif /* _RISCV_KERNEL_KPROBES_DECODE_INSN_H */ diff --git a/arch/riscv/kernel/probes/ftrace.c b/arch/riscv/kernel/probes/ftrace.c new file mode 100644 index 000000000..7142ec42e --- /dev/null +++ b/arch/riscv/kernel/probes/ftrace.c @@ -0,0 +1,62 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/kprobes.h> + +/* Ftrace callback handler for kprobes -- called under preepmt disabled */ +void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *ops, struct ftrace_regs *fregs) +{ + struct kprobe *p; + struct pt_regs *regs; + struct kprobe_ctlblk *kcb; + int bit; + + bit = ftrace_test_recursion_trylock(ip, parent_ip); + if (bit < 0) + return; + + p = get_kprobe((kprobe_opcode_t *)ip); + if (unlikely(!p) || kprobe_disabled(p)) + goto out; + + regs = ftrace_get_regs(fregs); + kcb = get_kprobe_ctlblk(); + if (kprobe_running()) { + kprobes_inc_nmissed_count(p); + } else { + unsigned long orig_ip = instruction_pointer(regs); + + instruction_pointer_set(regs, ip); + + __this_cpu_write(current_kprobe, p); + kcb->kprobe_status = KPROBE_HIT_ACTIVE; + if (!p->pre_handler || !p->pre_handler(p, regs)) { + /* + * Emulate singlestep (and also recover regs->pc) + * as if there is a nop + */ + instruction_pointer_set(regs, + (unsigned long)p->addr + MCOUNT_INSN_SIZE); + if (unlikely(p->post_handler)) { + kcb->kprobe_status = KPROBE_HIT_SSDONE; + p->post_handler(p, regs, 0); + } + instruction_pointer_set(regs, orig_ip); + } + + /* + * If pre_handler returns !0, it changes regs->pc. We have to + * skip emulating post_handler. + */ + __this_cpu_write(current_kprobe, NULL); + } +out: + ftrace_test_recursion_unlock(bit); +} +NOKPROBE_SYMBOL(kprobe_ftrace_handler); + +int arch_prepare_kprobe_ftrace(struct kprobe *p) +{ + p->ainsn.api.insn = NULL; + return 0; +} diff --git a/arch/riscv/kernel/probes/kprobes.c b/arch/riscv/kernel/probes/kprobes.c new file mode 100644 index 000000000..cca2b3a21 --- /dev/null +++ b/arch/riscv/kernel/probes/kprobes.c @@ -0,0 +1,389 @@ +// SPDX-License-Identifier: GPL-2.0+ + +#define pr_fmt(fmt) "kprobes: " fmt + +#include <linux/kprobes.h> +#include <linux/extable.h> +#include <linux/slab.h> +#include <linux/stop_machine.h> +#include <asm/ptrace.h> +#include <linux/uaccess.h> +#include <asm/sections.h> +#include <asm/cacheflush.h> +#include <asm/bug.h> +#include <asm/patch.h> + +#include "decode-insn.h" + +DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; +DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); + +static void __kprobes +post_kprobe_handler(struct kprobe *, struct kprobe_ctlblk *, struct pt_regs *); + +static void __kprobes arch_prepare_ss_slot(struct kprobe *p) +{ + unsigned long offset = GET_INSN_LENGTH(p->opcode); + + p->ainsn.api.restore = (unsigned long)p->addr + offset; + + patch_text(p->ainsn.api.insn, p->opcode); + patch_text((void *)((unsigned long)(p->ainsn.api.insn) + offset), + __BUG_INSN_32); +} + +static void __kprobes arch_prepare_simulate(struct kprobe *p) +{ + p->ainsn.api.restore = 0; +} + +static void __kprobes arch_simulate_insn(struct kprobe *p, struct pt_regs *regs) +{ + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + + if (p->ainsn.api.handler) + p->ainsn.api.handler((u32)p->opcode, + (unsigned long)p->addr, regs); + + post_kprobe_handler(p, kcb, regs); +} + +static bool __kprobes arch_check_kprobe(struct kprobe *p) +{ + unsigned long tmp = (unsigned long)p->addr - p->offset; + unsigned long addr = (unsigned long)p->addr; + + while (tmp <= addr) { + if (tmp == addr) + return true; + + tmp += GET_INSN_LENGTH(*(u16 *)tmp); + } + + return false; +} + +int __kprobes arch_prepare_kprobe(struct kprobe *p) +{ + u16 *insn = (u16 *)p->addr; + + if ((unsigned long)insn & 0x1) + return -EILSEQ; + + if (!arch_check_kprobe(p)) + return -EILSEQ; + + /* copy instruction */ + p->opcode = (kprobe_opcode_t)(*insn++); + if (GET_INSN_LENGTH(p->opcode) == 4) + p->opcode |= (kprobe_opcode_t)(*insn) << 16; + + /* decode instruction */ + switch (riscv_probe_decode_insn(p->addr, &p->ainsn.api)) { + case INSN_REJECTED: /* insn not supported */ + return -EINVAL; + + case INSN_GOOD_NO_SLOT: /* insn need simulation */ + p->ainsn.api.insn = NULL; + break; + + case INSN_GOOD: /* instruction uses slot */ + p->ainsn.api.insn = get_insn_slot(); + if (!p->ainsn.api.insn) + return -ENOMEM; + break; + } + + /* prepare the instruction */ + if (p->ainsn.api.insn) + arch_prepare_ss_slot(p); + else + arch_prepare_simulate(p); + + return 0; +} + +#ifdef CONFIG_MMU +void *alloc_insn_page(void) +{ + return __vmalloc_node_range(PAGE_SIZE, 1, VMALLOC_START, VMALLOC_END, + GFP_KERNEL, PAGE_KERNEL_READ_EXEC, + VM_FLUSH_RESET_PERMS, NUMA_NO_NODE, + __builtin_return_address(0)); +} +#endif + +/* install breakpoint in text */ +void __kprobes arch_arm_kprobe(struct kprobe *p) +{ + if ((p->opcode & __INSN_LENGTH_MASK) == __INSN_LENGTH_32) + patch_text(p->addr, __BUG_INSN_32); + else + patch_text(p->addr, __BUG_INSN_16); +} + +/* remove breakpoint from text */ +void __kprobes arch_disarm_kprobe(struct kprobe *p) +{ + patch_text(p->addr, p->opcode); +} + +void __kprobes arch_remove_kprobe(struct kprobe *p) +{ +} + +static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb) +{ + kcb->prev_kprobe.kp = kprobe_running(); + kcb->prev_kprobe.status = kcb->kprobe_status; +} + +static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb) +{ + __this_cpu_write(current_kprobe, kcb->prev_kprobe.kp); + kcb->kprobe_status = kcb->prev_kprobe.status; +} + +static void __kprobes set_current_kprobe(struct kprobe *p) +{ + __this_cpu_write(current_kprobe, p); +} + +/* + * Interrupts need to be disabled before single-step mode is set, and not + * reenabled until after single-step mode ends. + * Without disabling interrupt on local CPU, there is a chance of + * interrupt occurrence in the period of exception return and start of + * out-of-line single-step, that result in wrongly single stepping + * into the interrupt handler. + */ +static void __kprobes kprobes_save_local_irqflag(struct kprobe_ctlblk *kcb, + struct pt_regs *regs) +{ + kcb->saved_status = regs->status; + regs->status &= ~SR_SPIE; +} + +static void __kprobes kprobes_restore_local_irqflag(struct kprobe_ctlblk *kcb, + struct pt_regs *regs) +{ + regs->status = kcb->saved_status; +} + +static void __kprobes setup_singlestep(struct kprobe *p, + struct pt_regs *regs, + struct kprobe_ctlblk *kcb, int reenter) +{ + unsigned long slot; + + if (reenter) { + save_previous_kprobe(kcb); + set_current_kprobe(p); + kcb->kprobe_status = KPROBE_REENTER; + } else { + kcb->kprobe_status = KPROBE_HIT_SS; + } + + if (p->ainsn.api.insn) { + /* prepare for single stepping */ + slot = (unsigned long)p->ainsn.api.insn; + + /* IRQs and single stepping do not mix well. */ + kprobes_save_local_irqflag(kcb, regs); + + instruction_pointer_set(regs, slot); + } else { + /* insn simulation */ + arch_simulate_insn(p, regs); + } +} + +static int __kprobes reenter_kprobe(struct kprobe *p, + struct pt_regs *regs, + struct kprobe_ctlblk *kcb) +{ + switch (kcb->kprobe_status) { + case KPROBE_HIT_SSDONE: + case KPROBE_HIT_ACTIVE: + kprobes_inc_nmissed_count(p); + setup_singlestep(p, regs, kcb, 1); + break; + case KPROBE_HIT_SS: + case KPROBE_REENTER: + pr_warn("Failed to recover from reentered kprobes.\n"); + dump_kprobe(p); + BUG(); + break; + default: + WARN_ON(1); + return 0; + } + + return 1; +} + +static void __kprobes +post_kprobe_handler(struct kprobe *cur, struct kprobe_ctlblk *kcb, struct pt_regs *regs) +{ + /* return addr restore if non-branching insn */ + if (cur->ainsn.api.restore != 0) + regs->epc = cur->ainsn.api.restore; + + /* restore back original saved kprobe variables and continue */ + if (kcb->kprobe_status == KPROBE_REENTER) { + restore_previous_kprobe(kcb); + return; + } + + /* call post handler */ + kcb->kprobe_status = KPROBE_HIT_SSDONE; + if (cur->post_handler) { + /* post_handler can hit breakpoint and single step + * again, so we enable D-flag for recursive exception. + */ + cur->post_handler(cur, regs, 0); + } + + reset_current_kprobe(); +} + +int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned int trapnr) +{ + struct kprobe *cur = kprobe_running(); + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + + switch (kcb->kprobe_status) { + case KPROBE_HIT_SS: + case KPROBE_REENTER: + /* + * We are here because the instruction being single + * stepped caused a page fault. We reset the current + * kprobe and the ip points back to the probe address + * and allow the page fault handler to continue as a + * normal page fault. + */ + regs->epc = (unsigned long) cur->addr; + BUG_ON(!instruction_pointer(regs)); + + if (kcb->kprobe_status == KPROBE_REENTER) + restore_previous_kprobe(kcb); + else { + kprobes_restore_local_irqflag(kcb, regs); + reset_current_kprobe(); + } + + break; + case KPROBE_HIT_ACTIVE: + case KPROBE_HIT_SSDONE: + /* + * In case the user-specified fault handler returned + * zero, try to fix up. + */ + if (fixup_exception(regs)) + return 1; + } + return 0; +} + +bool __kprobes +kprobe_breakpoint_handler(struct pt_regs *regs) +{ + struct kprobe *p, *cur_kprobe; + struct kprobe_ctlblk *kcb; + unsigned long addr = instruction_pointer(regs); + + kcb = get_kprobe_ctlblk(); + cur_kprobe = kprobe_running(); + + p = get_kprobe((kprobe_opcode_t *) addr); + + if (p) { + if (cur_kprobe) { + if (reenter_kprobe(p, regs, kcb)) + return true; + } else { + /* Probe hit */ + set_current_kprobe(p); + kcb->kprobe_status = KPROBE_HIT_ACTIVE; + + /* + * If we have no pre-handler or it returned 0, we + * continue with normal processing. If we have a + * pre-handler and it returned non-zero, it will + * modify the execution path and no need to single + * stepping. Let's just reset current kprobe and exit. + * + * pre_handler can hit a breakpoint and can step thru + * before return. + */ + if (!p->pre_handler || !p->pre_handler(p, regs)) + setup_singlestep(p, regs, kcb, 0); + else + reset_current_kprobe(); + } + return true; + } + + /* + * The breakpoint instruction was removed right + * after we hit it. Another cpu has removed + * either a probepoint or a debugger breakpoint + * at this address. In either case, no further + * handling of this interrupt is appropriate. + * Return back to original instruction, and continue. + */ + return false; +} + +bool __kprobes +kprobe_single_step_handler(struct pt_regs *regs) +{ + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + unsigned long addr = instruction_pointer(regs); + struct kprobe *cur = kprobe_running(); + + if (cur && (kcb->kprobe_status & (KPROBE_HIT_SS | KPROBE_REENTER)) && + ((unsigned long)&cur->ainsn.api.insn[0] + GET_INSN_LENGTH(cur->opcode) == addr)) { + kprobes_restore_local_irqflag(kcb, regs); + post_kprobe_handler(cur, kcb, regs); + return true; + } + /* not ours, kprobes should ignore it */ + return false; +} + +/* + * Provide a blacklist of symbols identifying ranges which cannot be kprobed. + * This blacklist is exposed to userspace via debugfs (kprobes/blacklist). + */ +int __init arch_populate_kprobe_blacklist(void) +{ + int ret; + + ret = kprobe_add_area_blacklist((unsigned long)__irqentry_text_start, + (unsigned long)__irqentry_text_end); + return ret; +} + +void __kprobes __used *trampoline_probe_handler(struct pt_regs *regs) +{ + return (void *)kretprobe_trampoline_handler(regs, NULL); +} + +void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, + struct pt_regs *regs) +{ + ri->ret_addr = (kprobe_opcode_t *)regs->ra; + ri->fp = NULL; + regs->ra = (unsigned long) &__kretprobe_trampoline; +} + +int __kprobes arch_trampoline_kprobe(struct kprobe *p) +{ + return 0; +} + +int __init arch_init_kprobes(void) +{ + return 0; +} diff --git a/arch/riscv/kernel/probes/kprobes_trampoline.S b/arch/riscv/kernel/probes/kprobes_trampoline.S new file mode 100644 index 000000000..7bdb09ded --- /dev/null +++ b/arch/riscv/kernel/probes/kprobes_trampoline.S @@ -0,0 +1,93 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Author: Patrick Stählin <me@packi.ch> + */ +#include <linux/linkage.h> + +#include <asm/asm.h> +#include <asm/asm-offsets.h> + + .text + .altmacro + + .macro save_all_base_regs + REG_S x1, PT_RA(sp) + REG_S x3, PT_GP(sp) + REG_S x4, PT_TP(sp) + REG_S x5, PT_T0(sp) + REG_S x6, PT_T1(sp) + REG_S x7, PT_T2(sp) + REG_S x8, PT_S0(sp) + REG_S x9, PT_S1(sp) + REG_S x10, PT_A0(sp) + REG_S x11, PT_A1(sp) + REG_S x12, PT_A2(sp) + REG_S x13, PT_A3(sp) + REG_S x14, PT_A4(sp) + REG_S x15, PT_A5(sp) + REG_S x16, PT_A6(sp) + REG_S x17, PT_A7(sp) + REG_S x18, PT_S2(sp) + REG_S x19, PT_S3(sp) + REG_S x20, PT_S4(sp) + REG_S x21, PT_S5(sp) + REG_S x22, PT_S6(sp) + REG_S x23, PT_S7(sp) + REG_S x24, PT_S8(sp) + REG_S x25, PT_S9(sp) + REG_S x26, PT_S10(sp) + REG_S x27, PT_S11(sp) + REG_S x28, PT_T3(sp) + REG_S x29, PT_T4(sp) + REG_S x30, PT_T5(sp) + REG_S x31, PT_T6(sp) + .endm + + .macro restore_all_base_regs + REG_L x3, PT_GP(sp) + REG_L x4, PT_TP(sp) + REG_L x5, PT_T0(sp) + REG_L x6, PT_T1(sp) + REG_L x7, PT_T2(sp) + REG_L x8, PT_S0(sp) + REG_L x9, PT_S1(sp) + REG_L x10, PT_A0(sp) + REG_L x11, PT_A1(sp) + REG_L x12, PT_A2(sp) + REG_L x13, PT_A3(sp) + REG_L x14, PT_A4(sp) + REG_L x15, PT_A5(sp) + REG_L x16, PT_A6(sp) + REG_L x17, PT_A7(sp) + REG_L x18, PT_S2(sp) + REG_L x19, PT_S3(sp) + REG_L x20, PT_S4(sp) + REG_L x21, PT_S5(sp) + REG_L x22, PT_S6(sp) + REG_L x23, PT_S7(sp) + REG_L x24, PT_S8(sp) + REG_L x25, PT_S9(sp) + REG_L x26, PT_S10(sp) + REG_L x27, PT_S11(sp) + REG_L x28, PT_T3(sp) + REG_L x29, PT_T4(sp) + REG_L x30, PT_T5(sp) + REG_L x31, PT_T6(sp) + .endm + +ENTRY(__kretprobe_trampoline) + addi sp, sp, -(PT_SIZE_ON_STACK) + save_all_base_regs + + move a0, sp /* pt_regs */ + + call trampoline_probe_handler + + /* use the result as the return-address */ + move ra, a0 + + restore_all_base_regs + addi sp, sp, PT_SIZE_ON_STACK + + ret +ENDPROC(__kretprobe_trampoline) diff --git a/arch/riscv/kernel/probes/simulate-insn.c b/arch/riscv/kernel/probes/simulate-insn.c new file mode 100644 index 000000000..41bf1eb01 --- /dev/null +++ b/arch/riscv/kernel/probes/simulate-insn.c @@ -0,0 +1,197 @@ +// SPDX-License-Identifier: GPL-2.0+ + +#include <linux/bitops.h> +#include <linux/kernel.h> +#include <linux/kprobes.h> + +#include "decode-insn.h" +#include "simulate-insn.h" + +static inline bool rv_insn_reg_get_val(struct pt_regs *regs, u32 index, + unsigned long *ptr) +{ + if (index == 0) + *ptr = 0; + else if (index <= 31) + *ptr = *((unsigned long *)regs + index); + else + return false; + + return true; +} + +static inline bool rv_insn_reg_set_val(struct pt_regs *regs, u32 index, + unsigned long val) +{ + if (index == 0) + return true; + else if (index <= 31) + *((unsigned long *)regs + index) = val; + else + return false; + + return true; +} + +bool __kprobes simulate_jal(u32 opcode, unsigned long addr, struct pt_regs *regs) +{ + /* + * 31 30 21 20 19 12 11 7 6 0 + * imm [20] | imm[10:1] | imm[11] | imm[19:12] | rd | opcode + * 1 10 1 8 5 JAL/J + */ + bool ret; + u32 imm; + u32 index = (opcode >> 7) & 0x1f; + + ret = rv_insn_reg_set_val(regs, index, addr + 4); + if (!ret) + return ret; + + imm = ((opcode >> 21) & 0x3ff) << 1; + imm |= ((opcode >> 20) & 0x1) << 11; + imm |= ((opcode >> 12) & 0xff) << 12; + imm |= ((opcode >> 31) & 0x1) << 20; + + instruction_pointer_set(regs, addr + sign_extend32((imm), 20)); + + return ret; +} + +bool __kprobes simulate_jalr(u32 opcode, unsigned long addr, struct pt_regs *regs) +{ + /* + * 31 20 19 15 14 12 11 7 6 0 + * offset[11:0] | rs1 | 010 | rd | opcode + * 12 5 3 5 JALR/JR + */ + bool ret; + unsigned long base_addr; + u32 imm = (opcode >> 20) & 0xfff; + u32 rd_index = (opcode >> 7) & 0x1f; + u32 rs1_index = (opcode >> 15) & 0x1f; + + ret = rv_insn_reg_get_val(regs, rs1_index, &base_addr); + if (!ret) + return ret; + + ret = rv_insn_reg_set_val(regs, rd_index, addr + 4); + if (!ret) + return ret; + + instruction_pointer_set(regs, (base_addr + sign_extend32((imm), 11))&~1); + + return ret; +} + +#define auipc_rd_idx(opcode) \ + ((opcode >> 7) & 0x1f) + +#define auipc_imm(opcode) \ + ((((opcode) >> 12) & 0xfffff) << 12) + +#if __riscv_xlen == 64 +#define auipc_offset(opcode) sign_extend64(auipc_imm(opcode), 31) +#elif __riscv_xlen == 32 +#define auipc_offset(opcode) auipc_imm(opcode) +#else +#error "Unexpected __riscv_xlen" +#endif + +bool __kprobes simulate_auipc(u32 opcode, unsigned long addr, struct pt_regs *regs) +{ + /* + * auipc instruction: + * 31 12 11 7 6 0 + * | imm[31:12] | rd | opcode | + * 20 5 7 + */ + + u32 rd_idx = auipc_rd_idx(opcode); + unsigned long rd_val = addr + auipc_offset(opcode); + + if (!rv_insn_reg_set_val(regs, rd_idx, rd_val)) + return false; + + instruction_pointer_set(regs, addr + 4); + + return true; +} + +#define branch_rs1_idx(opcode) \ + (((opcode) >> 15) & 0x1f) + +#define branch_rs2_idx(opcode) \ + (((opcode) >> 20) & 0x1f) + +#define branch_funct3(opcode) \ + (((opcode) >> 12) & 0x7) + +#define branch_imm(opcode) \ + (((((opcode) >> 8) & 0xf ) << 1) | \ + ((((opcode) >> 25) & 0x3f) << 5) | \ + ((((opcode) >> 7) & 0x1 ) << 11) | \ + ((((opcode) >> 31) & 0x1 ) << 12)) + +#define branch_offset(opcode) \ + sign_extend32((branch_imm(opcode)), 12) + +#define BRANCH_BEQ 0x0 +#define BRANCH_BNE 0x1 +#define BRANCH_BLT 0x4 +#define BRANCH_BGE 0x5 +#define BRANCH_BLTU 0x6 +#define BRANCH_BGEU 0x7 + +bool __kprobes simulate_branch(u32 opcode, unsigned long addr, struct pt_regs *regs) +{ + /* + * branch instructions: + * 31 30 25 24 20 19 15 14 12 11 8 7 6 0 + * | imm[12] | imm[10:5] | rs2 | rs1 | funct3 | imm[4:1] | imm[11] | opcode | + * 1 6 5 5 3 4 1 7 + * imm[12|10:5] rs2 rs1 000 imm[4:1|11] 1100011 BEQ + * imm[12|10:5] rs2 rs1 001 imm[4:1|11] 1100011 BNE + * imm[12|10:5] rs2 rs1 100 imm[4:1|11] 1100011 BLT + * imm[12|10:5] rs2 rs1 101 imm[4:1|11] 1100011 BGE + * imm[12|10:5] rs2 rs1 110 imm[4:1|11] 1100011 BLTU + * imm[12|10:5] rs2 rs1 111 imm[4:1|11] 1100011 BGEU + */ + + s32 offset; + s32 offset_tmp; + unsigned long rs1_val; + unsigned long rs2_val; + + if (!rv_insn_reg_get_val(regs, branch_rs1_idx(opcode), &rs1_val) || + !rv_insn_reg_get_val(regs, branch_rs2_idx(opcode), &rs2_val)) + return false; + + offset_tmp = branch_offset(opcode); + switch (branch_funct3(opcode)) { + case BRANCH_BEQ: + offset = (rs1_val == rs2_val) ? offset_tmp : 4; + break; + case BRANCH_BNE: + offset = (rs1_val != rs2_val) ? offset_tmp : 4; + break; + case BRANCH_BLT: + offset = ((long)rs1_val < (long)rs2_val) ? offset_tmp : 4; + break; + case BRANCH_BGE: + offset = ((long)rs1_val >= (long)rs2_val) ? offset_tmp : 4; + break; + case BRANCH_BLTU: + offset = (rs1_val < rs2_val) ? offset_tmp : 4; + break; + case BRANCH_BGEU: + offset = (rs1_val >= rs2_val) ? offset_tmp : 4; + break; + default: + return false; + } + + instruction_pointer_set(regs, addr + offset); + + return true; +} diff --git a/arch/riscv/kernel/probes/simulate-insn.h b/arch/riscv/kernel/probes/simulate-insn.h new file mode 100644 index 000000000..de8474146 --- /dev/null +++ b/arch/riscv/kernel/probes/simulate-insn.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ + +#ifndef _RISCV_KERNEL_PROBES_SIMULATE_INSN_H +#define _RISCV_KERNEL_PROBES_SIMULATE_INSN_H + +#define __RISCV_INSN_FUNCS(name, mask, val) \ +static __always_inline bool riscv_insn_is_##name(probe_opcode_t code) \ +{ \ + BUILD_BUG_ON(~(mask) & (val)); \ + return (code & (mask)) == (val); \ +} \ +bool simulate_##name(u32 opcode, unsigned long addr, \ + struct pt_regs *regs) + +#define RISCV_INSN_REJECTED(name, code) \ + do { \ + if (riscv_insn_is_##name(code)) { \ + return INSN_REJECTED; \ + } \ + } while (0) + +__RISCV_INSN_FUNCS(system, 0x7f, 0x73); +__RISCV_INSN_FUNCS(fence, 0x7f, 0x0f); + +#define RISCV_INSN_SET_SIMULATE(name, code) \ + do { \ + if (riscv_insn_is_##name(code)) { \ + api->handler = simulate_##name; \ + return INSN_GOOD_NO_SLOT; \ + } \ + } while (0) + +__RISCV_INSN_FUNCS(c_j, 0xe003, 0xa001); +__RISCV_INSN_FUNCS(c_jr, 0xf07f, 0x8002); +__RISCV_INSN_FUNCS(c_jal, 0xe003, 0x2001); +__RISCV_INSN_FUNCS(c_jalr, 0xf07f, 0x9002); +__RISCV_INSN_FUNCS(c_beqz, 0xe003, 0xc001); +__RISCV_INSN_FUNCS(c_bnez, 0xe003, 0xe001); +__RISCV_INSN_FUNCS(c_ebreak, 0xffff, 0x9002); + +__RISCV_INSN_FUNCS(auipc, 0x7f, 0x17); +__RISCV_INSN_FUNCS(branch, 0x7f, 0x63); + +__RISCV_INSN_FUNCS(jal, 0x7f, 0x6f); +__RISCV_INSN_FUNCS(jalr, 0x707f, 0x67); + +#endif /* _RISCV_KERNEL_PROBES_SIMULATE_INSN_H */ diff --git a/arch/riscv/kernel/probes/uprobes.c b/arch/riscv/kernel/probes/uprobes.c new file mode 100644 index 000000000..194f166b2 --- /dev/null +++ b/arch/riscv/kernel/probes/uprobes.c @@ -0,0 +1,182 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <linux/highmem.h> +#include <linux/ptrace.h> +#include <linux/uprobes.h> + +#include "decode-insn.h" + +#define UPROBE_TRAP_NR UINT_MAX + +bool is_swbp_insn(uprobe_opcode_t *insn) +{ +#ifdef CONFIG_RISCV_ISA_C + return (*insn & 0xffff) == UPROBE_SWBP_INSN; +#else + return *insn == UPROBE_SWBP_INSN; +#endif +} + +unsigned long uprobe_get_swbp_addr(struct pt_regs *regs) +{ + return instruction_pointer(regs); +} + +int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, + unsigned long addr) +{ + probe_opcode_t opcode; + + opcode = *(probe_opcode_t *)(&auprobe->insn[0]); + + auprobe->insn_size = GET_INSN_LENGTH(opcode); + + switch (riscv_probe_decode_insn(&opcode, &auprobe->api)) { + case INSN_REJECTED: + return -EINVAL; + + case INSN_GOOD_NO_SLOT: + auprobe->simulate = true; + break; + + case INSN_GOOD: + auprobe->simulate = false; + break; + + default: + return -EINVAL; + } + + return 0; +} + +int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) +{ + struct uprobe_task *utask = current->utask; + + utask->autask.saved_cause = current->thread.bad_cause; + current->thread.bad_cause = UPROBE_TRAP_NR; + + instruction_pointer_set(regs, utask->xol_vaddr); + + return 0; +} + +int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) +{ + struct uprobe_task *utask = current->utask; + + WARN_ON_ONCE(current->thread.bad_cause != UPROBE_TRAP_NR); + current->thread.bad_cause = utask->autask.saved_cause; + + instruction_pointer_set(regs, utask->vaddr + auprobe->insn_size); + + return 0; +} + +bool arch_uprobe_xol_was_trapped(struct task_struct *t) +{ + if (t->thread.bad_cause != UPROBE_TRAP_NR) + return true; + + return false; +} + +bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) +{ + probe_opcode_t insn; + unsigned long addr; + + if (!auprobe->simulate) + return false; + + insn = *(probe_opcode_t *)(&auprobe->insn[0]); + addr = instruction_pointer(regs); + + if (auprobe->api.handler) + auprobe->api.handler(insn, addr, regs); + + return true; +} + +void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) +{ + struct uprobe_task *utask = current->utask; + + current->thread.bad_cause = utask->autask.saved_cause; + /* + * Task has received a fatal signal, so reset back to probbed + * address. + */ + instruction_pointer_set(regs, utask->vaddr); +} + +bool arch_uretprobe_is_alive(struct return_instance *ret, enum rp_check ctx, + struct pt_regs *regs) +{ + if (ctx == RP_CHECK_CHAIN_CALL) + return regs->sp <= ret->stack; + else + return regs->sp < ret->stack; +} + +unsigned long +arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, + struct pt_regs *regs) +{ + unsigned long ra; + + ra = regs->ra; + + regs->ra = trampoline_vaddr; + + return ra; +} + +int arch_uprobe_exception_notify(struct notifier_block *self, + unsigned long val, void *data) +{ + return NOTIFY_DONE; +} + +bool uprobe_breakpoint_handler(struct pt_regs *regs) +{ + if (uprobe_pre_sstep_notifier(regs)) + return true; + + return false; +} + +bool uprobe_single_step_handler(struct pt_regs *regs) +{ + if (uprobe_post_sstep_notifier(regs)) + return true; + + return false; +} + +void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr, + void *src, unsigned long len) +{ + /* Initialize the slot */ + void *kaddr = kmap_atomic(page); + void *dst = kaddr + (vaddr & ~PAGE_MASK); + + memcpy(dst, src, len); + + /* Add ebreak behind opcode to simulate singlestep */ + if (vaddr) { + dst += GET_INSN_LENGTH(*(probe_opcode_t *)src); + *(uprobe_opcode_t *)dst = __BUG_INSN_32; + } + + kunmap_atomic(kaddr); + + /* + * We probably need flush_icache_user_page() but it needs vma. + * This should work on most of architectures by default. If + * architecture needs to do something different it can define + * its own version of the function. + */ + flush_dcache_page(page); +} diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c new file mode 100644 index 000000000..8955f2432 --- /dev/null +++ b/arch/riscv/kernel/process.c @@ -0,0 +1,191 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2009 Sunplus Core Technology Co., Ltd. + * Chen Liqin <liqin.chen@sunplusct.com> + * Lennox Wu <lennox.wu@sunplusct.com> + * Copyright (C) 2012 Regents of the University of California + * Copyright (C) 2017 SiFive + */ + +#include <linux/cpu.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/sched/debug.h> +#include <linux/sched/task_stack.h> +#include <linux/tick.h> +#include <linux/ptrace.h> +#include <linux/uaccess.h> + +#include <asm/unistd.h> +#include <asm/processor.h> +#include <asm/csr.h> +#include <asm/stacktrace.h> +#include <asm/string.h> +#include <asm/switch_to.h> +#include <asm/thread_info.h> +#include <asm/cpuidle.h> + +register unsigned long gp_in_global __asm__("gp"); + +#if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_STACKPROTECTOR_PER_TASK) +#include <linux/stackprotector.h> +unsigned long __stack_chk_guard __read_mostly; +EXPORT_SYMBOL(__stack_chk_guard); +#endif + +extern asmlinkage void ret_from_fork(void); +extern asmlinkage void ret_from_kernel_thread(void); + +void arch_cpu_idle(void) +{ + cpu_do_idle(); + raw_local_irq_enable(); +} + +void __show_regs(struct pt_regs *regs) +{ + show_regs_print_info(KERN_DEFAULT); + + if (!user_mode(regs)) { + pr_cont("epc : %pS\n", (void *)regs->epc); + pr_cont(" ra : %pS\n", (void *)regs->ra); + } + + pr_cont("epc : " REG_FMT " ra : " REG_FMT " sp : " REG_FMT "\n", + regs->epc, regs->ra, regs->sp); + pr_cont(" gp : " REG_FMT " tp : " REG_FMT " t0 : " REG_FMT "\n", + regs->gp, regs->tp, regs->t0); + pr_cont(" t1 : " REG_FMT " t2 : " REG_FMT " s0 : " REG_FMT "\n", + regs->t1, regs->t2, regs->s0); + pr_cont(" s1 : " REG_FMT " a0 : " REG_FMT " a1 : " REG_FMT "\n", + regs->s1, regs->a0, regs->a1); + pr_cont(" a2 : " REG_FMT " a3 : " REG_FMT " a4 : " REG_FMT "\n", + regs->a2, regs->a3, regs->a4); + pr_cont(" a5 : " REG_FMT " a6 : " REG_FMT " a7 : " REG_FMT "\n", + regs->a5, regs->a6, regs->a7); + pr_cont(" s2 : " REG_FMT " s3 : " REG_FMT " s4 : " REG_FMT "\n", + regs->s2, regs->s3, regs->s4); + pr_cont(" s5 : " REG_FMT " s6 : " REG_FMT " s7 : " REG_FMT "\n", + regs->s5, regs->s6, regs->s7); + pr_cont(" s8 : " REG_FMT " s9 : " REG_FMT " s10: " REG_FMT "\n", + regs->s8, regs->s9, regs->s10); + pr_cont(" s11: " REG_FMT " t3 : " REG_FMT " t4 : " REG_FMT "\n", + regs->s11, regs->t3, regs->t4); + pr_cont(" t5 : " REG_FMT " t6 : " REG_FMT "\n", + regs->t5, regs->t6); + + pr_cont("status: " REG_FMT " badaddr: " REG_FMT " cause: " REG_FMT "\n", + regs->status, regs->badaddr, regs->cause); +} +void show_regs(struct pt_regs *regs) +{ + __show_regs(regs); + if (!user_mode(regs)) + dump_backtrace(regs, NULL, KERN_DEFAULT); +} + +#ifdef CONFIG_COMPAT +static bool compat_mode_supported __read_mostly; + +bool compat_elf_check_arch(Elf32_Ehdr *hdr) +{ + return compat_mode_supported && + hdr->e_machine == EM_RISCV && + hdr->e_ident[EI_CLASS] == ELFCLASS32; +} + +static int __init compat_mode_detect(void) +{ + unsigned long tmp = csr_read(CSR_STATUS); + + csr_write(CSR_STATUS, (tmp & ~SR_UXL) | SR_UXL_32); + compat_mode_supported = + (csr_read(CSR_STATUS) & SR_UXL) == SR_UXL_32; + + csr_write(CSR_STATUS, tmp); + + pr_info("riscv: ELF compat mode %s", + compat_mode_supported ? "supported" : "unsupported"); + + return 0; +} +early_initcall(compat_mode_detect); +#endif + +void start_thread(struct pt_regs *regs, unsigned long pc, + unsigned long sp) +{ + regs->status = SR_PIE; + if (has_fpu()) { + regs->status |= SR_FS_INITIAL; + /* + * Restore the initial value to the FP register + * before starting the user program. + */ + fstate_restore(current, regs); + } + regs->epc = pc; + regs->sp = sp; + +#ifdef CONFIG_64BIT + regs->status &= ~SR_UXL; + + if (is_compat_task()) + regs->status |= SR_UXL_32; + else + regs->status |= SR_UXL_64; +#endif +} + +void flush_thread(void) +{ +#ifdef CONFIG_FPU + /* + * Reset FPU state and context + * frm: round to nearest, ties to even (IEEE default) + * fflags: accrued exceptions cleared + */ + fstate_off(current, task_pt_regs(current)); + memset(¤t->thread.fstate, 0, sizeof(current->thread.fstate)); +#endif +} + +int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) +{ + fstate_save(src, task_pt_regs(src)); + *dst = *src; + return 0; +} + +int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) +{ + unsigned long clone_flags = args->flags; + unsigned long usp = args->stack; + unsigned long tls = args->tls; + struct pt_regs *childregs = task_pt_regs(p); + + memset(&p->thread.s, 0, sizeof(p->thread.s)); + + /* p->thread holds context to be restored by __switch_to() */ + if (unlikely(args->fn)) { + /* Kernel thread */ + memset(childregs, 0, sizeof(struct pt_regs)); + childregs->gp = gp_in_global; + /* Supervisor/Machine, irqs on: */ + childregs->status = SR_PP | SR_PIE; + + p->thread.ra = (unsigned long)ret_from_kernel_thread; + p->thread.s[0] = (unsigned long)args->fn; + p->thread.s[1] = (unsigned long)args->fn_arg; + } else { + *childregs = *(current_pt_regs()); + if (usp) /* User fork */ + childregs->sp = usp; + if (clone_flags & CLONE_SETTLS) + childregs->tp = tls; + childregs->a0 = 0; /* Return value of fork() */ + p->thread.ra = (unsigned long)ret_from_fork; + } + p->thread.sp = (unsigned long)childregs; /* kernel sp */ + return 0; +} diff --git a/arch/riscv/kernel/ptrace.c b/arch/riscv/kernel/ptrace.c new file mode 100644 index 000000000..2ae8280ae --- /dev/null +++ b/arch/riscv/kernel/ptrace.c @@ -0,0 +1,351 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2010 Tilera Corporation. All Rights Reserved. + * Copyright 2015 Regents of the University of California + * Copyright 2017 SiFive + * + * Copied from arch/tile/kernel/ptrace.c + */ + +#include <asm/ptrace.h> +#include <asm/syscall.h> +#include <asm/thread_info.h> +#include <asm/switch_to.h> +#include <linux/audit.h> +#include <linux/compat.h> +#include <linux/ptrace.h> +#include <linux/elf.h> +#include <linux/regset.h> +#include <linux/sched.h> +#include <linux/sched/task_stack.h> + +#define CREATE_TRACE_POINTS +#include <trace/events/syscalls.h> + +enum riscv_regset { + REGSET_X, +#ifdef CONFIG_FPU + REGSET_F, +#endif +}; + +static int riscv_gpr_get(struct task_struct *target, + const struct user_regset *regset, + struct membuf to) +{ + return membuf_write(&to, task_pt_regs(target), + sizeof(struct user_regs_struct)); +} + +static int riscv_gpr_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + struct pt_regs *regs; + + regs = task_pt_regs(target); + return user_regset_copyin(&pos, &count, &kbuf, &ubuf, regs, 0, -1); +} + +#ifdef CONFIG_FPU +static int riscv_fpr_get(struct task_struct *target, + const struct user_regset *regset, + struct membuf to) +{ + struct __riscv_d_ext_state *fstate = &target->thread.fstate; + + if (target == current) + fstate_save(current, task_pt_regs(current)); + + membuf_write(&to, fstate, offsetof(struct __riscv_d_ext_state, fcsr)); + membuf_store(&to, fstate->fcsr); + return membuf_zero(&to, 4); // explicitly pad +} + +static int riscv_fpr_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret; + struct __riscv_d_ext_state *fstate = &target->thread.fstate; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, fstate, 0, + offsetof(struct __riscv_d_ext_state, fcsr)); + if (!ret) { + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, fstate, 0, + offsetof(struct __riscv_d_ext_state, fcsr) + + sizeof(fstate->fcsr)); + } + + return ret; +} +#endif + +static const struct user_regset riscv_user_regset[] = { + [REGSET_X] = { + .core_note_type = NT_PRSTATUS, + .n = ELF_NGREG, + .size = sizeof(elf_greg_t), + .align = sizeof(elf_greg_t), + .regset_get = riscv_gpr_get, + .set = riscv_gpr_set, + }, +#ifdef CONFIG_FPU + [REGSET_F] = { + .core_note_type = NT_PRFPREG, + .n = ELF_NFPREG, + .size = sizeof(elf_fpreg_t), + .align = sizeof(elf_fpreg_t), + .regset_get = riscv_fpr_get, + .set = riscv_fpr_set, + }, +#endif +}; + +static const struct user_regset_view riscv_user_native_view = { + .name = "riscv", + .e_machine = EM_RISCV, + .regsets = riscv_user_regset, + .n = ARRAY_SIZE(riscv_user_regset), +}; + +struct pt_regs_offset { + const char *name; + int offset; +}; + +#define REG_OFFSET_NAME(r) {.name = #r, .offset = offsetof(struct pt_regs, r)} +#define REG_OFFSET_END {.name = NULL, .offset = 0} + +static const struct pt_regs_offset regoffset_table[] = { + REG_OFFSET_NAME(epc), + REG_OFFSET_NAME(ra), + REG_OFFSET_NAME(sp), + REG_OFFSET_NAME(gp), + REG_OFFSET_NAME(tp), + REG_OFFSET_NAME(t0), + REG_OFFSET_NAME(t1), + REG_OFFSET_NAME(t2), + REG_OFFSET_NAME(s0), + REG_OFFSET_NAME(s1), + REG_OFFSET_NAME(a0), + REG_OFFSET_NAME(a1), + REG_OFFSET_NAME(a2), + REG_OFFSET_NAME(a3), + REG_OFFSET_NAME(a4), + REG_OFFSET_NAME(a5), + REG_OFFSET_NAME(a6), + REG_OFFSET_NAME(a7), + REG_OFFSET_NAME(s2), + REG_OFFSET_NAME(s3), + REG_OFFSET_NAME(s4), + REG_OFFSET_NAME(s5), + REG_OFFSET_NAME(s6), + REG_OFFSET_NAME(s7), + REG_OFFSET_NAME(s8), + REG_OFFSET_NAME(s9), + REG_OFFSET_NAME(s10), + REG_OFFSET_NAME(s11), + REG_OFFSET_NAME(t3), + REG_OFFSET_NAME(t4), + REG_OFFSET_NAME(t5), + REG_OFFSET_NAME(t6), + REG_OFFSET_NAME(status), + REG_OFFSET_NAME(badaddr), + REG_OFFSET_NAME(cause), + REG_OFFSET_NAME(orig_a0), + REG_OFFSET_END, +}; + +/** + * regs_query_register_offset() - query register offset from its name + * @name: the name of a register + * + * regs_query_register_offset() returns the offset of a register in struct + * pt_regs from its name. If the name is invalid, this returns -EINVAL; + */ +int regs_query_register_offset(const char *name) +{ + const struct pt_regs_offset *roff; + + for (roff = regoffset_table; roff->name != NULL; roff++) + if (!strcmp(roff->name, name)) + return roff->offset; + return -EINVAL; +} + +/** + * regs_within_kernel_stack() - check the address in the stack + * @regs: pt_regs which contains kernel stack pointer. + * @addr: address which is checked. + * + * regs_within_kernel_stack() checks @addr is within the kernel stack page(s). + * If @addr is within the kernel stack, it returns true. If not, returns false. + */ +static bool regs_within_kernel_stack(struct pt_regs *regs, unsigned long addr) +{ + return (addr & ~(THREAD_SIZE - 1)) == + (kernel_stack_pointer(regs) & ~(THREAD_SIZE - 1)); +} + +/** + * regs_get_kernel_stack_nth() - get Nth entry of the stack + * @regs: pt_regs which contains kernel stack pointer. + * @n: stack entry number. + * + * regs_get_kernel_stack_nth() returns @n th entry of the kernel stack which + * is specified by @regs. If the @n th entry is NOT in the kernel stack, + * this returns 0. + */ +unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n) +{ + unsigned long *addr = (unsigned long *)kernel_stack_pointer(regs); + + addr += n; + if (regs_within_kernel_stack(regs, (unsigned long)addr)) + return *addr; + else + return 0; +} + +void ptrace_disable(struct task_struct *child) +{ + clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); +} + +long arch_ptrace(struct task_struct *child, long request, + unsigned long addr, unsigned long data) +{ + long ret = -EIO; + + switch (request) { + default: + ret = ptrace_request(child, request, addr, data); + break; + } + + return ret; +} + +/* + * Allows PTRACE_SYSCALL to work. These are called from entry.S in + * {handle,ret_from}_syscall. + */ +__visible int do_syscall_trace_enter(struct pt_regs *regs) +{ + if (test_thread_flag(TIF_SYSCALL_TRACE)) + if (ptrace_report_syscall_entry(regs)) + return -1; + + /* + * Do the secure computing after ptrace; failures should be fast. + * If this fails we might have return value in a0 from seccomp + * (via SECCOMP_RET_ERRNO/TRACE). + */ + if (secure_computing() == -1) + return -1; + +#ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS + if (test_thread_flag(TIF_SYSCALL_TRACEPOINT)) + trace_sys_enter(regs, syscall_get_nr(current, regs)); +#endif + + audit_syscall_entry(regs->a7, regs->a0, regs->a1, regs->a2, regs->a3); + return 0; +} + +__visible void do_syscall_trace_exit(struct pt_regs *regs) +{ + audit_syscall_exit(regs); + + if (test_thread_flag(TIF_SYSCALL_TRACE)) + ptrace_report_syscall_exit(regs, 0); + +#ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS + if (test_thread_flag(TIF_SYSCALL_TRACEPOINT)) + trace_sys_exit(regs, regs_return_value(regs)); +#endif +} + +#ifdef CONFIG_COMPAT +static int compat_riscv_gpr_get(struct task_struct *target, + const struct user_regset *regset, + struct membuf to) +{ + struct compat_user_regs_struct cregs; + + regs_to_cregs(&cregs, task_pt_regs(target)); + + return membuf_write(&to, &cregs, + sizeof(struct compat_user_regs_struct)); +} + +static int compat_riscv_gpr_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret; + struct compat_user_regs_struct cregs; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &cregs, 0, -1); + + cregs_to_regs(&cregs, task_pt_regs(target)); + + return ret; +} + +static const struct user_regset compat_riscv_user_regset[] = { + [REGSET_X] = { + .core_note_type = NT_PRSTATUS, + .n = ELF_NGREG, + .size = sizeof(compat_elf_greg_t), + .align = sizeof(compat_elf_greg_t), + .regset_get = compat_riscv_gpr_get, + .set = compat_riscv_gpr_set, + }, +#ifdef CONFIG_FPU + [REGSET_F] = { + .core_note_type = NT_PRFPREG, + .n = ELF_NFPREG, + .size = sizeof(elf_fpreg_t), + .align = sizeof(elf_fpreg_t), + .regset_get = riscv_fpr_get, + .set = riscv_fpr_set, + }, +#endif +}; + +static const struct user_regset_view compat_riscv_user_native_view = { + .name = "riscv", + .e_machine = EM_RISCV, + .regsets = compat_riscv_user_regset, + .n = ARRAY_SIZE(compat_riscv_user_regset), +}; + +long compat_arch_ptrace(struct task_struct *child, compat_long_t request, + compat_ulong_t caddr, compat_ulong_t cdata) +{ + long ret = -EIO; + + switch (request) { + default: + ret = compat_ptrace_request(child, request, caddr, cdata); + break; + } + + return ret; +} +#endif /* CONFIG_COMPAT */ + +const struct user_regset_view *task_user_regset_view(struct task_struct *task) +{ +#ifdef CONFIG_COMPAT + if (test_tsk_thread_flag(task, TIF_32BIT)) + return &compat_riscv_user_native_view; + else +#endif + return &riscv_user_native_view; +} diff --git a/arch/riscv/kernel/reset.c b/arch/riscv/kernel/reset.c new file mode 100644 index 000000000..912288572 --- /dev/null +++ b/arch/riscv/kernel/reset.c @@ -0,0 +1,34 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2012 Regents of the University of California + */ + +#include <linux/reboot.h> +#include <linux/pm.h> + +static void default_power_off(void) +{ + while (1) + wait_for_interrupt(); +} + +void (*pm_power_off)(void) = NULL; +EXPORT_SYMBOL(pm_power_off); + +void machine_restart(char *cmd) +{ + do_kernel_restart(cmd); + while (1); +} + +void machine_halt(void) +{ + do_kernel_power_off(); + default_power_off(); +} + +void machine_power_off(void) +{ + do_kernel_power_off(); + default_power_off(); +} diff --git a/arch/riscv/kernel/riscv_ksyms.c b/arch/riscv/kernel/riscv_ksyms.c new file mode 100644 index 000000000..5ab1c7e1a --- /dev/null +++ b/arch/riscv/kernel/riscv_ksyms.c @@ -0,0 +1,17 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2017 Zihao Yu + */ + +#include <linux/export.h> +#include <linux/uaccess.h> + +/* + * Assembly functions that may be used (directly or indirectly) by modules + */ +EXPORT_SYMBOL(memset); +EXPORT_SYMBOL(memcpy); +EXPORT_SYMBOL(memmove); +EXPORT_SYMBOL(__memset); +EXPORT_SYMBOL(__memcpy); +EXPORT_SYMBOL(__memmove); diff --git a/arch/riscv/kernel/sbi.c b/arch/riscv/kernel/sbi.c new file mode 100644 index 000000000..5238026f7 --- /dev/null +++ b/arch/riscv/kernel/sbi.c @@ -0,0 +1,697 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * SBI initialilization and all extension implementation. + * + * Copyright (c) 2020 Western Digital Corporation or its affiliates. + */ + +#include <linux/bits.h> +#include <linux/init.h> +#include <linux/pm.h> +#include <linux/reboot.h> +#include <asm/sbi.h> +#include <asm/smp.h> + +/* default SBI version is 0.1 */ +unsigned long sbi_spec_version __ro_after_init = SBI_SPEC_VERSION_DEFAULT; +EXPORT_SYMBOL(sbi_spec_version); + +static void (*__sbi_set_timer)(uint64_t stime) __ro_after_init; +static int (*__sbi_send_ipi)(const struct cpumask *cpu_mask) __ro_after_init; +static int (*__sbi_rfence)(int fid, const struct cpumask *cpu_mask, + unsigned long start, unsigned long size, + unsigned long arg4, unsigned long arg5) __ro_after_init; + +struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0, + unsigned long arg1, unsigned long arg2, + unsigned long arg3, unsigned long arg4, + unsigned long arg5) +{ + struct sbiret ret; + + register uintptr_t a0 asm ("a0") = (uintptr_t)(arg0); + register uintptr_t a1 asm ("a1") = (uintptr_t)(arg1); + register uintptr_t a2 asm ("a2") = (uintptr_t)(arg2); + register uintptr_t a3 asm ("a3") = (uintptr_t)(arg3); + register uintptr_t a4 asm ("a4") = (uintptr_t)(arg4); + register uintptr_t a5 asm ("a5") = (uintptr_t)(arg5); + register uintptr_t a6 asm ("a6") = (uintptr_t)(fid); + register uintptr_t a7 asm ("a7") = (uintptr_t)(ext); + asm volatile ("ecall" + : "+r" (a0), "+r" (a1) + : "r" (a2), "r" (a3), "r" (a4), "r" (a5), "r" (a6), "r" (a7) + : "memory"); + ret.error = a0; + ret.value = a1; + + return ret; +} +EXPORT_SYMBOL(sbi_ecall); + +int sbi_err_map_linux_errno(int err) +{ + switch (err) { + case SBI_SUCCESS: + return 0; + case SBI_ERR_DENIED: + return -EPERM; + case SBI_ERR_INVALID_PARAM: + return -EINVAL; + case SBI_ERR_INVALID_ADDRESS: + return -EFAULT; + case SBI_ERR_NOT_SUPPORTED: + case SBI_ERR_FAILURE: + default: + return -ENOTSUPP; + }; +} +EXPORT_SYMBOL(sbi_err_map_linux_errno); + +#ifdef CONFIG_RISCV_SBI_V01 +static unsigned long __sbi_v01_cpumask_to_hartmask(const struct cpumask *cpu_mask) +{ + unsigned long cpuid, hartid; + unsigned long hmask = 0; + + /* + * There is no maximum hartid concept in RISC-V and NR_CPUS must not be + * associated with hartid. As SBI v0.1 is only kept for backward compatibility + * and will be removed in the future, there is no point in supporting hartid + * greater than BITS_PER_LONG (32 for RV32 and 64 for RV64). Ideally, SBI v0.2 + * should be used for platforms with hartid greater than BITS_PER_LONG. + */ + for_each_cpu(cpuid, cpu_mask) { + hartid = cpuid_to_hartid_map(cpuid); + if (hartid >= BITS_PER_LONG) { + pr_warn("Unable to send any request to hartid > BITS_PER_LONG for SBI v0.1\n"); + break; + } + hmask |= BIT(hartid); + } + + return hmask; +} + +/** + * sbi_console_putchar() - Writes given character to the console device. + * @ch: The data to be written to the console. + * + * Return: None + */ +void sbi_console_putchar(int ch) +{ + sbi_ecall(SBI_EXT_0_1_CONSOLE_PUTCHAR, 0, ch, 0, 0, 0, 0, 0); +} +EXPORT_SYMBOL(sbi_console_putchar); + +/** + * sbi_console_getchar() - Reads a byte from console device. + * + * Returns the value read from console. + */ +int sbi_console_getchar(void) +{ + struct sbiret ret; + + ret = sbi_ecall(SBI_EXT_0_1_CONSOLE_GETCHAR, 0, 0, 0, 0, 0, 0, 0); + + return ret.error; +} +EXPORT_SYMBOL(sbi_console_getchar); + +/** + * sbi_shutdown() - Remove all the harts from executing supervisor code. + * + * Return: None + */ +void sbi_shutdown(void) +{ + sbi_ecall(SBI_EXT_0_1_SHUTDOWN, 0, 0, 0, 0, 0, 0, 0); +} +EXPORT_SYMBOL(sbi_shutdown); + +/** + * sbi_clear_ipi() - Clear any pending IPIs for the calling hart. + * + * Return: None + */ +void sbi_clear_ipi(void) +{ + sbi_ecall(SBI_EXT_0_1_CLEAR_IPI, 0, 0, 0, 0, 0, 0, 0); +} +EXPORT_SYMBOL(sbi_clear_ipi); + +/** + * __sbi_set_timer_v01() - Program the timer for next timer event. + * @stime_value: The value after which next timer event should fire. + * + * Return: None + */ +static void __sbi_set_timer_v01(uint64_t stime_value) +{ +#if __riscv_xlen == 32 + sbi_ecall(SBI_EXT_0_1_SET_TIMER, 0, stime_value, + stime_value >> 32, 0, 0, 0, 0); +#else + sbi_ecall(SBI_EXT_0_1_SET_TIMER, 0, stime_value, 0, 0, 0, 0, 0); +#endif +} + +static int __sbi_send_ipi_v01(const struct cpumask *cpu_mask) +{ + unsigned long hart_mask; + + if (!cpu_mask || cpumask_empty(cpu_mask)) + cpu_mask = cpu_online_mask; + hart_mask = __sbi_v01_cpumask_to_hartmask(cpu_mask); + + sbi_ecall(SBI_EXT_0_1_SEND_IPI, 0, (unsigned long)(&hart_mask), + 0, 0, 0, 0, 0); + return 0; +} + +static int __sbi_rfence_v01(int fid, const struct cpumask *cpu_mask, + unsigned long start, unsigned long size, + unsigned long arg4, unsigned long arg5) +{ + int result = 0; + unsigned long hart_mask; + + if (!cpu_mask || cpumask_empty(cpu_mask)) + cpu_mask = cpu_online_mask; + hart_mask = __sbi_v01_cpumask_to_hartmask(cpu_mask); + + /* v0.2 function IDs are equivalent to v0.1 extension IDs */ + switch (fid) { + case SBI_EXT_RFENCE_REMOTE_FENCE_I: + sbi_ecall(SBI_EXT_0_1_REMOTE_FENCE_I, 0, + (unsigned long)&hart_mask, 0, 0, 0, 0, 0); + break; + case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA: + sbi_ecall(SBI_EXT_0_1_REMOTE_SFENCE_VMA, 0, + (unsigned long)&hart_mask, start, size, + 0, 0, 0); + break; + case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID: + sbi_ecall(SBI_EXT_0_1_REMOTE_SFENCE_VMA_ASID, 0, + (unsigned long)&hart_mask, start, size, + arg4, 0, 0); + break; + default: + pr_err("SBI call [%d]not supported in SBI v0.1\n", fid); + result = -EINVAL; + } + + return result; +} + +static void sbi_set_power_off(void) +{ + pm_power_off = sbi_shutdown; +} +#else +static void __sbi_set_timer_v01(uint64_t stime_value) +{ + pr_warn("Timer extension is not available in SBI v%lu.%lu\n", + sbi_major_version(), sbi_minor_version()); +} + +static int __sbi_send_ipi_v01(const struct cpumask *cpu_mask) +{ + pr_warn("IPI extension is not available in SBI v%lu.%lu\n", + sbi_major_version(), sbi_minor_version()); + + return 0; +} + +static int __sbi_rfence_v01(int fid, const struct cpumask *cpu_mask, + unsigned long start, unsigned long size, + unsigned long arg4, unsigned long arg5) +{ + pr_warn("remote fence extension is not available in SBI v%lu.%lu\n", + sbi_major_version(), sbi_minor_version()); + + return 0; +} + +static void sbi_set_power_off(void) {} +#endif /* CONFIG_RISCV_SBI_V01 */ + +static void __sbi_set_timer_v02(uint64_t stime_value) +{ +#if __riscv_xlen == 32 + sbi_ecall(SBI_EXT_TIME, SBI_EXT_TIME_SET_TIMER, stime_value, + stime_value >> 32, 0, 0, 0, 0); +#else + sbi_ecall(SBI_EXT_TIME, SBI_EXT_TIME_SET_TIMER, stime_value, 0, + 0, 0, 0, 0); +#endif +} + +static int __sbi_send_ipi_v02(const struct cpumask *cpu_mask) +{ + unsigned long hartid, cpuid, hmask = 0, hbase = 0, htop = 0; + struct sbiret ret = {0}; + int result; + + if (!cpu_mask || cpumask_empty(cpu_mask)) + cpu_mask = cpu_online_mask; + + for_each_cpu(cpuid, cpu_mask) { + hartid = cpuid_to_hartid_map(cpuid); + if (hmask) { + if (hartid + BITS_PER_LONG <= htop || + hbase + BITS_PER_LONG <= hartid) { + ret = sbi_ecall(SBI_EXT_IPI, + SBI_EXT_IPI_SEND_IPI, hmask, + hbase, 0, 0, 0, 0); + if (ret.error) + goto ecall_failed; + hmask = 0; + } else if (hartid < hbase) { + /* shift the mask to fit lower hartid */ + hmask <<= hbase - hartid; + hbase = hartid; + } + } + if (!hmask) { + hbase = hartid; + htop = hartid; + } else if (hartid > htop) { + htop = hartid; + } + hmask |= BIT(hartid - hbase); + } + + if (hmask) { + ret = sbi_ecall(SBI_EXT_IPI, SBI_EXT_IPI_SEND_IPI, + hmask, hbase, 0, 0, 0, 0); + if (ret.error) + goto ecall_failed; + } + + return 0; + +ecall_failed: + result = sbi_err_map_linux_errno(ret.error); + pr_err("%s: hbase = [%lu] hmask = [0x%lx] failed (error [%d])\n", + __func__, hbase, hmask, result); + return result; +} + +static int __sbi_rfence_v02_call(unsigned long fid, unsigned long hmask, + unsigned long hbase, unsigned long start, + unsigned long size, unsigned long arg4, + unsigned long arg5) +{ + struct sbiret ret = {0}; + int ext = SBI_EXT_RFENCE; + int result = 0; + + switch (fid) { + case SBI_EXT_RFENCE_REMOTE_FENCE_I: + ret = sbi_ecall(ext, fid, hmask, hbase, 0, 0, 0, 0); + break; + case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA: + ret = sbi_ecall(ext, fid, hmask, hbase, start, + size, 0, 0); + break; + case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID: + ret = sbi_ecall(ext, fid, hmask, hbase, start, + size, arg4, 0); + break; + + case SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA: + ret = sbi_ecall(ext, fid, hmask, hbase, start, + size, 0, 0); + break; + case SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA_VMID: + ret = sbi_ecall(ext, fid, hmask, hbase, start, + size, arg4, 0); + break; + case SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA: + ret = sbi_ecall(ext, fid, hmask, hbase, start, + size, 0, 0); + break; + case SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA_ASID: + ret = sbi_ecall(ext, fid, hmask, hbase, start, + size, arg4, 0); + break; + default: + pr_err("unknown function ID [%lu] for SBI extension [%d]\n", + fid, ext); + result = -EINVAL; + } + + if (ret.error) { + result = sbi_err_map_linux_errno(ret.error); + pr_err("%s: hbase = [%lu] hmask = [0x%lx] failed (error [%d])\n", + __func__, hbase, hmask, result); + } + + return result; +} + +static int __sbi_rfence_v02(int fid, const struct cpumask *cpu_mask, + unsigned long start, unsigned long size, + unsigned long arg4, unsigned long arg5) +{ + unsigned long hartid, cpuid, hmask = 0, hbase = 0, htop = 0; + int result; + + if (!cpu_mask || cpumask_empty(cpu_mask)) + cpu_mask = cpu_online_mask; + + for_each_cpu(cpuid, cpu_mask) { + hartid = cpuid_to_hartid_map(cpuid); + if (hmask) { + if (hartid + BITS_PER_LONG <= htop || + hbase + BITS_PER_LONG <= hartid) { + result = __sbi_rfence_v02_call(fid, hmask, + hbase, start, size, arg4, arg5); + if (result) + return result; + hmask = 0; + } else if (hartid < hbase) { + /* shift the mask to fit lower hartid */ + hmask <<= hbase - hartid; + hbase = hartid; + } + } + if (!hmask) { + hbase = hartid; + htop = hartid; + } else if (hartid > htop) { + htop = hartid; + } + hmask |= BIT(hartid - hbase); + } + + if (hmask) { + result = __sbi_rfence_v02_call(fid, hmask, hbase, + start, size, arg4, arg5); + if (result) + return result; + } + + return 0; +} + +/** + * sbi_set_timer() - Program the timer for next timer event. + * @stime_value: The value after which next timer event should fire. + * + * Return: None. + */ +void sbi_set_timer(uint64_t stime_value) +{ + __sbi_set_timer(stime_value); +} + +/** + * sbi_send_ipi() - Send an IPI to any hart. + * @cpu_mask: A cpu mask containing all the target harts. + * + * Return: 0 on success, appropriate linux error code otherwise. + */ +int sbi_send_ipi(const struct cpumask *cpu_mask) +{ + return __sbi_send_ipi(cpu_mask); +} +EXPORT_SYMBOL(sbi_send_ipi); + +/** + * sbi_remote_fence_i() - Execute FENCE.I instruction on given remote harts. + * @cpu_mask: A cpu mask containing all the target harts. + * + * Return: 0 on success, appropriate linux error code otherwise. + */ +int sbi_remote_fence_i(const struct cpumask *cpu_mask) +{ + return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_FENCE_I, + cpu_mask, 0, 0, 0, 0); +} +EXPORT_SYMBOL(sbi_remote_fence_i); + +/** + * sbi_remote_sfence_vma() - Execute SFENCE.VMA instructions on given remote + * harts for the specified virtual address range. + * @cpu_mask: A cpu mask containing all the target harts. + * @start: Start of the virtual address + * @size: Total size of the virtual address range. + * + * Return: 0 on success, appropriate linux error code otherwise. + */ +int sbi_remote_sfence_vma(const struct cpumask *cpu_mask, + unsigned long start, + unsigned long size) +{ + return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_SFENCE_VMA, + cpu_mask, start, size, 0, 0); +} +EXPORT_SYMBOL(sbi_remote_sfence_vma); + +/** + * sbi_remote_sfence_vma_asid() - Execute SFENCE.VMA instructions on given + * remote harts for a virtual address range belonging to a specific ASID. + * + * @cpu_mask: A cpu mask containing all the target harts. + * @start: Start of the virtual address + * @size: Total size of the virtual address range. + * @asid: The value of address space identifier (ASID). + * + * Return: 0 on success, appropriate linux error code otherwise. + */ +int sbi_remote_sfence_vma_asid(const struct cpumask *cpu_mask, + unsigned long start, + unsigned long size, + unsigned long asid) +{ + return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID, + cpu_mask, start, size, asid, 0); +} +EXPORT_SYMBOL(sbi_remote_sfence_vma_asid); + +/** + * sbi_remote_hfence_gvma() - Execute HFENCE.GVMA instructions on given remote + * harts for the specified guest physical address range. + * @cpu_mask: A cpu mask containing all the target harts. + * @start: Start of the guest physical address + * @size: Total size of the guest physical address range. + * + * Return: None + */ +int sbi_remote_hfence_gvma(const struct cpumask *cpu_mask, + unsigned long start, + unsigned long size) +{ + return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA, + cpu_mask, start, size, 0, 0); +} +EXPORT_SYMBOL_GPL(sbi_remote_hfence_gvma); + +/** + * sbi_remote_hfence_gvma_vmid() - Execute HFENCE.GVMA instructions on given + * remote harts for a guest physical address range belonging to a specific VMID. + * + * @cpu_mask: A cpu mask containing all the target harts. + * @start: Start of the guest physical address + * @size: Total size of the guest physical address range. + * @vmid: The value of guest ID (VMID). + * + * Return: 0 if success, Error otherwise. + */ +int sbi_remote_hfence_gvma_vmid(const struct cpumask *cpu_mask, + unsigned long start, + unsigned long size, + unsigned long vmid) +{ + return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA_VMID, + cpu_mask, start, size, vmid, 0); +} +EXPORT_SYMBOL(sbi_remote_hfence_gvma_vmid); + +/** + * sbi_remote_hfence_vvma() - Execute HFENCE.VVMA instructions on given remote + * harts for the current guest virtual address range. + * @cpu_mask: A cpu mask containing all the target harts. + * @start: Start of the current guest virtual address + * @size: Total size of the current guest virtual address range. + * + * Return: None + */ +int sbi_remote_hfence_vvma(const struct cpumask *cpu_mask, + unsigned long start, + unsigned long size) +{ + return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA, + cpu_mask, start, size, 0, 0); +} +EXPORT_SYMBOL(sbi_remote_hfence_vvma); + +/** + * sbi_remote_hfence_vvma_asid() - Execute HFENCE.VVMA instructions on given + * remote harts for current guest virtual address range belonging to a specific + * ASID. + * + * @cpu_mask: A cpu mask containing all the target harts. + * @start: Start of the current guest virtual address + * @size: Total size of the current guest virtual address range. + * @asid: The value of address space identifier (ASID). + * + * Return: None + */ +int sbi_remote_hfence_vvma_asid(const struct cpumask *cpu_mask, + unsigned long start, + unsigned long size, + unsigned long asid) +{ + return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA_ASID, + cpu_mask, start, size, asid, 0); +} +EXPORT_SYMBOL(sbi_remote_hfence_vvma_asid); + +static void sbi_srst_reset(unsigned long type, unsigned long reason) +{ + sbi_ecall(SBI_EXT_SRST, SBI_EXT_SRST_RESET, type, reason, + 0, 0, 0, 0); + pr_warn("%s: type=0x%lx reason=0x%lx failed\n", + __func__, type, reason); +} + +static int sbi_srst_reboot(struct notifier_block *this, + unsigned long mode, void *cmd) +{ + sbi_srst_reset((mode == REBOOT_WARM || mode == REBOOT_SOFT) ? + SBI_SRST_RESET_TYPE_WARM_REBOOT : + SBI_SRST_RESET_TYPE_COLD_REBOOT, + SBI_SRST_RESET_REASON_NONE); + return NOTIFY_DONE; +} + +static struct notifier_block sbi_srst_reboot_nb; + +static void sbi_srst_power_off(void) +{ + sbi_srst_reset(SBI_SRST_RESET_TYPE_SHUTDOWN, + SBI_SRST_RESET_REASON_NONE); +} + +/** + * sbi_probe_extension() - Check if an SBI extension ID is supported or not. + * @extid: The extension ID to be probed. + * + * Return: 1 or an extension specific nonzero value if yes, 0 otherwise. + */ +long sbi_probe_extension(int extid) +{ + struct sbiret ret; + + ret = sbi_ecall(SBI_EXT_BASE, SBI_EXT_BASE_PROBE_EXT, extid, + 0, 0, 0, 0, 0); + if (!ret.error) + return ret.value; + + return 0; +} +EXPORT_SYMBOL(sbi_probe_extension); + +static long __sbi_base_ecall(int fid) +{ + struct sbiret ret; + + ret = sbi_ecall(SBI_EXT_BASE, fid, 0, 0, 0, 0, 0, 0); + if (!ret.error) + return ret.value; + else + return sbi_err_map_linux_errno(ret.error); +} + +static inline long sbi_get_spec_version(void) +{ + return __sbi_base_ecall(SBI_EXT_BASE_GET_SPEC_VERSION); +} + +static inline long sbi_get_firmware_id(void) +{ + return __sbi_base_ecall(SBI_EXT_BASE_GET_IMP_ID); +} + +static inline long sbi_get_firmware_version(void) +{ + return __sbi_base_ecall(SBI_EXT_BASE_GET_IMP_VERSION); +} + +long sbi_get_mvendorid(void) +{ + return __sbi_base_ecall(SBI_EXT_BASE_GET_MVENDORID); +} + +long sbi_get_marchid(void) +{ + return __sbi_base_ecall(SBI_EXT_BASE_GET_MARCHID); +} + +long sbi_get_mimpid(void) +{ + return __sbi_base_ecall(SBI_EXT_BASE_GET_MIMPID); +} + +static void sbi_send_cpumask_ipi(const struct cpumask *target) +{ + sbi_send_ipi(target); +} + +static const struct riscv_ipi_ops sbi_ipi_ops = { + .ipi_inject = sbi_send_cpumask_ipi +}; + +void __init sbi_init(void) +{ + int ret; + + sbi_set_power_off(); + ret = sbi_get_spec_version(); + if (ret > 0) + sbi_spec_version = ret; + + pr_info("SBI specification v%lu.%lu detected\n", + sbi_major_version(), sbi_minor_version()); + + if (!sbi_spec_is_0_1()) { + pr_info("SBI implementation ID=0x%lx Version=0x%lx\n", + sbi_get_firmware_id(), sbi_get_firmware_version()); + if (sbi_probe_extension(SBI_EXT_TIME)) { + __sbi_set_timer = __sbi_set_timer_v02; + pr_info("SBI TIME extension detected\n"); + } else { + __sbi_set_timer = __sbi_set_timer_v01; + } + if (sbi_probe_extension(SBI_EXT_IPI)) { + __sbi_send_ipi = __sbi_send_ipi_v02; + pr_info("SBI IPI extension detected\n"); + } else { + __sbi_send_ipi = __sbi_send_ipi_v01; + } + if (sbi_probe_extension(SBI_EXT_RFENCE)) { + __sbi_rfence = __sbi_rfence_v02; + pr_info("SBI RFENCE extension detected\n"); + } else { + __sbi_rfence = __sbi_rfence_v01; + } + if ((sbi_spec_version >= sbi_mk_version(0, 3)) && + sbi_probe_extension(SBI_EXT_SRST)) { + pr_info("SBI SRST extension detected\n"); + pm_power_off = sbi_srst_power_off; + sbi_srst_reboot_nb.notifier_call = sbi_srst_reboot; + sbi_srst_reboot_nb.priority = 192; + register_restart_handler(&sbi_srst_reboot_nb); + } + } else { + __sbi_set_timer = __sbi_set_timer_v01; + __sbi_send_ipi = __sbi_send_ipi_v01; + __sbi_rfence = __sbi_rfence_v01; + } + + riscv_set_ipi_ops(&sbi_ipi_ops); +} diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c new file mode 100644 index 000000000..2acf51c23 --- /dev/null +++ b/arch/riscv/kernel/setup.c @@ -0,0 +1,328 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2009 Sunplus Core Technology Co., Ltd. + * Chen Liqin <liqin.chen@sunplusct.com> + * Lennox Wu <lennox.wu@sunplusct.com> + * Copyright (C) 2012 Regents of the University of California + * Copyright (C) 2020 FORTH-ICS/CARV + * Nick Kossifidis <mick@ics.forth.gr> + */ + +#include <linux/init.h> +#include <linux/mm.h> +#include <linux/memblock.h> +#include <linux/sched.h> +#include <linux/console.h> +#include <linux/screen_info.h> +#include <linux/of_fdt.h> +#include <linux/of_platform.h> +#include <linux/sched/task.h> +#include <linux/smp.h> +#include <linux/efi.h> +#include <linux/crash_dump.h> + +#include <asm/alternative.h> +#include <asm/cacheflush.h> +#include <asm/cpu_ops.h> +#include <asm/early_ioremap.h> +#include <asm/pgtable.h> +#include <asm/setup.h> +#include <asm/set_memory.h> +#include <asm/sections.h> +#include <asm/sbi.h> +#include <asm/tlbflush.h> +#include <asm/thread_info.h> +#include <asm/kasan.h> +#include <asm/efi.h> + +#include "head.h" + +#if defined(CONFIG_DUMMY_CONSOLE) || defined(CONFIG_EFI) +struct screen_info screen_info __section(".data") = { + .orig_video_lines = 30, + .orig_video_cols = 80, + .orig_video_mode = 0, + .orig_video_ega_bx = 0, + .orig_video_isVGA = 1, + .orig_video_points = 8 +}; +#endif + +/* + * The lucky hart to first increment this variable will boot the other cores. + * This is used before the kernel initializes the BSS so it can't be in the + * BSS. + */ +atomic_t hart_lottery __section(".sdata") +#ifdef CONFIG_XIP_KERNEL += ATOMIC_INIT(0xC001BEEF) +#endif +; +unsigned long boot_cpu_hartid; +static DEFINE_PER_CPU(struct cpu, cpu_devices); + +/* + * Place kernel memory regions on the resource tree so that + * kexec-tools can retrieve them from /proc/iomem. While there + * also add "System RAM" regions for compatibility with other + * archs, and the rest of the known regions for completeness. + */ +static struct resource kimage_res = { .name = "Kernel image", }; +static struct resource code_res = { .name = "Kernel code", }; +static struct resource data_res = { .name = "Kernel data", }; +static struct resource rodata_res = { .name = "Kernel rodata", }; +static struct resource bss_res = { .name = "Kernel bss", }; +#ifdef CONFIG_CRASH_DUMP +static struct resource elfcorehdr_res = { .name = "ELF Core hdr", }; +#endif + +static int __init add_resource(struct resource *parent, + struct resource *res) +{ + int ret = 0; + + ret = insert_resource(parent, res); + if (ret < 0) { + pr_err("Failed to add a %s resource at %llx\n", + res->name, (unsigned long long) res->start); + return ret; + } + + return 1; +} + +static int __init add_kernel_resources(void) +{ + int ret = 0; + + /* + * The memory region of the kernel image is continuous and + * was reserved on setup_bootmem, register it here as a + * resource, with the various segments of the image as + * child nodes. + */ + + code_res.start = __pa_symbol(_text); + code_res.end = __pa_symbol(_etext) - 1; + code_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; + + rodata_res.start = __pa_symbol(__start_rodata); + rodata_res.end = __pa_symbol(__end_rodata) - 1; + rodata_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; + + data_res.start = __pa_symbol(_data); + data_res.end = __pa_symbol(_edata) - 1; + data_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; + + bss_res.start = __pa_symbol(__bss_start); + bss_res.end = __pa_symbol(__bss_stop) - 1; + bss_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; + + kimage_res.start = code_res.start; + kimage_res.end = bss_res.end; + kimage_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; + + ret = add_resource(&iomem_resource, &kimage_res); + if (ret < 0) + return ret; + + ret = add_resource(&kimage_res, &code_res); + if (ret < 0) + return ret; + + ret = add_resource(&kimage_res, &rodata_res); + if (ret < 0) + return ret; + + ret = add_resource(&kimage_res, &data_res); + if (ret < 0) + return ret; + + ret = add_resource(&kimage_res, &bss_res); + + return ret; +} + +static void __init init_resources(void) +{ + struct memblock_region *region = NULL; + struct resource *res = NULL; + struct resource *mem_res = NULL; + size_t mem_res_sz = 0; + int num_resources = 0, res_idx = 0; + int ret = 0; + + /* + 1 as memblock_alloc() might increase memblock.reserved.cnt */ + num_resources = memblock.memory.cnt + memblock.reserved.cnt + 1; + res_idx = num_resources - 1; + + mem_res_sz = num_resources * sizeof(*mem_res); + mem_res = memblock_alloc(mem_res_sz, SMP_CACHE_BYTES); + if (!mem_res) + panic("%s: Failed to allocate %zu bytes\n", __func__, mem_res_sz); + + /* + * Start by adding the reserved regions, if they overlap + * with /memory regions, insert_resource later on will take + * care of it. + */ + ret = add_kernel_resources(); + if (ret < 0) + goto error; + +#ifdef CONFIG_KEXEC_CORE + if (crashk_res.start != crashk_res.end) { + ret = add_resource(&iomem_resource, &crashk_res); + if (ret < 0) + goto error; + } +#endif + +#ifdef CONFIG_CRASH_DUMP + if (elfcorehdr_size > 0) { + elfcorehdr_res.start = elfcorehdr_addr; + elfcorehdr_res.end = elfcorehdr_addr + elfcorehdr_size - 1; + elfcorehdr_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; + add_resource(&iomem_resource, &elfcorehdr_res); + } +#endif + + for_each_reserved_mem_region(region) { + res = &mem_res[res_idx--]; + + res->name = "Reserved"; + res->flags = IORESOURCE_MEM | IORESOURCE_EXCLUSIVE; + res->start = __pfn_to_phys(memblock_region_reserved_base_pfn(region)); + res->end = __pfn_to_phys(memblock_region_reserved_end_pfn(region)) - 1; + + /* + * Ignore any other reserved regions within + * system memory. + */ + if (memblock_is_memory(res->start)) { + /* Re-use this pre-allocated resource */ + res_idx++; + continue; + } + + ret = add_resource(&iomem_resource, res); + if (ret < 0) + goto error; + } + + /* Add /memory regions to the resource tree */ + for_each_mem_region(region) { + res = &mem_res[res_idx--]; + + if (unlikely(memblock_is_nomap(region))) { + res->name = "Reserved"; + res->flags = IORESOURCE_MEM | IORESOURCE_EXCLUSIVE; + } else { + res->name = "System RAM"; + res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; + } + + res->start = __pfn_to_phys(memblock_region_memory_base_pfn(region)); + res->end = __pfn_to_phys(memblock_region_memory_end_pfn(region)) - 1; + + ret = add_resource(&iomem_resource, res); + if (ret < 0) + goto error; + } + + /* Clean-up any unused pre-allocated resources */ + if (res_idx >= 0) + memblock_free(mem_res, (res_idx + 1) * sizeof(*mem_res)); + return; + + error: + /* Better an empty resource tree than an inconsistent one */ + release_child_resources(&iomem_resource); + memblock_free(mem_res, mem_res_sz); +} + + +static void __init parse_dtb(void) +{ + /* Early scan of device tree from init memory */ + if (early_init_dt_scan(dtb_early_va)) { + const char *name = of_flat_dt_get_machine_name(); + + if (name) { + pr_info("Machine model: %s\n", name); + dump_stack_set_arch_desc("%s (DT)", name); + } + } else { + pr_err("No DTB passed to the kernel\n"); + } + +#ifdef CONFIG_CMDLINE_FORCE + strscpy(boot_command_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE); + pr_info("Forcing kernel command line to: %s\n", boot_command_line); +#endif +} + +void __init setup_arch(char **cmdline_p) +{ + parse_dtb(); + setup_initial_init_mm(_stext, _etext, _edata, _end); + + *cmdline_p = boot_command_line; + + early_ioremap_setup(); + jump_label_init(); + parse_early_param(); + + efi_init(); + paging_init(); +#if IS_ENABLED(CONFIG_BUILTIN_DTB) + unflatten_and_copy_device_tree(); +#else + unflatten_device_tree(); +#endif + misc_mem_init(); + + init_resources(); + sbi_init(); + +#ifdef CONFIG_KASAN + kasan_init(); +#endif + +#ifdef CONFIG_SMP + setup_smp(); +#endif + + riscv_init_cbom_blocksize(); + riscv_fill_hwcap(); + apply_boot_alternatives(); +} + +static int __init topology_init(void) +{ + int i, ret; + + for_each_possible_cpu(i) { + struct cpu *cpu = &per_cpu(cpu_devices, i); + + cpu->hotpluggable = cpu_has_hotplug(i); + ret = register_cpu(cpu, i); + if (unlikely(ret)) + pr_warn("Warning: %s: register_cpu %d failed (%d)\n", + __func__, i, ret); + } + + return 0; +} +subsys_initcall(topology_init); + +void free_initmem(void) +{ + if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX)) { + set_kernel_memory(lm_alias(__init_begin), lm_alias(__init_end), set_memory_rw_nx); + if (IS_ENABLED(CONFIG_64BIT)) + set_kernel_memory(__init_begin, __init_end, set_memory_nx); + } + + free_initmem_default(POISON_FREE_INITMEM); +} diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c new file mode 100644 index 000000000..dee66c929 --- /dev/null +++ b/arch/riscv/kernel/signal.c @@ -0,0 +1,346 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2009 Sunplus Core Technology Co., Ltd. + * Chen Liqin <liqin.chen@sunplusct.com> + * Lennox Wu <lennox.wu@sunplusct.com> + * Copyright (C) 2012 Regents of the University of California + */ + +#include <linux/compat.h> +#include <linux/signal.h> +#include <linux/uaccess.h> +#include <linux/syscalls.h> +#include <linux/resume_user_mode.h> +#include <linux/linkage.h> + +#include <asm/ucontext.h> +#include <asm/vdso.h> +#include <asm/signal.h> +#include <asm/signal32.h> +#include <asm/switch_to.h> +#include <asm/csr.h> +#include <asm/cacheflush.h> + +extern u32 __user_rt_sigreturn[2]; + +#define DEBUG_SIG 0 + +struct rt_sigframe { + struct siginfo info; + struct ucontext uc; +#ifndef CONFIG_MMU + u32 sigreturn_code[2]; +#endif +}; + +#ifdef CONFIG_FPU +static long restore_fp_state(struct pt_regs *regs, + union __riscv_fp_state __user *sc_fpregs) +{ + long err; + struct __riscv_d_ext_state __user *state = &sc_fpregs->d; + size_t i; + + err = __copy_from_user(¤t->thread.fstate, state, sizeof(*state)); + if (unlikely(err)) + return err; + + fstate_restore(current, regs); + + /* We support no other extension state at this time. */ + for (i = 0; i < ARRAY_SIZE(sc_fpregs->q.reserved); i++) { + u32 value; + + err = __get_user(value, &sc_fpregs->q.reserved[i]); + if (unlikely(err)) + break; + if (value != 0) + return -EINVAL; + } + + return err; +} + +static long save_fp_state(struct pt_regs *regs, + union __riscv_fp_state __user *sc_fpregs) +{ + long err; + struct __riscv_d_ext_state __user *state = &sc_fpregs->d; + size_t i; + + fstate_save(current, regs); + err = __copy_to_user(state, ¤t->thread.fstate, sizeof(*state)); + if (unlikely(err)) + return err; + + /* We support no other extension state at this time. */ + for (i = 0; i < ARRAY_SIZE(sc_fpregs->q.reserved); i++) { + err = __put_user(0, &sc_fpregs->q.reserved[i]); + if (unlikely(err)) + break; + } + + return err; +} +#else +#define save_fp_state(task, regs) (0) +#define restore_fp_state(task, regs) (0) +#endif + +static long restore_sigcontext(struct pt_regs *regs, + struct sigcontext __user *sc) +{ + long err; + /* sc_regs is structured the same as the start of pt_regs */ + err = __copy_from_user(regs, &sc->sc_regs, sizeof(sc->sc_regs)); + /* Restore the floating-point state. */ + if (has_fpu()) + err |= restore_fp_state(regs, &sc->sc_fpregs); + return err; +} + +SYSCALL_DEFINE0(rt_sigreturn) +{ + struct pt_regs *regs = current_pt_regs(); + struct rt_sigframe __user *frame; + struct task_struct *task; + sigset_t set; + + /* Always make any pending restarted system calls return -EINTR */ + current->restart_block.fn = do_no_restart_syscall; + + frame = (struct rt_sigframe __user *)regs->sp; + + if (!access_ok(frame, sizeof(*frame))) + goto badframe; + + if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) + goto badframe; + + set_current_blocked(&set); + + if (restore_sigcontext(regs, &frame->uc.uc_mcontext)) + goto badframe; + + if (restore_altstack(&frame->uc.uc_stack)) + goto badframe; + + regs->cause = -1UL; + + return regs->a0; + +badframe: + task = current; + if (show_unhandled_signals) { + pr_info_ratelimited( + "%s[%d]: bad frame in %s: frame=%p pc=%p sp=%p\n", + task->comm, task_pid_nr(task), __func__, + frame, (void *)regs->epc, (void *)regs->sp); + } + force_sig(SIGSEGV); + return 0; +} + +static long setup_sigcontext(struct rt_sigframe __user *frame, + struct pt_regs *regs) +{ + struct sigcontext __user *sc = &frame->uc.uc_mcontext; + long err; + /* sc_regs is structured the same as the start of pt_regs */ + err = __copy_to_user(&sc->sc_regs, regs, sizeof(sc->sc_regs)); + /* Save the floating-point state. */ + if (has_fpu()) + err |= save_fp_state(regs, &sc->sc_fpregs); + return err; +} + +static inline void __user *get_sigframe(struct ksignal *ksig, + struct pt_regs *regs, size_t framesize) +{ + unsigned long sp; + /* Default to using normal stack */ + sp = regs->sp; + + /* + * If we are on the alternate signal stack and would overflow it, don't. + * Return an always-bogus address instead so we will die with SIGSEGV. + */ + if (on_sig_stack(sp) && !likely(on_sig_stack(sp - framesize))) + return (void __user __force *)(-1UL); + + /* This is the X/Open sanctioned signal stack switching. */ + sp = sigsp(sp, ksig) - framesize; + + /* Align the stack frame. */ + sp &= ~0xfUL; + + return (void __user *)sp; +} + +static int setup_rt_frame(struct ksignal *ksig, sigset_t *set, + struct pt_regs *regs) +{ + struct rt_sigframe __user *frame; + long err = 0; + unsigned long __maybe_unused addr; + + frame = get_sigframe(ksig, regs, sizeof(*frame)); + if (!access_ok(frame, sizeof(*frame))) + return -EFAULT; + + err |= copy_siginfo_to_user(&frame->info, &ksig->info); + + /* Create the ucontext. */ + err |= __put_user(0, &frame->uc.uc_flags); + err |= __put_user(NULL, &frame->uc.uc_link); + err |= __save_altstack(&frame->uc.uc_stack, regs->sp); + err |= setup_sigcontext(frame, regs); + err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); + if (err) + return -EFAULT; + + /* Set up to return from userspace. */ +#ifdef CONFIG_MMU + regs->ra = (unsigned long)VDSO_SYMBOL( + current->mm->context.vdso, rt_sigreturn); +#else + /* + * For the nommu case we don't have a VDSO. Instead we push two + * instructions to call the rt_sigreturn syscall onto the user stack. + */ + if (copy_to_user(&frame->sigreturn_code, __user_rt_sigreturn, + sizeof(frame->sigreturn_code))) + return -EFAULT; + + addr = (unsigned long)&frame->sigreturn_code; + /* Make sure the two instructions are pushed to icache. */ + flush_icache_range(addr, addr + sizeof(frame->sigreturn_code)); + + regs->ra = addr; +#endif /* CONFIG_MMU */ + + /* + * Set up registers for signal handler. + * Registers that we don't modify keep the value they had from + * user-space at the time we took the signal. + * We always pass siginfo and mcontext, regardless of SA_SIGINFO, + * since some things rely on this (e.g. glibc's debug/segfault.c). + */ + regs->epc = (unsigned long)ksig->ka.sa.sa_handler; + regs->sp = (unsigned long)frame; + regs->a0 = ksig->sig; /* a0: signal number */ + regs->a1 = (unsigned long)(&frame->info); /* a1: siginfo pointer */ + regs->a2 = (unsigned long)(&frame->uc); /* a2: ucontext pointer */ + +#if DEBUG_SIG + pr_info("SIG deliver (%s:%d): sig=%d pc=%p ra=%p sp=%p\n", + current->comm, task_pid_nr(current), ksig->sig, + (void *)regs->epc, (void *)regs->ra, frame); +#endif + + return 0; +} + +static void handle_signal(struct ksignal *ksig, struct pt_regs *regs) +{ + sigset_t *oldset = sigmask_to_save(); + int ret; + + /* Are we from a system call? */ + if (regs->cause == EXC_SYSCALL) { + /* Avoid additional syscall restarting via ret_from_exception */ + regs->cause = -1UL; + /* If so, check system call restarting.. */ + switch (regs->a0) { + case -ERESTART_RESTARTBLOCK: + case -ERESTARTNOHAND: + regs->a0 = -EINTR; + break; + + case -ERESTARTSYS: + if (!(ksig->ka.sa.sa_flags & SA_RESTART)) { + regs->a0 = -EINTR; + break; + } + fallthrough; + case -ERESTARTNOINTR: + regs->a0 = regs->orig_a0; + regs->epc -= 0x4; + break; + } + } + + rseq_signal_deliver(ksig, regs); + + /* Set up the stack frame */ + if (is_compat_task()) + ret = compat_setup_rt_frame(ksig, oldset, regs); + else + ret = setup_rt_frame(ksig, oldset, regs); + + signal_setup_done(ret, ksig, 0); +} + +static void do_signal(struct pt_regs *regs) +{ + struct ksignal ksig; + + if (get_signal(&ksig)) { + /* Actually deliver the signal */ + handle_signal(&ksig, regs); + return; + } + + /* Did we come from a system call? */ + if (regs->cause == EXC_SYSCALL) { + /* Avoid additional syscall restarting via ret_from_exception */ + regs->cause = -1UL; + + /* Restart the system call - no handlers present */ + switch (regs->a0) { + case -ERESTARTNOHAND: + case -ERESTARTSYS: + case -ERESTARTNOINTR: + regs->a0 = regs->orig_a0; + regs->epc -= 0x4; + break; + case -ERESTART_RESTARTBLOCK: + regs->a0 = regs->orig_a0; + regs->a7 = __NR_restart_syscall; + regs->epc -= 0x4; + break; + } + } + + /* + * If there is no signal to deliver, we just put the saved + * sigmask back. + */ + restore_saved_sigmask(); +} + +/* + * Handle any pending work on the resume-to-userspace path, as indicated by + * _TIF_WORK_MASK. Entered from assembly with IRQs off. + */ +asmlinkage __visible void do_work_pending(struct pt_regs *regs, + unsigned long thread_info_flags) +{ + do { + if (thread_info_flags & _TIF_NEED_RESCHED) { + schedule(); + } else { + local_irq_enable(); + if (thread_info_flags & _TIF_UPROBE) + uprobe_notify_resume(regs); + /* Handle pending signal delivery */ + if (thread_info_flags & (_TIF_SIGPENDING | + _TIF_NOTIFY_SIGNAL)) + do_signal(regs); + if (thread_info_flags & _TIF_NOTIFY_RESUME) + resume_user_mode_work(regs); + } + local_irq_disable(); + thread_info_flags = read_thread_flags(); + } while (thread_info_flags & _TIF_WORK_MASK); +} diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c new file mode 100644 index 000000000..7f534023f --- /dev/null +++ b/arch/riscv/kernel/smp.c @@ -0,0 +1,334 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * SMP initialisation and IPI support + * Based on arch/arm64/kernel/smp.c + * + * Copyright (C) 2012 ARM Ltd. + * Copyright (C) 2015 Regents of the University of California + * Copyright (C) 2017 SiFive + */ + +#include <linux/cpu.h> +#include <linux/clockchips.h> +#include <linux/interrupt.h> +#include <linux/module.h> +#include <linux/kexec.h> +#include <linux/profile.h> +#include <linux/smp.h> +#include <linux/sched.h> +#include <linux/seq_file.h> +#include <linux/delay.h> +#include <linux/irq_work.h> + +#include <asm/sbi.h> +#include <asm/tlbflush.h> +#include <asm/cacheflush.h> +#include <asm/cpu_ops.h> + +enum ipi_message_type { + IPI_RESCHEDULE, + IPI_CALL_FUNC, + IPI_CPU_STOP, + IPI_CPU_CRASH_STOP, + IPI_IRQ_WORK, + IPI_TIMER, + IPI_MAX +}; + +unsigned long __cpuid_to_hartid_map[NR_CPUS] __ro_after_init = { + [0 ... NR_CPUS-1] = INVALID_HARTID +}; + +void __init smp_setup_processor_id(void) +{ + cpuid_to_hartid_map(0) = boot_cpu_hartid; +} + +/* A collection of single bit ipi messages. */ +static struct { + unsigned long stats[IPI_MAX] ____cacheline_aligned; + unsigned long bits ____cacheline_aligned; +} ipi_data[NR_CPUS] __cacheline_aligned; + +int riscv_hartid_to_cpuid(unsigned long hartid) +{ + int i; + + for (i = 0; i < NR_CPUS; i++) + if (cpuid_to_hartid_map(i) == hartid) + return i; + + return -ENOENT; +} + +bool arch_match_cpu_phys_id(int cpu, u64 phys_id) +{ + return phys_id == cpuid_to_hartid_map(cpu); +} + +static void ipi_stop(void) +{ + set_cpu_online(smp_processor_id(), false); + while (1) + wait_for_interrupt(); +} + +#ifdef CONFIG_KEXEC_CORE +static atomic_t waiting_for_crash_ipi = ATOMIC_INIT(0); + +static inline void ipi_cpu_crash_stop(unsigned int cpu, struct pt_regs *regs) +{ + crash_save_cpu(regs, cpu); + + atomic_dec(&waiting_for_crash_ipi); + + local_irq_disable(); + +#ifdef CONFIG_HOTPLUG_CPU + if (cpu_has_hotplug(cpu)) + cpu_ops[cpu]->cpu_stop(); +#endif + + for(;;) + wait_for_interrupt(); +} +#else +static inline void ipi_cpu_crash_stop(unsigned int cpu, struct pt_regs *regs) +{ + unreachable(); +} +#endif + +static const struct riscv_ipi_ops *ipi_ops __ro_after_init; + +void riscv_set_ipi_ops(const struct riscv_ipi_ops *ops) +{ + ipi_ops = ops; +} +EXPORT_SYMBOL_GPL(riscv_set_ipi_ops); + +void riscv_clear_ipi(void) +{ + if (ipi_ops && ipi_ops->ipi_clear) + ipi_ops->ipi_clear(); + + csr_clear(CSR_IP, IE_SIE); +} +EXPORT_SYMBOL_GPL(riscv_clear_ipi); + +static void send_ipi_mask(const struct cpumask *mask, enum ipi_message_type op) +{ + int cpu; + + smp_mb__before_atomic(); + for_each_cpu(cpu, mask) + set_bit(op, &ipi_data[cpu].bits); + smp_mb__after_atomic(); + + if (ipi_ops && ipi_ops->ipi_inject) + ipi_ops->ipi_inject(mask); + else + pr_warn("SMP: IPI inject method not available\n"); +} + +static void send_ipi_single(int cpu, enum ipi_message_type op) +{ + smp_mb__before_atomic(); + set_bit(op, &ipi_data[cpu].bits); + smp_mb__after_atomic(); + + if (ipi_ops && ipi_ops->ipi_inject) + ipi_ops->ipi_inject(cpumask_of(cpu)); + else + pr_warn("SMP: IPI inject method not available\n"); +} + +#ifdef CONFIG_IRQ_WORK +void arch_irq_work_raise(void) +{ + send_ipi_single(smp_processor_id(), IPI_IRQ_WORK); +} +#endif + +void handle_IPI(struct pt_regs *regs) +{ + unsigned int cpu = smp_processor_id(); + unsigned long *pending_ipis = &ipi_data[cpu].bits; + unsigned long *stats = ipi_data[cpu].stats; + + riscv_clear_ipi(); + + while (true) { + unsigned long ops; + + /* Order bit clearing and data access. */ + mb(); + + ops = xchg(pending_ipis, 0); + if (ops == 0) + return; + + if (ops & (1 << IPI_RESCHEDULE)) { + stats[IPI_RESCHEDULE]++; + scheduler_ipi(); + } + + if (ops & (1 << IPI_CALL_FUNC)) { + stats[IPI_CALL_FUNC]++; + generic_smp_call_function_interrupt(); + } + + if (ops & (1 << IPI_CPU_STOP)) { + stats[IPI_CPU_STOP]++; + ipi_stop(); + } + + if (ops & (1 << IPI_CPU_CRASH_STOP)) { + ipi_cpu_crash_stop(cpu, get_irq_regs()); + } + + if (ops & (1 << IPI_IRQ_WORK)) { + stats[IPI_IRQ_WORK]++; + irq_work_run(); + } + +#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST + if (ops & (1 << IPI_TIMER)) { + stats[IPI_TIMER]++; + tick_receive_broadcast(); + } +#endif + BUG_ON((ops >> IPI_MAX) != 0); + + /* Order data access and bit testing. */ + mb(); + } +} + +static const char * const ipi_names[] = { + [IPI_RESCHEDULE] = "Rescheduling interrupts", + [IPI_CALL_FUNC] = "Function call interrupts", + [IPI_CPU_STOP] = "CPU stop interrupts", + [IPI_CPU_CRASH_STOP] = "CPU stop (for crash dump) interrupts", + [IPI_IRQ_WORK] = "IRQ work interrupts", + [IPI_TIMER] = "Timer broadcast interrupts", +}; + +void show_ipi_stats(struct seq_file *p, int prec) +{ + unsigned int cpu, i; + + for (i = 0; i < IPI_MAX; i++) { + seq_printf(p, "%*s%u:%s", prec - 1, "IPI", i, + prec >= 4 ? " " : ""); + for_each_online_cpu(cpu) + seq_printf(p, "%10lu ", ipi_data[cpu].stats[i]); + seq_printf(p, " %s\n", ipi_names[i]); + } +} + +void arch_send_call_function_ipi_mask(struct cpumask *mask) +{ + send_ipi_mask(mask, IPI_CALL_FUNC); +} + +void arch_send_call_function_single_ipi(int cpu) +{ + send_ipi_single(cpu, IPI_CALL_FUNC); +} + +#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST +void tick_broadcast(const struct cpumask *mask) +{ + send_ipi_mask(mask, IPI_TIMER); +} +#endif + +void smp_send_stop(void) +{ + unsigned long timeout; + + if (num_online_cpus() > 1) { + cpumask_t mask; + + cpumask_copy(&mask, cpu_online_mask); + cpumask_clear_cpu(smp_processor_id(), &mask); + + if (system_state <= SYSTEM_RUNNING) + pr_crit("SMP: stopping secondary CPUs\n"); + send_ipi_mask(&mask, IPI_CPU_STOP); + } + + /* Wait up to one second for other CPUs to stop */ + timeout = USEC_PER_SEC; + while (num_online_cpus() > 1 && timeout--) + udelay(1); + + if (num_online_cpus() > 1) + pr_warn("SMP: failed to stop secondary CPUs %*pbl\n", + cpumask_pr_args(cpu_online_mask)); +} + +#ifdef CONFIG_KEXEC_CORE +/* + * The number of CPUs online, not counting this CPU (which may not be + * fully online and so not counted in num_online_cpus()). + */ +static inline unsigned int num_other_online_cpus(void) +{ + unsigned int this_cpu_online = cpu_online(smp_processor_id()); + + return num_online_cpus() - this_cpu_online; +} + +void crash_smp_send_stop(void) +{ + static int cpus_stopped; + cpumask_t mask; + unsigned long timeout; + + /* + * This function can be called twice in panic path, but obviously + * we execute this only once. + */ + if (cpus_stopped) + return; + + cpus_stopped = 1; + + /* + * If this cpu is the only one alive at this point in time, online or + * not, there are no stop messages to be sent around, so just back out. + */ + if (num_other_online_cpus() == 0) + return; + + cpumask_copy(&mask, cpu_online_mask); + cpumask_clear_cpu(smp_processor_id(), &mask); + + atomic_set(&waiting_for_crash_ipi, num_other_online_cpus()); + + pr_crit("SMP: stopping secondary CPUs\n"); + send_ipi_mask(&mask, IPI_CPU_CRASH_STOP); + + /* Wait up to one second for other CPUs to stop */ + timeout = USEC_PER_SEC; + while ((atomic_read(&waiting_for_crash_ipi) > 0) && timeout--) + udelay(1); + + if (atomic_read(&waiting_for_crash_ipi) > 0) + pr_warn("SMP: failed to stop secondary CPUs %*pbl\n", + cpumask_pr_args(&mask)); +} + +bool smp_crash_stop_failed(void) +{ + return (atomic_read(&waiting_for_crash_ipi) > 0); +} +#endif + +void smp_send_reschedule(int cpu) +{ + send_ipi_single(cpu, IPI_RESCHEDULE); +} +EXPORT_SYMBOL_GPL(smp_send_reschedule); diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c new file mode 100644 index 000000000..ddb2afba6 --- /dev/null +++ b/arch/riscv/kernel/smpboot.c @@ -0,0 +1,184 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * SMP initialisation and IPI support + * Based on arch/arm64/kernel/smp.c + * + * Copyright (C) 2012 ARM Ltd. + * Copyright (C) 2015 Regents of the University of California + * Copyright (C) 2017 SiFive + */ + +#include <linux/arch_topology.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/sched.h> +#include <linux/kernel_stat.h> +#include <linux/notifier.h> +#include <linux/cpu.h> +#include <linux/percpu.h> +#include <linux/delay.h> +#include <linux/err.h> +#include <linux/irq.h> +#include <linux/of.h> +#include <linux/sched/task_stack.h> +#include <linux/sched/mm.h> +#include <asm/cpu_ops.h> +#include <asm/irq.h> +#include <asm/mmu_context.h> +#include <asm/numa.h> +#include <asm/tlbflush.h> +#include <asm/sections.h> +#include <asm/sbi.h> +#include <asm/smp.h> + +#include "head.h" + +static DECLARE_COMPLETION(cpu_running); + +void __init smp_prepare_boot_cpu(void) +{ +} + +void __init smp_prepare_cpus(unsigned int max_cpus) +{ + int cpuid; + int ret; + unsigned int curr_cpuid; + + init_cpu_topology(); + + curr_cpuid = smp_processor_id(); + store_cpu_topology(curr_cpuid); + numa_store_cpu_info(curr_cpuid); + numa_add_cpu(curr_cpuid); + + /* This covers non-smp usecase mandated by "nosmp" option */ + if (max_cpus == 0) + return; + + for_each_possible_cpu(cpuid) { + if (cpuid == curr_cpuid) + continue; + if (cpu_ops[cpuid]->cpu_prepare) { + ret = cpu_ops[cpuid]->cpu_prepare(cpuid); + if (ret) + continue; + } + set_cpu_present(cpuid, true); + numa_store_cpu_info(cpuid); + } +} + +void __init setup_smp(void) +{ + struct device_node *dn; + unsigned long hart; + bool found_boot_cpu = false; + int cpuid = 1; + int rc; + + cpu_set_ops(0); + + for_each_of_cpu_node(dn) { + rc = riscv_of_processor_hartid(dn, &hart); + if (rc < 0) + continue; + + if (hart == cpuid_to_hartid_map(0)) { + BUG_ON(found_boot_cpu); + found_boot_cpu = 1; + early_map_cpu_to_node(0, of_node_to_nid(dn)); + continue; + } + if (cpuid >= NR_CPUS) { + pr_warn("Invalid cpuid [%d] for hartid [%lu]\n", + cpuid, hart); + continue; + } + + cpuid_to_hartid_map(cpuid) = hart; + early_map_cpu_to_node(cpuid, of_node_to_nid(dn)); + cpuid++; + } + + BUG_ON(!found_boot_cpu); + + if (cpuid > nr_cpu_ids) + pr_warn("Total number of cpus [%d] is greater than nr_cpus option value [%d]\n", + cpuid, nr_cpu_ids); + + for (cpuid = 1; cpuid < nr_cpu_ids; cpuid++) { + if (cpuid_to_hartid_map(cpuid) != INVALID_HARTID) { + cpu_set_ops(cpuid); + set_cpu_possible(cpuid, true); + } + } +} + +static int start_secondary_cpu(int cpu, struct task_struct *tidle) +{ + if (cpu_ops[cpu]->cpu_start) + return cpu_ops[cpu]->cpu_start(cpu, tidle); + + return -EOPNOTSUPP; +} + +int __cpu_up(unsigned int cpu, struct task_struct *tidle) +{ + int ret = 0; + tidle->thread_info.cpu = cpu; + + ret = start_secondary_cpu(cpu, tidle); + if (!ret) { + wait_for_completion_timeout(&cpu_running, + msecs_to_jiffies(1000)); + + if (!cpu_online(cpu)) { + pr_crit("CPU%u: failed to come online\n", cpu); + ret = -EIO; + } + } else { + pr_crit("CPU%u: failed to start\n", cpu); + } + + return ret; +} + +void __init smp_cpus_done(unsigned int max_cpus) +{ +} + +/* + * C entry point for a secondary processor. + */ +asmlinkage __visible void smp_callin(void) +{ + struct mm_struct *mm = &init_mm; + unsigned int curr_cpuid = smp_processor_id(); + + riscv_clear_ipi(); + + /* All kernel threads share the same mm context. */ + mmgrab(mm); + current->active_mm = mm; + + store_cpu_topology(curr_cpuid); + notify_cpu_starting(curr_cpuid); + numa_add_cpu(curr_cpuid); + set_cpu_online(curr_cpuid, 1); + + /* + * Remote TLB flushes are ignored while the CPU is offline, so emit + * a local TLB flush right now just in case. + */ + local_flush_tlb_all(); + complete(&cpu_running); + /* + * Disable preemption before enabling interrupts, so we don't try to + * schedule a CPU that hasn't actually started yet. + */ + local_irq_enable(); + cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); +} diff --git a/arch/riscv/kernel/soc.c b/arch/riscv/kernel/soc.c new file mode 100644 index 000000000..a0516172a --- /dev/null +++ b/arch/riscv/kernel/soc.c @@ -0,0 +1,28 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2020 Western Digital Corporation or its affiliates. + */ +#include <linux/init.h> +#include <linux/libfdt.h> +#include <linux/pgtable.h> +#include <asm/soc.h> + +/* + * This is called extremly early, before parse_dtb(), to allow initializing + * SoC hardware before memory or any device driver initialization. + */ +void __init soc_early_init(void) +{ + void (*early_fn)(const void *fdt); + const struct of_device_id *s; + const void *fdt = dtb_early_va; + + for (s = (void *)&__soc_early_init_table_start; + (void *)s < (void *)&__soc_early_init_table_end; s++) { + if (!fdt_node_check_compatible(fdt, 0, s->compatible)) { + early_fn = s->data; + early_fn(fdt); + return; + } + } +} diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c new file mode 100644 index 000000000..17d7383f2 --- /dev/null +++ b/arch/riscv/kernel/stacktrace.c @@ -0,0 +1,146 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2008 ARM Limited + * Copyright (C) 2014 Regents of the University of California + */ + +#include <linux/export.h> +#include <linux/kallsyms.h> +#include <linux/sched.h> +#include <linux/sched/debug.h> +#include <linux/sched/task_stack.h> +#include <linux/stacktrace.h> +#include <linux/ftrace.h> + +#include <asm/stacktrace.h> + +#ifdef CONFIG_FRAME_POINTER + +void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, + bool (*fn)(void *, unsigned long), void *arg) +{ + unsigned long fp, sp, pc; + int level = 0; + + if (regs) { + fp = frame_pointer(regs); + sp = user_stack_pointer(regs); + pc = instruction_pointer(regs); + } else if (task == NULL || task == current) { + fp = (unsigned long)__builtin_frame_address(0); + sp = current_stack_pointer; + pc = (unsigned long)walk_stackframe; + level = -1; + } else { + /* task blocked in __switch_to */ + fp = task->thread.s[0]; + sp = task->thread.sp; + pc = task->thread.ra; + } + + for (;;) { + unsigned long low, high; + struct stackframe *frame; + + if (unlikely(!__kernel_text_address(pc) || (level++ >= 0 && !fn(arg, pc)))) + break; + + /* Validate frame pointer */ + low = sp + sizeof(struct stackframe); + high = ALIGN(sp, THREAD_SIZE); + if (unlikely(fp < low || fp > high || fp & 0x7)) + break; + /* Unwind stack frame */ + frame = (struct stackframe *)fp - 1; + sp = fp; + if (regs && (regs->epc == pc) && (frame->fp & 0x7)) { + fp = frame->ra; + pc = regs->ra; + } else { + fp = frame->fp; + pc = ftrace_graph_ret_addr(current, NULL, frame->ra, + &frame->ra); + } + + } +} + +#else /* !CONFIG_FRAME_POINTER */ + +void notrace walk_stackframe(struct task_struct *task, + struct pt_regs *regs, bool (*fn)(void *, unsigned long), void *arg) +{ + unsigned long sp, pc; + unsigned long *ksp; + + if (regs) { + sp = user_stack_pointer(regs); + pc = instruction_pointer(regs); + } else if (task == NULL || task == current) { + sp = current_stack_pointer; + pc = (unsigned long)walk_stackframe; + } else { + /* task blocked in __switch_to */ + sp = task->thread.sp; + pc = task->thread.ra; + } + + if (unlikely(sp & 0x7)) + return; + + ksp = (unsigned long *)sp; + while (!kstack_end(ksp)) { + if (__kernel_text_address(pc) && unlikely(!fn(arg, pc))) + break; + pc = READ_ONCE_NOCHECK(*ksp++) - 0x4; + } +} + +#endif /* CONFIG_FRAME_POINTER */ + +static bool print_trace_address(void *arg, unsigned long pc) +{ + const char *loglvl = arg; + + print_ip_sym(loglvl, pc); + return true; +} + +noinline void dump_backtrace(struct pt_regs *regs, struct task_struct *task, + const char *loglvl) +{ + walk_stackframe(task, regs, print_trace_address, (void *)loglvl); +} + +void show_stack(struct task_struct *task, unsigned long *sp, const char *loglvl) +{ + pr_cont("%sCall Trace:\n", loglvl); + dump_backtrace(NULL, task, loglvl); +} + +static bool save_wchan(void *arg, unsigned long pc) +{ + if (!in_sched_functions(pc)) { + unsigned long *p = arg; + *p = pc; + return false; + } + return true; +} + +unsigned long __get_wchan(struct task_struct *task) +{ + unsigned long pc = 0; + + if (!try_get_task_stack(task)) + return 0; + walk_stackframe(task, NULL, save_wchan, &pc); + put_task_stack(task); + return pc; +} + +noinline void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie, + struct task_struct *task, struct pt_regs *regs) +{ + walk_stackframe(task, regs, consume_entry, cookie); +} diff --git a/arch/riscv/kernel/suspend.c b/arch/riscv/kernel/suspend.c new file mode 100644 index 000000000..9ba24fb8c --- /dev/null +++ b/arch/riscv/kernel/suspend.c @@ -0,0 +1,87 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2021 Western Digital Corporation or its affiliates. + * Copyright (c) 2022 Ventana Micro Systems Inc. + */ + +#include <linux/ftrace.h> +#include <asm/csr.h> +#include <asm/suspend.h> + +static void suspend_save_csrs(struct suspend_context *context) +{ + context->scratch = csr_read(CSR_SCRATCH); + context->tvec = csr_read(CSR_TVEC); + context->ie = csr_read(CSR_IE); + + /* + * No need to save/restore IP CSR (i.e. MIP or SIP) because: + * + * 1. For no-MMU (M-mode) kernel, the bits in MIP are set by + * external devices (such as interrupt controller, timer, etc). + * 2. For MMU (S-mode) kernel, the bits in SIP are set by + * M-mode firmware and external devices (such as interrupt + * controller, etc). + */ + +#ifdef CONFIG_MMU + context->satp = csr_read(CSR_SATP); +#endif +} + +static void suspend_restore_csrs(struct suspend_context *context) +{ + csr_write(CSR_SCRATCH, context->scratch); + csr_write(CSR_TVEC, context->tvec); + csr_write(CSR_IE, context->ie); + +#ifdef CONFIG_MMU + csr_write(CSR_SATP, context->satp); +#endif +} + +int cpu_suspend(unsigned long arg, + int (*finish)(unsigned long arg, + unsigned long entry, + unsigned long context)) +{ + int rc = 0; + struct suspend_context context = { 0 }; + + /* Finisher should be non-NULL */ + if (!finish) + return -EINVAL; + + /* Save additional CSRs*/ + suspend_save_csrs(&context); + + /* + * Function graph tracer state gets incosistent when the kernel + * calls functions that never return (aka finishers) hence disable + * graph tracing during their execution. + */ + pause_graph_tracing(); + + /* Save context on stack */ + if (__cpu_suspend_enter(&context)) { + /* Call the finisher */ + rc = finish(arg, __pa_symbol(__cpu_resume_enter), + (ulong)&context); + + /* + * Should never reach here, unless the suspend finisher + * fails. Successful cpu_suspend() should return from + * __cpu_resume_entry() + */ + if (!rc) + rc = -EOPNOTSUPP; + } + + /* Enable function graph tracer */ + unpause_graph_tracing(); + + /* Restore additional CSRs */ + suspend_restore_csrs(&context); + + return rc; +} diff --git a/arch/riscv/kernel/suspend_entry.S b/arch/riscv/kernel/suspend_entry.S new file mode 100644 index 000000000..aafcca58c --- /dev/null +++ b/arch/riscv/kernel/suspend_entry.S @@ -0,0 +1,125 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2021 Western Digital Corporation or its affiliates. + * Copyright (c) 2022 Ventana Micro Systems Inc. + */ + +#include <linux/linkage.h> +#include <asm/asm.h> +#include <asm/asm-offsets.h> +#include <asm/csr.h> +#include <asm/xip_fixup.h> + + .text + .altmacro + .option norelax + +ENTRY(__cpu_suspend_enter) + /* Save registers (except A0 and T0-T6) */ + REG_S ra, (SUSPEND_CONTEXT_REGS + PT_RA)(a0) + REG_S sp, (SUSPEND_CONTEXT_REGS + PT_SP)(a0) + REG_S gp, (SUSPEND_CONTEXT_REGS + PT_GP)(a0) + REG_S tp, (SUSPEND_CONTEXT_REGS + PT_TP)(a0) + REG_S s0, (SUSPEND_CONTEXT_REGS + PT_S0)(a0) + REG_S s1, (SUSPEND_CONTEXT_REGS + PT_S1)(a0) + REG_S a1, (SUSPEND_CONTEXT_REGS + PT_A1)(a0) + REG_S a2, (SUSPEND_CONTEXT_REGS + PT_A2)(a0) + REG_S a3, (SUSPEND_CONTEXT_REGS + PT_A3)(a0) + REG_S a4, (SUSPEND_CONTEXT_REGS + PT_A4)(a0) + REG_S a5, (SUSPEND_CONTEXT_REGS + PT_A5)(a0) + REG_S a6, (SUSPEND_CONTEXT_REGS + PT_A6)(a0) + REG_S a7, (SUSPEND_CONTEXT_REGS + PT_A7)(a0) + REG_S s2, (SUSPEND_CONTEXT_REGS + PT_S2)(a0) + REG_S s3, (SUSPEND_CONTEXT_REGS + PT_S3)(a0) + REG_S s4, (SUSPEND_CONTEXT_REGS + PT_S4)(a0) + REG_S s5, (SUSPEND_CONTEXT_REGS + PT_S5)(a0) + REG_S s6, (SUSPEND_CONTEXT_REGS + PT_S6)(a0) + REG_S s7, (SUSPEND_CONTEXT_REGS + PT_S7)(a0) + REG_S s8, (SUSPEND_CONTEXT_REGS + PT_S8)(a0) + REG_S s9, (SUSPEND_CONTEXT_REGS + PT_S9)(a0) + REG_S s10, (SUSPEND_CONTEXT_REGS + PT_S10)(a0) + REG_S s11, (SUSPEND_CONTEXT_REGS + PT_S11)(a0) + + /* Save CSRs */ + csrr t0, CSR_EPC + REG_S t0, (SUSPEND_CONTEXT_REGS + PT_EPC)(a0) + csrr t0, CSR_STATUS + REG_S t0, (SUSPEND_CONTEXT_REGS + PT_STATUS)(a0) + csrr t0, CSR_TVAL + REG_S t0, (SUSPEND_CONTEXT_REGS + PT_BADADDR)(a0) + csrr t0, CSR_CAUSE + REG_S t0, (SUSPEND_CONTEXT_REGS + PT_CAUSE)(a0) + + /* Return non-zero value */ + li a0, 1 + + /* Return to C code */ + ret +END(__cpu_suspend_enter) + +ENTRY(__cpu_resume_enter) + /* Load the global pointer */ + .option push + .option norelax + la gp, __global_pointer$ + .option pop + +#ifdef CONFIG_MMU + /* Save A0 and A1 */ + add t0, a0, zero + add t1, a1, zero + + /* Enable MMU */ + la a0, swapper_pg_dir + XIP_FIXUP_OFFSET a0 + call relocate_enable_mmu + + /* Restore A0 and A1 */ + add a0, t0, zero + add a1, t1, zero +#endif + + /* Make A0 point to suspend context */ + add a0, a1, zero + + /* Restore CSRs */ + REG_L t0, (SUSPEND_CONTEXT_REGS + PT_EPC)(a0) + csrw CSR_EPC, t0 + REG_L t0, (SUSPEND_CONTEXT_REGS + PT_STATUS)(a0) + csrw CSR_STATUS, t0 + REG_L t0, (SUSPEND_CONTEXT_REGS + PT_BADADDR)(a0) + csrw CSR_TVAL, t0 + REG_L t0, (SUSPEND_CONTEXT_REGS + PT_CAUSE)(a0) + csrw CSR_CAUSE, t0 + + /* Restore registers (except A0 and T0-T6) */ + REG_L ra, (SUSPEND_CONTEXT_REGS + PT_RA)(a0) + REG_L sp, (SUSPEND_CONTEXT_REGS + PT_SP)(a0) + REG_L gp, (SUSPEND_CONTEXT_REGS + PT_GP)(a0) + REG_L tp, (SUSPEND_CONTEXT_REGS + PT_TP)(a0) + REG_L s0, (SUSPEND_CONTEXT_REGS + PT_S0)(a0) + REG_L s1, (SUSPEND_CONTEXT_REGS + PT_S1)(a0) + REG_L a1, (SUSPEND_CONTEXT_REGS + PT_A1)(a0) + REG_L a2, (SUSPEND_CONTEXT_REGS + PT_A2)(a0) + REG_L a3, (SUSPEND_CONTEXT_REGS + PT_A3)(a0) + REG_L a4, (SUSPEND_CONTEXT_REGS + PT_A4)(a0) + REG_L a5, (SUSPEND_CONTEXT_REGS + PT_A5)(a0) + REG_L a6, (SUSPEND_CONTEXT_REGS + PT_A6)(a0) + REG_L a7, (SUSPEND_CONTEXT_REGS + PT_A7)(a0) + REG_L s2, (SUSPEND_CONTEXT_REGS + PT_S2)(a0) + REG_L s3, (SUSPEND_CONTEXT_REGS + PT_S3)(a0) + REG_L s4, (SUSPEND_CONTEXT_REGS + PT_S4)(a0) + REG_L s5, (SUSPEND_CONTEXT_REGS + PT_S5)(a0) + REG_L s6, (SUSPEND_CONTEXT_REGS + PT_S6)(a0) + REG_L s7, (SUSPEND_CONTEXT_REGS + PT_S7)(a0) + REG_L s8, (SUSPEND_CONTEXT_REGS + PT_S8)(a0) + REG_L s9, (SUSPEND_CONTEXT_REGS + PT_S9)(a0) + REG_L s10, (SUSPEND_CONTEXT_REGS + PT_S10)(a0) + REG_L s11, (SUSPEND_CONTEXT_REGS + PT_S11)(a0) + + /* Return zero value */ + add a0, zero, zero + + /* Return to C code */ + ret +END(__cpu_resume_enter) diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c new file mode 100644 index 000000000..5d3f2fbeb --- /dev/null +++ b/arch/riscv/kernel/sys_riscv.c @@ -0,0 +1,71 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2012 Regents of the University of California + * Copyright (C) 2014 Darius Rad <darius@bluespec.com> + * Copyright (C) 2017 SiFive + */ + +#include <linux/syscalls.h> +#include <asm/unistd.h> +#include <asm/cacheflush.h> +#include <asm-generic/mman-common.h> + +static long riscv_sys_mmap(unsigned long addr, unsigned long len, + unsigned long prot, unsigned long flags, + unsigned long fd, off_t offset, + unsigned long page_shift_offset) +{ + if (unlikely(offset & (~PAGE_MASK >> page_shift_offset))) + return -EINVAL; + + return ksys_mmap_pgoff(addr, len, prot, flags, fd, + offset >> (PAGE_SHIFT - page_shift_offset)); +} + +#ifdef CONFIG_64BIT +SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len, + unsigned long, prot, unsigned long, flags, + unsigned long, fd, off_t, offset) +{ + return riscv_sys_mmap(addr, len, prot, flags, fd, offset, 0); +} +#endif + +#if defined(CONFIG_32BIT) || defined(CONFIG_COMPAT) +SYSCALL_DEFINE6(mmap2, unsigned long, addr, unsigned long, len, + unsigned long, prot, unsigned long, flags, + unsigned long, fd, off_t, offset) +{ + /* + * Note that the shift for mmap2 is constant (12), + * regardless of PAGE_SIZE + */ + return riscv_sys_mmap(addr, len, prot, flags, fd, offset, 12); +} +#endif + +/* + * Allows the instruction cache to be flushed from userspace. Despite RISC-V + * having a direct 'fence.i' instruction available to userspace (which we + * can't trap!), that's not actually viable when running on Linux because the + * kernel might schedule a process on another hart. There is no way for + * userspace to handle this without invoking the kernel (as it doesn't know the + * thread->hart mappings), so we've defined a RISC-V specific system call to + * flush the instruction cache. + * + * sys_riscv_flush_icache() is defined to flush the instruction cache over an + * address range, with the flush applying to either all threads or just the + * caller. We don't currently do anything with the address range, that's just + * in there for forwards compatibility. + */ +SYSCALL_DEFINE3(riscv_flush_icache, uintptr_t, start, uintptr_t, end, + uintptr_t, flags) +{ + /* Check the reserved flags. */ + if (unlikely(flags & ~SYS_RISCV_FLUSH_ICACHE_ALL)) + return -EINVAL; + + flush_icache_mm(current->mm, flags & SYS_RISCV_FLUSH_ICACHE_LOCAL); + + return 0; +} diff --git a/arch/riscv/kernel/syscall_table.c b/arch/riscv/kernel/syscall_table.c new file mode 100644 index 000000000..44b1420a2 --- /dev/null +++ b/arch/riscv/kernel/syscall_table.c @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2009 Arnd Bergmann <arnd@arndb.de> + * Copyright (C) 2012 Regents of the University of California + */ + +#include <linux/linkage.h> +#include <linux/syscalls.h> +#include <asm-generic/syscalls.h> +#include <asm/syscall.h> + +#undef __SYSCALL +#define __SYSCALL(nr, call) [nr] = (call), + +void * const sys_call_table[__NR_syscalls] = { + [0 ... __NR_syscalls - 1] = sys_ni_syscall, +#include <asm/unistd.h> +}; diff --git a/arch/riscv/kernel/time.c b/arch/riscv/kernel/time.c new file mode 100644 index 000000000..1cf21db4f --- /dev/null +++ b/arch/riscv/kernel/time.c @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2012 Regents of the University of California + * Copyright (C) 2017 SiFive + */ + +#include <linux/of_clk.h> +#include <linux/clockchips.h> +#include <linux/clocksource.h> +#include <linux/delay.h> +#include <asm/sbi.h> +#include <asm/processor.h> +#include <asm/timex.h> + +unsigned long riscv_timebase __ro_after_init; +EXPORT_SYMBOL_GPL(riscv_timebase); + +void __init time_init(void) +{ + struct device_node *cpu; + u32 prop; + + cpu = of_find_node_by_path("/cpus"); + if (!cpu || of_property_read_u32(cpu, "timebase-frequency", &prop)) + panic(KERN_WARNING "RISC-V system with no 'timebase-frequency' in DTS\n"); + of_node_put(cpu); + riscv_timebase = prop; + + lpj_fine = riscv_timebase / HZ; + + of_clk_init(NULL); + timer_probe(); + + tick_setup_hrtimer_broadcast(); +} + +void clocksource_arch_init(struct clocksource *cs) +{ +#ifdef CONFIG_GENERIC_GETTIMEOFDAY + cs->vdso_clock_mode = VDSO_CLOCKMODE_ARCHTIMER; +#else + cs->vdso_clock_mode = VDSO_CLOCKMODE_NONE; +#endif +} diff --git a/arch/riscv/kernel/trace_irq.c b/arch/riscv/kernel/trace_irq.c new file mode 100644 index 000000000..095ac976d --- /dev/null +++ b/arch/riscv/kernel/trace_irq.c @@ -0,0 +1,27 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2022 Changbin Du <changbin.du@gmail.com> + */ + +#include <linux/irqflags.h> +#include <linux/kprobes.h> +#include "trace_irq.h" + +/* + * trace_hardirqs_on/off require the caller to setup frame pointer properly. + * Otherwise, CALLER_ADDR1 might trigger an pagging exception in kernel. + * Here we add one extra level so they can be safely called by low + * level entry code which $fp is used for other purpose. + */ + +void __trace_hardirqs_on(void) +{ + trace_hardirqs_on(); +} +NOKPROBE_SYMBOL(__trace_hardirqs_on); + +void __trace_hardirqs_off(void) +{ + trace_hardirqs_off(); +} +NOKPROBE_SYMBOL(__trace_hardirqs_off); diff --git a/arch/riscv/kernel/trace_irq.h b/arch/riscv/kernel/trace_irq.h new file mode 100644 index 000000000..99fe67377 --- /dev/null +++ b/arch/riscv/kernel/trace_irq.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2022 Changbin Du <changbin.du@gmail.com> + */ +#ifndef __TRACE_IRQ_H +#define __TRACE_IRQ_H + +void __trace_hardirqs_on(void); +void __trace_hardirqs_off(void); + +#endif /* __TRACE_IRQ_H */ diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c new file mode 100644 index 000000000..5d07f6b3c --- /dev/null +++ b/arch/riscv/kernel/traps.c @@ -0,0 +1,262 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2012 Regents of the University of California + */ + +#include <linux/cpu.h> +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/sched.h> +#include <linux/sched/debug.h> +#include <linux/sched/signal.h> +#include <linux/signal.h> +#include <linux/kdebug.h> +#include <linux/uaccess.h> +#include <linux/kprobes.h> +#include <linux/mm.h> +#include <linux/module.h> +#include <linux/irq.h> +#include <linux/kexec.h> + +#include <asm/asm-prototypes.h> +#include <asm/bug.h> +#include <asm/csr.h> +#include <asm/processor.h> +#include <asm/ptrace.h> +#include <asm/thread_info.h> + +int show_unhandled_signals = 1; + +static DEFINE_SPINLOCK(die_lock); + +void die(struct pt_regs *regs, const char *str) +{ + static int die_counter; + int ret; + long cause; + unsigned long flags; + + oops_enter(); + + spin_lock_irqsave(&die_lock, flags); + console_verbose(); + bust_spinlocks(1); + + pr_emerg("%s [#%d]\n", str, ++die_counter); + print_modules(); + if (regs) + show_regs(regs); + + cause = regs ? regs->cause : -1; + ret = notify_die(DIE_OOPS, str, regs, 0, cause, SIGSEGV); + + if (kexec_should_crash(current)) + crash_kexec(regs); + + bust_spinlocks(0); + add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE); + spin_unlock_irqrestore(&die_lock, flags); + oops_exit(); + + if (in_interrupt()) + panic("Fatal exception in interrupt"); + if (panic_on_oops) + panic("Fatal exception"); + if (ret != NOTIFY_STOP) + make_task_dead(SIGSEGV); +} + +void do_trap(struct pt_regs *regs, int signo, int code, unsigned long addr) +{ + struct task_struct *tsk = current; + + if (show_unhandled_signals && unhandled_signal(tsk, signo) + && printk_ratelimit()) { + pr_info("%s[%d]: unhandled signal %d code 0x%x at 0x" REG_FMT, + tsk->comm, task_pid_nr(tsk), signo, code, addr); + print_vma_addr(KERN_CONT " in ", instruction_pointer(regs)); + pr_cont("\n"); + __show_regs(regs); + } + + force_sig_fault(signo, code, (void __user *)addr); +} + +static void do_trap_error(struct pt_regs *regs, int signo, int code, + unsigned long addr, const char *str) +{ + current->thread.bad_cause = regs->cause; + + if (user_mode(regs)) { + do_trap(regs, signo, code, addr); + } else { + if (!fixup_exception(regs)) + die(regs, str); + } +} + +#if defined(CONFIG_XIP_KERNEL) && defined(CONFIG_RISCV_ALTERNATIVE) +#define __trap_section __section(".xip.traps") +#else +#define __trap_section +#endif +#define DO_ERROR_INFO(name, signo, code, str) \ +asmlinkage __visible __trap_section void name(struct pt_regs *regs) \ +{ \ + do_trap_error(regs, signo, code, regs->epc, "Oops - " str); \ +} + +DO_ERROR_INFO(do_trap_unknown, + SIGILL, ILL_ILLTRP, "unknown exception"); +DO_ERROR_INFO(do_trap_insn_misaligned, + SIGBUS, BUS_ADRALN, "instruction address misaligned"); +DO_ERROR_INFO(do_trap_insn_fault, + SIGSEGV, SEGV_ACCERR, "instruction access fault"); +DO_ERROR_INFO(do_trap_insn_illegal, + SIGILL, ILL_ILLOPC, "illegal instruction"); +DO_ERROR_INFO(do_trap_load_fault, + SIGSEGV, SEGV_ACCERR, "load access fault"); +#ifndef CONFIG_RISCV_M_MODE +DO_ERROR_INFO(do_trap_load_misaligned, + SIGBUS, BUS_ADRALN, "Oops - load address misaligned"); +DO_ERROR_INFO(do_trap_store_misaligned, + SIGBUS, BUS_ADRALN, "Oops - store (or AMO) address misaligned"); +#else +int handle_misaligned_load(struct pt_regs *regs); +int handle_misaligned_store(struct pt_regs *regs); + +asmlinkage void __trap_section do_trap_load_misaligned(struct pt_regs *regs) +{ + if (!handle_misaligned_load(regs)) + return; + do_trap_error(regs, SIGBUS, BUS_ADRALN, regs->epc, + "Oops - load address misaligned"); +} + +asmlinkage void __trap_section do_trap_store_misaligned(struct pt_regs *regs) +{ + if (!handle_misaligned_store(regs)) + return; + do_trap_error(regs, SIGBUS, BUS_ADRALN, regs->epc, + "Oops - store (or AMO) address misaligned"); +} +#endif +DO_ERROR_INFO(do_trap_store_fault, + SIGSEGV, SEGV_ACCERR, "store (or AMO) access fault"); +DO_ERROR_INFO(do_trap_ecall_u, + SIGILL, ILL_ILLTRP, "environment call from U-mode"); +DO_ERROR_INFO(do_trap_ecall_s, + SIGILL, ILL_ILLTRP, "environment call from S-mode"); +DO_ERROR_INFO(do_trap_ecall_m, + SIGILL, ILL_ILLTRP, "environment call from M-mode"); + +static inline unsigned long get_break_insn_length(unsigned long pc) +{ + bug_insn_t insn; + + if (get_kernel_nofault(insn, (bug_insn_t *)pc)) + return 0; + + return GET_INSN_LENGTH(insn); +} + +asmlinkage __visible __trap_section void do_trap_break(struct pt_regs *regs) +{ +#ifdef CONFIG_KPROBES + if (kprobe_single_step_handler(regs)) + return; + + if (kprobe_breakpoint_handler(regs)) + return; +#endif +#ifdef CONFIG_UPROBES + if (uprobe_single_step_handler(regs)) + return; + + if (uprobe_breakpoint_handler(regs)) + return; +#endif + current->thread.bad_cause = regs->cause; + + if (user_mode(regs)) + force_sig_fault(SIGTRAP, TRAP_BRKPT, (void __user *)regs->epc); +#ifdef CONFIG_KGDB + else if (notify_die(DIE_TRAP, "EBREAK", regs, 0, regs->cause, SIGTRAP) + == NOTIFY_STOP) + return; +#endif + else if (report_bug(regs->epc, regs) == BUG_TRAP_TYPE_WARN) + regs->epc += get_break_insn_length(regs->epc); + else + die(regs, "Kernel BUG"); +} +NOKPROBE_SYMBOL(do_trap_break); + +#ifdef CONFIG_GENERIC_BUG +int is_valid_bugaddr(unsigned long pc) +{ + bug_insn_t insn; + + if (pc < VMALLOC_START) + return 0; + if (get_kernel_nofault(insn, (bug_insn_t *)pc)) + return 0; + if ((insn & __INSN_LENGTH_MASK) == __INSN_LENGTH_32) + return (insn == __BUG_INSN_32); + else + return ((insn & __COMPRESSED_INSN_MASK) == __BUG_INSN_16); +} +#endif /* CONFIG_GENERIC_BUG */ + +#ifdef CONFIG_VMAP_STACK +static DEFINE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], + overflow_stack)__aligned(16); +/* + * shadow stack, handled_ kernel_ stack_ overflow(in kernel/entry.S) is used + * to get per-cpu overflow stack(get_overflow_stack). + */ +long shadow_stack[SHADOW_OVERFLOW_STACK_SIZE/sizeof(long)] __aligned(16); +asmlinkage unsigned long get_overflow_stack(void) +{ + return (unsigned long)this_cpu_ptr(overflow_stack) + + OVERFLOW_STACK_SIZE; +} + +/* + * A pseudo spinlock to protect the shadow stack from being used by multiple + * harts concurrently. This isn't a real spinlock because the lock side must + * be taken without a valid stack and only a single register, it's only taken + * while in the process of panicing anyway so the performance and error + * checking a proper spinlock gives us doesn't matter. + */ +unsigned long spin_shadow_stack; + +asmlinkage void handle_bad_stack(struct pt_regs *regs) +{ + unsigned long tsk_stk = (unsigned long)current->stack; + unsigned long ovf_stk = (unsigned long)this_cpu_ptr(overflow_stack); + + /* + * We're done with the shadow stack by this point, as we're on the + * overflow stack. Tell any other concurrent overflowing harts that + * they can proceed with panicing by releasing the pseudo-spinlock. + * + * This pairs with an amoswap.aq in handle_kernel_stack_overflow. + */ + smp_store_release(&spin_shadow_stack, 0); + + console_verbose(); + + pr_emerg("Insufficient stack space to handle exception!\n"); + pr_emerg("Task stack: [0x%016lx..0x%016lx]\n", + tsk_stk, tsk_stk + THREAD_SIZE); + pr_emerg("Overflow stack: [0x%016lx..0x%016lx]\n", + ovf_stk, ovf_stk + OVERFLOW_STACK_SIZE); + + __show_regs(regs); + panic("Kernel stack overflow"); + + for (;;) + wait_for_interrupt(); +} +#endif diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c new file mode 100644 index 000000000..5348d842c --- /dev/null +++ b/arch/riscv/kernel/traps_misaligned.c @@ -0,0 +1,366 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020 Western Digital Corporation or its affiliates. + */ +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/mm.h> +#include <linux/module.h> +#include <linux/irq.h> +#include <linux/stringify.h> + +#include <asm/processor.h> +#include <asm/ptrace.h> +#include <asm/csr.h> + +#define INSN_MATCH_LB 0x3 +#define INSN_MASK_LB 0x707f +#define INSN_MATCH_LH 0x1003 +#define INSN_MASK_LH 0x707f +#define INSN_MATCH_LW 0x2003 +#define INSN_MASK_LW 0x707f +#define INSN_MATCH_LD 0x3003 +#define INSN_MASK_LD 0x707f +#define INSN_MATCH_LBU 0x4003 +#define INSN_MASK_LBU 0x707f +#define INSN_MATCH_LHU 0x5003 +#define INSN_MASK_LHU 0x707f +#define INSN_MATCH_LWU 0x6003 +#define INSN_MASK_LWU 0x707f +#define INSN_MATCH_SB 0x23 +#define INSN_MASK_SB 0x707f +#define INSN_MATCH_SH 0x1023 +#define INSN_MASK_SH 0x707f +#define INSN_MATCH_SW 0x2023 +#define INSN_MASK_SW 0x707f +#define INSN_MATCH_SD 0x3023 +#define INSN_MASK_SD 0x707f + +#define INSN_MATCH_FLW 0x2007 +#define INSN_MASK_FLW 0x707f +#define INSN_MATCH_FLD 0x3007 +#define INSN_MASK_FLD 0x707f +#define INSN_MATCH_FLQ 0x4007 +#define INSN_MASK_FLQ 0x707f +#define INSN_MATCH_FSW 0x2027 +#define INSN_MASK_FSW 0x707f +#define INSN_MATCH_FSD 0x3027 +#define INSN_MASK_FSD 0x707f +#define INSN_MATCH_FSQ 0x4027 +#define INSN_MASK_FSQ 0x707f + +#define INSN_MATCH_C_LD 0x6000 +#define INSN_MASK_C_LD 0xe003 +#define INSN_MATCH_C_SD 0xe000 +#define INSN_MASK_C_SD 0xe003 +#define INSN_MATCH_C_LW 0x4000 +#define INSN_MASK_C_LW 0xe003 +#define INSN_MATCH_C_SW 0xc000 +#define INSN_MASK_C_SW 0xe003 +#define INSN_MATCH_C_LDSP 0x6002 +#define INSN_MASK_C_LDSP 0xe003 +#define INSN_MATCH_C_SDSP 0xe002 +#define INSN_MASK_C_SDSP 0xe003 +#define INSN_MATCH_C_LWSP 0x4002 +#define INSN_MASK_C_LWSP 0xe003 +#define INSN_MATCH_C_SWSP 0xc002 +#define INSN_MASK_C_SWSP 0xe003 + +#define INSN_MATCH_C_FLD 0x2000 +#define INSN_MASK_C_FLD 0xe003 +#define INSN_MATCH_C_FLW 0x6000 +#define INSN_MASK_C_FLW 0xe003 +#define INSN_MATCH_C_FSD 0xa000 +#define INSN_MASK_C_FSD 0xe003 +#define INSN_MATCH_C_FSW 0xe000 +#define INSN_MASK_C_FSW 0xe003 +#define INSN_MATCH_C_FLDSP 0x2002 +#define INSN_MASK_C_FLDSP 0xe003 +#define INSN_MATCH_C_FSDSP 0xa002 +#define INSN_MASK_C_FSDSP 0xe003 +#define INSN_MATCH_C_FLWSP 0x6002 +#define INSN_MASK_C_FLWSP 0xe003 +#define INSN_MATCH_C_FSWSP 0xe002 +#define INSN_MASK_C_FSWSP 0xe003 + +#define INSN_LEN(insn) ((((insn) & 0x3) < 0x3) ? 2 : 4) + +#if defined(CONFIG_64BIT) +#define LOG_REGBYTES 3 +#define XLEN 64 +#else +#define LOG_REGBYTES 2 +#define XLEN 32 +#endif +#define REGBYTES (1 << LOG_REGBYTES) +#define XLEN_MINUS_16 ((XLEN) - 16) + +#define SH_RD 7 +#define SH_RS1 15 +#define SH_RS2 20 +#define SH_RS2C 2 + +#define RV_X(x, s, n) (((x) >> (s)) & ((1 << (n)) - 1)) +#define RVC_LW_IMM(x) ((RV_X(x, 6, 1) << 2) | \ + (RV_X(x, 10, 3) << 3) | \ + (RV_X(x, 5, 1) << 6)) +#define RVC_LD_IMM(x) ((RV_X(x, 10, 3) << 3) | \ + (RV_X(x, 5, 2) << 6)) +#define RVC_LWSP_IMM(x) ((RV_X(x, 4, 3) << 2) | \ + (RV_X(x, 12, 1) << 5) | \ + (RV_X(x, 2, 2) << 6)) +#define RVC_LDSP_IMM(x) ((RV_X(x, 5, 2) << 3) | \ + (RV_X(x, 12, 1) << 5) | \ + (RV_X(x, 2, 3) << 6)) +#define RVC_SWSP_IMM(x) ((RV_X(x, 9, 4) << 2) | \ + (RV_X(x, 7, 2) << 6)) +#define RVC_SDSP_IMM(x) ((RV_X(x, 10, 3) << 3) | \ + (RV_X(x, 7, 3) << 6)) +#define RVC_RS1S(insn) (8 + RV_X(insn, SH_RD, 3)) +#define RVC_RS2S(insn) (8 + RV_X(insn, SH_RS2C, 3)) +#define RVC_RS2(insn) RV_X(insn, SH_RS2C, 5) + +#define SHIFT_RIGHT(x, y) \ + ((y) < 0 ? ((x) << -(y)) : ((x) >> (y))) + +#define REG_MASK \ + ((1 << (5 + LOG_REGBYTES)) - (1 << LOG_REGBYTES)) + +#define REG_OFFSET(insn, pos) \ + (SHIFT_RIGHT((insn), (pos) - LOG_REGBYTES) & REG_MASK) + +#define REG_PTR(insn, pos, regs) \ + (ulong *)((ulong)(regs) + REG_OFFSET(insn, pos)) + +#define GET_RM(insn) (((insn) >> 12) & 7) + +#define GET_RS1(insn, regs) (*REG_PTR(insn, SH_RS1, regs)) +#define GET_RS2(insn, regs) (*REG_PTR(insn, SH_RS2, regs)) +#define GET_RS1S(insn, regs) (*REG_PTR(RVC_RS1S(insn), 0, regs)) +#define GET_RS2S(insn, regs) (*REG_PTR(RVC_RS2S(insn), 0, regs)) +#define GET_RS2C(insn, regs) (*REG_PTR(insn, SH_RS2C, regs)) +#define GET_SP(regs) (*REG_PTR(2, 0, regs)) +#define SET_RD(insn, regs, val) (*REG_PTR(insn, SH_RD, regs) = (val)) +#define IMM_I(insn) ((s32)(insn) >> 20) +#define IMM_S(insn) (((s32)(insn) >> 25 << 5) | \ + (s32)(((insn) >> 7) & 0x1f)) +#define MASK_FUNCT3 0x7000 + +#define GET_PRECISION(insn) (((insn) >> 25) & 3) +#define GET_RM(insn) (((insn) >> 12) & 7) +#define PRECISION_S 0 +#define PRECISION_D 1 + +#define DECLARE_UNPRIVILEGED_LOAD_FUNCTION(type, insn) \ +static inline type load_##type(const type *addr) \ +{ \ + type val; \ + asm (#insn " %0, %1" \ + : "=&r" (val) : "m" (*addr)); \ + return val; \ +} + +#define DECLARE_UNPRIVILEGED_STORE_FUNCTION(type, insn) \ +static inline void store_##type(type *addr, type val) \ +{ \ + asm volatile (#insn " %0, %1\n" \ + : : "r" (val), "m" (*addr)); \ +} + +DECLARE_UNPRIVILEGED_LOAD_FUNCTION(u8, lbu) +DECLARE_UNPRIVILEGED_LOAD_FUNCTION(u16, lhu) +DECLARE_UNPRIVILEGED_LOAD_FUNCTION(s8, lb) +DECLARE_UNPRIVILEGED_LOAD_FUNCTION(s16, lh) +DECLARE_UNPRIVILEGED_LOAD_FUNCTION(s32, lw) +DECLARE_UNPRIVILEGED_STORE_FUNCTION(u8, sb) +DECLARE_UNPRIVILEGED_STORE_FUNCTION(u16, sh) +DECLARE_UNPRIVILEGED_STORE_FUNCTION(u32, sw) +#if defined(CONFIG_64BIT) +DECLARE_UNPRIVILEGED_LOAD_FUNCTION(u32, lwu) +DECLARE_UNPRIVILEGED_LOAD_FUNCTION(u64, ld) +DECLARE_UNPRIVILEGED_STORE_FUNCTION(u64, sd) +DECLARE_UNPRIVILEGED_LOAD_FUNCTION(ulong, ld) +#else +DECLARE_UNPRIVILEGED_LOAD_FUNCTION(u32, lw) +DECLARE_UNPRIVILEGED_LOAD_FUNCTION(ulong, lw) + +static inline u64 load_u64(const u64 *addr) +{ + return load_u32((u32 *)addr) + + ((u64)load_u32((u32 *)addr + 1) << 32); +} + +static inline void store_u64(u64 *addr, u64 val) +{ + store_u32((u32 *)addr, val); + store_u32((u32 *)addr + 1, val >> 32); +} +#endif + +static inline ulong get_insn(ulong mepc) +{ + register ulong __mepc asm ("a2") = mepc; + ulong val, rvc_mask = 3, tmp; + + asm ("and %[tmp], %[addr], 2\n" + "bnez %[tmp], 1f\n" +#if defined(CONFIG_64BIT) + __stringify(LWU) " %[insn], (%[addr])\n" +#else + __stringify(LW) " %[insn], (%[addr])\n" +#endif + "and %[tmp], %[insn], %[rvc_mask]\n" + "beq %[tmp], %[rvc_mask], 2f\n" + "sll %[insn], %[insn], %[xlen_minus_16]\n" + "srl %[insn], %[insn], %[xlen_minus_16]\n" + "j 2f\n" + "1:\n" + "lhu %[insn], (%[addr])\n" + "and %[tmp], %[insn], %[rvc_mask]\n" + "bne %[tmp], %[rvc_mask], 2f\n" + "lhu %[tmp], 2(%[addr])\n" + "sll %[tmp], %[tmp], 16\n" + "add %[insn], %[insn], %[tmp]\n" + "2:" + : [insn] "=&r" (val), [tmp] "=&r" (tmp) + : [addr] "r" (__mepc), [rvc_mask] "r" (rvc_mask), + [xlen_minus_16] "i" (XLEN_MINUS_16)); + + return val; +} + +union reg_data { + u8 data_bytes[8]; + ulong data_ulong; + u64 data_u64; +}; + +int handle_misaligned_load(struct pt_regs *regs) +{ + union reg_data val; + unsigned long epc = regs->epc; + unsigned long insn = get_insn(epc); + unsigned long addr = csr_read(mtval); + int i, fp = 0, shift = 0, len = 0; + + regs->epc = 0; + + if ((insn & INSN_MASK_LW) == INSN_MATCH_LW) { + len = 4; + shift = 8 * (sizeof(unsigned long) - len); +#if defined(CONFIG_64BIT) + } else if ((insn & INSN_MASK_LD) == INSN_MATCH_LD) { + len = 8; + shift = 8 * (sizeof(unsigned long) - len); + } else if ((insn & INSN_MASK_LWU) == INSN_MATCH_LWU) { + len = 4; +#endif + } else if ((insn & INSN_MASK_FLD) == INSN_MATCH_FLD) { + fp = 1; + len = 8; + } else if ((insn & INSN_MASK_FLW) == INSN_MATCH_FLW) { + fp = 1; + len = 4; + } else if ((insn & INSN_MASK_LH) == INSN_MATCH_LH) { + len = 2; + shift = 8 * (sizeof(unsigned long) - len); + } else if ((insn & INSN_MASK_LHU) == INSN_MATCH_LHU) { + len = 2; +#if defined(CONFIG_64BIT) + } else if ((insn & INSN_MASK_C_LD) == INSN_MATCH_C_LD) { + len = 8; + shift = 8 * (sizeof(unsigned long) - len); + insn = RVC_RS2S(insn) << SH_RD; + } else if ((insn & INSN_MASK_C_LDSP) == INSN_MATCH_C_LDSP && + ((insn >> SH_RD) & 0x1f)) { + len = 8; + shift = 8 * (sizeof(unsigned long) - len); +#endif + } else if ((insn & INSN_MASK_C_LW) == INSN_MATCH_C_LW) { + len = 4; + shift = 8 * (sizeof(unsigned long) - len); + insn = RVC_RS2S(insn) << SH_RD; + } else if ((insn & INSN_MASK_C_LWSP) == INSN_MATCH_C_LWSP && + ((insn >> SH_RD) & 0x1f)) { + len = 4; + shift = 8 * (sizeof(unsigned long) - len); + } else if ((insn & INSN_MASK_C_FLD) == INSN_MATCH_C_FLD) { + fp = 1; + len = 8; + insn = RVC_RS2S(insn) << SH_RD; + } else if ((insn & INSN_MASK_C_FLDSP) == INSN_MATCH_C_FLDSP) { + fp = 1; + len = 8; +#if defined(CONFIG_32BIT) + } else if ((insn & INSN_MASK_C_FLW) == INSN_MATCH_C_FLW) { + fp = 1; + len = 4; + insn = RVC_RS2S(insn) << SH_RD; + } else if ((insn & INSN_MASK_C_FLWSP) == INSN_MATCH_C_FLWSP) { + fp = 1; + len = 4; +#endif + } else { + regs->epc = epc; + return -1; + } + + val.data_u64 = 0; + for (i = 0; i < len; i++) + val.data_bytes[i] = load_u8((void *)(addr + i)); + + if (fp) + return -1; + SET_RD(insn, regs, val.data_ulong << shift >> shift); + + regs->epc = epc + INSN_LEN(insn); + + return 0; +} + +int handle_misaligned_store(struct pt_regs *regs) +{ + union reg_data val; + unsigned long epc = regs->epc; + unsigned long insn = get_insn(epc); + unsigned long addr = csr_read(mtval); + int i, len = 0; + + regs->epc = 0; + + val.data_ulong = GET_RS2(insn, regs); + + if ((insn & INSN_MASK_SW) == INSN_MATCH_SW) { + len = 4; +#if defined(CONFIG_64BIT) + } else if ((insn & INSN_MASK_SD) == INSN_MATCH_SD) { + len = 8; +#endif + } else if ((insn & INSN_MASK_SH) == INSN_MATCH_SH) { + len = 2; +#if defined(CONFIG_64BIT) + } else if ((insn & INSN_MASK_C_SD) == INSN_MATCH_C_SD) { + len = 8; + val.data_ulong = GET_RS2S(insn, regs); + } else if ((insn & INSN_MASK_C_SDSP) == INSN_MATCH_C_SDSP) { + len = 8; + val.data_ulong = GET_RS2C(insn, regs); +#endif + } else if ((insn & INSN_MASK_C_SW) == INSN_MATCH_C_SW) { + len = 4; + val.data_ulong = GET_RS2S(insn, regs); + } else if ((insn & INSN_MASK_C_SWSP) == INSN_MATCH_C_SWSP) { + len = 4; + val.data_ulong = GET_RS2C(insn, regs); + } else { + regs->epc = epc; + return -1; + } + + for (i = 0; i < len; i++) + store_u8((void *)(addr + i), val.data_bytes[i]); + + regs->epc = epc + INSN_LEN(insn); + + return 0; +} diff --git a/arch/riscv/kernel/vdso.c b/arch/riscv/kernel/vdso.c new file mode 100644 index 000000000..123d05255 --- /dev/null +++ b/arch/riscv/kernel/vdso.c @@ -0,0 +1,322 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2004 Benjamin Herrenschmidt, IBM Corp. + * <benh@kernel.crashing.org> + * Copyright (C) 2012 ARM Limited + * Copyright (C) 2015 Regents of the University of California + */ + +#include <linux/elf.h> +#include <linux/mm.h> +#include <linux/slab.h> +#include <linux/binfmts.h> +#include <linux/err.h> +#include <asm/page.h> +#include <asm/vdso.h> +#include <linux/time_namespace.h> + +#ifdef CONFIG_GENERIC_TIME_VSYSCALL +#include <vdso/datapage.h> +#else +struct vdso_data { +}; +#endif + +extern char vdso_start[], vdso_end[]; +#ifdef CONFIG_COMPAT +extern char compat_vdso_start[], compat_vdso_end[]; +#endif + +enum vvar_pages { + VVAR_DATA_PAGE_OFFSET, + VVAR_TIMENS_PAGE_OFFSET, + VVAR_NR_PAGES, +}; + +enum rv_vdso_map { + RV_VDSO_MAP_VVAR, + RV_VDSO_MAP_VDSO, +}; + +#define VVAR_SIZE (VVAR_NR_PAGES << PAGE_SHIFT) + +/* + * The vDSO data page. + */ +static union { + struct vdso_data data; + u8 page[PAGE_SIZE]; +} vdso_data_store __page_aligned_data; +struct vdso_data *vdso_data = &vdso_data_store.data; + +struct __vdso_info { + const char *name; + const char *vdso_code_start; + const char *vdso_code_end; + unsigned long vdso_pages; + /* Data Mapping */ + struct vm_special_mapping *dm; + /* Code Mapping */ + struct vm_special_mapping *cm; +}; + +static struct __vdso_info vdso_info; +#ifdef CONFIG_COMPAT +static struct __vdso_info compat_vdso_info; +#endif + +static int vdso_mremap(const struct vm_special_mapping *sm, + struct vm_area_struct *new_vma) +{ + current->mm->context.vdso = (void *)new_vma->vm_start; + + return 0; +} + +static void __init __vdso_init(struct __vdso_info *vdso_info) +{ + unsigned int i; + struct page **vdso_pagelist; + unsigned long pfn; + + if (memcmp(vdso_info->vdso_code_start, "\177ELF", 4)) + panic("vDSO is not a valid ELF object!\n"); + + vdso_info->vdso_pages = ( + vdso_info->vdso_code_end - + vdso_info->vdso_code_start) >> + PAGE_SHIFT; + + vdso_pagelist = kcalloc(vdso_info->vdso_pages, + sizeof(struct page *), + GFP_KERNEL); + if (vdso_pagelist == NULL) + panic("vDSO kcalloc failed!\n"); + + /* Grab the vDSO code pages. */ + pfn = sym_to_pfn(vdso_info->vdso_code_start); + + for (i = 0; i < vdso_info->vdso_pages; i++) + vdso_pagelist[i] = pfn_to_page(pfn + i); + + vdso_info->cm->pages = vdso_pagelist; +} + +#ifdef CONFIG_TIME_NS +struct vdso_data *arch_get_vdso_data(void *vvar_page) +{ + return (struct vdso_data *)(vvar_page); +} + +/* + * The vvar mapping contains data for a specific time namespace, so when a task + * changes namespace we must unmap its vvar data for the old namespace. + * Subsequent faults will map in data for the new namespace. + * + * For more details see timens_setup_vdso_data(). + */ +int vdso_join_timens(struct task_struct *task, struct time_namespace *ns) +{ + struct mm_struct *mm = task->mm; + struct vm_area_struct *vma; + VMA_ITERATOR(vmi, mm, 0); + + mmap_read_lock(mm); + + for_each_vma(vmi, vma) { + unsigned long size = vma->vm_end - vma->vm_start; + + if (vma_is_special_mapping(vma, vdso_info.dm)) + zap_page_range(vma, vma->vm_start, size); +#ifdef CONFIG_COMPAT + if (vma_is_special_mapping(vma, compat_vdso_info.dm)) + zap_page_range(vma, vma->vm_start, size); +#endif + } + + mmap_read_unlock(mm); + return 0; +} + +static struct page *find_timens_vvar_page(struct vm_area_struct *vma) +{ + if (likely(vma->vm_mm == current->mm)) + return current->nsproxy->time_ns->vvar_page; + + /* + * VM_PFNMAP | VM_IO protect .fault() handler from being called + * through interfaces like /proc/$pid/mem or + * process_vm_{readv,writev}() as long as there's no .access() + * in special_mapping_vmops. + * For more details check_vma_flags() and __access_remote_vm() + */ + WARN(1, "vvar_page accessed remotely"); + + return NULL; +} +#else +static struct page *find_timens_vvar_page(struct vm_area_struct *vma) +{ + return NULL; +} +#endif + +static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, + struct vm_area_struct *vma, struct vm_fault *vmf) +{ + struct page *timens_page = find_timens_vvar_page(vma); + unsigned long pfn; + + switch (vmf->pgoff) { + case VVAR_DATA_PAGE_OFFSET: + if (timens_page) + pfn = page_to_pfn(timens_page); + else + pfn = sym_to_pfn(vdso_data); + break; +#ifdef CONFIG_TIME_NS + case VVAR_TIMENS_PAGE_OFFSET: + /* + * If a task belongs to a time namespace then a namespace + * specific VVAR is mapped with the VVAR_DATA_PAGE_OFFSET and + * the real VVAR page is mapped with the VVAR_TIMENS_PAGE_OFFSET + * offset. + * See also the comment near timens_setup_vdso_data(). + */ + if (!timens_page) + return VM_FAULT_SIGBUS; + pfn = sym_to_pfn(vdso_data); + break; +#endif /* CONFIG_TIME_NS */ + default: + return VM_FAULT_SIGBUS; + } + + return vmf_insert_pfn(vma, vmf->address, pfn); +} + +static struct vm_special_mapping rv_vdso_maps[] __ro_after_init = { + [RV_VDSO_MAP_VVAR] = { + .name = "[vvar]", + .fault = vvar_fault, + }, + [RV_VDSO_MAP_VDSO] = { + .name = "[vdso]", + .mremap = vdso_mremap, + }, +}; + +static struct __vdso_info vdso_info __ro_after_init = { + .name = "vdso", + .vdso_code_start = vdso_start, + .vdso_code_end = vdso_end, + .dm = &rv_vdso_maps[RV_VDSO_MAP_VVAR], + .cm = &rv_vdso_maps[RV_VDSO_MAP_VDSO], +}; + +#ifdef CONFIG_COMPAT +static struct vm_special_mapping rv_compat_vdso_maps[] __ro_after_init = { + [RV_VDSO_MAP_VVAR] = { + .name = "[vvar]", + .fault = vvar_fault, + }, + [RV_VDSO_MAP_VDSO] = { + .name = "[vdso]", + .mremap = vdso_mremap, + }, +}; + +static struct __vdso_info compat_vdso_info __ro_after_init = { + .name = "compat_vdso", + .vdso_code_start = compat_vdso_start, + .vdso_code_end = compat_vdso_end, + .dm = &rv_compat_vdso_maps[RV_VDSO_MAP_VVAR], + .cm = &rv_compat_vdso_maps[RV_VDSO_MAP_VDSO], +}; +#endif + +static int __init vdso_init(void) +{ + __vdso_init(&vdso_info); +#ifdef CONFIG_COMPAT + __vdso_init(&compat_vdso_info); +#endif + + return 0; +} +arch_initcall(vdso_init); + +static int __setup_additional_pages(struct mm_struct *mm, + struct linux_binprm *bprm, + int uses_interp, + struct __vdso_info *vdso_info) +{ + unsigned long vdso_base, vdso_text_len, vdso_mapping_len; + void *ret; + + BUILD_BUG_ON(VVAR_NR_PAGES != __VVAR_PAGES); + + vdso_text_len = vdso_info->vdso_pages << PAGE_SHIFT; + /* Be sure to map the data page */ + vdso_mapping_len = vdso_text_len + VVAR_SIZE; + + vdso_base = get_unmapped_area(NULL, 0, vdso_mapping_len, 0, 0); + if (IS_ERR_VALUE(vdso_base)) { + ret = ERR_PTR(vdso_base); + goto up_fail; + } + + ret = _install_special_mapping(mm, vdso_base, VVAR_SIZE, + (VM_READ | VM_MAYREAD | VM_PFNMAP), vdso_info->dm); + if (IS_ERR(ret)) + goto up_fail; + + vdso_base += VVAR_SIZE; + mm->context.vdso = (void *)vdso_base; + + ret = + _install_special_mapping(mm, vdso_base, vdso_text_len, + (VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC), + vdso_info->cm); + + if (IS_ERR(ret)) + goto up_fail; + + return 0; + +up_fail: + mm->context.vdso = NULL; + return PTR_ERR(ret); +} + +#ifdef CONFIG_COMPAT +int compat_arch_setup_additional_pages(struct linux_binprm *bprm, + int uses_interp) +{ + struct mm_struct *mm = current->mm; + int ret; + + if (mmap_write_lock_killable(mm)) + return -EINTR; + + ret = __setup_additional_pages(mm, bprm, uses_interp, + &compat_vdso_info); + mmap_write_unlock(mm); + + return ret; +} +#endif + +int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) +{ + struct mm_struct *mm = current->mm; + int ret; + + if (mmap_write_lock_killable(mm)) + return -EINTR; + + ret = __setup_additional_pages(mm, bprm, uses_interp, &vdso_info); + mmap_write_unlock(mm); + + return ret; +} diff --git a/arch/riscv/kernel/vdso/.gitignore b/arch/riscv/kernel/vdso/.gitignore new file mode 100644 index 000000000..3a19def86 --- /dev/null +++ b/arch/riscv/kernel/vdso/.gitignore @@ -0,0 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only +vdso.lds +*.tmp +vdso-syms.S diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile new file mode 100644 index 000000000..06e6b27f3 --- /dev/null +++ b/arch/riscv/kernel/vdso/Makefile @@ -0,0 +1,83 @@ +# SPDX-License-Identifier: GPL-2.0-only +# Copied from arch/tile/kernel/vdso/Makefile + +# Absolute relocation type $(ARCH_REL_TYPE_ABS) needs to be defined before +# the inclusion of generic Makefile. +ARCH_REL_TYPE_ABS := R_RISCV_32|R_RISCV_64|R_RISCV_JUMP_SLOT +include $(srctree)/lib/vdso/Makefile +# Symbols present in the vdso +vdso-syms = rt_sigreturn +ifdef CONFIG_64BIT +vdso-syms += vgettimeofday +endif +vdso-syms += getcpu +vdso-syms += flush_icache + +# Files to link into the vdso +obj-vdso = $(patsubst %, %.o, $(vdso-syms)) note.o + +ccflags-y := -fno-stack-protector +ccflags-y += -DDISABLE_BRANCH_PROFILING + +ifneq ($(c-gettimeofday-y),) + CFLAGS_vgettimeofday.o += -fPIC -include $(c-gettimeofday-y) +endif + +# Build rules +targets := $(obj-vdso) vdso.so vdso.so.dbg vdso.lds +obj-vdso := $(addprefix $(obj)/, $(obj-vdso)) + +obj-y += vdso.o +CPPFLAGS_vdso.lds += -P -C -U$(ARCH) +ifneq ($(filter vgettimeofday, $(vdso-syms)),) +CPPFLAGS_vdso.lds += -DHAS_VGETTIMEOFDAY +endif + +# Disable -pg to prevent insert call site +CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) + +# Disable profiling and instrumentation for VDSO code +GCOV_PROFILE := n +KCOV_INSTRUMENT := n +KASAN_SANITIZE := n +UBSAN_SANITIZE := n + +# Force dependency +$(obj)/vdso.o: $(obj)/vdso.so + +# link rule for the .so file, .lds has to be first +$(obj)/vdso.so.dbg: $(obj)/vdso.lds $(obj-vdso) FORCE + $(call if_changed,vdsold) +LDFLAGS_vdso.so.dbg = -shared -S -soname=linux-vdso.so.1 \ + --build-id=sha1 --hash-style=both --eh-frame-hdr + +# strip rule for the .so file +$(obj)/%.so: OBJCOPYFLAGS := -S +$(obj)/%.so: $(obj)/%.so.dbg FORCE + $(call if_changed,objcopy) + +# Generate VDSO offsets using helper script +gen-vdsosym := $(srctree)/$(src)/gen_vdso_offsets.sh +quiet_cmd_vdsosym = VDSOSYM $@ + cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@ + +include/generated/vdso-offsets.h: $(obj)/vdso.so.dbg FORCE + $(call if_changed,vdsosym) + +# actual build commands +# The DSO images are built using a special linker script +# Make sure only to export the intended __vdso_xxx symbol offsets. +quiet_cmd_vdsold = VDSOLD $@ + cmd_vdsold = $(LD) $(ld_flags) -T $(filter-out FORCE,$^) -o $@.tmp && \ + $(OBJCOPY) $(patsubst %, -G __vdso_%, $(vdso-syms)) $@.tmp $@ && \ + rm $@.tmp + +# install commands for the unstripped file +quiet_cmd_vdso_install = INSTALL $@ + cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@ + +vdso.so: $(obj)/vdso.so.dbg + @mkdir -p $(MODLIB)/vdso + $(call cmd,vdso_install) + +vdso_install: vdso.so diff --git a/arch/riscv/kernel/vdso/flush_icache.S b/arch/riscv/kernel/vdso/flush_icache.S new file mode 100644 index 000000000..82f97d67c --- /dev/null +++ b/arch/riscv/kernel/vdso/flush_icache.S @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2017 SiFive + */ + +#include <linux/linkage.h> +#include <asm/unistd.h> + + .text +/* int __vdso_flush_icache(void *start, void *end, unsigned long flags); */ +ENTRY(__vdso_flush_icache) + .cfi_startproc +#ifdef CONFIG_SMP + li a7, __NR_riscv_flush_icache + ecall +#else + fence.i + li a0, 0 +#endif + ret + .cfi_endproc +ENDPROC(__vdso_flush_icache) diff --git a/arch/riscv/kernel/vdso/gen_vdso_offsets.sh b/arch/riscv/kernel/vdso/gen_vdso_offsets.sh new file mode 100755 index 000000000..c2e5613f3 --- /dev/null +++ b/arch/riscv/kernel/vdso/gen_vdso_offsets.sh @@ -0,0 +1,5 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +LC_ALL=C +sed -n -e 's/^[0]\+\(0[0-9a-fA-F]*\) . \(__vdso_[a-zA-Z0-9_]*\)$/\#define \2_offset\t0x\1/p' diff --git a/arch/riscv/kernel/vdso/getcpu.S b/arch/riscv/kernel/vdso/getcpu.S new file mode 100644 index 000000000..bb0c05e2f --- /dev/null +++ b/arch/riscv/kernel/vdso/getcpu.S @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2017 SiFive + */ + +#include <linux/linkage.h> +#include <asm/unistd.h> + + .text +/* int __vdso_getcpu(unsigned *cpu, unsigned *node, void *unused); */ +ENTRY(__vdso_getcpu) + .cfi_startproc + /* For now, just do the syscall. */ + li a7, __NR_getcpu + ecall + ret + .cfi_endproc +ENDPROC(__vdso_getcpu) diff --git a/arch/riscv/kernel/vdso/note.S b/arch/riscv/kernel/vdso/note.S new file mode 100644 index 000000000..2a956c942 --- /dev/null +++ b/arch/riscv/kernel/vdso/note.S @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text. + * Here we can supply some information useful to userland. + */ + +#include <linux/elfnote.h> +#include <linux/version.h> + +ELFNOTE_START(Linux, 0, "a") + .long LINUX_VERSION_CODE +ELFNOTE_END diff --git a/arch/riscv/kernel/vdso/rt_sigreturn.S b/arch/riscv/kernel/vdso/rt_sigreturn.S new file mode 100644 index 000000000..0573705ea --- /dev/null +++ b/arch/riscv/kernel/vdso/rt_sigreturn.S @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2014 Regents of the University of California + */ + +#include <linux/linkage.h> +#include <asm/unistd.h> + + .text +ENTRY(__vdso_rt_sigreturn) + .cfi_startproc + .cfi_signal_frame + li a7, __NR_rt_sigreturn + scall + .cfi_endproc +ENDPROC(__vdso_rt_sigreturn) diff --git a/arch/riscv/kernel/vdso/vdso.S b/arch/riscv/kernel/vdso/vdso.S new file mode 100644 index 000000000..83f1c899e --- /dev/null +++ b/arch/riscv/kernel/vdso/vdso.S @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2014 Regents of the University of California + */ + +#include <linux/init.h> +#include <linux/linkage.h> +#include <asm/page.h> + +#ifndef __VDSO_PATH +#define __VDSO_PATH "arch/riscv/kernel/vdso/vdso.so" +#endif + + __PAGE_ALIGNED_DATA + + .globl vdso_start, vdso_end + .balign PAGE_SIZE +vdso_start: + .incbin __VDSO_PATH + .balign PAGE_SIZE +vdso_end: + + .previous diff --git a/arch/riscv/kernel/vdso/vdso.lds.S b/arch/riscv/kernel/vdso/vdso.lds.S new file mode 100644 index 000000000..150b1a572 --- /dev/null +++ b/arch/riscv/kernel/vdso/vdso.lds.S @@ -0,0 +1,80 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2012 Regents of the University of California + */ +#include <asm/page.h> +#include <asm/vdso.h> + +OUTPUT_ARCH(riscv) + +SECTIONS +{ + PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE); +#ifdef CONFIG_TIME_NS + PROVIDE(_timens_data = _vdso_data + PAGE_SIZE); +#endif + . = SIZEOF_HEADERS; + + .hash : { *(.hash) } :text + .gnu.hash : { *(.gnu.hash) } + .dynsym : { *(.dynsym) } + .dynstr : { *(.dynstr) } + .gnu.version : { *(.gnu.version) } + .gnu.version_d : { *(.gnu.version_d) } + .gnu.version_r : { *(.gnu.version_r) } + + .note : { *(.note.*) } :text :note + .dynamic : { *(.dynamic) } :text :dynamic + + .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr + .eh_frame : { KEEP (*(.eh_frame)) } :text + + .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) } + + /* + * This linker script is used both with -r and with -shared. + * For the layouts to match, we need to skip more than enough + * space for the dynamic symbol table, etc. If this amount is + * insufficient, ld -shared will error; simply increase it here. + */ + . = 0x800; + .text : { *(.text .text.*) } :text + + .data : { + *(.got.plt) *(.got) + *(.data .data.* .gnu.linkonce.d.*) + *(.dynbss) + *(.bss .bss.* .gnu.linkonce.b.*) + } +} + +/* + * We must supply the ELF program headers explicitly to get just one + * PT_LOAD segment, and set the flags explicitly to make segments read-only. + */ +PHDRS +{ + text PT_LOAD FLAGS(5) FILEHDR PHDRS; /* PF_R|PF_X */ + dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ + note PT_NOTE FLAGS(4); /* PF_R */ + eh_frame_hdr PT_GNU_EH_FRAME; +} + +/* + * This controls what symbols we export from the DSO. + */ +VERSION +{ + LINUX_4.15 { + global: + __vdso_rt_sigreturn; +#ifdef HAS_VGETTIMEOFDAY + __vdso_gettimeofday; + __vdso_clock_gettime; + __vdso_clock_getres; +#endif + __vdso_getcpu; + __vdso_flush_icache; + local: *; + }; +} diff --git a/arch/riscv/kernel/vdso/vgettimeofday.c b/arch/riscv/kernel/vdso/vgettimeofday.c new file mode 100644 index 000000000..cc0d80699 --- /dev/null +++ b/arch/riscv/kernel/vdso/vgettimeofday.c @@ -0,0 +1,31 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copied from arch/arm64/kernel/vdso/vgettimeofday.c + * + * Copyright (C) 2018 ARM Ltd. + * Copyright (C) 2020 SiFive + */ + +#include <linux/time.h> +#include <linux/types.h> + +extern +int __vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts); +int __vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts) +{ + return __cvdso_clock_gettime(clock, ts); +} + +extern +int __vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz); +int __vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz) +{ + return __cvdso_gettimeofday(tv, tz); +} + +extern +int __vdso_clock_getres(clockid_t clock_id, struct __kernel_timespec *res); +int __vdso_clock_getres(clockid_t clock_id, struct __kernel_timespec *res) +{ + return __cvdso_clock_getres(clock_id, res); +} diff --git a/arch/riscv/kernel/vmlinux-xip.lds.S b/arch/riscv/kernel/vmlinux-xip.lds.S new file mode 100644 index 000000000..24a2fdd3b --- /dev/null +++ b/arch/riscv/kernel/vmlinux-xip.lds.S @@ -0,0 +1,149 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2012 Regents of the University of California + * Copyright (C) 2017 SiFive + * Copyright (C) 2020 Vitaly Wool, Konsulko AB + */ + +#include <asm/pgtable.h> +#define LOAD_OFFSET KERNEL_LINK_ADDR +/* No __ro_after_init data in the .rodata section - which will always be ro */ +#define RO_AFTER_INIT_DATA + +#include <asm/vmlinux.lds.h> +#include <asm/page.h> +#include <asm/cache.h> +#include <asm/thread_info.h> + +OUTPUT_ARCH(riscv) +ENTRY(_start) + +jiffies = jiffies_64; + +SECTIONS +{ + /* Beginning of code and text segment */ + . = LOAD_OFFSET; + _xiprom = .; + _start = .; + HEAD_TEXT_SECTION + INIT_TEXT_SECTION(PAGE_SIZE) + /* we have to discard exit text and such at runtime, not link time */ + __exittext_begin = .; + .exit.text : + { + EXIT_TEXT + } + __exittext_end = .; + + .text : { + _text = .; + _stext = .; + TEXT_TEXT + SCHED_TEXT + CPUIDLE_TEXT + LOCK_TEXT + KPROBES_TEXT + ENTRY_TEXT + IRQENTRY_TEXT + SOFTIRQENTRY_TEXT + _etext = .; + } + RO_DATA(L1_CACHE_BYTES) + .srodata : { + *(.srodata*) + } + .init.rodata : { + INIT_SETUP(16) + INIT_CALLS + CON_INITCALL + INIT_RAM_FS + } + _exiprom = .; /* End of XIP ROM area */ + + +/* + * From this point, stuff is considered writable and will be copied to RAM + */ + __data_loc = ALIGN(PAGE_SIZE); /* location in file */ + . = KERNEL_LINK_ADDR + XIP_OFFSET; /* location in memory */ + +#undef LOAD_OFFSET +#define LOAD_OFFSET (KERNEL_LINK_ADDR + XIP_OFFSET - (__data_loc & XIP_OFFSET_MASK)) + + _sdata = .; /* Start of data section */ + _data = .; + RW_DATA(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE) + _edata = .; + __start_ro_after_init = .; + .data.ro_after_init : AT(ADDR(.data.ro_after_init) - LOAD_OFFSET) { + *(.data..ro_after_init) + } + __end_ro_after_init = .; + + . = ALIGN(PAGE_SIZE); + __init_begin = .; + .init.data : { + INIT_DATA + } + .exit.data : { + EXIT_DATA + } + . = ALIGN(8); + __soc_early_init_table : { + __soc_early_init_table_start = .; + KEEP(*(__soc_early_init_table)) + __soc_early_init_table_end = .; + } + __soc_builtin_dtb_table : { + __soc_builtin_dtb_table_start = .; + KEEP(*(__soc_builtin_dtb_table)) + __soc_builtin_dtb_table_end = .; + } + + . = ALIGN(8); + .alternative : { + __alt_start = .; + *(.alternative) + __alt_end = .; + } + __init_end = .; + + . = ALIGN(16); + .xip.traps : { + __xip_traps_start = .; + *(.xip.traps) + __xip_traps_end = .; + } + + . = ALIGN(PAGE_SIZE); + .sdata : { + __global_pointer$ = . + 0x800; + *(.sdata*) + *(.sbss*) + } + + BSS_SECTION(PAGE_SIZE, PAGE_SIZE, 0) + + PERCPU_SECTION(L1_CACHE_BYTES) + + .rel.dyn : AT(ADDR(.rel.dyn) - LOAD_OFFSET) { + *(.rel.dyn*) + } + + /* + * End of copied data. We need a dummy section to get its LMA. + * Also located before final ALIGN() as trailing padding is not stored + * in the resulting binary file and useless to copy. + */ + .data.endmark : AT(ADDR(.data.endmark) - LOAD_OFFSET) { } + _edata_loc = LOADADDR(.data.endmark); + + . = ALIGN(PAGE_SIZE); + _end = .; + + STABS_DEBUG + DWARF_DEBUG + + DISCARDS +} diff --git a/arch/riscv/kernel/vmlinux.lds.S b/arch/riscv/kernel/vmlinux.lds.S new file mode 100644 index 000000000..d478e063b --- /dev/null +++ b/arch/riscv/kernel/vmlinux.lds.S @@ -0,0 +1,149 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2012 Regents of the University of California + * Copyright (C) 2017 SiFive + */ + +#define RO_EXCEPTION_TABLE_ALIGN 4 + +#ifdef CONFIG_XIP_KERNEL +#include "vmlinux-xip.lds.S" +#else + +#include <asm/pgtable.h> +#define LOAD_OFFSET KERNEL_LINK_ADDR + +#include <asm/vmlinux.lds.h> +#include <asm/page.h> +#include <asm/cache.h> +#include <asm/thread_info.h> +#include <asm/set_memory.h> +#include "image-vars.h" + +#include <linux/sizes.h> +OUTPUT_ARCH(riscv) +ENTRY(_start) + +jiffies = jiffies_64; + +PECOFF_SECTION_ALIGNMENT = 0x1000; +PECOFF_FILE_ALIGNMENT = 0x200; + +SECTIONS +{ + /* Beginning of code and text segment */ + . = LOAD_OFFSET; + _start = .; + HEAD_TEXT_SECTION + . = ALIGN(PAGE_SIZE); + + .text : { + _text = .; + _stext = .; + TEXT_TEXT + SCHED_TEXT + CPUIDLE_TEXT + LOCK_TEXT + KPROBES_TEXT + ENTRY_TEXT + IRQENTRY_TEXT + SOFTIRQENTRY_TEXT + _etext = .; + } + + . = ALIGN(SECTION_ALIGN); + __init_begin = .; + __init_text_begin = .; + .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) ALIGN(SECTION_ALIGN) { \ + _sinittext = .; \ + INIT_TEXT \ + _einittext = .; \ + } + + . = ALIGN(8); + __soc_early_init_table : { + __soc_early_init_table_start = .; + KEEP(*(__soc_early_init_table)) + __soc_early_init_table_end = .; + } + __soc_builtin_dtb_table : { + __soc_builtin_dtb_table_start = .; + KEEP(*(__soc_builtin_dtb_table)) + __soc_builtin_dtb_table_end = .; + } + /* we have to discard exit text and such at runtime, not link time */ + __exittext_begin = .; + .exit.text : + { + EXIT_TEXT + } + __exittext_end = .; + + __init_text_end = .; + . = ALIGN(SECTION_ALIGN); +#ifdef CONFIG_EFI + . = ALIGN(PECOFF_SECTION_ALIGNMENT); + __pecoff_text_end = .; +#endif + /* Start of init data section */ + __init_data_begin = .; + INIT_DATA_SECTION(16) + .exit.data : + { + EXIT_DATA + } + PERCPU_SECTION(L1_CACHE_BYTES) + + .rel.dyn : { + *(.rel.dyn*) + } + + __init_data_end = .; + + . = ALIGN(8); + .alternative : { + __alt_start = .; + *(.alternative) + __alt_end = .; + } + __init_end = .; + + /* Start of data section */ + _sdata = .; + RO_DATA(SECTION_ALIGN) + .srodata : { + *(.srodata*) + } + + . = ALIGN(SECTION_ALIGN); + _data = .; + + RW_DATA(L1_CACHE_BYTES, PAGE_SIZE, THREAD_ALIGN) + .sdata : { + __global_pointer$ = . + 0x800; + *(.sdata*) + } + +#ifdef CONFIG_EFI + .pecoff_edata_padding : { BYTE(0); . = ALIGN(PECOFF_FILE_ALIGNMENT); } + __pecoff_data_raw_size = ABSOLUTE(. - __pecoff_text_end); +#endif + + /* End of data section */ + _edata = .; + + BSS_SECTION(PAGE_SIZE, PAGE_SIZE, 0) + +#ifdef CONFIG_EFI + . = ALIGN(PECOFF_SECTION_ALIGNMENT); + __pecoff_data_virt_size = ABSOLUTE(. - __pecoff_text_end); +#endif + _end = .; + + STABS_DEBUG + DWARF_DEBUG + ELF_DETAILS + + DISCARDS +} +#endif /* CONFIG_XIP_KERNEL */ |