diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 18:49:45 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 18:49:45 +0000 |
commit | 2c3c1048746a4622d8c89a29670120dc8fab93c4 (patch) | |
tree | 848558de17fb3008cdf4d861b01ac7781903ce39 /arch/arm64/kvm/hyp/include | |
parent | Initial commit. (diff) | |
download | linux-2c3c1048746a4622d8c89a29670120dc8fab93c4.tar.xz linux-2c3c1048746a4622d8c89a29670120dc8fab93c4.zip |
Adding upstream version 6.1.76.upstream/6.1.76
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'arch/arm64/kvm/hyp/include')
-rw-r--r-- | arch/arm64/kvm/hyp/include/hyp/adjust_pc.h | 53 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/include/hyp/debug-sr.h | 168 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/include/hyp/fault.h | 75 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/include/hyp/switch.h | 533 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h | 219 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/include/nvhe/early_alloc.h | 14 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/include/nvhe/fixed_config.h | 208 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/include/nvhe/gfp.h | 34 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/include/nvhe/mem_protect.h | 73 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/include/nvhe/memory.h | 48 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/include/nvhe/mm.h | 39 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/include/nvhe/spinlock.h | 117 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/include/nvhe/trap_handler.h | 20 |
13 files changed, 1601 insertions, 0 deletions
diff --git a/arch/arm64/kvm/hyp/include/hyp/adjust_pc.h b/arch/arm64/kvm/hyp/include/hyp/adjust_pc.h new file mode 100644 index 000000000..4fdfeabef --- /dev/null +++ b/arch/arm64/kvm/hyp/include/hyp/adjust_pc.h @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Guest PC manipulation helpers + * + * Copyright (C) 2012,2013 - ARM Ltd + * Copyright (C) 2020 - Google LLC + * Author: Marc Zyngier <maz@kernel.org> + */ + +#ifndef __ARM64_KVM_HYP_ADJUST_PC_H__ +#define __ARM64_KVM_HYP_ADJUST_PC_H__ + +#include <asm/kvm_emulate.h> +#include <asm/kvm_host.h> + +static inline void kvm_skip_instr(struct kvm_vcpu *vcpu) +{ + if (vcpu_mode_is_32bit(vcpu)) { + kvm_skip_instr32(vcpu); + } else { + *vcpu_pc(vcpu) += 4; + *vcpu_cpsr(vcpu) &= ~PSR_BTYPE_MASK; + } + + /* advance the singlestep state machine */ + *vcpu_cpsr(vcpu) &= ~DBG_SPSR_SS; +} + +/* + * Skip an instruction which has been emulated at hyp while most guest sysregs + * are live. + */ +static inline void __kvm_skip_instr(struct kvm_vcpu *vcpu) +{ + *vcpu_pc(vcpu) = read_sysreg_el2(SYS_ELR); + vcpu_gp_regs(vcpu)->pstate = read_sysreg_el2(SYS_SPSR); + + kvm_skip_instr(vcpu); + + write_sysreg_el2(vcpu_gp_regs(vcpu)->pstate, SYS_SPSR); + write_sysreg_el2(*vcpu_pc(vcpu), SYS_ELR); +} + +/* + * Skip an instruction while host sysregs are live. + * Assumes host is always 64-bit. + */ +static inline void kvm_skip_host_instr(void) +{ + write_sysreg_el2(read_sysreg_el2(SYS_ELR) + 4, SYS_ELR); +} + +#endif diff --git a/arch/arm64/kvm/hyp/include/hyp/debug-sr.h b/arch/arm64/kvm/hyp/include/hyp/debug-sr.h new file mode 100644 index 000000000..961bbef10 --- /dev/null +++ b/arch/arm64/kvm/hyp/include/hyp/debug-sr.h @@ -0,0 +1,168 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2015 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + */ + +#ifndef __ARM64_KVM_HYP_DEBUG_SR_H__ +#define __ARM64_KVM_HYP_DEBUG_SR_H__ + +#include <linux/compiler.h> +#include <linux/kvm_host.h> + +#include <asm/debug-monitors.h> +#include <asm/kvm_asm.h> +#include <asm/kvm_hyp.h> +#include <asm/kvm_mmu.h> + +#define read_debug(r,n) read_sysreg(r##n##_el1) +#define write_debug(v,r,n) write_sysreg(v, r##n##_el1) + +#define save_debug(ptr,reg,nr) \ + switch (nr) { \ + case 15: ptr[15] = read_debug(reg, 15); \ + fallthrough; \ + case 14: ptr[14] = read_debug(reg, 14); \ + fallthrough; \ + case 13: ptr[13] = read_debug(reg, 13); \ + fallthrough; \ + case 12: ptr[12] = read_debug(reg, 12); \ + fallthrough; \ + case 11: ptr[11] = read_debug(reg, 11); \ + fallthrough; \ + case 10: ptr[10] = read_debug(reg, 10); \ + fallthrough; \ + case 9: ptr[9] = read_debug(reg, 9); \ + fallthrough; \ + case 8: ptr[8] = read_debug(reg, 8); \ + fallthrough; \ + case 7: ptr[7] = read_debug(reg, 7); \ + fallthrough; \ + case 6: ptr[6] = read_debug(reg, 6); \ + fallthrough; \ + case 5: ptr[5] = read_debug(reg, 5); \ + fallthrough; \ + case 4: ptr[4] = read_debug(reg, 4); \ + fallthrough; \ + case 3: ptr[3] = read_debug(reg, 3); \ + fallthrough; \ + case 2: ptr[2] = read_debug(reg, 2); \ + fallthrough; \ + case 1: ptr[1] = read_debug(reg, 1); \ + fallthrough; \ + default: ptr[0] = read_debug(reg, 0); \ + } + +#define restore_debug(ptr,reg,nr) \ + switch (nr) { \ + case 15: write_debug(ptr[15], reg, 15); \ + fallthrough; \ + case 14: write_debug(ptr[14], reg, 14); \ + fallthrough; \ + case 13: write_debug(ptr[13], reg, 13); \ + fallthrough; \ + case 12: write_debug(ptr[12], reg, 12); \ + fallthrough; \ + case 11: write_debug(ptr[11], reg, 11); \ + fallthrough; \ + case 10: write_debug(ptr[10], reg, 10); \ + fallthrough; \ + case 9: write_debug(ptr[9], reg, 9); \ + fallthrough; \ + case 8: write_debug(ptr[8], reg, 8); \ + fallthrough; \ + case 7: write_debug(ptr[7], reg, 7); \ + fallthrough; \ + case 6: write_debug(ptr[6], reg, 6); \ + fallthrough; \ + case 5: write_debug(ptr[5], reg, 5); \ + fallthrough; \ + case 4: write_debug(ptr[4], reg, 4); \ + fallthrough; \ + case 3: write_debug(ptr[3], reg, 3); \ + fallthrough; \ + case 2: write_debug(ptr[2], reg, 2); \ + fallthrough; \ + case 1: write_debug(ptr[1], reg, 1); \ + fallthrough; \ + default: write_debug(ptr[0], reg, 0); \ + } + +static void __debug_save_state(struct kvm_guest_debug_arch *dbg, + struct kvm_cpu_context *ctxt) +{ + u64 aa64dfr0; + int brps, wrps; + + aa64dfr0 = read_sysreg(id_aa64dfr0_el1); + brps = (aa64dfr0 >> 12) & 0xf; + wrps = (aa64dfr0 >> 20) & 0xf; + + save_debug(dbg->dbg_bcr, dbgbcr, brps); + save_debug(dbg->dbg_bvr, dbgbvr, brps); + save_debug(dbg->dbg_wcr, dbgwcr, wrps); + save_debug(dbg->dbg_wvr, dbgwvr, wrps); + + ctxt_sys_reg(ctxt, MDCCINT_EL1) = read_sysreg(mdccint_el1); +} + +static void __debug_restore_state(struct kvm_guest_debug_arch *dbg, + struct kvm_cpu_context *ctxt) +{ + u64 aa64dfr0; + int brps, wrps; + + aa64dfr0 = read_sysreg(id_aa64dfr0_el1); + + brps = (aa64dfr0 >> 12) & 0xf; + wrps = (aa64dfr0 >> 20) & 0xf; + + restore_debug(dbg->dbg_bcr, dbgbcr, brps); + restore_debug(dbg->dbg_bvr, dbgbvr, brps); + restore_debug(dbg->dbg_wcr, dbgwcr, wrps); + restore_debug(dbg->dbg_wvr, dbgwvr, wrps); + + write_sysreg(ctxt_sys_reg(ctxt, MDCCINT_EL1), mdccint_el1); +} + +static inline void __debug_switch_to_guest_common(struct kvm_vcpu *vcpu) +{ + struct kvm_cpu_context *host_ctxt; + struct kvm_cpu_context *guest_ctxt; + struct kvm_guest_debug_arch *host_dbg; + struct kvm_guest_debug_arch *guest_dbg; + + if (!vcpu_get_flag(vcpu, DEBUG_DIRTY)) + return; + + host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + guest_ctxt = &vcpu->arch.ctxt; + host_dbg = &vcpu->arch.host_debug_state.regs; + guest_dbg = kern_hyp_va(vcpu->arch.debug_ptr); + + __debug_save_state(host_dbg, host_ctxt); + __debug_restore_state(guest_dbg, guest_ctxt); +} + +static inline void __debug_switch_to_host_common(struct kvm_vcpu *vcpu) +{ + struct kvm_cpu_context *host_ctxt; + struct kvm_cpu_context *guest_ctxt; + struct kvm_guest_debug_arch *host_dbg; + struct kvm_guest_debug_arch *guest_dbg; + + if (!vcpu_get_flag(vcpu, DEBUG_DIRTY)) + return; + + host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + guest_ctxt = &vcpu->arch.ctxt; + host_dbg = &vcpu->arch.host_debug_state.regs; + guest_dbg = kern_hyp_va(vcpu->arch.debug_ptr); + + __debug_save_state(guest_dbg, guest_ctxt); + __debug_restore_state(host_dbg, host_ctxt); + + vcpu_clear_flag(vcpu, DEBUG_DIRTY); +} + +#endif /* __ARM64_KVM_HYP_DEBUG_SR_H__ */ diff --git a/arch/arm64/kvm/hyp/include/hyp/fault.h b/arch/arm64/kvm/hyp/include/hyp/fault.h new file mode 100644 index 000000000..1b8a2dcd7 --- /dev/null +++ b/arch/arm64/kvm/hyp/include/hyp/fault.h @@ -0,0 +1,75 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2015 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + */ + +#ifndef __ARM64_KVM_HYP_FAULT_H__ +#define __ARM64_KVM_HYP_FAULT_H__ + +#include <asm/kvm_asm.h> +#include <asm/kvm_emulate.h> +#include <asm/kvm_hyp.h> +#include <asm/kvm_mmu.h> + +static inline bool __translate_far_to_hpfar(u64 far, u64 *hpfar) +{ + u64 par, tmp; + + /* + * Resolve the IPA the hard way using the guest VA. + * + * Stage-1 translation already validated the memory access + * rights. As such, we can use the EL1 translation regime, and + * don't have to distinguish between EL0 and EL1 access. + * + * We do need to save/restore PAR_EL1 though, as we haven't + * saved the guest context yet, and we may return early... + */ + par = read_sysreg_par(); + if (!__kvm_at("s1e1r", far)) + tmp = read_sysreg_par(); + else + tmp = SYS_PAR_EL1_F; /* back to the guest */ + write_sysreg(par, par_el1); + + if (unlikely(tmp & SYS_PAR_EL1_F)) + return false; /* Translation failed, back to guest */ + + /* Convert PAR to HPFAR format */ + *hpfar = PAR_TO_HPFAR(tmp); + return true; +} + +static inline bool __get_fault_info(u64 esr, struct kvm_vcpu_fault_info *fault) +{ + u64 hpfar, far; + + far = read_sysreg_el2(SYS_FAR); + + /* + * The HPFAR can be invalid if the stage 2 fault did not + * happen during a stage 1 page table walk (the ESR_EL2.S1PTW + * bit is clear) and one of the two following cases are true: + * 1. The fault was due to a permission fault + * 2. The processor carries errata 834220 + * + * Therefore, for all non S1PTW faults where we either have a + * permission fault or the errata workaround is enabled, we + * resolve the IPA using the AT instruction. + */ + if (!(esr & ESR_ELx_S1PTW) && + (cpus_have_final_cap(ARM64_WORKAROUND_834220) || + (esr & ESR_ELx_FSC_TYPE) == FSC_PERM)) { + if (!__translate_far_to_hpfar(far, &hpfar)) + return false; + } else { + hpfar = read_sysreg(hpfar_el2); + } + + fault->far_el2 = far; + fault->hpfar_el2 = hpfar; + return true; +} + +#endif diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h new file mode 100644 index 000000000..081aca8f4 --- /dev/null +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -0,0 +1,533 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2015 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + */ + +#ifndef __ARM64_KVM_HYP_SWITCH_H__ +#define __ARM64_KVM_HYP_SWITCH_H__ + +#include <hyp/adjust_pc.h> +#include <hyp/fault.h> + +#include <linux/arm-smccc.h> +#include <linux/kvm_host.h> +#include <linux/types.h> +#include <linux/jump_label.h> +#include <uapi/linux/psci.h> + +#include <kvm/arm_psci.h> + +#include <asm/barrier.h> +#include <asm/cpufeature.h> +#include <asm/extable.h> +#include <asm/kprobes.h> +#include <asm/kvm_asm.h> +#include <asm/kvm_emulate.h> +#include <asm/kvm_hyp.h> +#include <asm/kvm_mmu.h> +#include <asm/fpsimd.h> +#include <asm/debug-monitors.h> +#include <asm/processor.h> + +struct kvm_exception_table_entry { + int insn, fixup; +}; + +extern struct kvm_exception_table_entry __start___kvm_ex_table; +extern struct kvm_exception_table_entry __stop___kvm_ex_table; + +/* Check whether the FP regs are owned by the guest */ +static inline bool guest_owns_fp_regs(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.fp_state == FP_STATE_GUEST_OWNED; +} + +/* Save the 32-bit only FPSIMD system register state */ +static inline void __fpsimd_save_fpexc32(struct kvm_vcpu *vcpu) +{ + if (!vcpu_el1_is_32bit(vcpu)) + return; + + __vcpu_sys_reg(vcpu, FPEXC32_EL2) = read_sysreg(fpexc32_el2); +} + +static inline void __activate_traps_fpsimd32(struct kvm_vcpu *vcpu) +{ + /* + * We are about to set CPTR_EL2.TFP to trap all floating point + * register accesses to EL2, however, the ARM ARM clearly states that + * traps are only taken to EL2 if the operation would not otherwise + * trap to EL1. Therefore, always make sure that for 32-bit guests, + * we set FPEXC.EN to prevent traps to EL1, when setting the TFP bit. + * If FP/ASIMD is not implemented, FPEXC is UNDEFINED and any access to + * it will cause an exception. + */ + if (vcpu_el1_is_32bit(vcpu) && system_supports_fpsimd()) { + write_sysreg(1 << 30, fpexc32_el2); + isb(); + } +} + +static inline void __activate_traps_common(struct kvm_vcpu *vcpu) +{ + /* Trap on AArch32 cp15 c15 (impdef sysregs) accesses (EL1 or EL0) */ + write_sysreg(1 << 15, hstr_el2); + + /* + * Make sure we trap PMU access from EL0 to EL2. Also sanitize + * PMSELR_EL0 to make sure it never contains the cycle + * counter, which could make a PMXEVCNTR_EL0 access UNDEF at + * EL1 instead of being trapped to EL2. + */ + if (kvm_arm_support_pmu_v3()) { + struct kvm_cpu_context *hctxt; + + write_sysreg(0, pmselr_el0); + + hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + ctxt_sys_reg(hctxt, PMUSERENR_EL0) = read_sysreg(pmuserenr_el0); + write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0); + } + + vcpu->arch.mdcr_el2_host = read_sysreg(mdcr_el2); + write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2); + + if (cpus_have_final_cap(ARM64_SME)) { + sysreg_clear_set_s(SYS_HFGRTR_EL2, + HFGxTR_EL2_nSMPRI_EL1_MASK | + HFGxTR_EL2_nTPIDR2_EL0_MASK, + 0); + sysreg_clear_set_s(SYS_HFGWTR_EL2, + HFGxTR_EL2_nSMPRI_EL1_MASK | + HFGxTR_EL2_nTPIDR2_EL0_MASK, + 0); + } +} + +static inline void __deactivate_traps_common(struct kvm_vcpu *vcpu) +{ + write_sysreg(vcpu->arch.mdcr_el2_host, mdcr_el2); + + write_sysreg(0, hstr_el2); + if (kvm_arm_support_pmu_v3()) { + struct kvm_cpu_context *hctxt; + + hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + write_sysreg(ctxt_sys_reg(hctxt, PMUSERENR_EL0), pmuserenr_el0); + } + + if (cpus_have_final_cap(ARM64_SME)) { + sysreg_clear_set_s(SYS_HFGRTR_EL2, 0, + HFGxTR_EL2_nSMPRI_EL1_MASK | + HFGxTR_EL2_nTPIDR2_EL0_MASK); + sysreg_clear_set_s(SYS_HFGWTR_EL2, 0, + HFGxTR_EL2_nSMPRI_EL1_MASK | + HFGxTR_EL2_nTPIDR2_EL0_MASK); + } +} + +static inline void ___activate_traps(struct kvm_vcpu *vcpu) +{ + u64 hcr = vcpu->arch.hcr_el2; + + if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM)) + hcr |= HCR_TVM; + + write_sysreg(hcr, hcr_el2); + + if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN) && (hcr & HCR_VSE)) + write_sysreg_s(vcpu->arch.vsesr_el2, SYS_VSESR_EL2); +} + +static inline void ___deactivate_traps(struct kvm_vcpu *vcpu) +{ + /* + * If we pended a virtual abort, preserve it until it gets + * cleared. See D1.14.3 (Virtual Interrupts) for details, but + * the crucial bit is "On taking a vSError interrupt, + * HCR_EL2.VSE is cleared to 0." + */ + if (vcpu->arch.hcr_el2 & HCR_VSE) { + vcpu->arch.hcr_el2 &= ~HCR_VSE; + vcpu->arch.hcr_el2 |= read_sysreg(hcr_el2) & HCR_VSE; + } +} + +static inline bool __populate_fault_info(struct kvm_vcpu *vcpu) +{ + return __get_fault_info(vcpu->arch.fault.esr_el2, &vcpu->arch.fault); +} + +static inline void __hyp_sve_restore_guest(struct kvm_vcpu *vcpu) +{ + sve_cond_update_zcr_vq(vcpu_sve_max_vq(vcpu) - 1, SYS_ZCR_EL2); + __sve_restore_state(vcpu_sve_pffr(vcpu), + &vcpu->arch.ctxt.fp_regs.fpsr); + write_sysreg_el1(__vcpu_sys_reg(vcpu, ZCR_EL1), SYS_ZCR); +} + +/* + * We trap the first access to the FP/SIMD to save the host context and + * restore the guest context lazily. + * If FP/SIMD is not implemented, handle the trap and inject an undefined + * instruction exception to the guest. Similarly for trapped SVE accesses. + */ +static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code) +{ + bool sve_guest; + u8 esr_ec; + u64 reg; + + if (!system_supports_fpsimd()) + return false; + + sve_guest = vcpu_has_sve(vcpu); + esr_ec = kvm_vcpu_trap_get_class(vcpu); + + /* Don't handle SVE traps for non-SVE vcpus here: */ + if (!sve_guest && esr_ec != ESR_ELx_EC_FP_ASIMD) + return false; + + /* Valid trap. Switch the context: */ + + /* First disable enough traps to allow us to update the registers */ + if (has_vhe()) { + reg = CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN; + if (sve_guest) + reg |= CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN; + + sysreg_clear_set(cpacr_el1, 0, reg); + } else { + reg = CPTR_EL2_TFP; + if (sve_guest) + reg |= CPTR_EL2_TZ; + + sysreg_clear_set(cptr_el2, reg, 0); + } + isb(); + + /* Write out the host state if it's in the registers */ + if (vcpu->arch.fp_state == FP_STATE_HOST_OWNED) + __fpsimd_save_state(vcpu->arch.host_fpsimd_state); + + /* Restore the guest state */ + if (sve_guest) + __hyp_sve_restore_guest(vcpu); + else + __fpsimd_restore_state(&vcpu->arch.ctxt.fp_regs); + + /* Skip restoring fpexc32 for AArch64 guests */ + if (!(read_sysreg(hcr_el2) & HCR_RW)) + write_sysreg(__vcpu_sys_reg(vcpu, FPEXC32_EL2), fpexc32_el2); + + vcpu->arch.fp_state = FP_STATE_GUEST_OWNED; + + return true; +} + +static inline bool handle_tx2_tvm(struct kvm_vcpu *vcpu) +{ + u32 sysreg = esr_sys64_to_sysreg(kvm_vcpu_get_esr(vcpu)); + int rt = kvm_vcpu_sys_get_rt(vcpu); + u64 val = vcpu_get_reg(vcpu, rt); + + /* + * The normal sysreg handling code expects to see the traps, + * let's not do anything here. + */ + if (vcpu->arch.hcr_el2 & HCR_TVM) + return false; + + switch (sysreg) { + case SYS_SCTLR_EL1: + write_sysreg_el1(val, SYS_SCTLR); + break; + case SYS_TTBR0_EL1: + write_sysreg_el1(val, SYS_TTBR0); + break; + case SYS_TTBR1_EL1: + write_sysreg_el1(val, SYS_TTBR1); + break; + case SYS_TCR_EL1: + write_sysreg_el1(val, SYS_TCR); + break; + case SYS_ESR_EL1: + write_sysreg_el1(val, SYS_ESR); + break; + case SYS_FAR_EL1: + write_sysreg_el1(val, SYS_FAR); + break; + case SYS_AFSR0_EL1: + write_sysreg_el1(val, SYS_AFSR0); + break; + case SYS_AFSR1_EL1: + write_sysreg_el1(val, SYS_AFSR1); + break; + case SYS_MAIR_EL1: + write_sysreg_el1(val, SYS_MAIR); + break; + case SYS_AMAIR_EL1: + write_sysreg_el1(val, SYS_AMAIR); + break; + case SYS_CONTEXTIDR_EL1: + write_sysreg_el1(val, SYS_CONTEXTIDR); + break; + default: + return false; + } + + __kvm_skip_instr(vcpu); + return true; +} + +static inline bool esr_is_ptrauth_trap(u64 esr) +{ + switch (esr_sys64_to_sysreg(esr)) { + case SYS_APIAKEYLO_EL1: + case SYS_APIAKEYHI_EL1: + case SYS_APIBKEYLO_EL1: + case SYS_APIBKEYHI_EL1: + case SYS_APDAKEYLO_EL1: + case SYS_APDAKEYHI_EL1: + case SYS_APDBKEYLO_EL1: + case SYS_APDBKEYHI_EL1: + case SYS_APGAKEYLO_EL1: + case SYS_APGAKEYHI_EL1: + return true; + } + + return false; +} + +#define __ptrauth_save_key(ctxt, key) \ + do { \ + u64 __val; \ + __val = read_sysreg_s(SYS_ ## key ## KEYLO_EL1); \ + ctxt_sys_reg(ctxt, key ## KEYLO_EL1) = __val; \ + __val = read_sysreg_s(SYS_ ## key ## KEYHI_EL1); \ + ctxt_sys_reg(ctxt, key ## KEYHI_EL1) = __val; \ +} while(0) + +DECLARE_PER_CPU(struct kvm_cpu_context, kvm_hyp_ctxt); + +static bool kvm_hyp_handle_ptrauth(struct kvm_vcpu *vcpu, u64 *exit_code) +{ + struct kvm_cpu_context *ctxt; + u64 val; + + if (!vcpu_has_ptrauth(vcpu)) + return false; + + ctxt = this_cpu_ptr(&kvm_hyp_ctxt); + __ptrauth_save_key(ctxt, APIA); + __ptrauth_save_key(ctxt, APIB); + __ptrauth_save_key(ctxt, APDA); + __ptrauth_save_key(ctxt, APDB); + __ptrauth_save_key(ctxt, APGA); + + vcpu_ptrauth_enable(vcpu); + + val = read_sysreg(hcr_el2); + val |= (HCR_API | HCR_APK); + write_sysreg(val, hcr_el2); + + return true; +} + +static bool kvm_hyp_handle_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code) +{ + if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM) && + handle_tx2_tvm(vcpu)) + return true; + + if (static_branch_unlikely(&vgic_v3_cpuif_trap) && + __vgic_v3_perform_cpuif_access(vcpu) == 1) + return true; + + if (esr_is_ptrauth_trap(kvm_vcpu_get_esr(vcpu))) + return kvm_hyp_handle_ptrauth(vcpu, exit_code); + + return false; +} + +static bool kvm_hyp_handle_cp15_32(struct kvm_vcpu *vcpu, u64 *exit_code) +{ + if (static_branch_unlikely(&vgic_v3_cpuif_trap) && + __vgic_v3_perform_cpuif_access(vcpu) == 1) + return true; + + return false; +} + +static bool kvm_hyp_handle_memory_fault(struct kvm_vcpu *vcpu, u64 *exit_code) +{ + if (!__populate_fault_info(vcpu)) + return true; + + return false; +} +static bool kvm_hyp_handle_iabt_low(struct kvm_vcpu *vcpu, u64 *exit_code) + __alias(kvm_hyp_handle_memory_fault); +static bool kvm_hyp_handle_watchpt_low(struct kvm_vcpu *vcpu, u64 *exit_code) + __alias(kvm_hyp_handle_memory_fault); + +static bool kvm_hyp_handle_dabt_low(struct kvm_vcpu *vcpu, u64 *exit_code) +{ + if (kvm_hyp_handle_memory_fault(vcpu, exit_code)) + return true; + + if (static_branch_unlikely(&vgic_v2_cpuif_trap)) { + bool valid; + + valid = kvm_vcpu_trap_get_fault_type(vcpu) == FSC_FAULT && + kvm_vcpu_dabt_isvalid(vcpu) && + !kvm_vcpu_abt_issea(vcpu) && + !kvm_vcpu_abt_iss1tw(vcpu); + + if (valid) { + int ret = __vgic_v2_perform_cpuif_access(vcpu); + + if (ret == 1) + return true; + + /* Promote an illegal access to an SError.*/ + if (ret == -1) + *exit_code = ARM_EXCEPTION_EL1_SERROR; + } + } + + return false; +} + +typedef bool (*exit_handler_fn)(struct kvm_vcpu *, u64 *); + +static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu); + +static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code); + +/* + * Allow the hypervisor to handle the exit with an exit handler if it has one. + * + * Returns true if the hypervisor handled the exit, and control should go back + * to the guest, or false if it hasn't. + */ +static inline bool kvm_hyp_handle_exit(struct kvm_vcpu *vcpu, u64 *exit_code) +{ + const exit_handler_fn *handlers = kvm_get_exit_handler_array(vcpu); + exit_handler_fn fn; + + fn = handlers[kvm_vcpu_trap_get_class(vcpu)]; + + if (fn) + return fn(vcpu, exit_code); + + return false; +} + +static inline void synchronize_vcpu_pstate(struct kvm_vcpu *vcpu, u64 *exit_code) +{ + /* + * Check for the conditions of Cortex-A510's #2077057. When these occur + * SPSR_EL2 can't be trusted, but isn't needed either as it is + * unchanged from the value in vcpu_gp_regs(vcpu)->pstate. + * Are we single-stepping the guest, and took a PAC exception from the + * active-not-pending state? + */ + if (cpus_have_final_cap(ARM64_WORKAROUND_2077057) && + vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP && + *vcpu_cpsr(vcpu) & DBG_SPSR_SS && + ESR_ELx_EC(read_sysreg_el2(SYS_ESR)) == ESR_ELx_EC_PAC) + write_sysreg_el2(*vcpu_cpsr(vcpu), SYS_SPSR); + + vcpu->arch.ctxt.regs.pstate = read_sysreg_el2(SYS_SPSR); +} + +/* + * Return true when we were able to fixup the guest exit and should return to + * the guest, false when we should restore the host state and return to the + * main run loop. + */ +static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code) +{ + /* + * Save PSTATE early so that we can evaluate the vcpu mode + * early on. + */ + synchronize_vcpu_pstate(vcpu, exit_code); + + /* + * Check whether we want to repaint the state one way or + * another. + */ + early_exit_filter(vcpu, exit_code); + + if (ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ) + vcpu->arch.fault.esr_el2 = read_sysreg_el2(SYS_ESR); + + if (ARM_SERROR_PENDING(*exit_code) && + ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ) { + u8 esr_ec = kvm_vcpu_trap_get_class(vcpu); + + /* + * HVC already have an adjusted PC, which we need to + * correct in order to return to after having injected + * the SError. + * + * SMC, on the other hand, is *trapped*, meaning its + * preferred return address is the SMC itself. + */ + if (esr_ec == ESR_ELx_EC_HVC32 || esr_ec == ESR_ELx_EC_HVC64) + write_sysreg_el2(read_sysreg_el2(SYS_ELR) - 4, SYS_ELR); + } + + /* + * We're using the raw exception code in order to only process + * the trap if no SError is pending. We will come back to the + * same PC once the SError has been injected, and replay the + * trapping instruction. + */ + if (*exit_code != ARM_EXCEPTION_TRAP) + goto exit; + + /* Check if there's an exit handler and allow it to handle the exit. */ + if (kvm_hyp_handle_exit(vcpu, exit_code)) + goto guest; +exit: + /* Return to the host kernel and handle the exit */ + return false; + +guest: + /* Re-enter the guest */ + asm(ALTERNATIVE("nop", "dmb sy", ARM64_WORKAROUND_1508412)); + return true; +} + +static inline void __kvm_unexpected_el2_exception(void) +{ + extern char __guest_exit_panic[]; + unsigned long addr, fixup; + struct kvm_exception_table_entry *entry, *end; + unsigned long elr_el2 = read_sysreg(elr_el2); + + entry = &__start___kvm_ex_table; + end = &__stop___kvm_ex_table; + + while (entry < end) { + addr = (unsigned long)&entry->insn + entry->insn; + fixup = (unsigned long)&entry->fixup + entry->fixup; + + if (addr != elr_el2) { + entry++; + continue; + } + + write_sysreg(fixup, elr_el2); + return; + } + + /* Trigger a panic after restoring the hyp context. */ + write_sysreg(__guest_exit_panic, elr_el2); +} + +#endif /* __ARM64_KVM_HYP_SWITCH_H__ */ diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h new file mode 100644 index 000000000..baa5b9b3d --- /dev/null +++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h @@ -0,0 +1,219 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2012-2015 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + */ + +#ifndef __ARM64_KVM_HYP_SYSREG_SR_H__ +#define __ARM64_KVM_HYP_SYSREG_SR_H__ + +#include <linux/compiler.h> +#include <linux/kvm_host.h> + +#include <asm/kprobes.h> +#include <asm/kvm_asm.h> +#include <asm/kvm_emulate.h> +#include <asm/kvm_hyp.h> +#include <asm/kvm_mmu.h> + +static inline void __sysreg_save_common_state(struct kvm_cpu_context *ctxt) +{ + ctxt_sys_reg(ctxt, MDSCR_EL1) = read_sysreg(mdscr_el1); +} + +static inline void __sysreg_save_user_state(struct kvm_cpu_context *ctxt) +{ + ctxt_sys_reg(ctxt, TPIDR_EL0) = read_sysreg(tpidr_el0); + ctxt_sys_reg(ctxt, TPIDRRO_EL0) = read_sysreg(tpidrro_el0); +} + +static inline bool ctxt_has_mte(struct kvm_cpu_context *ctxt) +{ + struct kvm_vcpu *vcpu = ctxt->__hyp_running_vcpu; + + if (!vcpu) + vcpu = container_of(ctxt, struct kvm_vcpu, arch.ctxt); + + return kvm_has_mte(kern_hyp_va(vcpu->kvm)); +} + +static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt) +{ + ctxt_sys_reg(ctxt, CSSELR_EL1) = read_sysreg(csselr_el1); + ctxt_sys_reg(ctxt, SCTLR_EL1) = read_sysreg_el1(SYS_SCTLR); + ctxt_sys_reg(ctxt, CPACR_EL1) = read_sysreg_el1(SYS_CPACR); + ctxt_sys_reg(ctxt, TTBR0_EL1) = read_sysreg_el1(SYS_TTBR0); + ctxt_sys_reg(ctxt, TTBR1_EL1) = read_sysreg_el1(SYS_TTBR1); + ctxt_sys_reg(ctxt, TCR_EL1) = read_sysreg_el1(SYS_TCR); + ctxt_sys_reg(ctxt, ESR_EL1) = read_sysreg_el1(SYS_ESR); + ctxt_sys_reg(ctxt, AFSR0_EL1) = read_sysreg_el1(SYS_AFSR0); + ctxt_sys_reg(ctxt, AFSR1_EL1) = read_sysreg_el1(SYS_AFSR1); + ctxt_sys_reg(ctxt, FAR_EL1) = read_sysreg_el1(SYS_FAR); + ctxt_sys_reg(ctxt, MAIR_EL1) = read_sysreg_el1(SYS_MAIR); + ctxt_sys_reg(ctxt, VBAR_EL1) = read_sysreg_el1(SYS_VBAR); + ctxt_sys_reg(ctxt, CONTEXTIDR_EL1) = read_sysreg_el1(SYS_CONTEXTIDR); + ctxt_sys_reg(ctxt, AMAIR_EL1) = read_sysreg_el1(SYS_AMAIR); + ctxt_sys_reg(ctxt, CNTKCTL_EL1) = read_sysreg_el1(SYS_CNTKCTL); + ctxt_sys_reg(ctxt, PAR_EL1) = read_sysreg_par(); + ctxt_sys_reg(ctxt, TPIDR_EL1) = read_sysreg(tpidr_el1); + + if (ctxt_has_mte(ctxt)) { + ctxt_sys_reg(ctxt, TFSR_EL1) = read_sysreg_el1(SYS_TFSR); + ctxt_sys_reg(ctxt, TFSRE0_EL1) = read_sysreg_s(SYS_TFSRE0_EL1); + } + + ctxt_sys_reg(ctxt, SP_EL1) = read_sysreg(sp_el1); + ctxt_sys_reg(ctxt, ELR_EL1) = read_sysreg_el1(SYS_ELR); + ctxt_sys_reg(ctxt, SPSR_EL1) = read_sysreg_el1(SYS_SPSR); +} + +static inline void __sysreg_save_el2_return_state(struct kvm_cpu_context *ctxt) +{ + ctxt->regs.pc = read_sysreg_el2(SYS_ELR); + /* + * Guest PSTATE gets saved at guest fixup time in all + * cases. We still need to handle the nVHE host side here. + */ + if (!has_vhe() && ctxt->__hyp_running_vcpu) + ctxt->regs.pstate = read_sysreg_el2(SYS_SPSR); + + if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN)) + ctxt_sys_reg(ctxt, DISR_EL1) = read_sysreg_s(SYS_VDISR_EL2); +} + +static inline void __sysreg_restore_common_state(struct kvm_cpu_context *ctxt) +{ + write_sysreg(ctxt_sys_reg(ctxt, MDSCR_EL1), mdscr_el1); +} + +static inline void __sysreg_restore_user_state(struct kvm_cpu_context *ctxt) +{ + write_sysreg(ctxt_sys_reg(ctxt, TPIDR_EL0), tpidr_el0); + write_sysreg(ctxt_sys_reg(ctxt, TPIDRRO_EL0), tpidrro_el0); +} + +static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt) +{ + write_sysreg(ctxt_sys_reg(ctxt, MPIDR_EL1), vmpidr_el2); + write_sysreg(ctxt_sys_reg(ctxt, CSSELR_EL1), csselr_el1); + + if (has_vhe() || + !cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { + write_sysreg_el1(ctxt_sys_reg(ctxt, SCTLR_EL1), SYS_SCTLR); + write_sysreg_el1(ctxt_sys_reg(ctxt, TCR_EL1), SYS_TCR); + } else if (!ctxt->__hyp_running_vcpu) { + /* + * Must only be done for guest registers, hence the context + * test. We're coming from the host, so SCTLR.M is already + * set. Pairs with nVHE's __activate_traps(). + */ + write_sysreg_el1((ctxt_sys_reg(ctxt, TCR_EL1) | + TCR_EPD1_MASK | TCR_EPD0_MASK), + SYS_TCR); + isb(); + } + + write_sysreg_el1(ctxt_sys_reg(ctxt, CPACR_EL1), SYS_CPACR); + write_sysreg_el1(ctxt_sys_reg(ctxt, TTBR0_EL1), SYS_TTBR0); + write_sysreg_el1(ctxt_sys_reg(ctxt, TTBR1_EL1), SYS_TTBR1); + write_sysreg_el1(ctxt_sys_reg(ctxt, ESR_EL1), SYS_ESR); + write_sysreg_el1(ctxt_sys_reg(ctxt, AFSR0_EL1), SYS_AFSR0); + write_sysreg_el1(ctxt_sys_reg(ctxt, AFSR1_EL1), SYS_AFSR1); + write_sysreg_el1(ctxt_sys_reg(ctxt, FAR_EL1), SYS_FAR); + write_sysreg_el1(ctxt_sys_reg(ctxt, MAIR_EL1), SYS_MAIR); + write_sysreg_el1(ctxt_sys_reg(ctxt, VBAR_EL1), SYS_VBAR); + write_sysreg_el1(ctxt_sys_reg(ctxt, CONTEXTIDR_EL1), SYS_CONTEXTIDR); + write_sysreg_el1(ctxt_sys_reg(ctxt, AMAIR_EL1), SYS_AMAIR); + write_sysreg_el1(ctxt_sys_reg(ctxt, CNTKCTL_EL1), SYS_CNTKCTL); + write_sysreg(ctxt_sys_reg(ctxt, PAR_EL1), par_el1); + write_sysreg(ctxt_sys_reg(ctxt, TPIDR_EL1), tpidr_el1); + + if (ctxt_has_mte(ctxt)) { + write_sysreg_el1(ctxt_sys_reg(ctxt, TFSR_EL1), SYS_TFSR); + write_sysreg_s(ctxt_sys_reg(ctxt, TFSRE0_EL1), SYS_TFSRE0_EL1); + } + + if (!has_vhe() && + cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT) && + ctxt->__hyp_running_vcpu) { + /* + * Must only be done for host registers, hence the context + * test. Pairs with nVHE's __deactivate_traps(). + */ + isb(); + /* + * At this stage, and thanks to the above isb(), S2 is + * deconfigured and disabled. We can now restore the host's + * S1 configuration: SCTLR, and only then TCR. + */ + write_sysreg_el1(ctxt_sys_reg(ctxt, SCTLR_EL1), SYS_SCTLR); + isb(); + write_sysreg_el1(ctxt_sys_reg(ctxt, TCR_EL1), SYS_TCR); + } + + write_sysreg(ctxt_sys_reg(ctxt, SP_EL1), sp_el1); + write_sysreg_el1(ctxt_sys_reg(ctxt, ELR_EL1), SYS_ELR); + write_sysreg_el1(ctxt_sys_reg(ctxt, SPSR_EL1), SYS_SPSR); +} + +static inline void __sysreg_restore_el2_return_state(struct kvm_cpu_context *ctxt) +{ + u64 pstate = ctxt->regs.pstate; + u64 mode = pstate & PSR_AA32_MODE_MASK; + + /* + * Safety check to ensure we're setting the CPU up to enter the guest + * in a less privileged mode. + * + * If we are attempting a return to EL2 or higher in AArch64 state, + * program SPSR_EL2 with M=EL2h and the IL bit set which ensures that + * we'll take an illegal exception state exception immediately after + * the ERET to the guest. Attempts to return to AArch32 Hyp will + * result in an illegal exception return because EL2's execution state + * is determined by SCR_EL3.RW. + */ + if (!(mode & PSR_MODE32_BIT) && mode >= PSR_MODE_EL2t) + pstate = PSR_MODE_EL2h | PSR_IL_BIT; + + write_sysreg_el2(ctxt->regs.pc, SYS_ELR); + write_sysreg_el2(pstate, SYS_SPSR); + + if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN)) + write_sysreg_s(ctxt_sys_reg(ctxt, DISR_EL1), SYS_VDISR_EL2); +} + +static inline void __sysreg32_save_state(struct kvm_vcpu *vcpu) +{ + if (!vcpu_el1_is_32bit(vcpu)) + return; + + vcpu->arch.ctxt.spsr_abt = read_sysreg(spsr_abt); + vcpu->arch.ctxt.spsr_und = read_sysreg(spsr_und); + vcpu->arch.ctxt.spsr_irq = read_sysreg(spsr_irq); + vcpu->arch.ctxt.spsr_fiq = read_sysreg(spsr_fiq); + + __vcpu_sys_reg(vcpu, DACR32_EL2) = read_sysreg(dacr32_el2); + __vcpu_sys_reg(vcpu, IFSR32_EL2) = read_sysreg(ifsr32_el2); + + if (has_vhe() || vcpu_get_flag(vcpu, DEBUG_DIRTY)) + __vcpu_sys_reg(vcpu, DBGVCR32_EL2) = read_sysreg(dbgvcr32_el2); +} + +static inline void __sysreg32_restore_state(struct kvm_vcpu *vcpu) +{ + if (!vcpu_el1_is_32bit(vcpu)) + return; + + write_sysreg(vcpu->arch.ctxt.spsr_abt, spsr_abt); + write_sysreg(vcpu->arch.ctxt.spsr_und, spsr_und); + write_sysreg(vcpu->arch.ctxt.spsr_irq, spsr_irq); + write_sysreg(vcpu->arch.ctxt.spsr_fiq, spsr_fiq); + + write_sysreg(__vcpu_sys_reg(vcpu, DACR32_EL2), dacr32_el2); + write_sysreg(__vcpu_sys_reg(vcpu, IFSR32_EL2), ifsr32_el2); + + if (has_vhe() || vcpu_get_flag(vcpu, DEBUG_DIRTY)) + write_sysreg(__vcpu_sys_reg(vcpu, DBGVCR32_EL2), dbgvcr32_el2); +} + +#endif /* __ARM64_KVM_HYP_SYSREG_SR_H__ */ diff --git a/arch/arm64/kvm/hyp/include/nvhe/early_alloc.h b/arch/arm64/kvm/hyp/include/nvhe/early_alloc.h new file mode 100644 index 000000000..dc61aaa56 --- /dev/null +++ b/arch/arm64/kvm/hyp/include/nvhe/early_alloc.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef __KVM_HYP_EARLY_ALLOC_H +#define __KVM_HYP_EARLY_ALLOC_H + +#include <asm/kvm_pgtable.h> + +void hyp_early_alloc_init(void *virt, unsigned long size); +unsigned long hyp_early_alloc_nr_used_pages(void); +void *hyp_early_alloc_page(void *arg); +void *hyp_early_alloc_contig(unsigned int nr_pages); + +extern struct kvm_pgtable_mm_ops hyp_early_alloc_mm_ops; + +#endif /* __KVM_HYP_EARLY_ALLOC_H */ diff --git a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h new file mode 100644 index 000000000..37440e1dd --- /dev/null +++ b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h @@ -0,0 +1,208 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2021 Google LLC + * Author: Fuad Tabba <tabba@google.com> + */ + +#ifndef __ARM64_KVM_FIXED_CONFIG_H__ +#define __ARM64_KVM_FIXED_CONFIG_H__ + +#include <asm/sysreg.h> + +/* + * This file contains definitions for features to be allowed or restricted for + * guest virtual machines, depending on the mode KVM is running in and on the + * type of guest that is running. + * + * The ALLOW masks represent a bitmask of feature fields that are allowed + * without any restrictions as long as they are supported by the system. + * + * The RESTRICT_UNSIGNED masks, if present, represent unsigned fields for + * features that are restricted to support at most the specified feature. + * + * If a feature field is not present in either, than it is not supported. + * + * The approach taken for protected VMs is to allow features that are: + * - Needed by common Linux distributions (e.g., floating point) + * - Trivial to support, e.g., supporting the feature does not introduce or + * require tracking of additional state in KVM + * - Cannot be trapped or prevent the guest from using anyway + */ + +/* + * Allow for protected VMs: + * - Floating-point and Advanced SIMD + * - Data Independent Timing + * - Spectre/Meltdown Mitigation + */ +#define PVM_ID_AA64PFR0_ALLOW (\ + ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_FP) | \ + ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_AdvSIMD) | \ + ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_DIT) | \ + ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV2) | \ + ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV3) \ + ) + +/* + * Restrict to the following *unsigned* features for protected VMs: + * - AArch64 guests only (no support for AArch32 guests): + * AArch32 adds complexity in trap handling, emulation, condition codes, + * etc... + * - RAS (v1) + * Supported by KVM + */ +#define PVM_ID_AA64PFR0_RESTRICT_UNSIGNED (\ + FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0), ID_AA64PFR0_EL1_ELx_64BIT_ONLY) | \ + FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL1), ID_AA64PFR0_EL1_ELx_64BIT_ONLY) | \ + FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL2), ID_AA64PFR0_EL1_ELx_64BIT_ONLY) | \ + FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL3), ID_AA64PFR0_EL1_ELx_64BIT_ONLY) | \ + FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_RAS), ID_AA64PFR0_EL1_RAS_IMP) \ + ) + +/* + * Allow for protected VMs: + * - Branch Target Identification + * - Speculative Store Bypassing + */ +#define PVM_ID_AA64PFR1_ALLOW (\ + ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_BT) | \ + ARM64_FEATURE_MASK(ID_AA64PFR1_EL1_SSBS) \ + ) + +/* + * Allow for protected VMs: + * - Mixed-endian + * - Distinction between Secure and Non-secure Memory + * - Mixed-endian at EL0 only + * - Non-context synchronizing exception entry and exit + */ +#define PVM_ID_AA64MMFR0_ALLOW (\ + ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_BIGEND) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_SNSMEM) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_BIGENDEL0) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_EXS) \ + ) + +/* + * Restrict to the following *unsigned* features for protected VMs: + * - 40-bit IPA + * - 16-bit ASID + */ +#define PVM_ID_AA64MMFR0_RESTRICT_UNSIGNED (\ + FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_PARANGE), ID_AA64MMFR0_EL1_PARANGE_40) | \ + FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_ASIDBITS), ID_AA64MMFR0_EL1_ASIDBITS_16) \ + ) + +/* + * Allow for protected VMs: + * - Hardware translation table updates to Access flag and Dirty state + * - Number of VMID bits from CPU + * - Hierarchical Permission Disables + * - Privileged Access Never + * - SError interrupt exceptions from speculative reads + * - Enhanced Translation Synchronization + */ +#define PVM_ID_AA64MMFR1_ALLOW (\ + ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_HAFDBS) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_VMIDBits) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_HPDS) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_PAN) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_SpecSEI) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_ETS) \ + ) + +/* + * Allow for protected VMs: + * - Common not Private translations + * - User Access Override + * - IESB bit in the SCTLR_ELx registers + * - Unaligned single-copy atomicity and atomic functions + * - ESR_ELx.EC value on an exception by read access to feature ID space + * - TTL field in address operations. + * - Break-before-make sequences when changing translation block size + * - E0PDx mechanism + */ +#define PVM_ID_AA64MMFR2_ALLOW (\ + ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_CnP) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_UAO) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_IESB) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_AT) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_IDS) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_TTL) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_BBM) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR2_EL1_E0PD) \ + ) + +/* + * No support for Scalable Vectors for protected VMs: + * Requires additional support from KVM, e.g., context-switching and + * trapping at EL2 + */ +#define PVM_ID_AA64ZFR0_ALLOW (0ULL) + +/* + * No support for debug, including breakpoints, and watchpoints for protected + * VMs: + * The Arm architecture mandates support for at least the Armv8 debug + * architecture, which would include at least 2 hardware breakpoints and + * watchpoints. Providing that support to protected guests adds + * considerable state and complexity. Therefore, the reserved value of 0 is + * used for debug-related fields. + */ +#define PVM_ID_AA64DFR0_ALLOW (0ULL) +#define PVM_ID_AA64DFR1_ALLOW (0ULL) + +/* + * No support for implementation defined features. + */ +#define PVM_ID_AA64AFR0_ALLOW (0ULL) +#define PVM_ID_AA64AFR1_ALLOW (0ULL) + +/* + * No restrictions on instructions implemented in AArch64. + */ +#define PVM_ID_AA64ISAR0_ALLOW (\ + ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_AES) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_SHA1) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_SHA2) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_CRC32) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_ATOMIC) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_RDM) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_SHA3) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_SM3) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_SM4) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_DP) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_FHM) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_TS) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_TLB) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_RNDR) \ + ) + +#define PVM_ID_AA64ISAR1_ALLOW (\ + ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_DPB) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_APA) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_API) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_JSCVT) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_FCMA) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_LRCPC) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPA) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_GPI) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_FRINTTS) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_SB) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_SPECRES) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_BF16) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_DGH) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_EL1_I8MM) \ + ) + +#define PVM_ID_AA64ISAR2_ALLOW (\ + ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_GPA3) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_APA3) \ + ) + +u64 pvm_read_id_reg(const struct kvm_vcpu *vcpu, u32 id); +bool kvm_handle_pvm_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code); +bool kvm_handle_pvm_restricted(struct kvm_vcpu *vcpu, u64 *exit_code); +int kvm_check_pvm_sysreg_table(void); + +#endif /* __ARM64_KVM_FIXED_CONFIG_H__ */ diff --git a/arch/arm64/kvm/hyp/include/nvhe/gfp.h b/arch/arm64/kvm/hyp/include/nvhe/gfp.h new file mode 100644 index 000000000..0a048dc06 --- /dev/null +++ b/arch/arm64/kvm/hyp/include/nvhe/gfp.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef __KVM_HYP_GFP_H +#define __KVM_HYP_GFP_H + +#include <linux/list.h> + +#include <nvhe/memory.h> +#include <nvhe/spinlock.h> + +#define HYP_NO_ORDER USHRT_MAX + +struct hyp_pool { + /* + * Spinlock protecting concurrent changes to the memory pool as well as + * the struct hyp_page of the pool's pages until we have a proper atomic + * API at EL2. + */ + hyp_spinlock_t lock; + struct list_head free_area[MAX_ORDER]; + phys_addr_t range_start; + phys_addr_t range_end; + unsigned short max_order; +}; + +/* Allocation */ +void *hyp_alloc_pages(struct hyp_pool *pool, unsigned short order); +void hyp_split_page(struct hyp_page *page); +void hyp_get_page(struct hyp_pool *pool, void *addr); +void hyp_put_page(struct hyp_pool *pool, void *addr); + +/* Used pages cannot be freed */ +int hyp_pool_init(struct hyp_pool *pool, u64 pfn, unsigned int nr_pages, + unsigned int reserved_pages); +#endif /* __KVM_HYP_GFP_H */ diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h new file mode 100644 index 000000000..80e99836e --- /dev/null +++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h @@ -0,0 +1,73 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2020 Google LLC + * Author: Quentin Perret <qperret@google.com> + */ + +#ifndef __KVM_NVHE_MEM_PROTECT__ +#define __KVM_NVHE_MEM_PROTECT__ +#include <linux/kvm_host.h> +#include <asm/kvm_hyp.h> +#include <asm/kvm_pgtable.h> +#include <asm/virt.h> +#include <nvhe/spinlock.h> + +/* + * SW bits 0-1 are reserved to track the memory ownership state of each page: + * 00: The page is owned exclusively by the page-table owner. + * 01: The page is owned by the page-table owner, but is shared + * with another entity. + * 10: The page is shared with, but not owned by the page-table owner. + * 11: Reserved for future use (lending). + */ +enum pkvm_page_state { + PKVM_PAGE_OWNED = 0ULL, + PKVM_PAGE_SHARED_OWNED = KVM_PGTABLE_PROT_SW0, + PKVM_PAGE_SHARED_BORROWED = KVM_PGTABLE_PROT_SW1, + __PKVM_PAGE_RESERVED = KVM_PGTABLE_PROT_SW0 | + KVM_PGTABLE_PROT_SW1, + + /* Meta-states which aren't encoded directly in the PTE's SW bits */ + PKVM_NOPAGE, +}; + +#define PKVM_PAGE_STATE_PROT_MASK (KVM_PGTABLE_PROT_SW0 | KVM_PGTABLE_PROT_SW1) +static inline enum kvm_pgtable_prot pkvm_mkstate(enum kvm_pgtable_prot prot, + enum pkvm_page_state state) +{ + return (prot & ~PKVM_PAGE_STATE_PROT_MASK) | state; +} + +static inline enum pkvm_page_state pkvm_getstate(enum kvm_pgtable_prot prot) +{ + return prot & PKVM_PAGE_STATE_PROT_MASK; +} + +struct host_kvm { + struct kvm_arch arch; + struct kvm_pgtable pgt; + struct kvm_pgtable_mm_ops mm_ops; + hyp_spinlock_t lock; +}; +extern struct host_kvm host_kvm; + +extern const u8 pkvm_hyp_id; + +int __pkvm_prot_finalize(void); +int __pkvm_host_share_hyp(u64 pfn); +int __pkvm_host_unshare_hyp(u64 pfn); + +bool addr_is_memory(phys_addr_t phys); +int host_stage2_idmap_locked(phys_addr_t addr, u64 size, enum kvm_pgtable_prot prot); +int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id); +int kvm_host_prepare_stage2(void *pgt_pool_base); +void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt); + +static __always_inline void __load_host_stage2(void) +{ + if (static_branch_likely(&kvm_protected_mode_initialized)) + __load_stage2(&host_kvm.arch.mmu, &host_kvm.arch); + else + write_sysreg(0, vttbr_el2); +} +#endif /* __KVM_NVHE_MEM_PROTECT__ */ diff --git a/arch/arm64/kvm/hyp/include/nvhe/memory.h b/arch/arm64/kvm/hyp/include/nvhe/memory.h new file mode 100644 index 000000000..592b7edb3 --- /dev/null +++ b/arch/arm64/kvm/hyp/include/nvhe/memory.h @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef __KVM_HYP_MEMORY_H +#define __KVM_HYP_MEMORY_H + +#include <asm/kvm_mmu.h> +#include <asm/page.h> + +#include <linux/types.h> + +struct hyp_page { + unsigned short refcount; + unsigned short order; +}; + +extern u64 __hyp_vmemmap; +#define hyp_vmemmap ((struct hyp_page *)__hyp_vmemmap) + +#define __hyp_va(phys) ((void *)((phys_addr_t)(phys) - hyp_physvirt_offset)) + +static inline void *hyp_phys_to_virt(phys_addr_t phys) +{ + return __hyp_va(phys); +} + +static inline phys_addr_t hyp_virt_to_phys(void *addr) +{ + return __hyp_pa(addr); +} + +#define hyp_phys_to_pfn(phys) ((phys) >> PAGE_SHIFT) +#define hyp_pfn_to_phys(pfn) ((phys_addr_t)((pfn) << PAGE_SHIFT)) +#define hyp_phys_to_page(phys) (&hyp_vmemmap[hyp_phys_to_pfn(phys)]) +#define hyp_virt_to_page(virt) hyp_phys_to_page(__hyp_pa(virt)) +#define hyp_virt_to_pfn(virt) hyp_phys_to_pfn(__hyp_pa(virt)) + +#define hyp_page_to_pfn(page) ((struct hyp_page *)(page) - hyp_vmemmap) +#define hyp_page_to_phys(page) hyp_pfn_to_phys((hyp_page_to_pfn(page))) +#define hyp_page_to_virt(page) __hyp_va(hyp_page_to_phys(page)) +#define hyp_page_to_pool(page) (((struct hyp_page *)page)->pool) + +static inline int hyp_page_count(void *addr) +{ + struct hyp_page *p = hyp_virt_to_page(addr); + + return p->refcount; +} + +#endif /* __KVM_HYP_MEMORY_H */ diff --git a/arch/arm64/kvm/hyp/include/nvhe/mm.h b/arch/arm64/kvm/hyp/include/nvhe/mm.h new file mode 100644 index 000000000..42d8eb9bf --- /dev/null +++ b/arch/arm64/kvm/hyp/include/nvhe/mm.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef __KVM_HYP_MM_H +#define __KVM_HYP_MM_H + +#include <asm/kvm_pgtable.h> +#include <asm/spectre.h> +#include <linux/memblock.h> +#include <linux/types.h> + +#include <nvhe/memory.h> +#include <nvhe/spinlock.h> + +extern struct kvm_pgtable pkvm_pgtable; +extern hyp_spinlock_t pkvm_pgd_lock; + +int hyp_create_idmap(u32 hyp_va_bits); +int hyp_map_vectors(void); +int hyp_back_vmemmap(phys_addr_t phys, unsigned long size, phys_addr_t back); +int pkvm_cpu_set_vector(enum arm64_hyp_spectre_vector slot); +int pkvm_create_mappings(void *from, void *to, enum kvm_pgtable_prot prot); +int pkvm_create_mappings_locked(void *from, void *to, enum kvm_pgtable_prot prot); +int __pkvm_create_private_mapping(phys_addr_t phys, size_t size, + enum kvm_pgtable_prot prot, + unsigned long *haddr); +int pkvm_alloc_private_va_range(size_t size, unsigned long *haddr); + +static inline void hyp_vmemmap_range(phys_addr_t phys, unsigned long size, + unsigned long *start, unsigned long *end) +{ + unsigned long nr_pages = size >> PAGE_SHIFT; + struct hyp_page *p = hyp_phys_to_page(phys); + + *start = (unsigned long)p; + *end = *start + nr_pages * sizeof(struct hyp_page); + *start = ALIGN_DOWN(*start, PAGE_SIZE); + *end = ALIGN(*end, PAGE_SIZE); +} + +#endif /* __KVM_HYP_MM_H */ diff --git a/arch/arm64/kvm/hyp/include/nvhe/spinlock.h b/arch/arm64/kvm/hyp/include/nvhe/spinlock.h new file mode 100644 index 000000000..4652fd04b --- /dev/null +++ b/arch/arm64/kvm/hyp/include/nvhe/spinlock.h @@ -0,0 +1,117 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * A stand-alone ticket spinlock implementation for use by the non-VHE + * KVM hypervisor code running at EL2. + * + * Copyright (C) 2020 Google LLC + * Author: Will Deacon <will@kernel.org> + * + * Heavily based on the implementation removed by c11090474d70 which was: + * Copyright (C) 2012 ARM Ltd. + */ + +#ifndef __ARM64_KVM_NVHE_SPINLOCK_H__ +#define __ARM64_KVM_NVHE_SPINLOCK_H__ + +#include <asm/alternative.h> +#include <asm/lse.h> +#include <asm/rwonce.h> + +typedef union hyp_spinlock { + u32 __val; + struct { +#ifdef __AARCH64EB__ + u16 next, owner; +#else + u16 owner, next; +#endif + }; +} hyp_spinlock_t; + +#define hyp_spin_lock_init(l) \ +do { \ + *(l) = (hyp_spinlock_t){ .__val = 0 }; \ +} while (0) + +static inline void hyp_spin_lock(hyp_spinlock_t *lock) +{ + u32 tmp; + hyp_spinlock_t lockval, newval; + + asm volatile( + /* Atomically increment the next ticket. */ + ARM64_LSE_ATOMIC_INSN( + /* LL/SC */ +" prfm pstl1strm, %3\n" +"1: ldaxr %w0, %3\n" +" add %w1, %w0, #(1 << 16)\n" +" stxr %w2, %w1, %3\n" +" cbnz %w2, 1b\n", + /* LSE atomics */ +" mov %w2, #(1 << 16)\n" +" ldadda %w2, %w0, %3\n" + __nops(3)) + + /* Did we get the lock? */ +" eor %w1, %w0, %w0, ror #16\n" +" cbz %w1, 3f\n" + /* + * No: spin on the owner. Send a local event to avoid missing an + * unlock before the exclusive load. + */ +" sevl\n" +"2: wfe\n" +" ldaxrh %w2, %4\n" +" eor %w1, %w2, %w0, lsr #16\n" +" cbnz %w1, 2b\n" + /* We got the lock. Critical section starts here. */ +"3:" + : "=&r" (lockval), "=&r" (newval), "=&r" (tmp), "+Q" (*lock) + : "Q" (lock->owner) + : "memory"); +} + +static inline void hyp_spin_unlock(hyp_spinlock_t *lock) +{ + u64 tmp; + + asm volatile( + ARM64_LSE_ATOMIC_INSN( + /* LL/SC */ + " ldrh %w1, %0\n" + " add %w1, %w1, #1\n" + " stlrh %w1, %0", + /* LSE atomics */ + " mov %w1, #1\n" + " staddlh %w1, %0\n" + __nops(1)) + : "=Q" (lock->owner), "=&r" (tmp) + : + : "memory"); +} + +static inline bool hyp_spin_is_locked(hyp_spinlock_t *lock) +{ + hyp_spinlock_t lockval = READ_ONCE(*lock); + + return lockval.owner != lockval.next; +} + +#ifdef CONFIG_NVHE_EL2_DEBUG +static inline void hyp_assert_lock_held(hyp_spinlock_t *lock) +{ + /* + * The __pkvm_init() path accesses protected data-structures without + * holding locks as the other CPUs are guaranteed to not enter EL2 + * concurrently at this point in time. The point by which EL2 is + * initialized on all CPUs is reflected in the pkvm static key, so + * wait until it is set before checking the lock state. + */ + if (static_branch_likely(&kvm_protected_mode_initialized)) + BUG_ON(!hyp_spin_is_locked(lock)); +} +#else +static inline void hyp_assert_lock_held(hyp_spinlock_t *lock) { } +#endif + +#endif /* __ARM64_KVM_NVHE_SPINLOCK_H__ */ diff --git a/arch/arm64/kvm/hyp/include/nvhe/trap_handler.h b/arch/arm64/kvm/hyp/include/nvhe/trap_handler.h new file mode 100644 index 000000000..45a84f0ad --- /dev/null +++ b/arch/arm64/kvm/hyp/include/nvhe/trap_handler.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Trap handler helpers. + * + * Copyright (C) 2020 - Google LLC + * Author: Marc Zyngier <maz@kernel.org> + */ + +#ifndef __ARM64_KVM_NVHE_TRAP_HANDLER_H__ +#define __ARM64_KVM_NVHE_TRAP_HANDLER_H__ + +#include <asm/kvm_host.h> + +#define cpu_reg(ctxt, r) (ctxt)->regs.regs[r] +#define DECLARE_REG(type, name, ctxt, reg) \ + type name = (type)cpu_reg(ctxt, (reg)) + +void __pkvm_vcpu_init_traps(struct kvm_vcpu *vcpu); + +#endif /* __ARM64_KVM_NVHE_TRAP_HANDLER_H__ */ |