summaryrefslogtreecommitdiffstats
path: root/arch/arm64/kvm/hyp
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm64/kvm/hyp')
-rw-r--r--arch/arm64/kvm/hyp/Makefile2
-rw-r--r--arch/arm64/kvm/hyp/fpsimd.S6
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/debug-sr.h8
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/switch.h120
-rw-r--r--arch/arm64/kvm/hyp/include/nvhe/pkvm.h5
-rw-r--r--arch/arm64/kvm/hyp/nvhe/Makefile13
-rw-r--r--arch/arm64/kvm/hyp/nvhe/debug-sr.c8
-rw-r--r--arch/arm64/kvm/hyp/nvhe/ffa.c13
-rw-r--r--arch/arm64/kvm/hyp/nvhe/hyp-main.c111
-rw-r--r--arch/arm64/kvm/hyp/nvhe/mem_protect.c8
-rw-r--r--arch/arm64/kvm/hyp/nvhe/pkvm.c9
-rw-r--r--arch/arm64/kvm/hyp/nvhe/psci-relay.c2
-rw-r--r--arch/arm64/kvm/hyp/nvhe/setup.c27
-rw-r--r--arch/arm64/kvm/hyp/nvhe/switch.c42
-rw-r--r--arch/arm64/kvm/hyp/nvhe/tlb.c115
-rw-r--r--arch/arm64/kvm/hyp/pgtable.c21
-rw-r--r--arch/arm64/kvm/hyp/vgic-v3-sr.c27
-rw-r--r--arch/arm64/kvm/hyp/vhe/switch.c121
-rw-r--r--arch/arm64/kvm/hyp/vhe/sysreg-sr.c4
-rw-r--r--arch/arm64/kvm/hyp/vhe/tlb.c26
20 files changed, 463 insertions, 225 deletions
diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile
index a38dea6186..d61e44642f 100644
--- a/arch/arm64/kvm/hyp/Makefile
+++ b/arch/arm64/kvm/hyp/Makefile
@@ -3,7 +3,7 @@
# Makefile for Kernel-based Virtual Machine module, HYP part
#
-incdir := $(srctree)/$(src)/include
+incdir := $(src)/include
subdir-asflags-y := -I$(incdir)
subdir-ccflags-y := -I$(incdir)
diff --git a/arch/arm64/kvm/hyp/fpsimd.S b/arch/arm64/kvm/hyp/fpsimd.S
index 61e6f3ba7b..e950875e31 100644
--- a/arch/arm64/kvm/hyp/fpsimd.S
+++ b/arch/arm64/kvm/hyp/fpsimd.S
@@ -25,3 +25,9 @@ SYM_FUNC_START(__sve_restore_state)
sve_load 0, x1, x2, 3
ret
SYM_FUNC_END(__sve_restore_state)
+
+SYM_FUNC_START(__sve_save_state)
+ mov x2, #1
+ sve_save 0, x1, x2, 3
+ ret
+SYM_FUNC_END(__sve_save_state)
diff --git a/arch/arm64/kvm/hyp/include/hyp/debug-sr.h b/arch/arm64/kvm/hyp/include/hyp/debug-sr.h
index 961bbef104..d00093699a 100644
--- a/arch/arm64/kvm/hyp/include/hyp/debug-sr.h
+++ b/arch/arm64/kvm/hyp/include/hyp/debug-sr.h
@@ -135,9 +135,9 @@ static inline void __debug_switch_to_guest_common(struct kvm_vcpu *vcpu)
if (!vcpu_get_flag(vcpu, DEBUG_DIRTY))
return;
- host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
+ host_ctxt = host_data_ptr(host_ctxt);
guest_ctxt = &vcpu->arch.ctxt;
- host_dbg = &vcpu->arch.host_debug_state.regs;
+ host_dbg = host_data_ptr(host_debug_state.regs);
guest_dbg = kern_hyp_va(vcpu->arch.debug_ptr);
__debug_save_state(host_dbg, host_ctxt);
@@ -154,9 +154,9 @@ static inline void __debug_switch_to_host_common(struct kvm_vcpu *vcpu)
if (!vcpu_get_flag(vcpu, DEBUG_DIRTY))
return;
- host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
+ host_ctxt = host_data_ptr(host_ctxt);
guest_ctxt = &vcpu->arch.ctxt;
- host_dbg = &vcpu->arch.host_debug_state.regs;
+ host_dbg = host_data_ptr(host_debug_state.regs);
guest_dbg = kern_hyp_va(vcpu->arch.debug_ptr);
__debug_save_state(guest_dbg, guest_ctxt);
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index e3fcf8c4d5..0c4de44534 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -27,6 +27,7 @@
#include <asm/kvm_hyp.h>
#include <asm/kvm_mmu.h>
#include <asm/kvm_nested.h>
+#include <asm/kvm_ptrauth.h>
#include <asm/fpsimd.h>
#include <asm/debug-monitors.h>
#include <asm/processor.h>
@@ -39,12 +40,6 @@ struct kvm_exception_table_entry {
extern struct kvm_exception_table_entry __start___kvm_ex_table;
extern struct kvm_exception_table_entry __stop___kvm_ex_table;
-/* Check whether the FP regs are owned by the guest */
-static inline bool guest_owns_fp_regs(struct kvm_vcpu *vcpu)
-{
- return vcpu->arch.fp_state == FP_STATE_GUEST_OWNED;
-}
-
/* Save the 32-bit only FPSIMD system register state */
static inline void __fpsimd_save_fpexc32(struct kvm_vcpu *vcpu)
{
@@ -155,7 +150,7 @@ static inline bool cpu_has_amu(void)
static inline void __activate_traps_hfgxtr(struct kvm_vcpu *vcpu)
{
- struct kvm_cpu_context *hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
+ struct kvm_cpu_context *hctxt = host_data_ptr(host_ctxt);
struct kvm *kvm = kern_hyp_va(vcpu->kvm);
CHECK_FGT_MASKS(HFGRTR_EL2);
@@ -191,7 +186,7 @@ static inline void __activate_traps_hfgxtr(struct kvm_vcpu *vcpu)
static inline void __deactivate_traps_hfgxtr(struct kvm_vcpu *vcpu)
{
- struct kvm_cpu_context *hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
+ struct kvm_cpu_context *hctxt = host_data_ptr(host_ctxt);
struct kvm *kvm = kern_hyp_va(vcpu->kvm);
if (!cpus_have_final_cap(ARM64_HAS_FGT))
@@ -226,13 +221,13 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu)
write_sysreg(0, pmselr_el0);
- hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
+ hctxt = host_data_ptr(host_ctxt);
ctxt_sys_reg(hctxt, PMUSERENR_EL0) = read_sysreg(pmuserenr_el0);
write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0);
vcpu_set_flag(vcpu, PMUSERENR_ON_CPU);
}
- vcpu->arch.mdcr_el2_host = read_sysreg(mdcr_el2);
+ *host_data_ptr(host_debug_state.mdcr_el2) = read_sysreg(mdcr_el2);
write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2);
if (cpus_have_final_cap(ARM64_HAS_HCX)) {
@@ -254,13 +249,13 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu)
static inline void __deactivate_traps_common(struct kvm_vcpu *vcpu)
{
- write_sysreg(vcpu->arch.mdcr_el2_host, mdcr_el2);
+ write_sysreg(*host_data_ptr(host_debug_state.mdcr_el2), mdcr_el2);
write_sysreg(0, hstr_el2);
if (kvm_arm_support_pmu_v3()) {
struct kvm_cpu_context *hctxt;
- hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
+ hctxt = host_data_ptr(host_ctxt);
write_sysreg(ctxt_sys_reg(hctxt, PMUSERENR_EL0), pmuserenr_el0);
vcpu_clear_flag(vcpu, PMUSERENR_ON_CPU);
}
@@ -271,10 +266,8 @@ static inline void __deactivate_traps_common(struct kvm_vcpu *vcpu)
__deactivate_traps_hfgxtr(vcpu);
}
-static inline void ___activate_traps(struct kvm_vcpu *vcpu)
+static inline void ___activate_traps(struct kvm_vcpu *vcpu, u64 hcr)
{
- u64 hcr = vcpu->arch.hcr_el2;
-
if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM))
hcr |= HCR_TVM;
@@ -323,10 +316,24 @@ static inline void __hyp_sve_restore_guest(struct kvm_vcpu *vcpu)
{
sve_cond_update_zcr_vq(vcpu_sve_max_vq(vcpu) - 1, SYS_ZCR_EL2);
__sve_restore_state(vcpu_sve_pffr(vcpu),
- &vcpu->arch.ctxt.fp_regs.fpsr);
+ &vcpu->arch.ctxt.fp_regs.fpsr,
+ true);
write_sysreg_el1(__vcpu_sys_reg(vcpu, ZCR_EL1), SYS_ZCR);
}
+static inline void __hyp_sve_save_host(void)
+{
+ struct cpu_sve_state *sve_state = *host_data_ptr(sve_state);
+
+ sve_state->zcr_el1 = read_sysreg_el1(SYS_ZCR);
+ write_sysreg_s(ZCR_ELx_LEN_MASK, SYS_ZCR_EL2);
+ __sve_save_state(sve_state->sve_regs + sve_ffr_offset(kvm_host_sve_max_vl),
+ &sve_state->fpsr,
+ true);
+}
+
+static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu);
+
/*
* We trap the first access to the FP/SIMD to save the host context and
* restore the guest context lazily.
@@ -337,7 +344,6 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
{
bool sve_guest;
u8 esr_ec;
- u64 reg;
if (!system_supports_fpsimd())
return false;
@@ -360,24 +366,15 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
/* Valid trap. Switch the context: */
/* First disable enough traps to allow us to update the registers */
- if (has_vhe() || has_hvhe()) {
- reg = CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN;
- if (sve_guest)
- reg |= CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN;
-
- sysreg_clear_set(cpacr_el1, 0, reg);
- } else {
- reg = CPTR_EL2_TFP;
- if (sve_guest)
- reg |= CPTR_EL2_TZ;
-
- sysreg_clear_set(cptr_el2, reg, 0);
- }
+ if (sve_guest || (is_protected_kvm_enabled() && system_supports_sve()))
+ cpacr_clear_set(0, CPACR_ELx_FPEN | CPACR_ELx_ZEN);
+ else
+ cpacr_clear_set(0, CPACR_ELx_FPEN);
isb();
/* Write out the host state if it's in the registers */
- if (vcpu->arch.fp_state == FP_STATE_HOST_OWNED)
- __fpsimd_save_state(vcpu->arch.host_fpsimd_state);
+ if (host_owns_fp_regs())
+ kvm_hyp_save_fpsimd_host(vcpu);
/* Restore the guest state */
if (sve_guest)
@@ -389,7 +386,7 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
if (!(read_sysreg(hcr_el2) & HCR_RW))
write_sysreg(__vcpu_sys_reg(vcpu, FPEXC32_EL2), fpexc32_el2);
- vcpu->arch.fp_state = FP_STATE_GUEST_OWNED;
+ *host_data_ptr(fp_owner) = FP_STATE_GUEST_OWNED;
return true;
}
@@ -449,60 +446,6 @@ static inline bool handle_tx2_tvm(struct kvm_vcpu *vcpu)
return true;
}
-static inline bool esr_is_ptrauth_trap(u64 esr)
-{
- switch (esr_sys64_to_sysreg(esr)) {
- case SYS_APIAKEYLO_EL1:
- case SYS_APIAKEYHI_EL1:
- case SYS_APIBKEYLO_EL1:
- case SYS_APIBKEYHI_EL1:
- case SYS_APDAKEYLO_EL1:
- case SYS_APDAKEYHI_EL1:
- case SYS_APDBKEYLO_EL1:
- case SYS_APDBKEYHI_EL1:
- case SYS_APGAKEYLO_EL1:
- case SYS_APGAKEYHI_EL1:
- return true;
- }
-
- return false;
-}
-
-#define __ptrauth_save_key(ctxt, key) \
- do { \
- u64 __val; \
- __val = read_sysreg_s(SYS_ ## key ## KEYLO_EL1); \
- ctxt_sys_reg(ctxt, key ## KEYLO_EL1) = __val; \
- __val = read_sysreg_s(SYS_ ## key ## KEYHI_EL1); \
- ctxt_sys_reg(ctxt, key ## KEYHI_EL1) = __val; \
-} while(0)
-
-DECLARE_PER_CPU(struct kvm_cpu_context, kvm_hyp_ctxt);
-
-static bool kvm_hyp_handle_ptrauth(struct kvm_vcpu *vcpu, u64 *exit_code)
-{
- struct kvm_cpu_context *ctxt;
- u64 val;
-
- if (!vcpu_has_ptrauth(vcpu))
- return false;
-
- ctxt = this_cpu_ptr(&kvm_hyp_ctxt);
- __ptrauth_save_key(ctxt, APIA);
- __ptrauth_save_key(ctxt, APIB);
- __ptrauth_save_key(ctxt, APDA);
- __ptrauth_save_key(ctxt, APDB);
- __ptrauth_save_key(ctxt, APGA);
-
- vcpu_ptrauth_enable(vcpu);
-
- val = read_sysreg(hcr_el2);
- val |= (HCR_API | HCR_APK);
- write_sysreg(val, hcr_el2);
-
- return true;
-}
-
static bool kvm_hyp_handle_cntpct(struct kvm_vcpu *vcpu)
{
struct arch_timer_context *ctxt;
@@ -590,9 +533,6 @@ static bool kvm_hyp_handle_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code)
__vgic_v3_perform_cpuif_access(vcpu) == 1)
return true;
- if (esr_is_ptrauth_trap(kvm_vcpu_get_esr(vcpu)))
- return kvm_hyp_handle_ptrauth(vcpu, exit_code);
-
if (kvm_hyp_handle_cntpct(vcpu))
return true;
diff --git a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h
index 82b3d62538..24a9a8330d 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h
@@ -53,6 +53,11 @@ pkvm_hyp_vcpu_to_hyp_vm(struct pkvm_hyp_vcpu *hyp_vcpu)
return container_of(hyp_vcpu->vcpu.kvm, struct pkvm_hyp_vm, kvm);
}
+static inline bool pkvm_hyp_vcpu_is_protected(struct pkvm_hyp_vcpu *hyp_vcpu)
+{
+ return vcpu_is_protected(&hyp_vcpu->vcpu);
+}
+
void pkvm_hyp_vm_table_init(void *tbl);
int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva,
diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile
index 2250253a64..50fa0ffb6b 100644
--- a/arch/arm64/kvm/hyp/nvhe/Makefile
+++ b/arch/arm64/kvm/hyp/nvhe/Makefile
@@ -97,16 +97,3 @@ KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_FTRACE) $(CC_FLAGS_SCS) $(CC_FLAGS_CFI)
# causes a build failure. Remove profile optimization flags.
KBUILD_CFLAGS := $(filter-out -fprofile-sample-use=% -fprofile-use=%, $(KBUILD_CFLAGS))
KBUILD_CFLAGS += -fno-asynchronous-unwind-tables -fno-unwind-tables
-
-# KVM nVHE code is run at a different exception code with a different map, so
-# compiler instrumentation that inserts callbacks or checks into the code may
-# cause crashes. Just disable it.
-GCOV_PROFILE := n
-KASAN_SANITIZE := n
-KCSAN_SANITIZE := n
-UBSAN_SANITIZE := n
-KCOV_INSTRUMENT := n
-
-# Skip objtool checking for this directory because nVHE code is compiled with
-# non-standard build rules.
-OBJECT_FILES_NON_STANDARD := y
diff --git a/arch/arm64/kvm/hyp/nvhe/debug-sr.c b/arch/arm64/kvm/hyp/nvhe/debug-sr.c
index 7746ea507b..53efda0235 100644
--- a/arch/arm64/kvm/hyp/nvhe/debug-sr.c
+++ b/arch/arm64/kvm/hyp/nvhe/debug-sr.c
@@ -83,10 +83,10 @@ void __debug_save_host_buffers_nvhe(struct kvm_vcpu *vcpu)
{
/* Disable and flush SPE data generation */
if (vcpu_get_flag(vcpu, DEBUG_STATE_SAVE_SPE))
- __debug_save_spe(&vcpu->arch.host_debug_state.pmscr_el1);
+ __debug_save_spe(host_data_ptr(host_debug_state.pmscr_el1));
/* Disable and flush Self-Hosted Trace generation */
if (vcpu_get_flag(vcpu, DEBUG_STATE_SAVE_TRBE))
- __debug_save_trace(&vcpu->arch.host_debug_state.trfcr_el1);
+ __debug_save_trace(host_data_ptr(host_debug_state.trfcr_el1));
}
void __debug_switch_to_guest(struct kvm_vcpu *vcpu)
@@ -97,9 +97,9 @@ void __debug_switch_to_guest(struct kvm_vcpu *vcpu)
void __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu)
{
if (vcpu_get_flag(vcpu, DEBUG_STATE_SAVE_SPE))
- __debug_restore_spe(vcpu->arch.host_debug_state.pmscr_el1);
+ __debug_restore_spe(*host_data_ptr(host_debug_state.pmscr_el1));
if (vcpu_get_flag(vcpu, DEBUG_STATE_SAVE_TRBE))
- __debug_restore_trace(vcpu->arch.host_debug_state.trfcr_el1);
+ __debug_restore_trace(*host_data_ptr(host_debug_state.trfcr_el1));
}
void __debug_switch_to_host(struct kvm_vcpu *vcpu)
diff --git a/arch/arm64/kvm/hyp/nvhe/ffa.c b/arch/arm64/kvm/hyp/nvhe/ffa.c
index 320f2eaa14..efb053af33 100644
--- a/arch/arm64/kvm/hyp/nvhe/ffa.c
+++ b/arch/arm64/kvm/hyp/nvhe/ffa.c
@@ -177,6 +177,14 @@ static void ffa_retrieve_req(struct arm_smccc_res *res, u32 len)
res);
}
+static void ffa_rx_release(struct arm_smccc_res *res)
+{
+ arm_smccc_1_1_smc(FFA_RX_RELEASE,
+ 0, 0,
+ 0, 0, 0, 0, 0,
+ res);
+}
+
static void do_ffa_rxtx_map(struct arm_smccc_res *res,
struct kvm_cpu_context *ctxt)
{
@@ -543,16 +551,19 @@ static void do_ffa_mem_reclaim(struct arm_smccc_res *res,
if (WARN_ON(offset > len ||
fraglen > KVM_FFA_MBOX_NR_PAGES * PAGE_SIZE)) {
ret = FFA_RET_ABORTED;
+ ffa_rx_release(res);
goto out_unlock;
}
if (len > ffa_desc_buf.len) {
ret = FFA_RET_NO_MEMORY;
+ ffa_rx_release(res);
goto out_unlock;
}
buf = ffa_desc_buf.buf;
memcpy(buf, hyp_buffers.rx, fraglen);
+ ffa_rx_release(res);
for (fragoff = fraglen; fragoff < len; fragoff += fraglen) {
ffa_mem_frag_rx(res, handle_lo, handle_hi, fragoff);
@@ -563,6 +574,7 @@ static void do_ffa_mem_reclaim(struct arm_smccc_res *res,
fraglen = res->a3;
memcpy((void *)buf + fragoff, hyp_buffers.rx, fraglen);
+ ffa_rx_release(res);
}
ffa_mem_reclaim(res, handle_lo, handle_hi, flags);
@@ -600,7 +612,6 @@ static bool ffa_call_supported(u64 func_id)
case FFA_MSG_POLL:
case FFA_MSG_WAIT:
/* 32-bit variants of 64-bit calls */
- case FFA_MSG_SEND_DIRECT_REQ:
case FFA_MSG_SEND_DIRECT_RESP:
case FFA_RXTX_MAP:
case FFA_MEM_DONATE:
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
index 2385fd03ed..f43d845f3c 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -23,26 +23,84 @@ DEFINE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);
void __kvm_hyp_host_forward_smc(struct kvm_cpu_context *host_ctxt);
+static void __hyp_sve_save_guest(struct kvm_vcpu *vcpu)
+{
+ __vcpu_sys_reg(vcpu, ZCR_EL1) = read_sysreg_el1(SYS_ZCR);
+ /*
+ * On saving/restoring guest sve state, always use the maximum VL for
+ * the guest. The layout of the data when saving the sve state depends
+ * on the VL, so use a consistent (i.e., the maximum) guest VL.
+ */
+ sve_cond_update_zcr_vq(vcpu_sve_max_vq(vcpu) - 1, SYS_ZCR_EL2);
+ __sve_save_state(vcpu_sve_pffr(vcpu), &vcpu->arch.ctxt.fp_regs.fpsr, true);
+ write_sysreg_s(ZCR_ELx_LEN_MASK, SYS_ZCR_EL2);
+}
+
+static void __hyp_sve_restore_host(void)
+{
+ struct cpu_sve_state *sve_state = *host_data_ptr(sve_state);
+
+ /*
+ * On saving/restoring host sve state, always use the maximum VL for
+ * the host. The layout of the data when saving the sve state depends
+ * on the VL, so use a consistent (i.e., the maximum) host VL.
+ *
+ * Setting ZCR_EL2 to ZCR_ELx_LEN_MASK sets the effective length
+ * supported by the system (or limited at EL3).
+ */
+ write_sysreg_s(ZCR_ELx_LEN_MASK, SYS_ZCR_EL2);
+ __sve_restore_state(sve_state->sve_regs + sve_ffr_offset(kvm_host_sve_max_vl),
+ &sve_state->fpsr,
+ true);
+ write_sysreg_el1(sve_state->zcr_el1, SYS_ZCR);
+}
+
+static void fpsimd_sve_flush(void)
+{
+ *host_data_ptr(fp_owner) = FP_STATE_HOST_OWNED;
+}
+
+static void fpsimd_sve_sync(struct kvm_vcpu *vcpu)
+{
+ if (!guest_owns_fp_regs())
+ return;
+
+ cpacr_clear_set(0, CPACR_ELx_FPEN | CPACR_ELx_ZEN);
+ isb();
+
+ if (vcpu_has_sve(vcpu))
+ __hyp_sve_save_guest(vcpu);
+ else
+ __fpsimd_save_state(&vcpu->arch.ctxt.fp_regs);
+
+ if (system_supports_sve())
+ __hyp_sve_restore_host();
+ else
+ __fpsimd_restore_state(*host_data_ptr(fpsimd_state));
+
+ *host_data_ptr(fp_owner) = FP_STATE_HOST_OWNED;
+}
+
static void flush_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu)
{
struct kvm_vcpu *host_vcpu = hyp_vcpu->host_vcpu;
+ fpsimd_sve_flush();
+
hyp_vcpu->vcpu.arch.ctxt = host_vcpu->arch.ctxt;
hyp_vcpu->vcpu.arch.sve_state = kern_hyp_va(host_vcpu->arch.sve_state);
- hyp_vcpu->vcpu.arch.sve_max_vl = host_vcpu->arch.sve_max_vl;
+ /* Limit guest vector length to the maximum supported by the host. */
+ hyp_vcpu->vcpu.arch.sve_max_vl = min(host_vcpu->arch.sve_max_vl, kvm_host_sve_max_vl);
hyp_vcpu->vcpu.arch.hw_mmu = host_vcpu->arch.hw_mmu;
hyp_vcpu->vcpu.arch.hcr_el2 = host_vcpu->arch.hcr_el2;
hyp_vcpu->vcpu.arch.mdcr_el2 = host_vcpu->arch.mdcr_el2;
- hyp_vcpu->vcpu.arch.cptr_el2 = host_vcpu->arch.cptr_el2;
hyp_vcpu->vcpu.arch.iflags = host_vcpu->arch.iflags;
- hyp_vcpu->vcpu.arch.fp_state = host_vcpu->arch.fp_state;
hyp_vcpu->vcpu.arch.debug_ptr = kern_hyp_va(host_vcpu->arch.debug_ptr);
- hyp_vcpu->vcpu.arch.host_fpsimd_state = host_vcpu->arch.host_fpsimd_state;
hyp_vcpu->vcpu.arch.vsesr_el2 = host_vcpu->arch.vsesr_el2;
@@ -56,15 +114,15 @@ static void sync_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu)
struct vgic_v3_cpu_if *host_cpu_if = &host_vcpu->arch.vgic_cpu.vgic_v3;
unsigned int i;
+ fpsimd_sve_sync(&hyp_vcpu->vcpu);
+
host_vcpu->arch.ctxt = hyp_vcpu->vcpu.arch.ctxt;
host_vcpu->arch.hcr_el2 = hyp_vcpu->vcpu.arch.hcr_el2;
- host_vcpu->arch.cptr_el2 = hyp_vcpu->vcpu.arch.cptr_el2;
host_vcpu->arch.fault = hyp_vcpu->vcpu.arch.fault;
host_vcpu->arch.iflags = hyp_vcpu->vcpu.arch.iflags;
- host_vcpu->arch.fp_state = hyp_vcpu->vcpu.arch.fp_state;
host_cpu_if->vgic_hcr = hyp_cpu_if->vgic_hcr;
for (i = 0; i < hyp_cpu_if->used_lrs; ++i)
@@ -82,6 +140,17 @@ static void handle___kvm_vcpu_run(struct kvm_cpu_context *host_ctxt)
struct pkvm_hyp_vcpu *hyp_vcpu;
struct kvm *host_kvm;
+ /*
+ * KVM (and pKVM) doesn't support SME guests for now, and
+ * ensures that SME features aren't enabled in pstate when
+ * loading a vcpu. Therefore, if SME features enabled the host
+ * is misbehaving.
+ */
+ if (unlikely(system_supports_sme() && read_sysreg_s(SYS_SVCR))) {
+ ret = -EINVAL;
+ goto out;
+ }
+
host_kvm = kern_hyp_va(host_vcpu->kvm);
hyp_vcpu = pkvm_load_hyp_vcpu(host_kvm->arch.pkvm.handle,
host_vcpu->vcpu_idx);
@@ -178,16 +247,6 @@ static void handle___vgic_v3_get_gic_config(struct kvm_cpu_context *host_ctxt)
cpu_reg(host_ctxt, 1) = __vgic_v3_get_gic_config();
}
-static void handle___vgic_v3_read_vmcr(struct kvm_cpu_context *host_ctxt)
-{
- cpu_reg(host_ctxt, 1) = __vgic_v3_read_vmcr();
-}
-
-static void handle___vgic_v3_write_vmcr(struct kvm_cpu_context *host_ctxt)
-{
- __vgic_v3_write_vmcr(cpu_reg(host_ctxt, 1));
-}
-
static void handle___vgic_v3_init_lrs(struct kvm_cpu_context *host_ctxt)
{
__vgic_v3_init_lrs();
@@ -198,18 +257,18 @@ static void handle___kvm_get_mdcr_el2(struct kvm_cpu_context *host_ctxt)
cpu_reg(host_ctxt, 1) = __kvm_get_mdcr_el2();
}
-static void handle___vgic_v3_save_aprs(struct kvm_cpu_context *host_ctxt)
+static void handle___vgic_v3_save_vmcr_aprs(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(struct vgic_v3_cpu_if *, cpu_if, host_ctxt, 1);
- __vgic_v3_save_aprs(kern_hyp_va(cpu_if));
+ __vgic_v3_save_vmcr_aprs(kern_hyp_va(cpu_if));
}
-static void handle___vgic_v3_restore_aprs(struct kvm_cpu_context *host_ctxt)
+static void handle___vgic_v3_restore_vmcr_aprs(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(struct vgic_v3_cpu_if *, cpu_if, host_ctxt, 1);
- __vgic_v3_restore_aprs(kern_hyp_va(cpu_if));
+ __vgic_v3_restore_vmcr_aprs(kern_hyp_va(cpu_if));
}
static void handle___pkvm_init(struct kvm_cpu_context *host_ctxt)
@@ -340,10 +399,8 @@ static const hcall_t host_hcall[] = {
HANDLE_FUNC(__kvm_tlb_flush_vmid_range),
HANDLE_FUNC(__kvm_flush_cpu_context),
HANDLE_FUNC(__kvm_timer_set_cntvoff),
- HANDLE_FUNC(__vgic_v3_read_vmcr),
- HANDLE_FUNC(__vgic_v3_write_vmcr),
- HANDLE_FUNC(__vgic_v3_save_aprs),
- HANDLE_FUNC(__vgic_v3_restore_aprs),
+ HANDLE_FUNC(__vgic_v3_save_vmcr_aprs),
+ HANDLE_FUNC(__vgic_v3_restore_vmcr_aprs),
HANDLE_FUNC(__pkvm_vcpu_init_traps),
HANDLE_FUNC(__pkvm_init_vm),
HANDLE_FUNC(__pkvm_init_vcpu),
@@ -420,11 +477,7 @@ void handle_trap(struct kvm_cpu_context *host_ctxt)
handle_host_smc(host_ctxt);
break;
case ESR_ELx_EC_SVE:
- if (has_hvhe())
- sysreg_clear_set(cpacr_el1, 0, (CPACR_EL1_ZEN_EL1EN |
- CPACR_EL1_ZEN_EL0EN));
- else
- sysreg_clear_set(cptr_el2, CPTR_EL2_TZ, 0);
+ cpacr_clear_set(0, CPACR_ELx_ZEN);
isb();
sve_cond_update_zcr_vq(ZCR_ELx_LEN_MASK, SYS_ZCR_EL2);
break;
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index 861c76021a..caba3e4bd0 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -533,7 +533,13 @@ void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt)
int ret = 0;
esr = read_sysreg_el2(SYS_ESR);
- BUG_ON(!__get_fault_info(esr, &fault));
+ if (!__get_fault_info(esr, &fault)) {
+ /*
+ * We've presumably raced with a page-table change which caused
+ * AT to fail, try again.
+ */
+ return;
+ }
addr = (fault.hpfar_el2 & HPFAR_MASK) << 8;
ret = host_stage2_idmap(addr);
diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c
index 26dd9a20ad..95cf185742 100644
--- a/arch/arm64/kvm/hyp/nvhe/pkvm.c
+++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c
@@ -18,6 +18,8 @@ unsigned long __icache_flags;
/* Used by kvm_get_vttbr(). */
unsigned int kvm_arm_vmid_bits;
+unsigned int kvm_host_sve_max_vl;
+
/*
* Set trap register values based on features in ID_AA64PFR0.
*/
@@ -63,7 +65,7 @@ static void pvm_init_traps_aa64pfr0(struct kvm_vcpu *vcpu)
/* Trap SVE */
if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_SVE), feature_ids)) {
if (has_hvhe())
- cptr_clear |= CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN;
+ cptr_clear |= CPACR_ELx_ZEN;
else
cptr_set |= CPTR_EL2_TZ;
}
@@ -200,7 +202,7 @@ static void pvm_init_trap_regs(struct kvm_vcpu *vcpu)
}
/*
- * Initialize trap register values for protected VMs.
+ * Initialize trap register values in protected mode.
*/
void __pkvm_vcpu_init_traps(struct kvm_vcpu *vcpu)
{
@@ -430,6 +432,7 @@ static void *map_donated_memory(unsigned long host_va, size_t size)
static void __unmap_donated_memory(void *va, size_t size)
{
+ kvm_flush_dcache_to_poc(va, size);
WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(va),
PAGE_ALIGN(size) >> PAGE_SHIFT));
}
@@ -574,6 +577,8 @@ unlock:
if (ret)
unmap_donated_memory(hyp_vcpu, sizeof(*hyp_vcpu));
+ hyp_vcpu->vcpu.arch.cptr_el2 = kvm_get_reset_cptr_el2(&hyp_vcpu->vcpu);
+
return ret;
}
diff --git a/arch/arm64/kvm/hyp/nvhe/psci-relay.c b/arch/arm64/kvm/hyp/nvhe/psci-relay.c
index d57bcb6ab9..dfe8fe0f7e 100644
--- a/arch/arm64/kvm/hyp/nvhe/psci-relay.c
+++ b/arch/arm64/kvm/hyp/nvhe/psci-relay.c
@@ -205,7 +205,7 @@ asmlinkage void __noreturn __kvm_host_psci_cpu_entry(bool is_cpu_on)
struct psci_boot_args *boot_args;
struct kvm_cpu_context *host_ctxt;
- host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
+ host_ctxt = host_data_ptr(host_ctxt);
if (is_cpu_on)
boot_args = this_cpu_ptr(&cpu_on_args);
diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c
index bc58d1b515..f4350ba07b 100644
--- a/arch/arm64/kvm/hyp/nvhe/setup.c
+++ b/arch/arm64/kvm/hyp/nvhe/setup.c
@@ -67,6 +67,28 @@ static int divide_memory_pool(void *virt, unsigned long size)
return 0;
}
+static int pkvm_create_host_sve_mappings(void)
+{
+ void *start, *end;
+ int ret, i;
+
+ if (!system_supports_sve())
+ return 0;
+
+ for (i = 0; i < hyp_nr_cpus; i++) {
+ struct kvm_host_data *host_data = per_cpu_ptr(&kvm_host_data, i);
+ struct cpu_sve_state *sve_state = host_data->sve_state;
+
+ start = kern_hyp_va(sve_state);
+ end = start + PAGE_ALIGN(pkvm_host_sve_state_size());
+ ret = pkvm_create_mappings(start, end, PAGE_HYP);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size,
unsigned long *per_cpu_base,
u32 hyp_va_bits)
@@ -125,6 +147,8 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size,
return ret;
}
+ pkvm_create_host_sve_mappings();
+
/*
* Map the host sections RO in the hypervisor, but transfer the
* ownership from the host to the hypervisor itself to make sure they
@@ -257,8 +281,7 @@ static int fix_hyp_pgtable_refcnt(void)
void __noreturn __pkvm_init_finalise(void)
{
- struct kvm_host_data *host_data = this_cpu_ptr(&kvm_host_data);
- struct kvm_cpu_context *host_ctxt = &host_data->host_ctxt;
+ struct kvm_cpu_context *host_ctxt = host_data_ptr(host_ctxt);
unsigned long nr_pages, reserved_pages, pfn;
int ret;
diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
index c50f8459e4..6af179c635 100644
--- a/arch/arm64/kvm/hyp/nvhe/switch.c
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -40,7 +40,7 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
{
u64 val;
- ___activate_traps(vcpu);
+ ___activate_traps(vcpu, vcpu->arch.hcr_el2);
__activate_traps_common(vcpu);
val = vcpu->arch.cptr_el2;
@@ -48,15 +48,14 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
val |= has_hvhe() ? CPACR_EL1_TTA : CPTR_EL2_TTA;
if (cpus_have_final_cap(ARM64_SME)) {
if (has_hvhe())
- val &= ~(CPACR_EL1_SMEN_EL1EN | CPACR_EL1_SMEN_EL0EN);
+ val &= ~CPACR_ELx_SMEN;
else
val |= CPTR_EL2_TSM;
}
- if (!guest_owns_fp_regs(vcpu)) {
+ if (!guest_owns_fp_regs()) {
if (has_hvhe())
- val &= ~(CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN |
- CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN);
+ val &= ~(CPACR_ELx_FPEN | CPACR_ELx_ZEN);
else
val |= CPTR_EL2_TFP | CPTR_EL2_TZ;
@@ -182,6 +181,25 @@ static bool kvm_handle_pvm_sys64(struct kvm_vcpu *vcpu, u64 *exit_code)
kvm_handle_pvm_sysreg(vcpu, exit_code));
}
+static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu)
+{
+ /*
+ * Non-protected kvm relies on the host restoring its sve state.
+ * Protected kvm restores the host's sve state as not to reveal that
+ * fpsimd was used by a guest nor leak upper sve bits.
+ */
+ if (unlikely(is_protected_kvm_enabled() && system_supports_sve())) {
+ __hyp_sve_save_host();
+
+ /* Re-enable SVE traps if not supported for the guest vcpu. */
+ if (!vcpu_has_sve(vcpu))
+ cpacr_clear_set(CPACR_ELx_ZEN, 0);
+
+ } else {
+ __fpsimd_save_state(*host_data_ptr(fpsimd_state));
+ }
+}
+
static const exit_handler_fn hyp_exit_handlers[] = {
[0 ... ESR_ELx_EC_MAX] = NULL,
[ESR_ELx_EC_CP15_32] = kvm_hyp_handle_cp15_32,
@@ -191,7 +209,6 @@ static const exit_handler_fn hyp_exit_handlers[] = {
[ESR_ELx_EC_IABT_LOW] = kvm_hyp_handle_iabt_low,
[ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low,
[ESR_ELx_EC_WATCHPT_LOW] = kvm_hyp_handle_watchpt_low,
- [ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth,
[ESR_ELx_EC_MOPS] = kvm_hyp_handle_mops,
};
@@ -203,13 +220,12 @@ static const exit_handler_fn pvm_exit_handlers[] = {
[ESR_ELx_EC_IABT_LOW] = kvm_hyp_handle_iabt_low,
[ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low,
[ESR_ELx_EC_WATCHPT_LOW] = kvm_hyp_handle_watchpt_low,
- [ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth,
[ESR_ELx_EC_MOPS] = kvm_hyp_handle_mops,
};
static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu)
{
- if (unlikely(kvm_vm_is_protected(kern_hyp_va(vcpu->kvm))))
+ if (unlikely(vcpu_is_protected(vcpu)))
return pvm_exit_handlers;
return hyp_exit_handlers;
@@ -228,9 +244,7 @@ static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu)
*/
static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code)
{
- struct kvm *kvm = kern_hyp_va(vcpu->kvm);
-
- if (kvm_vm_is_protected(kvm) && vcpu_mode_is_32bit(vcpu)) {
+ if (unlikely(vcpu_is_protected(vcpu) && vcpu_mode_is_32bit(vcpu))) {
/*
* As we have caught the guest red-handed, decide that it isn't
* fit for purpose anymore by making the vcpu invalid. The VMM
@@ -264,7 +278,7 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
pmr_sync();
}
- host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
+ host_ctxt = host_data_ptr(host_ctxt);
host_ctxt->__hyp_running_vcpu = vcpu;
guest_ctxt = &vcpu->arch.ctxt;
@@ -337,7 +351,7 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
__sysreg_restore_state_nvhe(host_ctxt);
- if (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED)
+ if (guest_owns_fp_regs())
__fpsimd_save_fpexc32(vcpu);
__debug_switch_to_host(vcpu);
@@ -367,7 +381,7 @@ asmlinkage void __noreturn hyp_panic(void)
struct kvm_cpu_context *host_ctxt;
struct kvm_vcpu *vcpu;
- host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
+ host_ctxt = host_data_ptr(host_ctxt);
vcpu = host_ctxt->__hyp_running_vcpu;
if (vcpu) {
diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c
index 2fc68da403..ca3c09df8d 100644
--- a/arch/arm64/kvm/hyp/nvhe/tlb.c
+++ b/arch/arm64/kvm/hyp/nvhe/tlb.c
@@ -11,13 +11,23 @@
#include <nvhe/mem_protect.h>
struct tlb_inv_context {
- u64 tcr;
+ struct kvm_s2_mmu *mmu;
+ u64 tcr;
+ u64 sctlr;
};
-static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu,
- struct tlb_inv_context *cxt,
- bool nsh)
+static void enter_vmid_context(struct kvm_s2_mmu *mmu,
+ struct tlb_inv_context *cxt,
+ bool nsh)
{
+ struct kvm_s2_mmu *host_s2_mmu = &host_mmu.arch.mmu;
+ struct kvm_cpu_context *host_ctxt;
+ struct kvm_vcpu *vcpu;
+
+ host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
+ vcpu = host_ctxt->__hyp_running_vcpu;
+ cxt->mmu = NULL;
+
/*
* We have two requirements:
*
@@ -40,20 +50,55 @@ static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu,
else
dsb(ish);
+ /*
+ * If we're already in the desired context, then there's nothing to do.
+ */
+ if (vcpu) {
+ /*
+ * We're in guest context. However, for this to work, this needs
+ * to be called from within __kvm_vcpu_run(), which ensures that
+ * __hyp_running_vcpu is set to the current guest vcpu.
+ */
+ if (mmu == vcpu->arch.hw_mmu || WARN_ON(mmu != host_s2_mmu))
+ return;
+
+ cxt->mmu = vcpu->arch.hw_mmu;
+ } else {
+ /* We're in host context. */
+ if (mmu == host_s2_mmu)
+ return;
+
+ cxt->mmu = host_s2_mmu;
+ }
+
if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
u64 val;
/*
* For CPUs that are affected by ARM 1319367, we need to
- * avoid a host Stage-1 walk while we have the guest's
- * VMID set in the VTTBR in order to invalidate TLBs.
- * We're guaranteed that the S1 MMU is enabled, so we can
- * simply set the EPD bits to avoid any further TLB fill.
+ * avoid a Stage-1 walk with the old VMID while we have
+ * the new VMID set in the VTTBR in order to invalidate TLBs.
+ * We're guaranteed that the host S1 MMU is enabled, so
+ * we can simply set the EPD bits to avoid any further
+ * TLB fill. For guests, we ensure that the S1 MMU is
+ * temporarily enabled in the next context.
*/
val = cxt->tcr = read_sysreg_el1(SYS_TCR);
val |= TCR_EPD1_MASK | TCR_EPD0_MASK;
write_sysreg_el1(val, SYS_TCR);
isb();
+
+ if (vcpu) {
+ val = cxt->sctlr = read_sysreg_el1(SYS_SCTLR);
+ if (!(val & SCTLR_ELx_M)) {
+ val |= SCTLR_ELx_M;
+ write_sysreg_el1(val, SYS_SCTLR);
+ isb();
+ }
+ } else {
+ /* The host S1 MMU is always enabled. */
+ cxt->sctlr = SCTLR_ELx_M;
+ }
}
/*
@@ -62,18 +107,40 @@ static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu,
* ensuring that we always have an ISB, but not two ISBs back
* to back.
*/
- __load_stage2(mmu, kern_hyp_va(mmu->arch));
+ if (vcpu)
+ __load_host_stage2();
+ else
+ __load_stage2(mmu, kern_hyp_va(mmu->arch));
+
asm(ALTERNATIVE("isb", "nop", ARM64_WORKAROUND_SPECULATIVE_AT));
}
-static void __tlb_switch_to_host(struct tlb_inv_context *cxt)
+static void exit_vmid_context(struct tlb_inv_context *cxt)
{
- __load_host_stage2();
+ struct kvm_s2_mmu *mmu = cxt->mmu;
+ struct kvm_cpu_context *host_ctxt;
+ struct kvm_vcpu *vcpu;
+
+ host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
+ vcpu = host_ctxt->__hyp_running_vcpu;
+
+ if (!mmu)
+ return;
+
+ if (vcpu)
+ __load_stage2(mmu, kern_hyp_va(mmu->arch));
+ else
+ __load_host_stage2();
if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
- /* Ensure write of the host VMID */
+ /* Ensure write of the old VMID */
isb();
- /* Restore the host's TCR_EL1 */
+
+ if (!(cxt->sctlr & SCTLR_ELx_M)) {
+ write_sysreg_el1(cxt->sctlr, SYS_SCTLR);
+ isb();
+ }
+
write_sysreg_el1(cxt->tcr, SYS_TCR);
}
}
@@ -84,7 +151,7 @@ void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu,
struct tlb_inv_context cxt;
/* Switch to requested VMID */
- __tlb_switch_to_guest(mmu, &cxt, false);
+ enter_vmid_context(mmu, &cxt, false);
/*
* We could do so much better if we had the VA as well.
@@ -105,7 +172,7 @@ void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu,
dsb(ish);
isb();
- __tlb_switch_to_host(&cxt);
+ exit_vmid_context(&cxt);
}
void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu,
@@ -114,7 +181,7 @@ void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu,
struct tlb_inv_context cxt;
/* Switch to requested VMID */
- __tlb_switch_to_guest(mmu, &cxt, true);
+ enter_vmid_context(mmu, &cxt, true);
/*
* We could do so much better if we had the VA as well.
@@ -135,7 +202,7 @@ void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu,
dsb(nsh);
isb();
- __tlb_switch_to_host(&cxt);
+ exit_vmid_context(&cxt);
}
void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
@@ -152,7 +219,7 @@ void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
start = round_down(start, stride);
/* Switch to requested VMID */
- __tlb_switch_to_guest(mmu, &cxt, false);
+ enter_vmid_context(mmu, &cxt, false);
__flush_s2_tlb_range_op(ipas2e1is, start, pages, stride,
TLBI_TTL_UNKNOWN);
@@ -162,7 +229,7 @@ void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
dsb(ish);
isb();
- __tlb_switch_to_host(&cxt);
+ exit_vmid_context(&cxt);
}
void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
@@ -170,13 +237,13 @@ void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
struct tlb_inv_context cxt;
/* Switch to requested VMID */
- __tlb_switch_to_guest(mmu, &cxt, false);
+ enter_vmid_context(mmu, &cxt, false);
__tlbi(vmalls12e1is);
dsb(ish);
isb();
- __tlb_switch_to_host(&cxt);
+ exit_vmid_context(&cxt);
}
void __kvm_flush_cpu_context(struct kvm_s2_mmu *mmu)
@@ -184,19 +251,19 @@ void __kvm_flush_cpu_context(struct kvm_s2_mmu *mmu)
struct tlb_inv_context cxt;
/* Switch to requested VMID */
- __tlb_switch_to_guest(mmu, &cxt, false);
+ enter_vmid_context(mmu, &cxt, false);
__tlbi(vmalle1);
asm volatile("ic iallu");
dsb(nsh);
isb();
- __tlb_switch_to_host(&cxt);
+ exit_vmid_context(&cxt);
}
void __kvm_flush_vm_context(void)
{
- /* Same remark as in __tlb_switch_to_guest() */
+ /* Same remark as in enter_vmid_context() */
dsb(ish);
__tlbi(alle1is);
dsb(ish);
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index 5a59ef88b6..9e2bbee774 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -914,12 +914,12 @@ static void stage2_unmap_put_pte(const struct kvm_pgtable_visit_ctx *ctx,
static bool stage2_pte_cacheable(struct kvm_pgtable *pgt, kvm_pte_t pte)
{
u64 memattr = pte & KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR;
- return memattr == KVM_S2_MEMATTR(pgt, NORMAL);
+ return kvm_pte_valid(pte) && memattr == KVM_S2_MEMATTR(pgt, NORMAL);
}
static bool stage2_pte_executable(kvm_pte_t pte)
{
- return !(pte & KVM_PTE_LEAF_ATTR_HI_S2_XN);
+ return kvm_pte_valid(pte) && !(pte & KVM_PTE_LEAF_ATTR_HI_S2_XN);
}
static u64 stage2_map_walker_phys_addr(const struct kvm_pgtable_visit_ctx *ctx,
@@ -979,6 +979,21 @@ static int stage2_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx,
if (!stage2_pte_needs_update(ctx->old, new))
return -EAGAIN;
+ /* If we're only changing software bits, then store them and go! */
+ if (!kvm_pgtable_walk_shared(ctx) &&
+ !((ctx->old ^ new) & ~KVM_PTE_LEAF_ATTR_HI_SW)) {
+ bool old_is_counted = stage2_pte_is_counted(ctx->old);
+
+ if (old_is_counted != stage2_pte_is_counted(new)) {
+ if (old_is_counted)
+ mm_ops->put_page(ctx->ptep);
+ else
+ mm_ops->get_page(ctx->ptep);
+ }
+ WARN_ON_ONCE(!stage2_try_set_pte(ctx, new));
+ return 0;
+ }
+
if (!stage2_try_break_pte(ctx, data->mmu))
return -EAGAIN;
@@ -1370,7 +1385,7 @@ static int stage2_flush_walker(const struct kvm_pgtable_visit_ctx *ctx,
struct kvm_pgtable *pgt = ctx->arg;
struct kvm_pgtable_mm_ops *mm_ops = pgt->mm_ops;
- if (!kvm_pte_valid(ctx->old) || !stage2_pte_cacheable(pgt, ctx->old))
+ if (!stage2_pte_cacheable(pgt, ctx->old))
return 0;
if (mm_ops->dcache_clean_inval_poc)
diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c
index 6cb638b184..7b397fad26 100644
--- a/arch/arm64/kvm/hyp/vgic-v3-sr.c
+++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c
@@ -330,7 +330,7 @@ void __vgic_v3_deactivate_traps(struct vgic_v3_cpu_if *cpu_if)
write_gicreg(0, ICH_HCR_EL2);
}
-void __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if)
+static void __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if)
{
u64 val;
u32 nr_pre_bits;
@@ -363,7 +363,7 @@ void __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if)
}
}
-void __vgic_v3_restore_aprs(struct vgic_v3_cpu_if *cpu_if)
+static void __vgic_v3_restore_aprs(struct vgic_v3_cpu_if *cpu_if)
{
u64 val;
u32 nr_pre_bits;
@@ -455,16 +455,35 @@ u64 __vgic_v3_get_gic_config(void)
return val;
}
-u64 __vgic_v3_read_vmcr(void)
+static u64 __vgic_v3_read_vmcr(void)
{
return read_gicreg(ICH_VMCR_EL2);
}
-void __vgic_v3_write_vmcr(u32 vmcr)
+static void __vgic_v3_write_vmcr(u32 vmcr)
{
write_gicreg(vmcr, ICH_VMCR_EL2);
}
+void __vgic_v3_save_vmcr_aprs(struct vgic_v3_cpu_if *cpu_if)
+{
+ __vgic_v3_save_aprs(cpu_if);
+ if (cpu_if->vgic_sre)
+ cpu_if->vgic_vmcr = __vgic_v3_read_vmcr();
+}
+
+void __vgic_v3_restore_vmcr_aprs(struct vgic_v3_cpu_if *cpu_if)
+{
+ /*
+ * If dealing with a GICv2 emulation on GICv3, VMCR_EL2.VFIQen
+ * is dependent on ICC_SRE_EL1.SRE, and we have to perform the
+ * VMCR_EL2 save/restore in the world switch.
+ */
+ if (cpu_if->vgic_sre)
+ __vgic_v3_write_vmcr(cpu_if->vgic_vmcr);
+ __vgic_v3_restore_aprs(cpu_if);
+}
+
static int __vgic_v3_bpr_min(void)
{
/* See Pseudocode for VPriorityGroup */
diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c
index 1581df6aec..8fbb6a2e05 100644
--- a/arch/arm64/kvm/hyp/vhe/switch.c
+++ b/arch/arm64/kvm/hyp/vhe/switch.c
@@ -33,11 +33,43 @@ DEFINE_PER_CPU(struct kvm_host_data, kvm_host_data);
DEFINE_PER_CPU(struct kvm_cpu_context, kvm_hyp_ctxt);
DEFINE_PER_CPU(unsigned long, kvm_hyp_vector);
+/*
+ * HCR_EL2 bits that the NV guest can freely change (no RES0/RES1
+ * semantics, irrespective of the configuration), but that cannot be
+ * applied to the actual HW as things would otherwise break badly.
+ *
+ * - TGE: we want the guest to use EL1, which is incompatible with
+ * this bit being set
+ *
+ * - API/APK: they are already accounted for by vcpu_load(), and can
+ * only take effect across a load/put cycle (such as ERET)
+ */
+#define NV_HCR_GUEST_EXCLUDE (HCR_TGE | HCR_API | HCR_APK)
+
+static u64 __compute_hcr(struct kvm_vcpu *vcpu)
+{
+ u64 hcr = vcpu->arch.hcr_el2;
+
+ if (!vcpu_has_nv(vcpu))
+ return hcr;
+
+ if (is_hyp_ctxt(vcpu)) {
+ hcr |= HCR_NV | HCR_NV2 | HCR_AT | HCR_TTLB;
+
+ if (!vcpu_el2_e2h_is_set(vcpu))
+ hcr |= HCR_NV1;
+
+ write_sysreg_s(vcpu->arch.ctxt.vncr_array, SYS_VNCR_EL2);
+ }
+
+ return hcr | (__vcpu_sys_reg(vcpu, HCR_EL2) & ~NV_HCR_GUEST_EXCLUDE);
+}
+
static void __activate_traps(struct kvm_vcpu *vcpu)
{
u64 val;
- ___activate_traps(vcpu);
+ ___activate_traps(vcpu, __compute_hcr(vcpu));
if (has_cntpoff()) {
struct timer_map map;
@@ -61,8 +93,7 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
val = read_sysreg(cpacr_el1);
val |= CPACR_ELx_TTA;
- val &= ~(CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN |
- CPACR_EL1_SMEN_EL0EN | CPACR_EL1_SMEN_EL1EN);
+ val &= ~(CPACR_ELx_ZEN | CPACR_ELx_SMEN);
/*
* With VHE (HCR.E2H == 1), accesses to CPACR_EL1 are routed to
@@ -75,11 +106,11 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
val |= CPTR_EL2_TAM;
- if (guest_owns_fp_regs(vcpu)) {
+ if (guest_owns_fp_regs()) {
if (vcpu_has_sve(vcpu))
- val |= CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN;
+ val |= CPACR_ELx_ZEN;
} else {
- val &= ~(CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN);
+ val &= ~CPACR_ELx_FPEN;
__activate_traps_fpsimd32(vcpu);
}
@@ -162,6 +193,8 @@ static void __vcpu_put_deactivate_traps(struct kvm_vcpu *vcpu)
void kvm_vcpu_load_vhe(struct kvm_vcpu *vcpu)
{
+ host_data_ptr(host_ctxt)->__hyp_running_vcpu = vcpu;
+
__vcpu_load_switch_sysregs(vcpu);
__vcpu_load_activate_traps(vcpu);
__load_stage2(vcpu->arch.hw_mmu, vcpu->arch.hw_mmu->arch);
@@ -171,6 +204,66 @@ void kvm_vcpu_put_vhe(struct kvm_vcpu *vcpu)
{
__vcpu_put_deactivate_traps(vcpu);
__vcpu_put_switch_sysregs(vcpu);
+
+ host_data_ptr(host_ctxt)->__hyp_running_vcpu = NULL;
+}
+
+static bool kvm_hyp_handle_eret(struct kvm_vcpu *vcpu, u64 *exit_code)
+{
+ u64 esr = kvm_vcpu_get_esr(vcpu);
+ u64 spsr, elr, mode;
+
+ /*
+ * Going through the whole put/load motions is a waste of time
+ * if this is a VHE guest hypervisor returning to its own
+ * userspace, or the hypervisor performing a local exception
+ * return. No need to save/restore registers, no need to
+ * switch S2 MMU. Just do the canonical ERET.
+ *
+ * Unless the trap has to be forwarded further down the line,
+ * of course...
+ */
+ if ((__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_NV) ||
+ (__vcpu_sys_reg(vcpu, HFGITR_EL2) & HFGITR_EL2_ERET))
+ return false;
+
+ spsr = read_sysreg_el1(SYS_SPSR);
+ mode = spsr & (PSR_MODE_MASK | PSR_MODE32_BIT);
+
+ switch (mode) {
+ case PSR_MODE_EL0t:
+ if (!(vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu)))
+ return false;
+ break;
+ case PSR_MODE_EL2t:
+ mode = PSR_MODE_EL1t;
+ break;
+ case PSR_MODE_EL2h:
+ mode = PSR_MODE_EL1h;
+ break;
+ default:
+ return false;
+ }
+
+ /* If ERETAx fails, take the slow path */
+ if (esr_iss_is_eretax(esr)) {
+ if (!(vcpu_has_ptrauth(vcpu) && kvm_auth_eretax(vcpu, &elr)))
+ return false;
+ } else {
+ elr = read_sysreg_el1(SYS_ELR);
+ }
+
+ spsr = (spsr & ~(PSR_MODE_MASK | PSR_MODE32_BIT)) | mode;
+
+ write_sysreg_el2(spsr, SYS_SPSR);
+ write_sysreg_el2(elr, SYS_ELR);
+
+ return true;
+}
+
+static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu)
+{
+ __fpsimd_save_state(*host_data_ptr(fpsimd_state));
}
static const exit_handler_fn hyp_exit_handlers[] = {
@@ -182,7 +275,7 @@ static const exit_handler_fn hyp_exit_handlers[] = {
[ESR_ELx_EC_IABT_LOW] = kvm_hyp_handle_iabt_low,
[ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low,
[ESR_ELx_EC_WATCHPT_LOW] = kvm_hyp_handle_watchpt_low,
- [ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth,
+ [ESR_ELx_EC_ERET] = kvm_hyp_handle_eret,
[ESR_ELx_EC_MOPS] = kvm_hyp_handle_mops,
};
@@ -197,7 +290,7 @@ static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code)
* If we were in HYP context on entry, adjust the PSTATE view
* so that the usual helpers work correctly.
*/
- if (unlikely(vcpu_get_flag(vcpu, VCPU_HYP_CONTEXT))) {
+ if (vcpu_has_nv(vcpu) && (read_sysreg(hcr_el2) & HCR_NV)) {
u64 mode = *vcpu_cpsr(vcpu) & (PSR_MODE_MASK | PSR_MODE32_BIT);
switch (mode) {
@@ -221,8 +314,7 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
struct kvm_cpu_context *guest_ctxt;
u64 exit_code;
- host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
- host_ctxt->__hyp_running_vcpu = vcpu;
+ host_ctxt = host_data_ptr(host_ctxt);
guest_ctxt = &vcpu->arch.ctxt;
sysreg_save_host_state_vhe(host_ctxt);
@@ -240,11 +332,6 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
sysreg_restore_guest_state_vhe(guest_ctxt);
__debug_switch_to_guest(vcpu);
- if (is_hyp_ctxt(vcpu))
- vcpu_set_flag(vcpu, VCPU_HYP_CONTEXT);
- else
- vcpu_clear_flag(vcpu, VCPU_HYP_CONTEXT);
-
do {
/* Jump in the fire! */
exit_code = __guest_enter(vcpu);
@@ -258,7 +345,7 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
sysreg_restore_host_state_vhe(host_ctxt);
- if (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED)
+ if (guest_owns_fp_regs())
__fpsimd_save_fpexc32(vcpu);
__debug_switch_to_host(vcpu);
@@ -306,7 +393,7 @@ static void __hyp_call_panic(u64 spsr, u64 elr, u64 par)
struct kvm_cpu_context *host_ctxt;
struct kvm_vcpu *vcpu;
- host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
+ host_ctxt = host_data_ptr(host_ctxt);
vcpu = host_ctxt->__hyp_running_vcpu;
__deactivate_traps(vcpu);
diff --git a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c
index a8b9ea4967..e12bd7d6d2 100644
--- a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c
+++ b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c
@@ -67,7 +67,7 @@ void __vcpu_load_switch_sysregs(struct kvm_vcpu *vcpu)
struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt;
struct kvm_cpu_context *host_ctxt;
- host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
+ host_ctxt = host_data_ptr(host_ctxt);
__sysreg_save_user_state(host_ctxt);
/*
@@ -110,7 +110,7 @@ void __vcpu_put_switch_sysregs(struct kvm_vcpu *vcpu)
struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt;
struct kvm_cpu_context *host_ctxt;
- host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
+ host_ctxt = host_data_ptr(host_ctxt);
__sysreg_save_el1_state(guest_ctxt);
__sysreg_save_user_state(guest_ctxt);
diff --git a/arch/arm64/kvm/hyp/vhe/tlb.c b/arch/arm64/kvm/hyp/vhe/tlb.c
index 1a60b95381..5fa0359f3a 100644
--- a/arch/arm64/kvm/hyp/vhe/tlb.c
+++ b/arch/arm64/kvm/hyp/vhe/tlb.c
@@ -17,8 +17,8 @@ struct tlb_inv_context {
u64 sctlr;
};
-static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu,
- struct tlb_inv_context *cxt)
+static void enter_vmid_context(struct kvm_s2_mmu *mmu,
+ struct tlb_inv_context *cxt)
{
struct kvm_vcpu *vcpu = kvm_get_running_vcpu();
u64 val;
@@ -67,7 +67,7 @@ static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu,
isb();
}
-static void __tlb_switch_to_host(struct tlb_inv_context *cxt)
+static void exit_vmid_context(struct tlb_inv_context *cxt)
{
/*
* We're done with the TLB operation, let's restore the host's
@@ -97,7 +97,7 @@ void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu,
dsb(ishst);
/* Switch to requested VMID */
- __tlb_switch_to_guest(mmu, &cxt);
+ enter_vmid_context(mmu, &cxt);
/*
* We could do so much better if we had the VA as well.
@@ -118,7 +118,7 @@ void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu,
dsb(ish);
isb();
- __tlb_switch_to_host(&cxt);
+ exit_vmid_context(&cxt);
}
void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu,
@@ -129,7 +129,7 @@ void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu,
dsb(nshst);
/* Switch to requested VMID */
- __tlb_switch_to_guest(mmu, &cxt);
+ enter_vmid_context(mmu, &cxt);
/*
* We could do so much better if we had the VA as well.
@@ -150,7 +150,7 @@ void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu,
dsb(nsh);
isb();
- __tlb_switch_to_host(&cxt);
+ exit_vmid_context(&cxt);
}
void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
@@ -169,7 +169,7 @@ void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
dsb(ishst);
/* Switch to requested VMID */
- __tlb_switch_to_guest(mmu, &cxt);
+ enter_vmid_context(mmu, &cxt);
__flush_s2_tlb_range_op(ipas2e1is, start, pages, stride,
TLBI_TTL_UNKNOWN);
@@ -179,7 +179,7 @@ void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
dsb(ish);
isb();
- __tlb_switch_to_host(&cxt);
+ exit_vmid_context(&cxt);
}
void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
@@ -189,13 +189,13 @@ void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
dsb(ishst);
/* Switch to requested VMID */
- __tlb_switch_to_guest(mmu, &cxt);
+ enter_vmid_context(mmu, &cxt);
__tlbi(vmalls12e1is);
dsb(ish);
isb();
- __tlb_switch_to_host(&cxt);
+ exit_vmid_context(&cxt);
}
void __kvm_flush_cpu_context(struct kvm_s2_mmu *mmu)
@@ -203,14 +203,14 @@ void __kvm_flush_cpu_context(struct kvm_s2_mmu *mmu)
struct tlb_inv_context cxt;
/* Switch to requested VMID */
- __tlb_switch_to_guest(mmu, &cxt);
+ enter_vmid_context(mmu, &cxt);
__tlbi(vmalle1);
asm volatile("ic iallu");
dsb(nsh);
isb();
- __tlb_switch_to_host(&cxt);
+ exit_vmid_context(&cxt);
}
void __kvm_flush_vm_context(void)