summaryrefslogtreecommitdiffstats
path: root/arch/arm64/kvm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm64/kvm')
-rw-r--r--arch/arm64/kvm/Makefile5
-rw-r--r--arch/arm64/kvm/arm.c285
-rw-r--r--arch/arm64/kvm/emulate-nested.c71
-rw-r--r--arch/arm64/kvm/fpsimd.c80
-rw-r--r--arch/arm64/kvm/guest.c3
-rw-r--r--arch/arm64/kvm/handle_exit.c36
-rw-r--r--arch/arm64/kvm/hyp/Makefile2
-rw-r--r--arch/arm64/kvm/hyp/aarch32.c18
-rw-r--r--arch/arm64/kvm/hyp/fpsimd.S6
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/debug-sr.h8
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/switch.h120
-rw-r--r--arch/arm64/kvm/hyp/include/nvhe/pkvm.h5
-rw-r--r--arch/arm64/kvm/hyp/nvhe/Makefile13
-rw-r--r--arch/arm64/kvm/hyp/nvhe/debug-sr.c8
-rw-r--r--arch/arm64/kvm/hyp/nvhe/ffa.c13
-rw-r--r--arch/arm64/kvm/hyp/nvhe/hyp-main.c111
-rw-r--r--arch/arm64/kvm/hyp/nvhe/mem_protect.c8
-rw-r--r--arch/arm64/kvm/hyp/nvhe/pkvm.c9
-rw-r--r--arch/arm64/kvm/hyp/nvhe/psci-relay.c2
-rw-r--r--arch/arm64/kvm/hyp/nvhe/setup.c27
-rw-r--r--arch/arm64/kvm/hyp/nvhe/switch.c42
-rw-r--r--arch/arm64/kvm/hyp/nvhe/tlb.c115
-rw-r--r--arch/arm64/kvm/hyp/pgtable.c21
-rw-r--r--arch/arm64/kvm/hyp/vgic-v3-sr.c27
-rw-r--r--arch/arm64/kvm/hyp/vhe/switch.c121
-rw-r--r--arch/arm64/kvm/hyp/vhe/sysreg-sr.c4
-rw-r--r--arch/arm64/kvm/hyp/vhe/tlb.c26
-rw-r--r--arch/arm64/kvm/mmio.c12
-rw-r--r--arch/arm64/kvm/mmu.c42
-rw-r--r--arch/arm64/kvm/nested.c14
-rw-r--r--arch/arm64/kvm/pauth.c206
-rw-r--r--arch/arm64/kvm/pkvm.c2
-rw-r--r--arch/arm64/kvm/pmu.c2
-rw-r--r--arch/arm64/kvm/reset.c4
-rw-r--r--arch/arm64/kvm/sys_regs.c69
-rw-r--r--arch/arm64/kvm/vgic/vgic-debug.c82
-rw-r--r--arch/arm64/kvm/vgic/vgic-init.c92
-rw-r--r--arch/arm64/kvm/vgic/vgic-its.c352
-rw-r--r--arch/arm64/kvm/vgic/vgic-mmio-v3.c17
-rw-r--r--arch/arm64/kvm/vgic/vgic-v2.c9
-rw-r--r--arch/arm64/kvm/vgic/vgic-v3.c23
-rw-r--r--arch/arm64/kvm/vgic/vgic.c17
-rw-r--r--arch/arm64/kvm/vgic/vgic.h10
43 files changed, 1366 insertions, 773 deletions
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index c0c050e531..a6497228c5 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -3,7 +3,7 @@
# Makefile for Kernel-based Virtual Machine module
#
-ccflags-y += -I $(srctree)/$(src)
+ccflags-y += -I $(src)
include $(srctree)/virt/kvm/Makefile.kvm
@@ -23,6 +23,7 @@ kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \
vgic/vgic-its.o vgic/vgic-debug.o
kvm-$(CONFIG_HW_PERF_EVENTS) += pmu-emul.o pmu.o
+kvm-$(CONFIG_ARM64_PTR_AUTH) += pauth.o
always-y := hyp_constants.h hyp-constants.s
@@ -30,7 +31,7 @@ define rule_gen_hyp_constants
$(call filechk,offsets,__HYP_CONSTANTS_H__)
endef
-CFLAGS_hyp-constants.o = -I $(srctree)/$(src)/hyp/include
+CFLAGS_hyp-constants.o = -I $(src)/hyp/include
$(obj)/hyp-constants.s: $(src)/hyp/hyp-constants.c FORCE
$(call if_changed_dep,cc_s_c)
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index c4a0a35e02..59716789fe 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -35,10 +35,11 @@
#include <asm/virt.h>
#include <asm/kvm_arm.h>
#include <asm/kvm_asm.h>
+#include <asm/kvm_emulate.h>
#include <asm/kvm_mmu.h>
#include <asm/kvm_nested.h>
#include <asm/kvm_pkvm.h>
-#include <asm/kvm_emulate.h>
+#include <asm/kvm_ptrauth.h>
#include <asm/sections.h>
#include <kvm/arm_hypercalls.h>
@@ -69,15 +70,42 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
}
+/*
+ * This functions as an allow-list of protected VM capabilities.
+ * Features not explicitly allowed by this function are denied.
+ */
+static bool pkvm_ext_allowed(struct kvm *kvm, long ext)
+{
+ switch (ext) {
+ case KVM_CAP_IRQCHIP:
+ case KVM_CAP_ARM_PSCI:
+ case KVM_CAP_ARM_PSCI_0_2:
+ case KVM_CAP_NR_VCPUS:
+ case KVM_CAP_MAX_VCPUS:
+ case KVM_CAP_MAX_VCPU_ID:
+ case KVM_CAP_MSI_DEVID:
+ case KVM_CAP_ARM_VM_IPA_SIZE:
+ case KVM_CAP_ARM_PMU_V3:
+ case KVM_CAP_ARM_SVE:
+ case KVM_CAP_ARM_PTRAUTH_ADDRESS:
+ case KVM_CAP_ARM_PTRAUTH_GENERIC:
+ return true;
+ default:
+ return false;
+ }
+}
+
int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
struct kvm_enable_cap *cap)
{
- int r;
- u64 new_cap;
+ int r = -EINVAL;
if (cap->flags)
return -EINVAL;
+ if (kvm_vm_is_protected(kvm) && !pkvm_ext_allowed(kvm, cap->cap))
+ return -EINVAL;
+
switch (cap->cap) {
case KVM_CAP_ARM_NISV_TO_USER:
r = 0;
@@ -86,9 +114,7 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
break;
case KVM_CAP_ARM_MTE:
mutex_lock(&kvm->lock);
- if (!system_supports_mte() || kvm->created_vcpus) {
- r = -EINVAL;
- } else {
+ if (system_supports_mte() && !kvm->created_vcpus) {
r = 0;
set_bit(KVM_ARCH_FLAG_MTE_ENABLED, &kvm->arch.flags);
}
@@ -99,25 +125,22 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
set_bit(KVM_ARCH_FLAG_SYSTEM_SUSPEND_ENABLED, &kvm->arch.flags);
break;
case KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE:
- new_cap = cap->args[0];
-
mutex_lock(&kvm->slots_lock);
/*
* To keep things simple, allow changing the chunk
* size only when no memory slots have been created.
*/
- if (!kvm_are_all_memslots_empty(kvm)) {
- r = -EINVAL;
- } else if (new_cap && !kvm_is_block_size_supported(new_cap)) {
- r = -EINVAL;
- } else {
- r = 0;
- kvm->arch.mmu.split_page_chunk_size = new_cap;
+ if (kvm_are_all_memslots_empty(kvm)) {
+ u64 new_cap = cap->args[0];
+
+ if (!new_cap || kvm_is_block_size_supported(new_cap)) {
+ r = 0;
+ kvm->arch.mmu.split_page_chunk_size = new_cap;
+ }
}
mutex_unlock(&kvm->slots_lock);
break;
default:
- r = -EINVAL;
break;
}
@@ -195,6 +218,23 @@ void kvm_arch_create_vm_debugfs(struct kvm *kvm)
kvm_sys_regs_create_debugfs(kvm);
}
+static void kvm_destroy_mpidr_data(struct kvm *kvm)
+{
+ struct kvm_mpidr_data *data;
+
+ mutex_lock(&kvm->arch.config_lock);
+
+ data = rcu_dereference_protected(kvm->arch.mpidr_data,
+ lockdep_is_held(&kvm->arch.config_lock));
+ if (data) {
+ rcu_assign_pointer(kvm->arch.mpidr_data, NULL);
+ synchronize_rcu();
+ kfree(data);
+ }
+
+ mutex_unlock(&kvm->arch.config_lock);
+}
+
/**
* kvm_arch_destroy_vm - destroy the VM data structure
* @kvm: pointer to the KVM struct
@@ -209,7 +249,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
if (is_protected_kvm_enabled())
pkvm_destroy_hyp_vm(kvm);
- kfree(kvm->arch.mpidr_data);
+ kvm_destroy_mpidr_data(kvm);
+
kfree(kvm->arch.sysreg_masks);
kvm_destroy_vcpus(kvm);
@@ -218,9 +259,47 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
kvm_arm_teardown_hypercalls(kvm);
}
+static bool kvm_has_full_ptr_auth(void)
+{
+ bool apa, gpa, api, gpi, apa3, gpa3;
+ u64 isar1, isar2, val;
+
+ /*
+ * Check that:
+ *
+ * - both Address and Generic auth are implemented for a given
+ * algorithm (Q5, IMPDEF or Q3)
+ * - only a single algorithm is implemented.
+ */
+ if (!system_has_full_ptr_auth())
+ return false;
+
+ isar1 = read_sanitised_ftr_reg(SYS_ID_AA64ISAR1_EL1);
+ isar2 = read_sanitised_ftr_reg(SYS_ID_AA64ISAR2_EL1);
+
+ apa = !!FIELD_GET(ID_AA64ISAR1_EL1_APA_MASK, isar1);
+ val = FIELD_GET(ID_AA64ISAR1_EL1_GPA_MASK, isar1);
+ gpa = (val == ID_AA64ISAR1_EL1_GPA_IMP);
+
+ api = !!FIELD_GET(ID_AA64ISAR1_EL1_API_MASK, isar1);
+ val = FIELD_GET(ID_AA64ISAR1_EL1_GPI_MASK, isar1);
+ gpi = (val == ID_AA64ISAR1_EL1_GPI_IMP);
+
+ apa3 = !!FIELD_GET(ID_AA64ISAR2_EL1_APA3_MASK, isar2);
+ val = FIELD_GET(ID_AA64ISAR2_EL1_GPA3_MASK, isar2);
+ gpa3 = (val == ID_AA64ISAR2_EL1_GPA3_IMP);
+
+ return (apa == gpa && api == gpi && apa3 == gpa3 &&
+ (apa + api + apa3) == 1);
+}
+
int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
{
int r;
+
+ if (kvm && kvm_vm_is_protected(kvm) && !pkvm_ext_allowed(kvm, ext))
+ return 0;
+
switch (ext) {
case KVM_CAP_IRQCHIP:
r = vgic_present;
@@ -311,7 +390,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
break;
case KVM_CAP_ARM_PTRAUTH_ADDRESS:
case KVM_CAP_ARM_PTRAUTH_GENERIC:
- r = system_has_full_ptr_auth();
+ r = kvm_has_full_ptr_auth();
break;
case KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE:
if (kvm)
@@ -378,12 +457,6 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
vcpu->arch.mmu_page_cache.gfp_zero = __GFP_ZERO;
- /*
- * Default value for the FP state, will be overloaded at load
- * time if we support FP (pretty likely)
- */
- vcpu->arch.fp_state = FP_STATE_FREE;
-
/* Set up the timer */
kvm_timer_vcpu_init(vcpu);
@@ -395,6 +468,13 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
vcpu->arch.hw_mmu = &vcpu->kvm->arch.mmu;
+ /*
+ * This vCPU may have been created after mpidr_data was initialized.
+ * Throw out the pre-computed mappings if that is the case which forces
+ * KVM to fall back to iteratively searching the vCPUs.
+ */
+ kvm_destroy_mpidr_data(vcpu->kvm);
+
err = kvm_vgic_vcpu_init(vcpu);
if (err)
return err;
@@ -428,6 +508,44 @@ void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
}
+static void vcpu_set_pauth_traps(struct kvm_vcpu *vcpu)
+{
+ if (vcpu_has_ptrauth(vcpu)) {
+ /*
+ * Either we're running running an L2 guest, and the API/APK
+ * bits come from L1's HCR_EL2, or API/APK are both set.
+ */
+ if (unlikely(vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu))) {
+ u64 val;
+
+ val = __vcpu_sys_reg(vcpu, HCR_EL2);
+ val &= (HCR_API | HCR_APK);
+ vcpu->arch.hcr_el2 &= ~(HCR_API | HCR_APK);
+ vcpu->arch.hcr_el2 |= val;
+ } else {
+ vcpu->arch.hcr_el2 |= (HCR_API | HCR_APK);
+ }
+
+ /*
+ * Save the host keys if there is any chance for the guest
+ * to use pauth, as the entry code will reload the guest
+ * keys in that case.
+ * Protected mode is the exception to that rule, as the
+ * entry into the EL2 code eagerly switch back and forth
+ * between host and hyp keys (and kvm_hyp_ctxt is out of
+ * reach anyway).
+ */
+ if (is_protected_kvm_enabled())
+ return;
+
+ if (vcpu->arch.hcr_el2 & (HCR_API | HCR_APK)) {
+ struct kvm_cpu_context *ctxt;
+ ctxt = this_cpu_ptr_hyp_sym(kvm_hyp_ctxt);
+ ptrauth_save_keys(ctxt);
+ }
+ }
+}
+
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
struct kvm_s2_mmu *mmu;
@@ -466,8 +584,8 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
else
vcpu_set_wfx_traps(vcpu);
- if (vcpu_has_ptrauth(vcpu))
- vcpu_ptrauth_disable(vcpu);
+ vcpu_set_pauth_traps(vcpu);
+
kvm_arch_vcpu_load_debug_state_flags(vcpu);
if (!cpumask_test_cpu(cpu, vcpu->kvm->arch.supported_cpus))
@@ -580,11 +698,6 @@ unsigned long kvm_arch_vcpu_get_ip(struct kvm_vcpu *vcpu)
}
#endif
-static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu)
-{
- return vcpu_get_flag(vcpu, VCPU_INITIALIZED);
-}
-
static void kvm_init_mpidr_data(struct kvm *kvm)
{
struct kvm_mpidr_data *data = NULL;
@@ -594,7 +707,8 @@ static void kvm_init_mpidr_data(struct kvm *kvm)
mutex_lock(&kvm->arch.config_lock);
- if (kvm->arch.mpidr_data || atomic_read(&kvm->online_vcpus) == 1)
+ if (rcu_access_pointer(kvm->arch.mpidr_data) ||
+ atomic_read(&kvm->online_vcpus) == 1)
goto out;
kvm_for_each_vcpu(c, vcpu, kvm) {
@@ -631,7 +745,7 @@ static void kvm_init_mpidr_data(struct kvm *kvm)
data->cmpidr_to_idx[index] = c;
}
- kvm->arch.mpidr_data = data;
+ rcu_assign_pointer(kvm->arch.mpidr_data, data);
out:
mutex_unlock(&kvm->arch.config_lock);
}
@@ -790,9 +904,8 @@ void kvm_vcpu_wfi(struct kvm_vcpu *vcpu)
* doorbells to be signalled, should an interrupt become pending.
*/
preempt_disable();
- kvm_vgic_vmcr_sync(vcpu);
vcpu_set_flag(vcpu, IN_WFI);
- vgic_v4_put(vcpu);
+ kvm_vgic_put(vcpu);
preempt_enable();
kvm_vcpu_halt(vcpu);
@@ -800,7 +913,7 @@ void kvm_vcpu_wfi(struct kvm_vcpu *vcpu)
preempt_disable();
vcpu_clear_flag(vcpu, IN_WFI);
- vgic_v4_load(vcpu);
+ kvm_vgic_load(vcpu);
preempt_enable();
}
@@ -980,7 +1093,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
if (run->exit_reason == KVM_EXIT_MMIO) {
ret = kvm_handle_mmio_return(vcpu);
- if (ret)
+ if (ret <= 0)
return ret;
}
@@ -1270,7 +1383,7 @@ static unsigned long system_supported_vcpu_features(void)
if (!system_supports_sve())
clear_bit(KVM_ARM_VCPU_SVE, &features);
- if (!system_has_full_ptr_auth()) {
+ if (!kvm_has_full_ptr_auth()) {
clear_bit(KVM_ARM_VCPU_PTRAUTH_ADDRESS, &features);
clear_bit(KVM_ARM_VCPU_PTRAUTH_GENERIC, &features);
}
@@ -1818,6 +1931,11 @@ static unsigned long nvhe_percpu_order(void)
return size ? get_order(size) : 0;
}
+static size_t pkvm_host_sve_state_order(void)
+{
+ return get_order(pkvm_host_sve_state_size());
+}
+
/* A lookup table holding the hypervisor VA for each vector slot */
static void *hyp_spectre_vector_selector[BP_HARDEN_EL2_SLOTS];
@@ -1971,7 +2089,7 @@ static void cpu_set_hyp_vector(void)
static void cpu_hyp_init_context(void)
{
- kvm_init_host_cpu_context(&this_cpu_ptr_hyp_sym(kvm_host_data)->host_ctxt);
+ kvm_init_host_cpu_context(host_data_ptr(host_ctxt));
if (!is_kernel_in_hyp_mode())
cpu_init_hyp_mode();
@@ -2197,12 +2315,20 @@ static void __init teardown_subsystems(void)
static void __init teardown_hyp_mode(void)
{
+ bool free_sve = system_supports_sve() && is_protected_kvm_enabled();
int cpu;
free_hyp_pgds();
for_each_possible_cpu(cpu) {
free_page(per_cpu(kvm_arm_hyp_stack_page, cpu));
free_pages(kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu], nvhe_percpu_order());
+
+ if (free_sve) {
+ struct cpu_sve_state *sve_state;
+
+ sve_state = per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->sve_state;
+ free_pages((unsigned long) sve_state, pkvm_host_sve_state_order());
+ }
}
}
@@ -2285,6 +2411,58 @@ static int __init kvm_hyp_init_protection(u32 hyp_va_bits)
return 0;
}
+static int init_pkvm_host_sve_state(void)
+{
+ int cpu;
+
+ if (!system_supports_sve())
+ return 0;
+
+ /* Allocate pages for host sve state in protected mode. */
+ for_each_possible_cpu(cpu) {
+ struct page *page = alloc_pages(GFP_KERNEL, pkvm_host_sve_state_order());
+
+ if (!page)
+ return -ENOMEM;
+
+ per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->sve_state = page_address(page);
+ }
+
+ /*
+ * Don't map the pages in hyp since these are only used in protected
+ * mode, which will (re)create its own mapping when initialized.
+ */
+
+ return 0;
+}
+
+/*
+ * Finalizes the initialization of hyp mode, once everything else is initialized
+ * and the initialziation process cannot fail.
+ */
+static void finalize_init_hyp_mode(void)
+{
+ int cpu;
+
+ if (system_supports_sve() && is_protected_kvm_enabled()) {
+ for_each_possible_cpu(cpu) {
+ struct cpu_sve_state *sve_state;
+
+ sve_state = per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->sve_state;
+ per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->sve_state =
+ kern_hyp_va(sve_state);
+ }
+ } else {
+ for_each_possible_cpu(cpu) {
+ struct user_fpsimd_state *fpsimd_state;
+
+ fpsimd_state = &per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->host_ctxt.fp_regs;
+ per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->fpsimd_state =
+ kern_hyp_va(fpsimd_state);
+ }
+ }
+}
+
static void pkvm_hyp_init_ptrauth(void)
{
struct kvm_cpu_context *hyp_ctxt;
@@ -2453,6 +2631,10 @@ static int __init init_hyp_mode(void)
goto out_err;
}
+ err = init_pkvm_host_sve_state();
+ if (err)
+ goto out_err;
+
err = kvm_hyp_init_protection(hyp_va_bits);
if (err) {
kvm_err("Failed to init hyp memory protection\n");
@@ -2470,21 +2652,27 @@ out_err:
struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr)
{
- struct kvm_vcpu *vcpu;
+ struct kvm_vcpu *vcpu = NULL;
+ struct kvm_mpidr_data *data;
unsigned long i;
mpidr &= MPIDR_HWID_BITMASK;
- if (kvm->arch.mpidr_data) {
- u16 idx = kvm_mpidr_index(kvm->arch.mpidr_data, mpidr);
+ rcu_read_lock();
+ data = rcu_dereference(kvm->arch.mpidr_data);
- vcpu = kvm_get_vcpu(kvm,
- kvm->arch.mpidr_data->cmpidr_to_idx[idx]);
+ if (data) {
+ u16 idx = kvm_mpidr_index(data, mpidr);
+
+ vcpu = kvm_get_vcpu(kvm, data->cmpidr_to_idx[idx]);
if (mpidr != kvm_vcpu_get_mpidr_aff(vcpu))
vcpu = NULL;
+ }
+ rcu_read_unlock();
+
+ if (vcpu)
return vcpu;
- }
kvm_for_each_vcpu(i, vcpu, kvm) {
if (mpidr == kvm_vcpu_get_mpidr_aff(vcpu))
@@ -2611,6 +2799,13 @@ static __init int kvm_arm_init(void)
if (err)
goto out_subs;
+ /*
+ * This should be called after initialization is done and failure isn't
+ * possible anymore.
+ */
+ if (!in_hyp_mode)
+ finalize_init_hyp_mode();
+
kvm_arm_initialised = true;
return 0;
diff --git a/arch/arm64/kvm/emulate-nested.c b/arch/arm64/kvm/emulate-nested.c
index 4697ba41b3..54090967a3 100644
--- a/arch/arm64/kvm/emulate-nested.c
+++ b/arch/arm64/kvm/emulate-nested.c
@@ -2117,6 +2117,26 @@ inject:
return true;
}
+static bool forward_traps(struct kvm_vcpu *vcpu, u64 control_bit)
+{
+ bool control_bit_set;
+
+ if (!vcpu_has_nv(vcpu))
+ return false;
+
+ control_bit_set = __vcpu_sys_reg(vcpu, HCR_EL2) & control_bit;
+ if (!is_hyp_ctxt(vcpu) && control_bit_set) {
+ kvm_inject_nested_sync(vcpu, kvm_vcpu_get_esr(vcpu));
+ return true;
+ }
+ return false;
+}
+
+bool forward_smc_trap(struct kvm_vcpu *vcpu)
+{
+ return forward_traps(vcpu, HCR_TSC);
+}
+
static u64 kvm_check_illegal_exception_return(struct kvm_vcpu *vcpu, u64 spsr)
{
u64 mode = spsr & PSR_MODE_MASK;
@@ -2152,44 +2172,47 @@ static u64 kvm_check_illegal_exception_return(struct kvm_vcpu *vcpu, u64 spsr)
void kvm_emulate_nested_eret(struct kvm_vcpu *vcpu)
{
- u64 spsr, elr, mode;
- bool direct_eret;
+ u64 spsr, elr, esr;
/*
- * Going through the whole put/load motions is a waste of time
- * if this is a VHE guest hypervisor returning to its own
- * userspace, or the hypervisor performing a local exception
- * return. No need to save/restore registers, no need to
- * switch S2 MMU. Just do the canonical ERET.
+ * Forward this trap to the virtual EL2 if the virtual
+ * HCR_EL2.NV bit is set and this is coming from !EL2.
*/
+ if (forward_traps(vcpu, HCR_NV))
+ return;
+
spsr = vcpu_read_sys_reg(vcpu, SPSR_EL2);
spsr = kvm_check_illegal_exception_return(vcpu, spsr);
- mode = spsr & (PSR_MODE_MASK | PSR_MODE32_BIT);
-
- direct_eret = (mode == PSR_MODE_EL0t &&
- vcpu_el2_e2h_is_set(vcpu) &&
- vcpu_el2_tge_is_set(vcpu));
- direct_eret |= (mode == PSR_MODE_EL2h || mode == PSR_MODE_EL2t);
-
- if (direct_eret) {
- *vcpu_pc(vcpu) = vcpu_read_sys_reg(vcpu, ELR_EL2);
- *vcpu_cpsr(vcpu) = spsr;
- trace_kvm_nested_eret(vcpu, *vcpu_pc(vcpu), spsr);
- return;
+ /* Check for an ERETAx */
+ esr = kvm_vcpu_get_esr(vcpu);
+ if (esr_iss_is_eretax(esr) && !kvm_auth_eretax(vcpu, &elr)) {
+ /*
+ * Oh no, ERETAx failed to authenticate.
+ *
+ * If we have FPACCOMBINE and we don't have a pending
+ * Illegal Execution State exception (which has priority
+ * over FPAC), deliver an exception right away.
+ *
+ * Otherwise, let the mangled ELR value trickle down the
+ * ERET handling, and the guest will have a little surprise.
+ */
+ if (kvm_has_pauth(vcpu->kvm, FPACCOMBINE) && !(spsr & PSR_IL_BIT)) {
+ esr &= ESR_ELx_ERET_ISS_ERETA;
+ esr |= FIELD_PREP(ESR_ELx_EC_MASK, ESR_ELx_EC_FPAC);
+ kvm_inject_nested_sync(vcpu, esr);
+ return;
+ }
}
preempt_disable();
kvm_arch_vcpu_put(vcpu);
- elr = __vcpu_sys_reg(vcpu, ELR_EL2);
+ if (!esr_iss_is_eretax(esr))
+ elr = __vcpu_sys_reg(vcpu, ELR_EL2);
trace_kvm_nested_eret(vcpu, elr, spsr);
- /*
- * Note that the current exception level is always the virtual EL2,
- * since we set HCR_EL2.NV bit only when entering the virtual EL2.
- */
*vcpu_pc(vcpu) = elr;
*vcpu_cpsr(vcpu) = spsr;
diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c
index 826307e19e..521b32868d 100644
--- a/arch/arm64/kvm/fpsimd.c
+++ b/arch/arm64/kvm/fpsimd.c
@@ -14,19 +14,6 @@
#include <asm/kvm_mmu.h>
#include <asm/sysreg.h>
-void kvm_vcpu_unshare_task_fp(struct kvm_vcpu *vcpu)
-{
- struct task_struct *p = vcpu->arch.parent_task;
- struct user_fpsimd_state *fpsimd;
-
- if (!is_protected_kvm_enabled() || !p)
- return;
-
- fpsimd = &p->thread.uw.fpsimd_state;
- kvm_unshare_hyp(fpsimd, fpsimd + 1);
- put_task_struct(p);
-}
-
/*
* Called on entry to KVM_RUN unless this vcpu previously ran at least
* once and the most recent prior KVM_RUN for this vcpu was called from
@@ -38,30 +25,18 @@ void kvm_vcpu_unshare_task_fp(struct kvm_vcpu *vcpu)
*/
int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu)
{
- int ret;
-
struct user_fpsimd_state *fpsimd = &current->thread.uw.fpsimd_state;
+ int ret;
- kvm_vcpu_unshare_task_fp(vcpu);
+ /* pKVM has its own tracking of the host fpsimd state. */
+ if (is_protected_kvm_enabled())
+ return 0;
/* Make sure the host task fpsimd state is visible to hyp: */
ret = kvm_share_hyp(fpsimd, fpsimd + 1);
if (ret)
return ret;
- vcpu->arch.host_fpsimd_state = kern_hyp_va(fpsimd);
-
- /*
- * We need to keep current's task_struct pinned until its data has been
- * unshared with the hypervisor to make sure it is not re-used by the
- * kernel and donated to someone else while already shared -- see
- * kvm_vcpu_unshare_task_fp() for the matching put_task_struct().
- */
- if (is_protected_kvm_enabled()) {
- get_task_struct(current);
- vcpu->arch.parent_task = current;
- }
-
return 0;
}
@@ -86,7 +61,8 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu)
* guest in kvm_arch_vcpu_ctxflush_fp() and override this to
* FP_STATE_FREE if the flag set.
*/
- vcpu->arch.fp_state = FP_STATE_HOST_OWNED;
+ *host_data_ptr(fp_owner) = FP_STATE_HOST_OWNED;
+ *host_data_ptr(fpsimd_state) = kern_hyp_va(&current->thread.uw.fpsimd_state);
vcpu_clear_flag(vcpu, HOST_SVE_ENABLED);
if (read_sysreg(cpacr_el1) & CPACR_EL1_ZEN_EL0EN)
@@ -110,10 +86,17 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu)
* been saved, this is very unlikely to happen.
*/
if (read_sysreg_s(SYS_SVCR) & (SVCR_SM_MASK | SVCR_ZA_MASK)) {
- vcpu->arch.fp_state = FP_STATE_FREE;
+ *host_data_ptr(fp_owner) = FP_STATE_FREE;
fpsimd_save_and_flush_cpu_state();
}
}
+
+ /*
+ * If normal guests gain SME support, maintain this behavior for pKVM
+ * guests, which don't support SME.
+ */
+ WARN_ON(is_protected_kvm_enabled() && system_supports_sme() &&
+ read_sysreg_s(SYS_SVCR));
}
/*
@@ -126,7 +109,7 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu)
void kvm_arch_vcpu_ctxflush_fp(struct kvm_vcpu *vcpu)
{
if (test_thread_flag(TIF_FOREIGN_FPSTATE))
- vcpu->arch.fp_state = FP_STATE_FREE;
+ *host_data_ptr(fp_owner) = FP_STATE_FREE;
}
/*
@@ -142,8 +125,7 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu)
WARN_ON_ONCE(!irqs_disabled());
- if (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED) {
-
+ if (guest_owns_fp_regs()) {
/*
* Currently we do not support SME guests so SVCR is
* always 0 and we just need a variable to point to.
@@ -186,9 +168,7 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu)
if (has_vhe() && system_supports_sme()) {
/* Also restore EL0 state seen on entry */
if (vcpu_get_flag(vcpu, HOST_SME_ENABLED))
- sysreg_clear_set(CPACR_EL1, 0,
- CPACR_EL1_SMEN_EL0EN |
- CPACR_EL1_SMEN_EL1EN);
+ sysreg_clear_set(CPACR_EL1, 0, CPACR_ELx_SMEN);
else
sysreg_clear_set(CPACR_EL1,
CPACR_EL1_SMEN_EL0EN,
@@ -196,16 +176,38 @@ void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu)
isb();
}
- if (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED) {
+ if (guest_owns_fp_regs()) {
if (vcpu_has_sve(vcpu)) {
__vcpu_sys_reg(vcpu, ZCR_EL1) = read_sysreg_el1(SYS_ZCR);
- /* Restore the VL that was saved when bound to the CPU */
+ /*
+ * Restore the VL that was saved when bound to the CPU,
+ * which is the maximum VL for the guest. Because the
+ * layout of the data when saving the sve state depends
+ * on the VL, we need to use a consistent (i.e., the
+ * maximum) VL.
+ * Note that this means that at guest exit ZCR_EL1 is
+ * not necessarily the same as on guest entry.
+ *
+ * Restoring the VL isn't needed in VHE mode since
+ * ZCR_EL2 (accessed via ZCR_EL1) would fulfill the same
+ * role when doing the save from EL2.
+ */
if (!has_vhe())
sve_cond_update_zcr_vq(vcpu_sve_max_vq(vcpu) - 1,
SYS_ZCR_EL1);
}
+ /*
+ * Flush (save and invalidate) the fpsimd/sve state so that if
+ * the host tries to use fpsimd/sve, it's not using stale data
+ * from the guest.
+ *
+ * Flushing the state sets the TIF_FOREIGN_FPSTATE bit for the
+ * context unconditionally, in both nVHE and VHE. This allows
+ * the kernel to restore the fpsimd/sve state, including ZCR_EL1
+ * when needed.
+ */
fpsimd_save_and_flush_cpu_state();
} else if (has_vhe() && system_supports_sve()) {
/*
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index e2f762d959..11098eb7eb 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -251,6 +251,7 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
case PSR_AA32_MODE_SVC:
case PSR_AA32_MODE_ABT:
case PSR_AA32_MODE_UND:
+ case PSR_AA32_MODE_SYS:
if (!vcpu_el1_is_32bit(vcpu))
return -EINVAL;
break;
@@ -276,7 +277,7 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
if (*vcpu_cpsr(vcpu) & PSR_MODE32_BIT) {
int i, nr_reg;
- switch (*vcpu_cpsr(vcpu)) {
+ switch (*vcpu_cpsr(vcpu) & PSR_AA32_MODE_MASK) {
/*
* Either we are dealing with user mode, and only the
* first 15 registers (+ PC) must be narrowed to 32bit.
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index 617ae6dea5..b037f0a0e2 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -56,6 +56,13 @@ static int handle_hvc(struct kvm_vcpu *vcpu)
static int handle_smc(struct kvm_vcpu *vcpu)
{
/*
+ * Forward this trapped smc instruction to the virtual EL2 if
+ * the guest has asked for it.
+ */
+ if (forward_smc_trap(vcpu))
+ return 1;
+
+ /*
* "If an SMC instruction executed at Non-secure EL1 is
* trapped to EL2 because HCR_EL2.TSC is 1, the exception is a
* Trap exception, not a Secure Monitor Call exception [...]"
@@ -207,19 +214,40 @@ static int handle_sve(struct kvm_vcpu *vcpu)
}
/*
- * Guest usage of a ptrauth instruction (which the guest EL1 did not turn into
- * a NOP). If we get here, it is that we didn't fixup ptrauth on exit, and all
- * that we can do is give the guest an UNDEF.
+ * Two possibilities to handle a trapping ptrauth instruction:
+ *
+ * - Guest usage of a ptrauth instruction (which the guest EL1 did not
+ * turn into a NOP). If we get here, it is because we didn't enable
+ * ptrauth for the guest. This results in an UNDEF, as it isn't
+ * supposed to use ptrauth without being told it could.
+ *
+ * - Running an L2 NV guest while L1 has left HCR_EL2.API==0, and for
+ * which we reinject the exception into L1.
+ *
+ * Anything else is an emulation bug (hence the WARN_ON + UNDEF).
*/
static int kvm_handle_ptrauth(struct kvm_vcpu *vcpu)
{
+ if (!vcpu_has_ptrauth(vcpu)) {
+ kvm_inject_undefined(vcpu);
+ return 1;
+ }
+
+ if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) {
+ kvm_inject_nested_sync(vcpu, kvm_vcpu_get_esr(vcpu));
+ return 1;
+ }
+
+ /* Really shouldn't be here! */
+ WARN_ON_ONCE(1);
kvm_inject_undefined(vcpu);
return 1;
}
static int kvm_handle_eret(struct kvm_vcpu *vcpu)
{
- if (kvm_vcpu_get_esr(vcpu) & ESR_ELx_ERET_ISS_ERET)
+ if (esr_iss_is_eretax(kvm_vcpu_get_esr(vcpu)) &&
+ !vcpu_has_ptrauth(vcpu))
return kvm_handle_ptrauth(vcpu);
/*
diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile
index a38dea6186..d61e44642f 100644
--- a/arch/arm64/kvm/hyp/Makefile
+++ b/arch/arm64/kvm/hyp/Makefile
@@ -3,7 +3,7 @@
# Makefile for Kernel-based Virtual Machine module, HYP part
#
-incdir := $(srctree)/$(src)/include
+incdir := $(src)/include
subdir-asflags-y := -I$(incdir)
subdir-ccflags-y := -I$(incdir)
diff --git a/arch/arm64/kvm/hyp/aarch32.c b/arch/arm64/kvm/hyp/aarch32.c
index 8d9670e661..449fa58cf3 100644
--- a/arch/arm64/kvm/hyp/aarch32.c
+++ b/arch/arm64/kvm/hyp/aarch32.c
@@ -50,9 +50,23 @@ bool kvm_condition_valid32(const struct kvm_vcpu *vcpu)
u32 cpsr_cond;
int cond;
- /* Top two bits non-zero? Unconditional. */
- if (kvm_vcpu_get_esr(vcpu) >> 30)
+ /*
+ * These are the exception classes that could fire with a
+ * conditional instruction.
+ */
+ switch (kvm_vcpu_trap_get_class(vcpu)) {
+ case ESR_ELx_EC_CP15_32:
+ case ESR_ELx_EC_CP15_64:
+ case ESR_ELx_EC_CP14_MR:
+ case ESR_ELx_EC_CP14_LS:
+ case ESR_ELx_EC_FP_ASIMD:
+ case ESR_ELx_EC_CP10_ID:
+ case ESR_ELx_EC_CP14_64:
+ case ESR_ELx_EC_SVC32:
+ break;
+ default:
return true;
+ }
/* Is condition field valid? */
cond = kvm_vcpu_get_condition(vcpu);
diff --git a/arch/arm64/kvm/hyp/fpsimd.S b/arch/arm64/kvm/hyp/fpsimd.S
index 61e6f3ba7b..e950875e31 100644
--- a/arch/arm64/kvm/hyp/fpsimd.S
+++ b/arch/arm64/kvm/hyp/fpsimd.S
@@ -25,3 +25,9 @@ SYM_FUNC_START(__sve_restore_state)
sve_load 0, x1, x2, 3
ret
SYM_FUNC_END(__sve_restore_state)
+
+SYM_FUNC_START(__sve_save_state)
+ mov x2, #1
+ sve_save 0, x1, x2, 3
+ ret
+SYM_FUNC_END(__sve_save_state)
diff --git a/arch/arm64/kvm/hyp/include/hyp/debug-sr.h b/arch/arm64/kvm/hyp/include/hyp/debug-sr.h
index 961bbef104..d00093699a 100644
--- a/arch/arm64/kvm/hyp/include/hyp/debug-sr.h
+++ b/arch/arm64/kvm/hyp/include/hyp/debug-sr.h
@@ -135,9 +135,9 @@ static inline void __debug_switch_to_guest_common(struct kvm_vcpu *vcpu)
if (!vcpu_get_flag(vcpu, DEBUG_DIRTY))
return;
- host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
+ host_ctxt = host_data_ptr(host_ctxt);
guest_ctxt = &vcpu->arch.ctxt;
- host_dbg = &vcpu->arch.host_debug_state.regs;
+ host_dbg = host_data_ptr(host_debug_state.regs);
guest_dbg = kern_hyp_va(vcpu->arch.debug_ptr);
__debug_save_state(host_dbg, host_ctxt);
@@ -154,9 +154,9 @@ static inline void __debug_switch_to_host_common(struct kvm_vcpu *vcpu)
if (!vcpu_get_flag(vcpu, DEBUG_DIRTY))
return;
- host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
+ host_ctxt = host_data_ptr(host_ctxt);
guest_ctxt = &vcpu->arch.ctxt;
- host_dbg = &vcpu->arch.host_debug_state.regs;
+ host_dbg = host_data_ptr(host_debug_state.regs);
guest_dbg = kern_hyp_va(vcpu->arch.debug_ptr);
__debug_save_state(guest_dbg, guest_ctxt);
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index e3fcf8c4d5..0c4de44534 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -27,6 +27,7 @@
#include <asm/kvm_hyp.h>
#include <asm/kvm_mmu.h>
#include <asm/kvm_nested.h>
+#include <asm/kvm_ptrauth.h>
#include <asm/fpsimd.h>
#include <asm/debug-monitors.h>
#include <asm/processor.h>
@@ -39,12 +40,6 @@ struct kvm_exception_table_entry {
extern struct kvm_exception_table_entry __start___kvm_ex_table;
extern struct kvm_exception_table_entry __stop___kvm_ex_table;
-/* Check whether the FP regs are owned by the guest */
-static inline bool guest_owns_fp_regs(struct kvm_vcpu *vcpu)
-{
- return vcpu->arch.fp_state == FP_STATE_GUEST_OWNED;
-}
-
/* Save the 32-bit only FPSIMD system register state */
static inline void __fpsimd_save_fpexc32(struct kvm_vcpu *vcpu)
{
@@ -155,7 +150,7 @@ static inline bool cpu_has_amu(void)
static inline void __activate_traps_hfgxtr(struct kvm_vcpu *vcpu)
{
- struct kvm_cpu_context *hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
+ struct kvm_cpu_context *hctxt = host_data_ptr(host_ctxt);
struct kvm *kvm = kern_hyp_va(vcpu->kvm);
CHECK_FGT_MASKS(HFGRTR_EL2);
@@ -191,7 +186,7 @@ static inline void __activate_traps_hfgxtr(struct kvm_vcpu *vcpu)
static inline void __deactivate_traps_hfgxtr(struct kvm_vcpu *vcpu)
{
- struct kvm_cpu_context *hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
+ struct kvm_cpu_context *hctxt = host_data_ptr(host_ctxt);
struct kvm *kvm = kern_hyp_va(vcpu->kvm);
if (!cpus_have_final_cap(ARM64_HAS_FGT))
@@ -226,13 +221,13 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu)
write_sysreg(0, pmselr_el0);
- hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
+ hctxt = host_data_ptr(host_ctxt);
ctxt_sys_reg(hctxt, PMUSERENR_EL0) = read_sysreg(pmuserenr_el0);
write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0);
vcpu_set_flag(vcpu, PMUSERENR_ON_CPU);
}
- vcpu->arch.mdcr_el2_host = read_sysreg(mdcr_el2);
+ *host_data_ptr(host_debug_state.mdcr_el2) = read_sysreg(mdcr_el2);
write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2);
if (cpus_have_final_cap(ARM64_HAS_HCX)) {
@@ -254,13 +249,13 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu)
static inline void __deactivate_traps_common(struct kvm_vcpu *vcpu)
{
- write_sysreg(vcpu->arch.mdcr_el2_host, mdcr_el2);
+ write_sysreg(*host_data_ptr(host_debug_state.mdcr_el2), mdcr_el2);
write_sysreg(0, hstr_el2);
if (kvm_arm_support_pmu_v3()) {
struct kvm_cpu_context *hctxt;
- hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
+ hctxt = host_data_ptr(host_ctxt);
write_sysreg(ctxt_sys_reg(hctxt, PMUSERENR_EL0), pmuserenr_el0);
vcpu_clear_flag(vcpu, PMUSERENR_ON_CPU);
}
@@ -271,10 +266,8 @@ static inline void __deactivate_traps_common(struct kvm_vcpu *vcpu)
__deactivate_traps_hfgxtr(vcpu);
}
-static inline void ___activate_traps(struct kvm_vcpu *vcpu)
+static inline void ___activate_traps(struct kvm_vcpu *vcpu, u64 hcr)
{
- u64 hcr = vcpu->arch.hcr_el2;
-
if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM))
hcr |= HCR_TVM;
@@ -323,10 +316,24 @@ static inline void __hyp_sve_restore_guest(struct kvm_vcpu *vcpu)
{
sve_cond_update_zcr_vq(vcpu_sve_max_vq(vcpu) - 1, SYS_ZCR_EL2);
__sve_restore_state(vcpu_sve_pffr(vcpu),
- &vcpu->arch.ctxt.fp_regs.fpsr);
+ &vcpu->arch.ctxt.fp_regs.fpsr,
+ true);
write_sysreg_el1(__vcpu_sys_reg(vcpu, ZCR_EL1), SYS_ZCR);
}
+static inline void __hyp_sve_save_host(void)
+{
+ struct cpu_sve_state *sve_state = *host_data_ptr(sve_state);
+
+ sve_state->zcr_el1 = read_sysreg_el1(SYS_ZCR);
+ write_sysreg_s(ZCR_ELx_LEN_MASK, SYS_ZCR_EL2);
+ __sve_save_state(sve_state->sve_regs + sve_ffr_offset(kvm_host_sve_max_vl),
+ &sve_state->fpsr,
+ true);
+}
+
+static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu);
+
/*
* We trap the first access to the FP/SIMD to save the host context and
* restore the guest context lazily.
@@ -337,7 +344,6 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
{
bool sve_guest;
u8 esr_ec;
- u64 reg;
if (!system_supports_fpsimd())
return false;
@@ -360,24 +366,15 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
/* Valid trap. Switch the context: */
/* First disable enough traps to allow us to update the registers */
- if (has_vhe() || has_hvhe()) {
- reg = CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN;
- if (sve_guest)
- reg |= CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN;
-
- sysreg_clear_set(cpacr_el1, 0, reg);
- } else {
- reg = CPTR_EL2_TFP;
- if (sve_guest)
- reg |= CPTR_EL2_TZ;
-
- sysreg_clear_set(cptr_el2, reg, 0);
- }
+ if (sve_guest || (is_protected_kvm_enabled() && system_supports_sve()))
+ cpacr_clear_set(0, CPACR_ELx_FPEN | CPACR_ELx_ZEN);
+ else
+ cpacr_clear_set(0, CPACR_ELx_FPEN);
isb();
/* Write out the host state if it's in the registers */
- if (vcpu->arch.fp_state == FP_STATE_HOST_OWNED)
- __fpsimd_save_state(vcpu->arch.host_fpsimd_state);
+ if (host_owns_fp_regs())
+ kvm_hyp_save_fpsimd_host(vcpu);
/* Restore the guest state */
if (sve_guest)
@@ -389,7 +386,7 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
if (!(read_sysreg(hcr_el2) & HCR_RW))
write_sysreg(__vcpu_sys_reg(vcpu, FPEXC32_EL2), fpexc32_el2);
- vcpu->arch.fp_state = FP_STATE_GUEST_OWNED;
+ *host_data_ptr(fp_owner) = FP_STATE_GUEST_OWNED;
return true;
}
@@ -449,60 +446,6 @@ static inline bool handle_tx2_tvm(struct kvm_vcpu *vcpu)
return true;
}
-static inline bool esr_is_ptrauth_trap(u64 esr)
-{
- switch (esr_sys64_to_sysreg(esr)) {
- case SYS_APIAKEYLO_EL1:
- case SYS_APIAKEYHI_EL1:
- case SYS_APIBKEYLO_EL1:
- case SYS_APIBKEYHI_EL1:
- case SYS_APDAKEYLO_EL1:
- case SYS_APDAKEYHI_EL1:
- case SYS_APDBKEYLO_EL1:
- case SYS_APDBKEYHI_EL1:
- case SYS_APGAKEYLO_EL1:
- case SYS_APGAKEYHI_EL1:
- return true;
- }
-
- return false;
-}
-
-#define __ptrauth_save_key(ctxt, key) \
- do { \
- u64 __val; \
- __val = read_sysreg_s(SYS_ ## key ## KEYLO_EL1); \
- ctxt_sys_reg(ctxt, key ## KEYLO_EL1) = __val; \
- __val = read_sysreg_s(SYS_ ## key ## KEYHI_EL1); \
- ctxt_sys_reg(ctxt, key ## KEYHI_EL1) = __val; \
-} while(0)
-
-DECLARE_PER_CPU(struct kvm_cpu_context, kvm_hyp_ctxt);
-
-static bool kvm_hyp_handle_ptrauth(struct kvm_vcpu *vcpu, u64 *exit_code)
-{
- struct kvm_cpu_context *ctxt;
- u64 val;
-
- if (!vcpu_has_ptrauth(vcpu))
- return false;
-
- ctxt = this_cpu_ptr(&kvm_hyp_ctxt);
- __ptrauth_save_key(ctxt, APIA);
- __ptrauth_save_key(ctxt, APIB);
- __ptrauth_save_key(ctxt, APDA);
- __ptrauth_save_key(ctxt, APDB);
- __ptrauth_save_key(ctxt, APGA);
-
- vcpu_ptrauth_enable(vcpu);
-
- val = read_sysreg(hcr_el2);
- val |= (HCR_API | HCR_APK);
- write_sysreg(val, hcr_el2);
-
- return true;
-}
-
static bool kvm_hyp_handle_cntpct(struct kvm_vcpu *vcpu)
{
struct arch_timer_context *ctxt;
@@ -590,9 +533,6 @@ static bool kvm_hyp_handle_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code)
__vgic_v3_perform_cpuif_access(vcpu) == 1)
return true;
- if (esr_is_ptrauth_trap(kvm_vcpu_get_esr(vcpu)))
- return kvm_hyp_handle_ptrauth(vcpu, exit_code);
-
if (kvm_hyp_handle_cntpct(vcpu))
return true;
diff --git a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h
index 82b3d62538..24a9a8330d 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h
@@ -53,6 +53,11 @@ pkvm_hyp_vcpu_to_hyp_vm(struct pkvm_hyp_vcpu *hyp_vcpu)
return container_of(hyp_vcpu->vcpu.kvm, struct pkvm_hyp_vm, kvm);
}
+static inline bool pkvm_hyp_vcpu_is_protected(struct pkvm_hyp_vcpu *hyp_vcpu)
+{
+ return vcpu_is_protected(&hyp_vcpu->vcpu);
+}
+
void pkvm_hyp_vm_table_init(void *tbl);
int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva,
diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile
index 2250253a64..50fa0ffb6b 100644
--- a/arch/arm64/kvm/hyp/nvhe/Makefile
+++ b/arch/arm64/kvm/hyp/nvhe/Makefile
@@ -97,16 +97,3 @@ KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_FTRACE) $(CC_FLAGS_SCS) $(CC_FLAGS_CFI)
# causes a build failure. Remove profile optimization flags.
KBUILD_CFLAGS := $(filter-out -fprofile-sample-use=% -fprofile-use=%, $(KBUILD_CFLAGS))
KBUILD_CFLAGS += -fno-asynchronous-unwind-tables -fno-unwind-tables
-
-# KVM nVHE code is run at a different exception code with a different map, so
-# compiler instrumentation that inserts callbacks or checks into the code may
-# cause crashes. Just disable it.
-GCOV_PROFILE := n
-KASAN_SANITIZE := n
-KCSAN_SANITIZE := n
-UBSAN_SANITIZE := n
-KCOV_INSTRUMENT := n
-
-# Skip objtool checking for this directory because nVHE code is compiled with
-# non-standard build rules.
-OBJECT_FILES_NON_STANDARD := y
diff --git a/arch/arm64/kvm/hyp/nvhe/debug-sr.c b/arch/arm64/kvm/hyp/nvhe/debug-sr.c
index 7746ea507b..53efda0235 100644
--- a/arch/arm64/kvm/hyp/nvhe/debug-sr.c
+++ b/arch/arm64/kvm/hyp/nvhe/debug-sr.c
@@ -83,10 +83,10 @@ void __debug_save_host_buffers_nvhe(struct kvm_vcpu *vcpu)
{
/* Disable and flush SPE data generation */
if (vcpu_get_flag(vcpu, DEBUG_STATE_SAVE_SPE))
- __debug_save_spe(&vcpu->arch.host_debug_state.pmscr_el1);
+ __debug_save_spe(host_data_ptr(host_debug_state.pmscr_el1));
/* Disable and flush Self-Hosted Trace generation */
if (vcpu_get_flag(vcpu, DEBUG_STATE_SAVE_TRBE))
- __debug_save_trace(&vcpu->arch.host_debug_state.trfcr_el1);
+ __debug_save_trace(host_data_ptr(host_debug_state.trfcr_el1));
}
void __debug_switch_to_guest(struct kvm_vcpu *vcpu)
@@ -97,9 +97,9 @@ void __debug_switch_to_guest(struct kvm_vcpu *vcpu)
void __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu)
{
if (vcpu_get_flag(vcpu, DEBUG_STATE_SAVE_SPE))
- __debug_restore_spe(vcpu->arch.host_debug_state.pmscr_el1);
+ __debug_restore_spe(*host_data_ptr(host_debug_state.pmscr_el1));
if (vcpu_get_flag(vcpu, DEBUG_STATE_SAVE_TRBE))
- __debug_restore_trace(vcpu->arch.host_debug_state.trfcr_el1);
+ __debug_restore_trace(*host_data_ptr(host_debug_state.trfcr_el1));
}
void __debug_switch_to_host(struct kvm_vcpu *vcpu)
diff --git a/arch/arm64/kvm/hyp/nvhe/ffa.c b/arch/arm64/kvm/hyp/nvhe/ffa.c
index 320f2eaa14..efb053af33 100644
--- a/arch/arm64/kvm/hyp/nvhe/ffa.c
+++ b/arch/arm64/kvm/hyp/nvhe/ffa.c
@@ -177,6 +177,14 @@ static void ffa_retrieve_req(struct arm_smccc_res *res, u32 len)
res);
}
+static void ffa_rx_release(struct arm_smccc_res *res)
+{
+ arm_smccc_1_1_smc(FFA_RX_RELEASE,
+ 0, 0,
+ 0, 0, 0, 0, 0,
+ res);
+}
+
static void do_ffa_rxtx_map(struct arm_smccc_res *res,
struct kvm_cpu_context *ctxt)
{
@@ -543,16 +551,19 @@ static void do_ffa_mem_reclaim(struct arm_smccc_res *res,
if (WARN_ON(offset > len ||
fraglen > KVM_FFA_MBOX_NR_PAGES * PAGE_SIZE)) {
ret = FFA_RET_ABORTED;
+ ffa_rx_release(res);
goto out_unlock;
}
if (len > ffa_desc_buf.len) {
ret = FFA_RET_NO_MEMORY;
+ ffa_rx_release(res);
goto out_unlock;
}
buf = ffa_desc_buf.buf;
memcpy(buf, hyp_buffers.rx, fraglen);
+ ffa_rx_release(res);
for (fragoff = fraglen; fragoff < len; fragoff += fraglen) {
ffa_mem_frag_rx(res, handle_lo, handle_hi, fragoff);
@@ -563,6 +574,7 @@ static void do_ffa_mem_reclaim(struct arm_smccc_res *res,
fraglen = res->a3;
memcpy((void *)buf + fragoff, hyp_buffers.rx, fraglen);
+ ffa_rx_release(res);
}
ffa_mem_reclaim(res, handle_lo, handle_hi, flags);
@@ -600,7 +612,6 @@ static bool ffa_call_supported(u64 func_id)
case FFA_MSG_POLL:
case FFA_MSG_WAIT:
/* 32-bit variants of 64-bit calls */
- case FFA_MSG_SEND_DIRECT_REQ:
case FFA_MSG_SEND_DIRECT_RESP:
case FFA_RXTX_MAP:
case FFA_MEM_DONATE:
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
index 2385fd03ed..f43d845f3c 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -23,26 +23,84 @@ DEFINE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);
void __kvm_hyp_host_forward_smc(struct kvm_cpu_context *host_ctxt);
+static void __hyp_sve_save_guest(struct kvm_vcpu *vcpu)
+{
+ __vcpu_sys_reg(vcpu, ZCR_EL1) = read_sysreg_el1(SYS_ZCR);
+ /*
+ * On saving/restoring guest sve state, always use the maximum VL for
+ * the guest. The layout of the data when saving the sve state depends
+ * on the VL, so use a consistent (i.e., the maximum) guest VL.
+ */
+ sve_cond_update_zcr_vq(vcpu_sve_max_vq(vcpu) - 1, SYS_ZCR_EL2);
+ __sve_save_state(vcpu_sve_pffr(vcpu), &vcpu->arch.ctxt.fp_regs.fpsr, true);
+ write_sysreg_s(ZCR_ELx_LEN_MASK, SYS_ZCR_EL2);
+}
+
+static void __hyp_sve_restore_host(void)
+{
+ struct cpu_sve_state *sve_state = *host_data_ptr(sve_state);
+
+ /*
+ * On saving/restoring host sve state, always use the maximum VL for
+ * the host. The layout of the data when saving the sve state depends
+ * on the VL, so use a consistent (i.e., the maximum) host VL.
+ *
+ * Setting ZCR_EL2 to ZCR_ELx_LEN_MASK sets the effective length
+ * supported by the system (or limited at EL3).
+ */
+ write_sysreg_s(ZCR_ELx_LEN_MASK, SYS_ZCR_EL2);
+ __sve_restore_state(sve_state->sve_regs + sve_ffr_offset(kvm_host_sve_max_vl),
+ &sve_state->fpsr,
+ true);
+ write_sysreg_el1(sve_state->zcr_el1, SYS_ZCR);
+}
+
+static void fpsimd_sve_flush(void)
+{
+ *host_data_ptr(fp_owner) = FP_STATE_HOST_OWNED;
+}
+
+static void fpsimd_sve_sync(struct kvm_vcpu *vcpu)
+{
+ if (!guest_owns_fp_regs())
+ return;
+
+ cpacr_clear_set(0, CPACR_ELx_FPEN | CPACR_ELx_ZEN);
+ isb();
+
+ if (vcpu_has_sve(vcpu))
+ __hyp_sve_save_guest(vcpu);
+ else
+ __fpsimd_save_state(&vcpu->arch.ctxt.fp_regs);
+
+ if (system_supports_sve())
+ __hyp_sve_restore_host();
+ else
+ __fpsimd_restore_state(*host_data_ptr(fpsimd_state));
+
+ *host_data_ptr(fp_owner) = FP_STATE_HOST_OWNED;
+}
+
static void flush_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu)
{
struct kvm_vcpu *host_vcpu = hyp_vcpu->host_vcpu;
+ fpsimd_sve_flush();
+
hyp_vcpu->vcpu.arch.ctxt = host_vcpu->arch.ctxt;
hyp_vcpu->vcpu.arch.sve_state = kern_hyp_va(host_vcpu->arch.sve_state);
- hyp_vcpu->vcpu.arch.sve_max_vl = host_vcpu->arch.sve_max_vl;
+ /* Limit guest vector length to the maximum supported by the host. */
+ hyp_vcpu->vcpu.arch.sve_max_vl = min(host_vcpu->arch.sve_max_vl, kvm_host_sve_max_vl);
hyp_vcpu->vcpu.arch.hw_mmu = host_vcpu->arch.hw_mmu;
hyp_vcpu->vcpu.arch.hcr_el2 = host_vcpu->arch.hcr_el2;
hyp_vcpu->vcpu.arch.mdcr_el2 = host_vcpu->arch.mdcr_el2;
- hyp_vcpu->vcpu.arch.cptr_el2 = host_vcpu->arch.cptr_el2;
hyp_vcpu->vcpu.arch.iflags = host_vcpu->arch.iflags;
- hyp_vcpu->vcpu.arch.fp_state = host_vcpu->arch.fp_state;
hyp_vcpu->vcpu.arch.debug_ptr = kern_hyp_va(host_vcpu->arch.debug_ptr);
- hyp_vcpu->vcpu.arch.host_fpsimd_state = host_vcpu->arch.host_fpsimd_state;
hyp_vcpu->vcpu.arch.vsesr_el2 = host_vcpu->arch.vsesr_el2;
@@ -56,15 +114,15 @@ static void sync_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu)
struct vgic_v3_cpu_if *host_cpu_if = &host_vcpu->arch.vgic_cpu.vgic_v3;
unsigned int i;
+ fpsimd_sve_sync(&hyp_vcpu->vcpu);
+
host_vcpu->arch.ctxt = hyp_vcpu->vcpu.arch.ctxt;
host_vcpu->arch.hcr_el2 = hyp_vcpu->vcpu.arch.hcr_el2;
- host_vcpu->arch.cptr_el2 = hyp_vcpu->vcpu.arch.cptr_el2;
host_vcpu->arch.fault = hyp_vcpu->vcpu.arch.fault;
host_vcpu->arch.iflags = hyp_vcpu->vcpu.arch.iflags;
- host_vcpu->arch.fp_state = hyp_vcpu->vcpu.arch.fp_state;
host_cpu_if->vgic_hcr = hyp_cpu_if->vgic_hcr;
for (i = 0; i < hyp_cpu_if->used_lrs; ++i)
@@ -82,6 +140,17 @@ static void handle___kvm_vcpu_run(struct kvm_cpu_context *host_ctxt)
struct pkvm_hyp_vcpu *hyp_vcpu;
struct kvm *host_kvm;
+ /*
+ * KVM (and pKVM) doesn't support SME guests for now, and
+ * ensures that SME features aren't enabled in pstate when
+ * loading a vcpu. Therefore, if SME features enabled the host
+ * is misbehaving.
+ */
+ if (unlikely(system_supports_sme() && read_sysreg_s(SYS_SVCR))) {
+ ret = -EINVAL;
+ goto out;
+ }
+
host_kvm = kern_hyp_va(host_vcpu->kvm);
hyp_vcpu = pkvm_load_hyp_vcpu(host_kvm->arch.pkvm.handle,
host_vcpu->vcpu_idx);
@@ -178,16 +247,6 @@ static void handle___vgic_v3_get_gic_config(struct kvm_cpu_context *host_ctxt)
cpu_reg(host_ctxt, 1) = __vgic_v3_get_gic_config();
}
-static void handle___vgic_v3_read_vmcr(struct kvm_cpu_context *host_ctxt)
-{
- cpu_reg(host_ctxt, 1) = __vgic_v3_read_vmcr();
-}
-
-static void handle___vgic_v3_write_vmcr(struct kvm_cpu_context *host_ctxt)
-{
- __vgic_v3_write_vmcr(cpu_reg(host_ctxt, 1));
-}
-
static void handle___vgic_v3_init_lrs(struct kvm_cpu_context *host_ctxt)
{
__vgic_v3_init_lrs();
@@ -198,18 +257,18 @@ static void handle___kvm_get_mdcr_el2(struct kvm_cpu_context *host_ctxt)
cpu_reg(host_ctxt, 1) = __kvm_get_mdcr_el2();
}
-static void handle___vgic_v3_save_aprs(struct kvm_cpu_context *host_ctxt)
+static void handle___vgic_v3_save_vmcr_aprs(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(struct vgic_v3_cpu_if *, cpu_if, host_ctxt, 1);
- __vgic_v3_save_aprs(kern_hyp_va(cpu_if));
+ __vgic_v3_save_vmcr_aprs(kern_hyp_va(cpu_if));
}
-static void handle___vgic_v3_restore_aprs(struct kvm_cpu_context *host_ctxt)
+static void handle___vgic_v3_restore_vmcr_aprs(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(struct vgic_v3_cpu_if *, cpu_if, host_ctxt, 1);
- __vgic_v3_restore_aprs(kern_hyp_va(cpu_if));
+ __vgic_v3_restore_vmcr_aprs(kern_hyp_va(cpu_if));
}
static void handle___pkvm_init(struct kvm_cpu_context *host_ctxt)
@@ -340,10 +399,8 @@ static const hcall_t host_hcall[] = {
HANDLE_FUNC(__kvm_tlb_flush_vmid_range),
HANDLE_FUNC(__kvm_flush_cpu_context),
HANDLE_FUNC(__kvm_timer_set_cntvoff),
- HANDLE_FUNC(__vgic_v3_read_vmcr),
- HANDLE_FUNC(__vgic_v3_write_vmcr),
- HANDLE_FUNC(__vgic_v3_save_aprs),
- HANDLE_FUNC(__vgic_v3_restore_aprs),
+ HANDLE_FUNC(__vgic_v3_save_vmcr_aprs),
+ HANDLE_FUNC(__vgic_v3_restore_vmcr_aprs),
HANDLE_FUNC(__pkvm_vcpu_init_traps),
HANDLE_FUNC(__pkvm_init_vm),
HANDLE_FUNC(__pkvm_init_vcpu),
@@ -420,11 +477,7 @@ void handle_trap(struct kvm_cpu_context *host_ctxt)
handle_host_smc(host_ctxt);
break;
case ESR_ELx_EC_SVE:
- if (has_hvhe())
- sysreg_clear_set(cpacr_el1, 0, (CPACR_EL1_ZEN_EL1EN |
- CPACR_EL1_ZEN_EL0EN));
- else
- sysreg_clear_set(cptr_el2, CPTR_EL2_TZ, 0);
+ cpacr_clear_set(0, CPACR_ELx_ZEN);
isb();
sve_cond_update_zcr_vq(ZCR_ELx_LEN_MASK, SYS_ZCR_EL2);
break;
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index 861c76021a..caba3e4bd0 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -533,7 +533,13 @@ void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt)
int ret = 0;
esr = read_sysreg_el2(SYS_ESR);
- BUG_ON(!__get_fault_info(esr, &fault));
+ if (!__get_fault_info(esr, &fault)) {
+ /*
+ * We've presumably raced with a page-table change which caused
+ * AT to fail, try again.
+ */
+ return;
+ }
addr = (fault.hpfar_el2 & HPFAR_MASK) << 8;
ret = host_stage2_idmap(addr);
diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c
index 26dd9a20ad..95cf185742 100644
--- a/arch/arm64/kvm/hyp/nvhe/pkvm.c
+++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c
@@ -18,6 +18,8 @@ unsigned long __icache_flags;
/* Used by kvm_get_vttbr(). */
unsigned int kvm_arm_vmid_bits;
+unsigned int kvm_host_sve_max_vl;
+
/*
* Set trap register values based on features in ID_AA64PFR0.
*/
@@ -63,7 +65,7 @@ static void pvm_init_traps_aa64pfr0(struct kvm_vcpu *vcpu)
/* Trap SVE */
if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_SVE), feature_ids)) {
if (has_hvhe())
- cptr_clear |= CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN;
+ cptr_clear |= CPACR_ELx_ZEN;
else
cptr_set |= CPTR_EL2_TZ;
}
@@ -200,7 +202,7 @@ static void pvm_init_trap_regs(struct kvm_vcpu *vcpu)
}
/*
- * Initialize trap register values for protected VMs.
+ * Initialize trap register values in protected mode.
*/
void __pkvm_vcpu_init_traps(struct kvm_vcpu *vcpu)
{
@@ -430,6 +432,7 @@ static void *map_donated_memory(unsigned long host_va, size_t size)
static void __unmap_donated_memory(void *va, size_t size)
{
+ kvm_flush_dcache_to_poc(va, size);
WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(va),
PAGE_ALIGN(size) >> PAGE_SHIFT));
}
@@ -574,6 +577,8 @@ unlock:
if (ret)
unmap_donated_memory(hyp_vcpu, sizeof(*hyp_vcpu));
+ hyp_vcpu->vcpu.arch.cptr_el2 = kvm_get_reset_cptr_el2(&hyp_vcpu->vcpu);
+
return ret;
}
diff --git a/arch/arm64/kvm/hyp/nvhe/psci-relay.c b/arch/arm64/kvm/hyp/nvhe/psci-relay.c
index d57bcb6ab9..dfe8fe0f7e 100644
--- a/arch/arm64/kvm/hyp/nvhe/psci-relay.c
+++ b/arch/arm64/kvm/hyp/nvhe/psci-relay.c
@@ -205,7 +205,7 @@ asmlinkage void __noreturn __kvm_host_psci_cpu_entry(bool is_cpu_on)
struct psci_boot_args *boot_args;
struct kvm_cpu_context *host_ctxt;
- host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
+ host_ctxt = host_data_ptr(host_ctxt);
if (is_cpu_on)
boot_args = this_cpu_ptr(&cpu_on_args);
diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c
index bc58d1b515..f4350ba07b 100644
--- a/arch/arm64/kvm/hyp/nvhe/setup.c
+++ b/arch/arm64/kvm/hyp/nvhe/setup.c
@@ -67,6 +67,28 @@ static int divide_memory_pool(void *virt, unsigned long size)
return 0;
}
+static int pkvm_create_host_sve_mappings(void)
+{
+ void *start, *end;
+ int ret, i;
+
+ if (!system_supports_sve())
+ return 0;
+
+ for (i = 0; i < hyp_nr_cpus; i++) {
+ struct kvm_host_data *host_data = per_cpu_ptr(&kvm_host_data, i);
+ struct cpu_sve_state *sve_state = host_data->sve_state;
+
+ start = kern_hyp_va(sve_state);
+ end = start + PAGE_ALIGN(pkvm_host_sve_state_size());
+ ret = pkvm_create_mappings(start, end, PAGE_HYP);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size,
unsigned long *per_cpu_base,
u32 hyp_va_bits)
@@ -125,6 +147,8 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size,
return ret;
}
+ pkvm_create_host_sve_mappings();
+
/*
* Map the host sections RO in the hypervisor, but transfer the
* ownership from the host to the hypervisor itself to make sure they
@@ -257,8 +281,7 @@ static int fix_hyp_pgtable_refcnt(void)
void __noreturn __pkvm_init_finalise(void)
{
- struct kvm_host_data *host_data = this_cpu_ptr(&kvm_host_data);
- struct kvm_cpu_context *host_ctxt = &host_data->host_ctxt;
+ struct kvm_cpu_context *host_ctxt = host_data_ptr(host_ctxt);
unsigned long nr_pages, reserved_pages, pfn;
int ret;
diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
index c50f8459e4..6af179c635 100644
--- a/arch/arm64/kvm/hyp/nvhe/switch.c
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -40,7 +40,7 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
{
u64 val;
- ___activate_traps(vcpu);
+ ___activate_traps(vcpu, vcpu->arch.hcr_el2);
__activate_traps_common(vcpu);
val = vcpu->arch.cptr_el2;
@@ -48,15 +48,14 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
val |= has_hvhe() ? CPACR_EL1_TTA : CPTR_EL2_TTA;
if (cpus_have_final_cap(ARM64_SME)) {
if (has_hvhe())
- val &= ~(CPACR_EL1_SMEN_EL1EN | CPACR_EL1_SMEN_EL0EN);
+ val &= ~CPACR_ELx_SMEN;
else
val |= CPTR_EL2_TSM;
}
- if (!guest_owns_fp_regs(vcpu)) {
+ if (!guest_owns_fp_regs()) {
if (has_hvhe())
- val &= ~(CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN |
- CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN);
+ val &= ~(CPACR_ELx_FPEN | CPACR_ELx_ZEN);
else
val |= CPTR_EL2_TFP | CPTR_EL2_TZ;
@@ -182,6 +181,25 @@ static bool kvm_handle_pvm_sys64(struct kvm_vcpu *vcpu, u64 *exit_code)
kvm_handle_pvm_sysreg(vcpu, exit_code));
}
+static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu)
+{
+ /*
+ * Non-protected kvm relies on the host restoring its sve state.
+ * Protected kvm restores the host's sve state as not to reveal that
+ * fpsimd was used by a guest nor leak upper sve bits.
+ */
+ if (unlikely(is_protected_kvm_enabled() && system_supports_sve())) {
+ __hyp_sve_save_host();
+
+ /* Re-enable SVE traps if not supported for the guest vcpu. */
+ if (!vcpu_has_sve(vcpu))
+ cpacr_clear_set(CPACR_ELx_ZEN, 0);
+
+ } else {
+ __fpsimd_save_state(*host_data_ptr(fpsimd_state));
+ }
+}
+
static const exit_handler_fn hyp_exit_handlers[] = {
[0 ... ESR_ELx_EC_MAX] = NULL,
[ESR_ELx_EC_CP15_32] = kvm_hyp_handle_cp15_32,
@@ -191,7 +209,6 @@ static const exit_handler_fn hyp_exit_handlers[] = {
[ESR_ELx_EC_IABT_LOW] = kvm_hyp_handle_iabt_low,
[ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low,
[ESR_ELx_EC_WATCHPT_LOW] = kvm_hyp_handle_watchpt_low,
- [ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth,
[ESR_ELx_EC_MOPS] = kvm_hyp_handle_mops,
};
@@ -203,13 +220,12 @@ static const exit_handler_fn pvm_exit_handlers[] = {
[ESR_ELx_EC_IABT_LOW] = kvm_hyp_handle_iabt_low,
[ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low,
[ESR_ELx_EC_WATCHPT_LOW] = kvm_hyp_handle_watchpt_low,
- [ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth,
[ESR_ELx_EC_MOPS] = kvm_hyp_handle_mops,
};
static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu)
{
- if (unlikely(kvm_vm_is_protected(kern_hyp_va(vcpu->kvm))))
+ if (unlikely(vcpu_is_protected(vcpu)))
return pvm_exit_handlers;
return hyp_exit_handlers;
@@ -228,9 +244,7 @@ static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu)
*/
static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code)
{
- struct kvm *kvm = kern_hyp_va(vcpu->kvm);
-
- if (kvm_vm_is_protected(kvm) && vcpu_mode_is_32bit(vcpu)) {
+ if (unlikely(vcpu_is_protected(vcpu) && vcpu_mode_is_32bit(vcpu))) {
/*
* As we have caught the guest red-handed, decide that it isn't
* fit for purpose anymore by making the vcpu invalid. The VMM
@@ -264,7 +278,7 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
pmr_sync();
}
- host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
+ host_ctxt = host_data_ptr(host_ctxt);
host_ctxt->__hyp_running_vcpu = vcpu;
guest_ctxt = &vcpu->arch.ctxt;
@@ -337,7 +351,7 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
__sysreg_restore_state_nvhe(host_ctxt);
- if (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED)
+ if (guest_owns_fp_regs())
__fpsimd_save_fpexc32(vcpu);
__debug_switch_to_host(vcpu);
@@ -367,7 +381,7 @@ asmlinkage void __noreturn hyp_panic(void)
struct kvm_cpu_context *host_ctxt;
struct kvm_vcpu *vcpu;
- host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
+ host_ctxt = host_data_ptr(host_ctxt);
vcpu = host_ctxt->__hyp_running_vcpu;
if (vcpu) {
diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c
index 2fc68da403..ca3c09df8d 100644
--- a/arch/arm64/kvm/hyp/nvhe/tlb.c
+++ b/arch/arm64/kvm/hyp/nvhe/tlb.c
@@ -11,13 +11,23 @@
#include <nvhe/mem_protect.h>
struct tlb_inv_context {
- u64 tcr;
+ struct kvm_s2_mmu *mmu;
+ u64 tcr;
+ u64 sctlr;
};
-static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu,
- struct tlb_inv_context *cxt,
- bool nsh)
+static void enter_vmid_context(struct kvm_s2_mmu *mmu,
+ struct tlb_inv_context *cxt,
+ bool nsh)
{
+ struct kvm_s2_mmu *host_s2_mmu = &host_mmu.arch.mmu;
+ struct kvm_cpu_context *host_ctxt;
+ struct kvm_vcpu *vcpu;
+
+ host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
+ vcpu = host_ctxt->__hyp_running_vcpu;
+ cxt->mmu = NULL;
+
/*
* We have two requirements:
*
@@ -40,20 +50,55 @@ static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu,
else
dsb(ish);
+ /*
+ * If we're already in the desired context, then there's nothing to do.
+ */
+ if (vcpu) {
+ /*
+ * We're in guest context. However, for this to work, this needs
+ * to be called from within __kvm_vcpu_run(), which ensures that
+ * __hyp_running_vcpu is set to the current guest vcpu.
+ */
+ if (mmu == vcpu->arch.hw_mmu || WARN_ON(mmu != host_s2_mmu))
+ return;
+
+ cxt->mmu = vcpu->arch.hw_mmu;
+ } else {
+ /* We're in host context. */
+ if (mmu == host_s2_mmu)
+ return;
+
+ cxt->mmu = host_s2_mmu;
+ }
+
if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
u64 val;
/*
* For CPUs that are affected by ARM 1319367, we need to
- * avoid a host Stage-1 walk while we have the guest's
- * VMID set in the VTTBR in order to invalidate TLBs.
- * We're guaranteed that the S1 MMU is enabled, so we can
- * simply set the EPD bits to avoid any further TLB fill.
+ * avoid a Stage-1 walk with the old VMID while we have
+ * the new VMID set in the VTTBR in order to invalidate TLBs.
+ * We're guaranteed that the host S1 MMU is enabled, so
+ * we can simply set the EPD bits to avoid any further
+ * TLB fill. For guests, we ensure that the S1 MMU is
+ * temporarily enabled in the next context.
*/
val = cxt->tcr = read_sysreg_el1(SYS_TCR);
val |= TCR_EPD1_MASK | TCR_EPD0_MASK;
write_sysreg_el1(val, SYS_TCR);
isb();
+
+ if (vcpu) {
+ val = cxt->sctlr = read_sysreg_el1(SYS_SCTLR);
+ if (!(val & SCTLR_ELx_M)) {
+ val |= SCTLR_ELx_M;
+ write_sysreg_el1(val, SYS_SCTLR);
+ isb();
+ }
+ } else {
+ /* The host S1 MMU is always enabled. */
+ cxt->sctlr = SCTLR_ELx_M;
+ }
}
/*
@@ -62,18 +107,40 @@ static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu,
* ensuring that we always have an ISB, but not two ISBs back
* to back.
*/
- __load_stage2(mmu, kern_hyp_va(mmu->arch));
+ if (vcpu)
+ __load_host_stage2();
+ else
+ __load_stage2(mmu, kern_hyp_va(mmu->arch));
+
asm(ALTERNATIVE("isb", "nop", ARM64_WORKAROUND_SPECULATIVE_AT));
}
-static void __tlb_switch_to_host(struct tlb_inv_context *cxt)
+static void exit_vmid_context(struct tlb_inv_context *cxt)
{
- __load_host_stage2();
+ struct kvm_s2_mmu *mmu = cxt->mmu;
+ struct kvm_cpu_context *host_ctxt;
+ struct kvm_vcpu *vcpu;
+
+ host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
+ vcpu = host_ctxt->__hyp_running_vcpu;
+
+ if (!mmu)
+ return;
+
+ if (vcpu)
+ __load_stage2(mmu, kern_hyp_va(mmu->arch));
+ else
+ __load_host_stage2();
if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
- /* Ensure write of the host VMID */
+ /* Ensure write of the old VMID */
isb();
- /* Restore the host's TCR_EL1 */
+
+ if (!(cxt->sctlr & SCTLR_ELx_M)) {
+ write_sysreg_el1(cxt->sctlr, SYS_SCTLR);
+ isb();
+ }
+
write_sysreg_el1(cxt->tcr, SYS_TCR);
}
}
@@ -84,7 +151,7 @@ void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu,
struct tlb_inv_context cxt;
/* Switch to requested VMID */
- __tlb_switch_to_guest(mmu, &cxt, false);
+ enter_vmid_context(mmu, &cxt, false);
/*
* We could do so much better if we had the VA as well.
@@ -105,7 +172,7 @@ void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu,
dsb(ish);
isb();
- __tlb_switch_to_host(&cxt);
+ exit_vmid_context(&cxt);
}
void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu,
@@ -114,7 +181,7 @@ void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu,
struct tlb_inv_context cxt;
/* Switch to requested VMID */
- __tlb_switch_to_guest(mmu, &cxt, true);
+ enter_vmid_context(mmu, &cxt, true);
/*
* We could do so much better if we had the VA as well.
@@ -135,7 +202,7 @@ void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu,
dsb(nsh);
isb();
- __tlb_switch_to_host(&cxt);
+ exit_vmid_context(&cxt);
}
void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
@@ -152,7 +219,7 @@ void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
start = round_down(start, stride);
/* Switch to requested VMID */
- __tlb_switch_to_guest(mmu, &cxt, false);
+ enter_vmid_context(mmu, &cxt, false);
__flush_s2_tlb_range_op(ipas2e1is, start, pages, stride,
TLBI_TTL_UNKNOWN);
@@ -162,7 +229,7 @@ void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
dsb(ish);
isb();
- __tlb_switch_to_host(&cxt);
+ exit_vmid_context(&cxt);
}
void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
@@ -170,13 +237,13 @@ void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
struct tlb_inv_context cxt;
/* Switch to requested VMID */
- __tlb_switch_to_guest(mmu, &cxt, false);
+ enter_vmid_context(mmu, &cxt, false);
__tlbi(vmalls12e1is);
dsb(ish);
isb();
- __tlb_switch_to_host(&cxt);
+ exit_vmid_context(&cxt);
}
void __kvm_flush_cpu_context(struct kvm_s2_mmu *mmu)
@@ -184,19 +251,19 @@ void __kvm_flush_cpu_context(struct kvm_s2_mmu *mmu)
struct tlb_inv_context cxt;
/* Switch to requested VMID */
- __tlb_switch_to_guest(mmu, &cxt, false);
+ enter_vmid_context(mmu, &cxt, false);
__tlbi(vmalle1);
asm volatile("ic iallu");
dsb(nsh);
isb();
- __tlb_switch_to_host(&cxt);
+ exit_vmid_context(&cxt);
}
void __kvm_flush_vm_context(void)
{
- /* Same remark as in __tlb_switch_to_guest() */
+ /* Same remark as in enter_vmid_context() */
dsb(ish);
__tlbi(alle1is);
dsb(ish);
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index 5a59ef88b6..9e2bbee774 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -914,12 +914,12 @@ static void stage2_unmap_put_pte(const struct kvm_pgtable_visit_ctx *ctx,
static bool stage2_pte_cacheable(struct kvm_pgtable *pgt, kvm_pte_t pte)
{
u64 memattr = pte & KVM_PTE_LEAF_ATTR_LO_S2_MEMATTR;
- return memattr == KVM_S2_MEMATTR(pgt, NORMAL);
+ return kvm_pte_valid(pte) && memattr == KVM_S2_MEMATTR(pgt, NORMAL);
}
static bool stage2_pte_executable(kvm_pte_t pte)
{
- return !(pte & KVM_PTE_LEAF_ATTR_HI_S2_XN);
+ return kvm_pte_valid(pte) && !(pte & KVM_PTE_LEAF_ATTR_HI_S2_XN);
}
static u64 stage2_map_walker_phys_addr(const struct kvm_pgtable_visit_ctx *ctx,
@@ -979,6 +979,21 @@ static int stage2_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx,
if (!stage2_pte_needs_update(ctx->old, new))
return -EAGAIN;
+ /* If we're only changing software bits, then store them and go! */
+ if (!kvm_pgtable_walk_shared(ctx) &&
+ !((ctx->old ^ new) & ~KVM_PTE_LEAF_ATTR_HI_SW)) {
+ bool old_is_counted = stage2_pte_is_counted(ctx->old);
+
+ if (old_is_counted != stage2_pte_is_counted(new)) {
+ if (old_is_counted)
+ mm_ops->put_page(ctx->ptep);
+ else
+ mm_ops->get_page(ctx->ptep);
+ }
+ WARN_ON_ONCE(!stage2_try_set_pte(ctx, new));
+ return 0;
+ }
+
if (!stage2_try_break_pte(ctx, data->mmu))
return -EAGAIN;
@@ -1370,7 +1385,7 @@ static int stage2_flush_walker(const struct kvm_pgtable_visit_ctx *ctx,
struct kvm_pgtable *pgt = ctx->arg;
struct kvm_pgtable_mm_ops *mm_ops = pgt->mm_ops;
- if (!kvm_pte_valid(ctx->old) || !stage2_pte_cacheable(pgt, ctx->old))
+ if (!stage2_pte_cacheable(pgt, ctx->old))
return 0;
if (mm_ops->dcache_clean_inval_poc)
diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c
index 6cb638b184..7b397fad26 100644
--- a/arch/arm64/kvm/hyp/vgic-v3-sr.c
+++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c
@@ -330,7 +330,7 @@ void __vgic_v3_deactivate_traps(struct vgic_v3_cpu_if *cpu_if)
write_gicreg(0, ICH_HCR_EL2);
}
-void __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if)
+static void __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if)
{
u64 val;
u32 nr_pre_bits;
@@ -363,7 +363,7 @@ void __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if)
}
}
-void __vgic_v3_restore_aprs(struct vgic_v3_cpu_if *cpu_if)
+static void __vgic_v3_restore_aprs(struct vgic_v3_cpu_if *cpu_if)
{
u64 val;
u32 nr_pre_bits;
@@ -455,16 +455,35 @@ u64 __vgic_v3_get_gic_config(void)
return val;
}
-u64 __vgic_v3_read_vmcr(void)
+static u64 __vgic_v3_read_vmcr(void)
{
return read_gicreg(ICH_VMCR_EL2);
}
-void __vgic_v3_write_vmcr(u32 vmcr)
+static void __vgic_v3_write_vmcr(u32 vmcr)
{
write_gicreg(vmcr, ICH_VMCR_EL2);
}
+void __vgic_v3_save_vmcr_aprs(struct vgic_v3_cpu_if *cpu_if)
+{
+ __vgic_v3_save_aprs(cpu_if);
+ if (cpu_if->vgic_sre)
+ cpu_if->vgic_vmcr = __vgic_v3_read_vmcr();
+}
+
+void __vgic_v3_restore_vmcr_aprs(struct vgic_v3_cpu_if *cpu_if)
+{
+ /*
+ * If dealing with a GICv2 emulation on GICv3, VMCR_EL2.VFIQen
+ * is dependent on ICC_SRE_EL1.SRE, and we have to perform the
+ * VMCR_EL2 save/restore in the world switch.
+ */
+ if (cpu_if->vgic_sre)
+ __vgic_v3_write_vmcr(cpu_if->vgic_vmcr);
+ __vgic_v3_restore_aprs(cpu_if);
+}
+
static int __vgic_v3_bpr_min(void)
{
/* See Pseudocode for VPriorityGroup */
diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c
index 1581df6aec..8fbb6a2e05 100644
--- a/arch/arm64/kvm/hyp/vhe/switch.c
+++ b/arch/arm64/kvm/hyp/vhe/switch.c
@@ -33,11 +33,43 @@ DEFINE_PER_CPU(struct kvm_host_data, kvm_host_data);
DEFINE_PER_CPU(struct kvm_cpu_context, kvm_hyp_ctxt);
DEFINE_PER_CPU(unsigned long, kvm_hyp_vector);
+/*
+ * HCR_EL2 bits that the NV guest can freely change (no RES0/RES1
+ * semantics, irrespective of the configuration), but that cannot be
+ * applied to the actual HW as things would otherwise break badly.
+ *
+ * - TGE: we want the guest to use EL1, which is incompatible with
+ * this bit being set
+ *
+ * - API/APK: they are already accounted for by vcpu_load(), and can
+ * only take effect across a load/put cycle (such as ERET)
+ */
+#define NV_HCR_GUEST_EXCLUDE (HCR_TGE | HCR_API | HCR_APK)
+
+static u64 __compute_hcr(struct kvm_vcpu *vcpu)
+{
+ u64 hcr = vcpu->arch.hcr_el2;
+
+ if (!vcpu_has_nv(vcpu))
+ return hcr;
+
+ if (is_hyp_ctxt(vcpu)) {
+ hcr |= HCR_NV | HCR_NV2 | HCR_AT | HCR_TTLB;
+
+ if (!vcpu_el2_e2h_is_set(vcpu))
+ hcr |= HCR_NV1;
+
+ write_sysreg_s(vcpu->arch.ctxt.vncr_array, SYS_VNCR_EL2);
+ }
+
+ return hcr | (__vcpu_sys_reg(vcpu, HCR_EL2) & ~NV_HCR_GUEST_EXCLUDE);
+}
+
static void __activate_traps(struct kvm_vcpu *vcpu)
{
u64 val;
- ___activate_traps(vcpu);
+ ___activate_traps(vcpu, __compute_hcr(vcpu));
if (has_cntpoff()) {
struct timer_map map;
@@ -61,8 +93,7 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
val = read_sysreg(cpacr_el1);
val |= CPACR_ELx_TTA;
- val &= ~(CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN |
- CPACR_EL1_SMEN_EL0EN | CPACR_EL1_SMEN_EL1EN);
+ val &= ~(CPACR_ELx_ZEN | CPACR_ELx_SMEN);
/*
* With VHE (HCR.E2H == 1), accesses to CPACR_EL1 are routed to
@@ -75,11 +106,11 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
val |= CPTR_EL2_TAM;
- if (guest_owns_fp_regs(vcpu)) {
+ if (guest_owns_fp_regs()) {
if (vcpu_has_sve(vcpu))
- val |= CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN;
+ val |= CPACR_ELx_ZEN;
} else {
- val &= ~(CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN);
+ val &= ~CPACR_ELx_FPEN;
__activate_traps_fpsimd32(vcpu);
}
@@ -162,6 +193,8 @@ static void __vcpu_put_deactivate_traps(struct kvm_vcpu *vcpu)
void kvm_vcpu_load_vhe(struct kvm_vcpu *vcpu)
{
+ host_data_ptr(host_ctxt)->__hyp_running_vcpu = vcpu;
+
__vcpu_load_switch_sysregs(vcpu);
__vcpu_load_activate_traps(vcpu);
__load_stage2(vcpu->arch.hw_mmu, vcpu->arch.hw_mmu->arch);
@@ -171,6 +204,66 @@ void kvm_vcpu_put_vhe(struct kvm_vcpu *vcpu)
{
__vcpu_put_deactivate_traps(vcpu);
__vcpu_put_switch_sysregs(vcpu);
+
+ host_data_ptr(host_ctxt)->__hyp_running_vcpu = NULL;
+}
+
+static bool kvm_hyp_handle_eret(struct kvm_vcpu *vcpu, u64 *exit_code)
+{
+ u64 esr = kvm_vcpu_get_esr(vcpu);
+ u64 spsr, elr, mode;
+
+ /*
+ * Going through the whole put/load motions is a waste of time
+ * if this is a VHE guest hypervisor returning to its own
+ * userspace, or the hypervisor performing a local exception
+ * return. No need to save/restore registers, no need to
+ * switch S2 MMU. Just do the canonical ERET.
+ *
+ * Unless the trap has to be forwarded further down the line,
+ * of course...
+ */
+ if ((__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_NV) ||
+ (__vcpu_sys_reg(vcpu, HFGITR_EL2) & HFGITR_EL2_ERET))
+ return false;
+
+ spsr = read_sysreg_el1(SYS_SPSR);
+ mode = spsr & (PSR_MODE_MASK | PSR_MODE32_BIT);
+
+ switch (mode) {
+ case PSR_MODE_EL0t:
+ if (!(vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu)))
+ return false;
+ break;
+ case PSR_MODE_EL2t:
+ mode = PSR_MODE_EL1t;
+ break;
+ case PSR_MODE_EL2h:
+ mode = PSR_MODE_EL1h;
+ break;
+ default:
+ return false;
+ }
+
+ /* If ERETAx fails, take the slow path */
+ if (esr_iss_is_eretax(esr)) {
+ if (!(vcpu_has_ptrauth(vcpu) && kvm_auth_eretax(vcpu, &elr)))
+ return false;
+ } else {
+ elr = read_sysreg_el1(SYS_ELR);
+ }
+
+ spsr = (spsr & ~(PSR_MODE_MASK | PSR_MODE32_BIT)) | mode;
+
+ write_sysreg_el2(spsr, SYS_SPSR);
+ write_sysreg_el2(elr, SYS_ELR);
+
+ return true;
+}
+
+static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu)
+{
+ __fpsimd_save_state(*host_data_ptr(fpsimd_state));
}
static const exit_handler_fn hyp_exit_handlers[] = {
@@ -182,7 +275,7 @@ static const exit_handler_fn hyp_exit_handlers[] = {
[ESR_ELx_EC_IABT_LOW] = kvm_hyp_handle_iabt_low,
[ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low,
[ESR_ELx_EC_WATCHPT_LOW] = kvm_hyp_handle_watchpt_low,
- [ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth,
+ [ESR_ELx_EC_ERET] = kvm_hyp_handle_eret,
[ESR_ELx_EC_MOPS] = kvm_hyp_handle_mops,
};
@@ -197,7 +290,7 @@ static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code)
* If we were in HYP context on entry, adjust the PSTATE view
* so that the usual helpers work correctly.
*/
- if (unlikely(vcpu_get_flag(vcpu, VCPU_HYP_CONTEXT))) {
+ if (vcpu_has_nv(vcpu) && (read_sysreg(hcr_el2) & HCR_NV)) {
u64 mode = *vcpu_cpsr(vcpu) & (PSR_MODE_MASK | PSR_MODE32_BIT);
switch (mode) {
@@ -221,8 +314,7 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
struct kvm_cpu_context *guest_ctxt;
u64 exit_code;
- host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
- host_ctxt->__hyp_running_vcpu = vcpu;
+ host_ctxt = host_data_ptr(host_ctxt);
guest_ctxt = &vcpu->arch.ctxt;
sysreg_save_host_state_vhe(host_ctxt);
@@ -240,11 +332,6 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
sysreg_restore_guest_state_vhe(guest_ctxt);
__debug_switch_to_guest(vcpu);
- if (is_hyp_ctxt(vcpu))
- vcpu_set_flag(vcpu, VCPU_HYP_CONTEXT);
- else
- vcpu_clear_flag(vcpu, VCPU_HYP_CONTEXT);
-
do {
/* Jump in the fire! */
exit_code = __guest_enter(vcpu);
@@ -258,7 +345,7 @@ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu)
sysreg_restore_host_state_vhe(host_ctxt);
- if (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED)
+ if (guest_owns_fp_regs())
__fpsimd_save_fpexc32(vcpu);
__debug_switch_to_host(vcpu);
@@ -306,7 +393,7 @@ static void __hyp_call_panic(u64 spsr, u64 elr, u64 par)
struct kvm_cpu_context *host_ctxt;
struct kvm_vcpu *vcpu;
- host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
+ host_ctxt = host_data_ptr(host_ctxt);
vcpu = host_ctxt->__hyp_running_vcpu;
__deactivate_traps(vcpu);
diff --git a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c
index a8b9ea4967..e12bd7d6d2 100644
--- a/arch/arm64/kvm/hyp/vhe/sysreg-sr.c
+++ b/arch/arm64/kvm/hyp/vhe/sysreg-sr.c
@@ -67,7 +67,7 @@ void __vcpu_load_switch_sysregs(struct kvm_vcpu *vcpu)
struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt;
struct kvm_cpu_context *host_ctxt;
- host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
+ host_ctxt = host_data_ptr(host_ctxt);
__sysreg_save_user_state(host_ctxt);
/*
@@ -110,7 +110,7 @@ void __vcpu_put_switch_sysregs(struct kvm_vcpu *vcpu)
struct kvm_cpu_context *guest_ctxt = &vcpu->arch.ctxt;
struct kvm_cpu_context *host_ctxt;
- host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
+ host_ctxt = host_data_ptr(host_ctxt);
__sysreg_save_el1_state(guest_ctxt);
__sysreg_save_user_state(guest_ctxt);
diff --git a/arch/arm64/kvm/hyp/vhe/tlb.c b/arch/arm64/kvm/hyp/vhe/tlb.c
index 1a60b95381..5fa0359f3a 100644
--- a/arch/arm64/kvm/hyp/vhe/tlb.c
+++ b/arch/arm64/kvm/hyp/vhe/tlb.c
@@ -17,8 +17,8 @@ struct tlb_inv_context {
u64 sctlr;
};
-static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu,
- struct tlb_inv_context *cxt)
+static void enter_vmid_context(struct kvm_s2_mmu *mmu,
+ struct tlb_inv_context *cxt)
{
struct kvm_vcpu *vcpu = kvm_get_running_vcpu();
u64 val;
@@ -67,7 +67,7 @@ static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu,
isb();
}
-static void __tlb_switch_to_host(struct tlb_inv_context *cxt)
+static void exit_vmid_context(struct tlb_inv_context *cxt)
{
/*
* We're done with the TLB operation, let's restore the host's
@@ -97,7 +97,7 @@ void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu,
dsb(ishst);
/* Switch to requested VMID */
- __tlb_switch_to_guest(mmu, &cxt);
+ enter_vmid_context(mmu, &cxt);
/*
* We could do so much better if we had the VA as well.
@@ -118,7 +118,7 @@ void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu,
dsb(ish);
isb();
- __tlb_switch_to_host(&cxt);
+ exit_vmid_context(&cxt);
}
void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu,
@@ -129,7 +129,7 @@ void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu,
dsb(nshst);
/* Switch to requested VMID */
- __tlb_switch_to_guest(mmu, &cxt);
+ enter_vmid_context(mmu, &cxt);
/*
* We could do so much better if we had the VA as well.
@@ -150,7 +150,7 @@ void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu,
dsb(nsh);
isb();
- __tlb_switch_to_host(&cxt);
+ exit_vmid_context(&cxt);
}
void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
@@ -169,7 +169,7 @@ void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
dsb(ishst);
/* Switch to requested VMID */
- __tlb_switch_to_guest(mmu, &cxt);
+ enter_vmid_context(mmu, &cxt);
__flush_s2_tlb_range_op(ipas2e1is, start, pages, stride,
TLBI_TTL_UNKNOWN);
@@ -179,7 +179,7 @@ void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
dsb(ish);
isb();
- __tlb_switch_to_host(&cxt);
+ exit_vmid_context(&cxt);
}
void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
@@ -189,13 +189,13 @@ void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
dsb(ishst);
/* Switch to requested VMID */
- __tlb_switch_to_guest(mmu, &cxt);
+ enter_vmid_context(mmu, &cxt);
__tlbi(vmalls12e1is);
dsb(ish);
isb();
- __tlb_switch_to_host(&cxt);
+ exit_vmid_context(&cxt);
}
void __kvm_flush_cpu_context(struct kvm_s2_mmu *mmu)
@@ -203,14 +203,14 @@ void __kvm_flush_cpu_context(struct kvm_s2_mmu *mmu)
struct tlb_inv_context cxt;
/* Switch to requested VMID */
- __tlb_switch_to_guest(mmu, &cxt);
+ enter_vmid_context(mmu, &cxt);
__tlbi(vmalle1);
asm volatile("ic iallu");
dsb(nsh);
isb();
- __tlb_switch_to_host(&cxt);
+ exit_vmid_context(&cxt);
}
void __kvm_flush_vm_context(void)
diff --git a/arch/arm64/kvm/mmio.c b/arch/arm64/kvm/mmio.c
index 200c8019a8..cd6b7b83e2 100644
--- a/arch/arm64/kvm/mmio.c
+++ b/arch/arm64/kvm/mmio.c
@@ -86,7 +86,7 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu)
/* Detect an already handled MMIO return */
if (unlikely(!vcpu->mmio_needed))
- return 0;
+ return 1;
vcpu->mmio_needed = 0;
@@ -117,7 +117,7 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu)
*/
kvm_incr_pc(vcpu);
- return 0;
+ return 1;
}
int io_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
@@ -133,11 +133,19 @@ int io_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
/*
* No valid syndrome? Ask userspace for help if it has
* volunteered to do so, and bail out otherwise.
+ *
+ * In the protected VM case, there isn't much userspace can do
+ * though, so directly deliver an exception to the guest.
*/
if (!kvm_vcpu_dabt_isvalid(vcpu)) {
trace_kvm_mmio_nisv(*vcpu_pc(vcpu), kvm_vcpu_get_esr(vcpu),
kvm_vcpu_get_hfar(vcpu), fault_ipa);
+ if (vcpu_is_protected(vcpu)) {
+ kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu));
+ return 1;
+ }
+
if (test_bit(KVM_ARCH_FLAG_RETURN_NISV_IO_ABORT_TO_USER,
&vcpu->kvm->arch.flags)) {
run->exit_reason = KVM_EXIT_ARM_NISV;
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index dc04bc7678..8bcab0cc3f 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -1522,8 +1522,10 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
read_lock(&kvm->mmu_lock);
pgt = vcpu->arch.hw_mmu->pgt;
- if (mmu_invalidate_retry(kvm, mmu_seq))
+ if (mmu_invalidate_retry(kvm, mmu_seq)) {
+ ret = -EAGAIN;
goto out_unlock;
+ }
/*
* If we are not forced to use page mapping, check if we are
@@ -1581,6 +1583,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
memcache,
KVM_PGTABLE_WALK_HANDLE_FAULT |
KVM_PGTABLE_WALK_SHARED);
+out_unlock:
+ read_unlock(&kvm->mmu_lock);
/* Mark the page dirty only if the fault is handled successfully */
if (writable && !ret) {
@@ -1588,8 +1592,6 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
mark_page_dirty_in_slot(kvm, memslot, gfn);
}
-out_unlock:
- read_unlock(&kvm->mmu_lock);
kvm_release_pfn_clean(pfn);
return ret != -EAGAIN ? ret : 0;
}
@@ -1768,40 +1770,6 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
return false;
}
-bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
-{
- kvm_pfn_t pfn = pte_pfn(range->arg.pte);
-
- if (!kvm->arch.mmu.pgt)
- return false;
-
- WARN_ON(range->end - range->start != 1);
-
- /*
- * If the page isn't tagged, defer to user_mem_abort() for sanitising
- * the MTE tags. The S2 pte should have been unmapped by
- * mmu_notifier_invalidate_range_end().
- */
- if (kvm_has_mte(kvm) && !page_mte_tagged(pfn_to_page(pfn)))
- return false;
-
- /*
- * We've moved a page around, probably through CoW, so let's treat
- * it just like a translation fault and the map handler will clean
- * the cache to the PoC.
- *
- * The MMU notifiers will have unmapped a huge PMD before calling
- * ->change_pte() (which in turn calls kvm_set_spte_gfn()) and
- * therefore we never need to clear out a huge PMD through this
- * calling path and a memcache is not required.
- */
- kvm_pgtable_stage2_map(kvm->arch.mmu.pgt, range->start << PAGE_SHIFT,
- PAGE_SIZE, __pfn_to_phys(pfn),
- KVM_PGTABLE_PROT_R, NULL, 0);
-
- return false;
-}
-
bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
{
u64 size = (range->end - range->start) << PAGE_SHIFT;
diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c
index ced30c9052..bae8536cbf 100644
--- a/arch/arm64/kvm/nested.c
+++ b/arch/arm64/kvm/nested.c
@@ -35,13 +35,9 @@ static u64 limit_nv_id_reg(u32 id, u64 val)
break;
case SYS_ID_AA64ISAR1_EL1:
- /* Support everything but PtrAuth and Spec Invalidation */
+ /* Support everything but Spec Invalidation */
val &= ~(GENMASK_ULL(63, 56) |
- NV_FTR(ISAR1, SPECRES) |
- NV_FTR(ISAR1, GPI) |
- NV_FTR(ISAR1, GPA) |
- NV_FTR(ISAR1, API) |
- NV_FTR(ISAR1, APA));
+ NV_FTR(ISAR1, SPECRES));
break;
case SYS_ID_AA64PFR0_EL1:
@@ -62,8 +58,10 @@ static u64 limit_nv_id_reg(u32 id, u64 val)
break;
case SYS_ID_AA64PFR1_EL1:
- /* Only support SSBS */
- val &= NV_FTR(PFR1, SSBS);
+ /* Only support BTI, SSBS, CSV2_frac */
+ val &= (NV_FTR(PFR1, BT) |
+ NV_FTR(PFR1, SSBS) |
+ NV_FTR(PFR1, CSV2_frac));
break;
case SYS_ID_AA64MMFR0_EL1:
diff --git a/arch/arm64/kvm/pauth.c b/arch/arm64/kvm/pauth.c
new file mode 100644
index 0000000000..d5eb3ae876
--- /dev/null
+++ b/arch/arm64/kvm/pauth.c
@@ -0,0 +1,206 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2024 - Google LLC
+ * Author: Marc Zyngier <maz@kernel.org>
+ *
+ * Primitive PAuth emulation for ERETAA/ERETAB.
+ *
+ * This code assumes that is is run from EL2, and that it is part of
+ * the emulation of ERETAx for a guest hypervisor. That's a lot of
+ * baked-in assumptions and shortcuts.
+ *
+ * Do no reuse for anything else!
+ */
+
+#include <linux/kvm_host.h>
+
+#include <asm/gpr-num.h>
+#include <asm/kvm_emulate.h>
+#include <asm/pointer_auth.h>
+
+/* PACGA Xd, Xn, Xm */
+#define PACGA(d,n,m) \
+ asm volatile(__DEFINE_ASM_GPR_NUMS \
+ ".inst 0x9AC03000 |" \
+ "(.L__gpr_num_%[Rd] << 0) |" \
+ "(.L__gpr_num_%[Rn] << 5) |" \
+ "(.L__gpr_num_%[Rm] << 16)\n" \
+ : [Rd] "=r" ((d)) \
+ : [Rn] "r" ((n)), [Rm] "r" ((m)))
+
+static u64 compute_pac(struct kvm_vcpu *vcpu, u64 ptr,
+ struct ptrauth_key ikey)
+{
+ struct ptrauth_key gkey;
+ u64 mod, pac = 0;
+
+ preempt_disable();
+
+ if (!vcpu_get_flag(vcpu, SYSREGS_ON_CPU))
+ mod = __vcpu_sys_reg(vcpu, SP_EL2);
+ else
+ mod = read_sysreg(sp_el1);
+
+ gkey.lo = read_sysreg_s(SYS_APGAKEYLO_EL1);
+ gkey.hi = read_sysreg_s(SYS_APGAKEYHI_EL1);
+
+ __ptrauth_key_install_nosync(APGA, ikey);
+ isb();
+
+ PACGA(pac, ptr, mod);
+ isb();
+
+ __ptrauth_key_install_nosync(APGA, gkey);
+
+ preempt_enable();
+
+ /* PAC in the top 32bits */
+ return pac;
+}
+
+static bool effective_tbi(struct kvm_vcpu *vcpu, bool bit55)
+{
+ u64 tcr = vcpu_read_sys_reg(vcpu, TCR_EL2);
+ bool tbi, tbid;
+
+ /*
+ * Since we are authenticating an instruction address, we have
+ * to take TBID into account. If E2H==0, ignore VA[55], as
+ * TCR_EL2 only has a single TBI/TBID. If VA[55] was set in
+ * this case, this is likely a guest bug...
+ */
+ if (!vcpu_el2_e2h_is_set(vcpu)) {
+ tbi = tcr & BIT(20);
+ tbid = tcr & BIT(29);
+ } else if (bit55) {
+ tbi = tcr & TCR_TBI1;
+ tbid = tcr & TCR_TBID1;
+ } else {
+ tbi = tcr & TCR_TBI0;
+ tbid = tcr & TCR_TBID0;
+ }
+
+ return tbi && !tbid;
+}
+
+static int compute_bottom_pac(struct kvm_vcpu *vcpu, bool bit55)
+{
+ static const int maxtxsz = 39; // Revisit these two values once
+ static const int mintxsz = 16; // (if) we support TTST/LVA/LVA2
+ u64 tcr = vcpu_read_sys_reg(vcpu, TCR_EL2);
+ int txsz;
+
+ if (!vcpu_el2_e2h_is_set(vcpu) || !bit55)
+ txsz = FIELD_GET(TCR_T0SZ_MASK, tcr);
+ else
+ txsz = FIELD_GET(TCR_T1SZ_MASK, tcr);
+
+ return 64 - clamp(txsz, mintxsz, maxtxsz);
+}
+
+static u64 compute_pac_mask(struct kvm_vcpu *vcpu, bool bit55)
+{
+ int bottom_pac;
+ u64 mask;
+
+ bottom_pac = compute_bottom_pac(vcpu, bit55);
+
+ mask = GENMASK(54, bottom_pac);
+ if (!effective_tbi(vcpu, bit55))
+ mask |= GENMASK(63, 56);
+
+ return mask;
+}
+
+static u64 to_canonical_addr(struct kvm_vcpu *vcpu, u64 ptr, u64 mask)
+{
+ bool bit55 = !!(ptr & BIT(55));
+
+ if (bit55)
+ return ptr | mask;
+
+ return ptr & ~mask;
+}
+
+static u64 corrupt_addr(struct kvm_vcpu *vcpu, u64 ptr)
+{
+ bool bit55 = !!(ptr & BIT(55));
+ u64 mask, error_code;
+ int shift;
+
+ if (effective_tbi(vcpu, bit55)) {
+ mask = GENMASK(54, 53);
+ shift = 53;
+ } else {
+ mask = GENMASK(62, 61);
+ shift = 61;
+ }
+
+ if (esr_iss_is_eretab(kvm_vcpu_get_esr(vcpu)))
+ error_code = 2 << shift;
+ else
+ error_code = 1 << shift;
+
+ ptr &= ~mask;
+ ptr |= error_code;
+
+ return ptr;
+}
+
+/*
+ * Authenticate an ERETAA/ERETAB instruction, returning true if the
+ * authentication succeeded and false otherwise. In all cases, *elr
+ * contains the VA to ERET to. Potential exception injection is left
+ * to the caller.
+ */
+bool kvm_auth_eretax(struct kvm_vcpu *vcpu, u64 *elr)
+{
+ u64 sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL2);
+ u64 esr = kvm_vcpu_get_esr(vcpu);
+ u64 ptr, cptr, pac, mask;
+ struct ptrauth_key ikey;
+
+ *elr = ptr = vcpu_read_sys_reg(vcpu, ELR_EL2);
+
+ /* We assume we're already in the context of an ERETAx */
+ if (esr_iss_is_eretab(esr)) {
+ if (!(sctlr & SCTLR_EL1_EnIB))
+ return true;
+
+ ikey.lo = __vcpu_sys_reg(vcpu, APIBKEYLO_EL1);
+ ikey.hi = __vcpu_sys_reg(vcpu, APIBKEYHI_EL1);
+ } else {
+ if (!(sctlr & SCTLR_EL1_EnIA))
+ return true;
+
+ ikey.lo = __vcpu_sys_reg(vcpu, APIAKEYLO_EL1);
+ ikey.hi = __vcpu_sys_reg(vcpu, APIAKEYHI_EL1);
+ }
+
+ mask = compute_pac_mask(vcpu, !!(ptr & BIT(55)));
+ cptr = to_canonical_addr(vcpu, ptr, mask);
+
+ pac = compute_pac(vcpu, cptr, ikey);
+
+ /*
+ * Slightly deviate from the pseudocode: if we have a PAC
+ * match with the signed pointer, then it must be good.
+ * Anything after this point is pure error handling.
+ */
+ if ((pac & mask) == (ptr & mask)) {
+ *elr = cptr;
+ return true;
+ }
+
+ /*
+ * Authentication failed, corrupt the canonical address if
+ * PAuth2 isn't implemented, or some XORing if it is.
+ */
+ if (!kvm_has_pauth(vcpu->kvm, PAuth2))
+ cptr = corrupt_addr(vcpu, cptr);
+ else
+ cptr = ptr ^ (pac & mask);
+
+ *elr = cptr;
+ return false;
+}
diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c
index b7be96a535..85117ea8f3 100644
--- a/arch/arm64/kvm/pkvm.c
+++ b/arch/arm64/kvm/pkvm.c
@@ -222,7 +222,6 @@ void pkvm_destroy_hyp_vm(struct kvm *host_kvm)
int pkvm_init_host_vm(struct kvm *host_kvm)
{
- mutex_init(&host_kvm->lock);
return 0;
}
@@ -259,6 +258,7 @@ static int __init finalize_pkvm(void)
* at, which would end badly once inaccessible.
*/
kmemleak_free_part(__hyp_bss_start, __hyp_bss_end - __hyp_bss_start);
+ kmemleak_free_part(__hyp_rodata_start, __hyp_rodata_end - __hyp_rodata_start);
kmemleak_free_part_phys(hyp_mem_base, hyp_mem_size);
ret = pkvm_drop_host_privileges();
diff --git a/arch/arm64/kvm/pmu.c b/arch/arm64/kvm/pmu.c
index a243934c55..3298198060 100644
--- a/arch/arm64/kvm/pmu.c
+++ b/arch/arm64/kvm/pmu.c
@@ -232,7 +232,7 @@ bool kvm_set_pmuserenr(u64 val)
if (!vcpu || !vcpu_get_flag(vcpu, PMUSERENR_ON_CPU))
return false;
- hctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt;
+ hctxt = host_data_ptr(host_ctxt);
ctxt_sys_reg(hctxt, PMUSERENR_EL0) = val;
return true;
}
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index 68d1d05672..3fc8ca164d 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -32,6 +32,7 @@
/* Maximum phys_shift supported for any VM on this host */
static u32 __ro_after_init kvm_ipa_limit;
+unsigned int __ro_after_init kvm_host_sve_max_vl;
/*
* ARMv8 Reset Values
@@ -51,6 +52,8 @@ int __init kvm_arm_init_sve(void)
{
if (system_supports_sve()) {
kvm_sve_max_vl = sve_max_virtualisable_vl();
+ kvm_host_sve_max_vl = sve_max_vl();
+ kvm_nvhe_sym(kvm_host_sve_max_vl) = kvm_host_sve_max_vl;
/*
* The get_sve_reg()/set_sve_reg() ioctl interface will need
@@ -151,7 +154,6 @@ void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu)
{
void *sve_state = vcpu->arch.sve_state;
- kvm_vcpu_unshare_task_fp(vcpu);
kvm_unshare_hyp(vcpu, vcpu + 1);
if (sve_state)
kvm_unshare_hyp(sve_state, sve_state + vcpu_sve_state_size(vcpu));
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index c9f4f38715..22b45a15d0 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1568,17 +1568,31 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu, const struct sys_reg_desc *r
return IDREG(vcpu->kvm, reg_to_encoding(r));
}
+static bool is_feature_id_reg(u32 encoding)
+{
+ return (sys_reg_Op0(encoding) == 3 &&
+ (sys_reg_Op1(encoding) < 2 || sys_reg_Op1(encoding) == 3) &&
+ sys_reg_CRn(encoding) == 0 &&
+ sys_reg_CRm(encoding) <= 7);
+}
+
/*
* Return true if the register's (Op0, Op1, CRn, CRm, Op2) is
- * (3, 0, 0, crm, op2), where 1<=crm<8, 0<=op2<8.
+ * (3, 0, 0, crm, op2), where 1<=crm<8, 0<=op2<8, which is the range of ID
+ * registers KVM maintains on a per-VM basis.
*/
-static inline bool is_id_reg(u32 id)
+static inline bool is_vm_ftr_id_reg(u32 id)
{
return (sys_reg_Op0(id) == 3 && sys_reg_Op1(id) == 0 &&
sys_reg_CRn(id) == 0 && sys_reg_CRm(id) >= 1 &&
sys_reg_CRm(id) < 8);
}
+static inline bool is_vcpu_ftr_id_reg(u32 id)
+{
+ return is_feature_id_reg(id) && !is_vm_ftr_id_reg(id);
+}
+
static inline bool is_aa32_id_reg(u32 id)
{
return (sys_reg_Op0(id) == 3 && sys_reg_Op1(id) == 0 &&
@@ -2338,7 +2352,6 @@ static const struct sys_reg_desc sys_reg_descs[] = {
ID_AA64MMFR0_EL1_TGRAN16_2)),
ID_WRITABLE(ID_AA64MMFR1_EL1, ~(ID_AA64MMFR1_EL1_RES0 |
ID_AA64MMFR1_EL1_HCX |
- ID_AA64MMFR1_EL1_XNX |
ID_AA64MMFR1_EL1_TWED |
ID_AA64MMFR1_EL1_XNX |
ID_AA64MMFR1_EL1_VH |
@@ -3069,12 +3082,14 @@ static bool check_sysreg_table(const struct sys_reg_desc *table, unsigned int n,
for (i = 0; i < n; i++) {
if (!is_32 && table[i].reg && !table[i].reset) {
- kvm_err("sys_reg table %pS entry %d lacks reset\n", &table[i], i);
+ kvm_err("sys_reg table %pS entry %d (%s) lacks reset\n",
+ &table[i], i, table[i].name);
return false;
}
if (i && cmp_sys_reg(&table[i-1], &table[i]) >= 0) {
- kvm_err("sys_reg table %pS entry %d out of order\n", &table[i - 1], i - 1);
+ kvm_err("sys_reg table %pS entry %d (%s -> %s) out of order\n",
+ &table[i], i, table[i - 1].name, table[i].name);
return false;
}
}
@@ -3509,26 +3524,25 @@ void kvm_sys_regs_create_debugfs(struct kvm *kvm)
&idregs_debug_fops);
}
-static void kvm_reset_id_regs(struct kvm_vcpu *vcpu)
+static void reset_vm_ftr_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *reg)
{
- const struct sys_reg_desc *idreg = first_idreg;
- u32 id = reg_to_encoding(idreg);
+ u32 id = reg_to_encoding(reg);
struct kvm *kvm = vcpu->kvm;
if (test_bit(KVM_ARCH_FLAG_ID_REGS_INITIALIZED, &kvm->arch.flags))
return;
lockdep_assert_held(&kvm->arch.config_lock);
+ IDREG(kvm, id) = reg->reset(vcpu, reg);
+}
- /* Initialize all idregs */
- while (is_id_reg(id)) {
- IDREG(kvm, id) = idreg->reset(vcpu, idreg);
-
- idreg++;
- id = reg_to_encoding(idreg);
- }
+static void reset_vcpu_ftr_id_reg(struct kvm_vcpu *vcpu,
+ const struct sys_reg_desc *reg)
+{
+ if (kvm_vcpu_initialized(vcpu))
+ return;
- set_bit(KVM_ARCH_FLAG_ID_REGS_INITIALIZED, &kvm->arch.flags);
+ reg->reset(vcpu, reg);
}
/**
@@ -3540,19 +3554,24 @@ static void kvm_reset_id_regs(struct kvm_vcpu *vcpu)
*/
void kvm_reset_sys_regs(struct kvm_vcpu *vcpu)
{
+ struct kvm *kvm = vcpu->kvm;
unsigned long i;
- kvm_reset_id_regs(vcpu);
-
for (i = 0; i < ARRAY_SIZE(sys_reg_descs); i++) {
const struct sys_reg_desc *r = &sys_reg_descs[i];
- if (is_id_reg(reg_to_encoding(r)))
+ if (!r->reset)
continue;
- if (r->reset)
+ if (is_vm_ftr_id_reg(reg_to_encoding(r)))
+ reset_vm_ftr_id_reg(vcpu, r);
+ else if (is_vcpu_ftr_id_reg(reg_to_encoding(r)))
+ reset_vcpu_ftr_id_reg(vcpu, r);
+ else
r->reset(vcpu, r);
}
+
+ set_bit(KVM_ARCH_FLAG_ID_REGS_INITIALIZED, &kvm->arch.flags);
}
/**
@@ -3978,14 +3997,6 @@ int kvm_arm_copy_sys_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
sys_reg_CRm(r), \
sys_reg_Op2(r))
-static bool is_feature_id_reg(u32 encoding)
-{
- return (sys_reg_Op0(encoding) == 3 &&
- (sys_reg_Op1(encoding) < 2 || sys_reg_Op1(encoding) == 3) &&
- sys_reg_CRn(encoding) == 0 &&
- sys_reg_CRm(encoding) <= 7);
-}
-
int kvm_vm_ioctl_get_reg_writable_masks(struct kvm *kvm, struct reg_mask_range *range)
{
const void *zero_page = page_to_virt(ZERO_PAGE(0));
@@ -4014,7 +4025,7 @@ int kvm_vm_ioctl_get_reg_writable_masks(struct kvm *kvm, struct reg_mask_range *
* compliant with a given revision of the architecture, but the
* RES0/RES1 definitions allow us to do that.
*/
- if (is_id_reg(encoding)) {
+ if (is_vm_ftr_id_reg(encoding)) {
if (!reg->val ||
(is_aa32_id_reg(encoding) && !kvm_supports_32bit_el0()))
continue;
diff --git a/arch/arm64/kvm/vgic/vgic-debug.c b/arch/arm64/kvm/vgic/vgic-debug.c
index 389025ce77..bcbc8c986b 100644
--- a/arch/arm64/kvm/vgic/vgic-debug.c
+++ b/arch/arm64/kvm/vgic/vgic-debug.c
@@ -28,27 +28,65 @@ struct vgic_state_iter {
int nr_lpis;
int dist_id;
int vcpu_id;
- int intid;
+ unsigned long intid;
int lpi_idx;
- u32 *lpi_array;
};
-static void iter_next(struct vgic_state_iter *iter)
+static void iter_next(struct kvm *kvm, struct vgic_state_iter *iter)
{
+ struct vgic_dist *dist = &kvm->arch.vgic;
+
if (iter->dist_id == 0) {
iter->dist_id++;
return;
}
+ /*
+ * Let the xarray drive the iterator after the last SPI, as the iterator
+ * has exhausted the sequentially-allocated INTID space.
+ */
+ if (iter->intid >= (iter->nr_spis + VGIC_NR_PRIVATE_IRQS - 1)) {
+ if (iter->lpi_idx < iter->nr_lpis)
+ xa_find_after(&dist->lpi_xa, &iter->intid,
+ VGIC_LPI_MAX_INTID,
+ LPI_XA_MARK_DEBUG_ITER);
+ iter->lpi_idx++;
+ return;
+ }
+
iter->intid++;
if (iter->intid == VGIC_NR_PRIVATE_IRQS &&
++iter->vcpu_id < iter->nr_cpus)
iter->intid = 0;
+}
- if (iter->intid >= (iter->nr_spis + VGIC_NR_PRIVATE_IRQS)) {
- if (iter->lpi_idx < iter->nr_lpis)
- iter->intid = iter->lpi_array[iter->lpi_idx];
- iter->lpi_idx++;
+static int iter_mark_lpis(struct kvm *kvm)
+{
+ struct vgic_dist *dist = &kvm->arch.vgic;
+ struct vgic_irq *irq;
+ unsigned long intid;
+ int nr_lpis = 0;
+
+ xa_for_each(&dist->lpi_xa, intid, irq) {
+ if (!vgic_try_get_irq_kref(irq))
+ continue;
+
+ xa_set_mark(&dist->lpi_xa, intid, LPI_XA_MARK_DEBUG_ITER);
+ nr_lpis++;
+ }
+
+ return nr_lpis;
+}
+
+static void iter_unmark_lpis(struct kvm *kvm)
+{
+ struct vgic_dist *dist = &kvm->arch.vgic;
+ struct vgic_irq *irq;
+ unsigned long intid;
+
+ xa_for_each(&dist->lpi_xa, intid, irq) {
+ xa_clear_mark(&dist->lpi_xa, intid, LPI_XA_MARK_DEBUG_ITER);
+ vgic_put_irq(kvm, irq);
}
}
@@ -61,15 +99,12 @@ static void iter_init(struct kvm *kvm, struct vgic_state_iter *iter,
iter->nr_cpus = nr_cpus;
iter->nr_spis = kvm->arch.vgic.nr_spis;
- if (kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) {
- iter->nr_lpis = vgic_copy_lpi_list(kvm, NULL, &iter->lpi_array);
- if (iter->nr_lpis < 0)
- iter->nr_lpis = 0;
- }
+ if (kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3)
+ iter->nr_lpis = iter_mark_lpis(kvm);
/* Fast forward to the right position if needed */
while (pos--)
- iter_next(iter);
+ iter_next(kvm, iter);
}
static bool end_of_vgic(struct vgic_state_iter *iter)
@@ -114,7 +149,7 @@ static void *vgic_debug_next(struct seq_file *s, void *v, loff_t *pos)
struct vgic_state_iter *iter = kvm->arch.vgic.iter;
++*pos;
- iter_next(iter);
+ iter_next(kvm, iter);
if (end_of_vgic(iter))
iter = NULL;
return iter;
@@ -134,13 +169,14 @@ static void vgic_debug_stop(struct seq_file *s, void *v)
mutex_lock(&kvm->arch.config_lock);
iter = kvm->arch.vgic.iter;
- kfree(iter->lpi_array);
+ iter_unmark_lpis(kvm);
kfree(iter);
kvm->arch.vgic.iter = NULL;
mutex_unlock(&kvm->arch.config_lock);
}
-static void print_dist_state(struct seq_file *s, struct vgic_dist *dist)
+static void print_dist_state(struct seq_file *s, struct vgic_dist *dist,
+ struct vgic_state_iter *iter)
{
bool v3 = dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3;
@@ -149,7 +185,7 @@ static void print_dist_state(struct seq_file *s, struct vgic_dist *dist)
seq_printf(s, "vgic_model:\t%s\n", v3 ? "GICv3" : "GICv2");
seq_printf(s, "nr_spis:\t%d\n", dist->nr_spis);
if (v3)
- seq_printf(s, "nr_lpis:\t%d\n", atomic_read(&dist->lpi_count));
+ seq_printf(s, "nr_lpis:\t%d\n", iter->nr_lpis);
seq_printf(s, "enabled:\t%d\n", dist->enabled);
seq_printf(s, "\n");
@@ -236,7 +272,7 @@ static int vgic_debug_show(struct seq_file *s, void *v)
unsigned long flags;
if (iter->dist_id == 0) {
- print_dist_state(s, &kvm->arch.vgic);
+ print_dist_state(s, &kvm->arch.vgic, iter);
return 0;
}
@@ -246,11 +282,13 @@ static int vgic_debug_show(struct seq_file *s, void *v)
if (iter->vcpu_id < iter->nr_cpus)
vcpu = kvm_get_vcpu(kvm, iter->vcpu_id);
+ /*
+ * Expect this to succeed, as iter_mark_lpis() takes a reference on
+ * every LPI to be visited.
+ */
irq = vgic_get_irq(kvm, vcpu, iter->intid);
- if (!irq) {
- seq_printf(s, " LPI %4d freed\n", iter->intid);
- return 0;
- }
+ if (WARN_ON_ONCE(!irq))
+ return -EINVAL;
raw_spin_lock_irqsave(&irq->irq_lock, flags);
print_irq_state(s, irq, vcpu);
diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c
index f20941f83a..7f68cf58b9 100644
--- a/arch/arm64/kvm/vgic/vgic-init.c
+++ b/arch/arm64/kvm/vgic/vgic-init.c
@@ -53,8 +53,6 @@ void kvm_vgic_early_init(struct kvm *kvm)
{
struct vgic_dist *dist = &kvm->arch.vgic;
- INIT_LIST_HEAD(&dist->lpi_translation_cache);
- raw_spin_lock_init(&dist->lpi_list_lock);
xa_init_flags(&dist->lpi_xa, XA_FLAGS_LOCK_IRQ);
}
@@ -182,27 +180,22 @@ static int kvm_vgic_dist_init(struct kvm *kvm, unsigned int nr_spis)
return 0;
}
-/**
- * kvm_vgic_vcpu_init() - Initialize static VGIC VCPU data
- * structures and register VCPU-specific KVM iodevs
- *
- * @vcpu: pointer to the VCPU being created and initialized
- *
- * Only do initialization, but do not actually enable the
- * VGIC CPU interface
- */
-int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
+static int vgic_allocate_private_irqs_locked(struct kvm_vcpu *vcpu)
{
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
- struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
- int ret = 0;
int i;
- vgic_cpu->rd_iodev.base_addr = VGIC_ADDR_UNDEF;
+ lockdep_assert_held(&vcpu->kvm->arch.config_lock);
- INIT_LIST_HEAD(&vgic_cpu->ap_list_head);
- raw_spin_lock_init(&vgic_cpu->ap_list_lock);
- atomic_set(&vgic_cpu->vgic_v3.its_vpe.vlpi_count, 0);
+ if (vgic_cpu->private_irqs)
+ return 0;
+
+ vgic_cpu->private_irqs = kcalloc(VGIC_NR_PRIVATE_IRQS,
+ sizeof(struct vgic_irq),
+ GFP_KERNEL_ACCOUNT);
+
+ if (!vgic_cpu->private_irqs)
+ return -ENOMEM;
/*
* Enable and configure all SGIs to be edge-triggered and
@@ -227,9 +220,48 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
}
}
+ return 0;
+}
+
+static int vgic_allocate_private_irqs(struct kvm_vcpu *vcpu)
+{
+ int ret;
+
+ mutex_lock(&vcpu->kvm->arch.config_lock);
+ ret = vgic_allocate_private_irqs_locked(vcpu);
+ mutex_unlock(&vcpu->kvm->arch.config_lock);
+
+ return ret;
+}
+
+/**
+ * kvm_vgic_vcpu_init() - Initialize static VGIC VCPU data
+ * structures and register VCPU-specific KVM iodevs
+ *
+ * @vcpu: pointer to the VCPU being created and initialized
+ *
+ * Only do initialization, but do not actually enable the
+ * VGIC CPU interface
+ */
+int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
+{
+ struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+ struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+ int ret = 0;
+
+ vgic_cpu->rd_iodev.base_addr = VGIC_ADDR_UNDEF;
+
+ INIT_LIST_HEAD(&vgic_cpu->ap_list_head);
+ raw_spin_lock_init(&vgic_cpu->ap_list_lock);
+ atomic_set(&vgic_cpu->vgic_v3.its_vpe.vlpi_count, 0);
+
if (!irqchip_in_kernel(vcpu->kvm))
return 0;
+ ret = vgic_allocate_private_irqs(vcpu);
+ if (ret)
+ return ret;
+
/*
* If we are creating a VCPU with a GICv3 we must also register the
* KVM io device for the redistributor that belongs to this VCPU.
@@ -285,10 +317,13 @@ int vgic_init(struct kvm *kvm)
/* Initialize groups on CPUs created before the VGIC type was known */
kvm_for_each_vcpu(idx, vcpu, kvm) {
- struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+ ret = vgic_allocate_private_irqs_locked(vcpu);
+ if (ret)
+ goto out;
for (i = 0; i < VGIC_NR_PRIVATE_IRQS; i++) {
- struct vgic_irq *irq = &vgic_cpu->private_irqs[i];
+ struct vgic_irq *irq = vgic_get_irq(kvm, vcpu, i);
+
switch (dist->vgic_model) {
case KVM_DEV_TYPE_ARM_VGIC_V3:
irq->group = 1;
@@ -300,14 +335,15 @@ int vgic_init(struct kvm *kvm)
break;
default:
ret = -EINVAL;
- goto out;
}
+
+ vgic_put_irq(kvm, irq);
+
+ if (ret)
+ goto out;
}
}
- if (vgic_has_its(kvm))
- vgic_lpi_translation_cache_init(kvm);
-
/*
* If we have GICv4.1 enabled, unconditionally request enable the
* v4 support so that we get HW-accelerated vSGIs. Otherwise, only
@@ -355,15 +391,12 @@ static void kvm_vgic_dist_destroy(struct kvm *kvm)
if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) {
list_for_each_entry_safe(rdreg, next, &dist->rd_regions, list)
- vgic_v3_free_redist_region(rdreg);
+ vgic_v3_free_redist_region(kvm, rdreg);
INIT_LIST_HEAD(&dist->rd_regions);
} else {
dist->vgic_cpu_base = VGIC_ADDR_UNDEF;
}
- if (vgic_has_its(kvm))
- vgic_lpi_translation_cache_destroy(kvm);
-
if (vgic_supports_direct_msis(kvm))
vgic_v4_teardown(kvm);
@@ -381,6 +414,9 @@ static void __kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
vgic_flush_pending_lpis(vcpu);
INIT_LIST_HEAD(&vgic_cpu->ap_list_head);
+ kfree(vgic_cpu->private_irqs);
+ vgic_cpu->private_irqs = NULL;
+
if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) {
vgic_unregister_redist_iodev(vcpu);
vgic_cpu->rd_iodev.base_addr = VGIC_ADDR_UNDEF;
diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c
index e85a495ada..40bb43f20b 100644
--- a/arch/arm64/kvm/vgic/vgic-its.c
+++ b/arch/arm64/kvm/vgic/vgic-its.c
@@ -23,6 +23,8 @@
#include "vgic.h"
#include "vgic-mmio.h"
+static struct kvm_device_ops kvm_arm_vgic_its_ops;
+
static int vgic_its_save_tables_v0(struct vgic_its *its);
static int vgic_its_restore_tables_v0(struct vgic_its *its);
static int vgic_its_commit_v0(struct vgic_its *its);
@@ -67,7 +69,7 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid,
irq->target_vcpu = vcpu;
irq->group = 1;
- raw_spin_lock_irqsave(&dist->lpi_list_lock, flags);
+ xa_lock_irqsave(&dist->lpi_xa, flags);
/*
* There could be a race with another vgic_add_lpi(), so we need to
@@ -82,17 +84,14 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid,
goto out_unlock;
}
- ret = xa_err(xa_store(&dist->lpi_xa, intid, irq, 0));
+ ret = xa_err(__xa_store(&dist->lpi_xa, intid, irq, 0));
if (ret) {
xa_release(&dist->lpi_xa, intid);
kfree(irq);
- goto out_unlock;
}
- atomic_inc(&dist->lpi_count);
-
out_unlock:
- raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
+ xa_unlock_irqrestore(&dist->lpi_xa, flags);
if (ret)
return ERR_PTR(ret);
@@ -150,14 +149,6 @@ struct its_ite {
u32 event_id;
};
-struct vgic_translation_cache_entry {
- struct list_head entry;
- phys_addr_t db;
- u32 devid;
- u32 eventid;
- struct vgic_irq *irq;
-};
-
/**
* struct vgic_its_abi - ITS abi ops and settings
* @cte_esz: collection table entry size
@@ -252,8 +243,10 @@ static struct its_ite *find_ite(struct vgic_its *its, u32 device_id,
#define GIC_LPI_OFFSET 8192
-#define VITS_TYPER_IDBITS 16
-#define VITS_TYPER_DEVBITS 16
+#define VITS_TYPER_IDBITS 16
+#define VITS_MAX_EVENTID (BIT(VITS_TYPER_IDBITS) - 1)
+#define VITS_TYPER_DEVBITS 16
+#define VITS_MAX_DEVID (BIT(VITS_TYPER_DEVBITS) - 1)
#define VITS_DTE_MAX_DEVID_OFFSET (BIT(14) - 1)
#define VITS_ITE_MAX_EVENTID_OFFSET (BIT(16) - 1)
@@ -316,53 +309,6 @@ static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq,
return 0;
}
-#define GIC_LPI_MAX_INTID ((1 << INTERRUPT_ID_BITS_ITS) - 1)
-
-/*
- * Create a snapshot of the current LPIs targeting @vcpu, so that we can
- * enumerate those LPIs without holding any lock.
- * Returns their number and puts the kmalloc'ed array into intid_ptr.
- */
-int vgic_copy_lpi_list(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 **intid_ptr)
-{
- struct vgic_dist *dist = &kvm->arch.vgic;
- XA_STATE(xas, &dist->lpi_xa, GIC_LPI_OFFSET);
- struct vgic_irq *irq;
- unsigned long flags;
- u32 *intids;
- int irq_count, i = 0;
-
- /*
- * There is an obvious race between allocating the array and LPIs
- * being mapped/unmapped. If we ended up here as a result of a
- * command, we're safe (locks are held, preventing another
- * command). If coming from another path (such as enabling LPIs),
- * we must be careful not to overrun the array.
- */
- irq_count = atomic_read(&dist->lpi_count);
- intids = kmalloc_array(irq_count, sizeof(intids[0]), GFP_KERNEL_ACCOUNT);
- if (!intids)
- return -ENOMEM;
-
- raw_spin_lock_irqsave(&dist->lpi_list_lock, flags);
- rcu_read_lock();
-
- xas_for_each(&xas, irq, GIC_LPI_MAX_INTID) {
- if (i == irq_count)
- break;
- /* We don't need to "get" the IRQ, as we hold the list lock. */
- if (vcpu && irq->target_vcpu != vcpu)
- continue;
- intids[i++] = irq->intid;
- }
-
- rcu_read_unlock();
- raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
-
- *intid_ptr = intids;
- return i;
-}
-
static int update_affinity(struct vgic_irq *irq, struct kvm_vcpu *vcpu)
{
int ret = 0;
@@ -446,23 +392,18 @@ static u32 max_lpis_propbaser(u64 propbaser)
static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu)
{
gpa_t pendbase = GICR_PENDBASER_ADDRESS(vcpu->arch.vgic_cpu.pendbaser);
+ struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+ unsigned long intid, flags;
struct vgic_irq *irq;
int last_byte_offset = -1;
int ret = 0;
- u32 *intids;
- int nr_irqs, i;
- unsigned long flags;
u8 pendmask;
- nr_irqs = vgic_copy_lpi_list(vcpu->kvm, vcpu, &intids);
- if (nr_irqs < 0)
- return nr_irqs;
-
- for (i = 0; i < nr_irqs; i++) {
+ xa_for_each(&dist->lpi_xa, intid, irq) {
int byte_offset, bit_nr;
- byte_offset = intids[i] / BITS_PER_BYTE;
- bit_nr = intids[i] % BITS_PER_BYTE;
+ byte_offset = intid / BITS_PER_BYTE;
+ bit_nr = intid % BITS_PER_BYTE;
/*
* For contiguously allocated LPIs chances are we just read
@@ -472,25 +413,23 @@ static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu)
ret = kvm_read_guest_lock(vcpu->kvm,
pendbase + byte_offset,
&pendmask, 1);
- if (ret) {
- kfree(intids);
+ if (ret)
return ret;
- }
+
last_byte_offset = byte_offset;
}
- irq = vgic_get_irq(vcpu->kvm, NULL, intids[i]);
+ irq = vgic_get_irq(vcpu->kvm, NULL, intid);
if (!irq)
continue;
raw_spin_lock_irqsave(&irq->irq_lock, flags);
- irq->pending_latch = pendmask & (1U << bit_nr);
+ if (irq->target_vcpu == vcpu)
+ irq->pending_latch = pendmask & (1U << bit_nr);
vgic_queue_irq_unlock(vcpu->kvm, irq, flags);
vgic_put_irq(vcpu->kvm, irq);
}
- kfree(intids);
-
return ret;
}
@@ -566,51 +505,52 @@ static unsigned long vgic_mmio_read_its_idregs(struct kvm *kvm,
return 0;
}
-static struct vgic_irq *__vgic_its_check_cache(struct vgic_dist *dist,
- phys_addr_t db,
- u32 devid, u32 eventid)
+static struct vgic_its *__vgic_doorbell_to_its(struct kvm *kvm, gpa_t db)
{
- struct vgic_translation_cache_entry *cte;
+ struct kvm_io_device *kvm_io_dev;
+ struct vgic_io_device *iodev;
- list_for_each_entry(cte, &dist->lpi_translation_cache, entry) {
- /*
- * If we hit a NULL entry, there is nothing after this
- * point.
- */
- if (!cte->irq)
- break;
+ kvm_io_dev = kvm_io_bus_get_dev(kvm, KVM_MMIO_BUS, db);
+ if (!kvm_io_dev)
+ return ERR_PTR(-EINVAL);
- if (cte->db != db || cte->devid != devid ||
- cte->eventid != eventid)
- continue;
+ if (kvm_io_dev->ops != &kvm_io_gic_ops)
+ return ERR_PTR(-EINVAL);
- /*
- * Move this entry to the head, as it is the most
- * recently used.
- */
- if (!list_is_first(&cte->entry, &dist->lpi_translation_cache))
- list_move(&cte->entry, &dist->lpi_translation_cache);
+ iodev = container_of(kvm_io_dev, struct vgic_io_device, dev);
+ if (iodev->iodev_type != IODEV_ITS)
+ return ERR_PTR(-EINVAL);
- return cte->irq;
- }
+ return iodev->its;
+}
+
+static unsigned long vgic_its_cache_key(u32 devid, u32 eventid)
+{
+ return (((unsigned long)devid) << VITS_TYPER_IDBITS) | eventid;
- return NULL;
}
static struct vgic_irq *vgic_its_check_cache(struct kvm *kvm, phys_addr_t db,
u32 devid, u32 eventid)
{
- struct vgic_dist *dist = &kvm->arch.vgic;
+ unsigned long cache_key = vgic_its_cache_key(devid, eventid);
+ struct vgic_its *its;
struct vgic_irq *irq;
- unsigned long flags;
- raw_spin_lock_irqsave(&dist->lpi_list_lock, flags);
+ if (devid > VITS_MAX_DEVID || eventid > VITS_MAX_EVENTID)
+ return NULL;
- irq = __vgic_its_check_cache(dist, db, devid, eventid);
+ its = __vgic_doorbell_to_its(kvm, db);
+ if (IS_ERR(its))
+ return NULL;
+
+ rcu_read_lock();
+
+ irq = xa_load(&its->translation_cache, cache_key);
if (!vgic_try_get_irq_kref(irq))
irq = NULL;
- raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
+ rcu_read_unlock();
return irq;
}
@@ -619,41 +559,13 @@ static void vgic_its_cache_translation(struct kvm *kvm, struct vgic_its *its,
u32 devid, u32 eventid,
struct vgic_irq *irq)
{
- struct vgic_dist *dist = &kvm->arch.vgic;
- struct vgic_translation_cache_entry *cte;
- unsigned long flags;
- phys_addr_t db;
+ unsigned long cache_key = vgic_its_cache_key(devid, eventid);
+ struct vgic_irq *old;
/* Do not cache a directly injected interrupt */
if (irq->hw)
return;
- raw_spin_lock_irqsave(&dist->lpi_list_lock, flags);
-
- if (unlikely(list_empty(&dist->lpi_translation_cache)))
- goto out;
-
- /*
- * We could have raced with another CPU caching the same
- * translation behind our back, so let's check it is not in
- * already
- */
- db = its->vgic_its_base + GITS_TRANSLATER;
- if (__vgic_its_check_cache(dist, db, devid, eventid))
- goto out;
-
- /* Always reuse the last entry (LRU policy) */
- cte = list_last_entry(&dist->lpi_translation_cache,
- typeof(*cte), entry);
-
- /*
- * Caching the translation implies having an extra reference
- * to the interrupt, so drop the potential reference on what
- * was in the cache, and increment it on the new interrupt.
- */
- if (cte->irq)
- vgic_put_irq(kvm, cte->irq);
-
/*
* The irq refcount is guaranteed to be nonzero while holding the
* its_lock, as the ITE (and the reference it holds) cannot be freed.
@@ -661,39 +573,44 @@ static void vgic_its_cache_translation(struct kvm *kvm, struct vgic_its *its,
lockdep_assert_held(&its->its_lock);
vgic_get_irq_kref(irq);
- cte->db = db;
- cte->devid = devid;
- cte->eventid = eventid;
- cte->irq = irq;
+ /*
+ * We could have raced with another CPU caching the same
+ * translation behind our back, ensure we don't leak a
+ * reference if that is the case.
+ */
+ old = xa_store(&its->translation_cache, cache_key, irq, GFP_KERNEL_ACCOUNT);
+ if (old)
+ vgic_put_irq(kvm, old);
+}
- /* Move the new translation to the head of the list */
- list_move(&cte->entry, &dist->lpi_translation_cache);
+static void vgic_its_invalidate_cache(struct vgic_its *its)
+{
+ struct kvm *kvm = its->dev->kvm;
+ struct vgic_irq *irq;
+ unsigned long idx;
-out:
- raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
+ xa_for_each(&its->translation_cache, idx, irq) {
+ xa_erase(&its->translation_cache, idx);
+ vgic_put_irq(kvm, irq);
+ }
}
-void vgic_its_invalidate_cache(struct kvm *kvm)
+void vgic_its_invalidate_all_caches(struct kvm *kvm)
{
- struct vgic_dist *dist = &kvm->arch.vgic;
- struct vgic_translation_cache_entry *cte;
- unsigned long flags;
+ struct kvm_device *dev;
+ struct vgic_its *its;
- raw_spin_lock_irqsave(&dist->lpi_list_lock, flags);
+ rcu_read_lock();
- list_for_each_entry(cte, &dist->lpi_translation_cache, entry) {
- /*
- * If we hit a NULL entry, there is nothing after this
- * point.
- */
- if (!cte->irq)
- break;
+ list_for_each_entry_rcu(dev, &kvm->devices, vm_node) {
+ if (dev->ops != &kvm_arm_vgic_its_ops)
+ continue;
- vgic_put_irq(kvm, cte->irq);
- cte->irq = NULL;
+ its = dev->private;
+ vgic_its_invalidate_cache(its);
}
- raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags);
+ rcu_read_unlock();
}
int vgic_its_resolve_lpi(struct kvm *kvm, struct vgic_its *its,
@@ -725,8 +642,6 @@ int vgic_its_resolve_lpi(struct kvm *kvm, struct vgic_its *its,
struct vgic_its *vgic_msi_to_its(struct kvm *kvm, struct kvm_msi *msi)
{
u64 address;
- struct kvm_io_device *kvm_io_dev;
- struct vgic_io_device *iodev;
if (!vgic_has_its(kvm))
return ERR_PTR(-ENODEV);
@@ -736,18 +651,7 @@ struct vgic_its *vgic_msi_to_its(struct kvm *kvm, struct kvm_msi *msi)
address = (u64)msi->address_hi << 32 | msi->address_lo;
- kvm_io_dev = kvm_io_bus_get_dev(kvm, KVM_MMIO_BUS, address);
- if (!kvm_io_dev)
- return ERR_PTR(-EINVAL);
-
- if (kvm_io_dev->ops != &kvm_io_gic_ops)
- return ERR_PTR(-EINVAL);
-
- iodev = container_of(kvm_io_dev, struct vgic_io_device, dev);
- if (iodev->iodev_type != IODEV_ITS)
- return ERR_PTR(-EINVAL);
-
- return iodev->its;
+ return __vgic_doorbell_to_its(kvm, address);
}
/*
@@ -883,7 +787,7 @@ static int vgic_its_cmd_handle_discard(struct kvm *kvm, struct vgic_its *its,
* don't bother here since we clear the ITTE anyway and the
* pending state is a property of the ITTE struct.
*/
- vgic_its_invalidate_cache(kvm);
+ vgic_its_invalidate_cache(its);
its_free_ite(kvm, ite);
return 0;
@@ -920,7 +824,7 @@ static int vgic_its_cmd_handle_movi(struct kvm *kvm, struct vgic_its *its,
ite->collection = collection;
vcpu = collection_to_vcpu(kvm, collection);
- vgic_its_invalidate_cache(kvm);
+ vgic_its_invalidate_cache(its);
return update_affinity(ite->irq, vcpu);
}
@@ -955,7 +859,7 @@ static bool vgic_its_check_id(struct vgic_its *its, u64 baser, u32 id,
switch (type) {
case GITS_BASER_TYPE_DEVICE:
- if (id >= BIT_ULL(VITS_TYPER_DEVBITS))
+ if (id > VITS_MAX_DEVID)
return false;
break;
case GITS_BASER_TYPE_COLLECTION:
@@ -1167,7 +1071,8 @@ static int vgic_its_cmd_handle_mapi(struct kvm *kvm, struct vgic_its *its,
}
/* Requires the its_lock to be held. */
-static void vgic_its_free_device(struct kvm *kvm, struct its_device *device)
+static void vgic_its_free_device(struct kvm *kvm, struct vgic_its *its,
+ struct its_device *device)
{
struct its_ite *ite, *temp;
@@ -1179,7 +1084,7 @@ static void vgic_its_free_device(struct kvm *kvm, struct its_device *device)
list_for_each_entry_safe(ite, temp, &device->itt_head, ite_list)
its_free_ite(kvm, ite);
- vgic_its_invalidate_cache(kvm);
+ vgic_its_invalidate_cache(its);
list_del(&device->dev_list);
kfree(device);
@@ -1191,7 +1096,7 @@ static void vgic_its_free_device_list(struct kvm *kvm, struct vgic_its *its)
struct its_device *cur, *temp;
list_for_each_entry_safe(cur, temp, &its->device_list, dev_list)
- vgic_its_free_device(kvm, cur);
+ vgic_its_free_device(kvm, its, cur);
}
/* its lock must be held */
@@ -1250,7 +1155,7 @@ static int vgic_its_cmd_handle_mapd(struct kvm *kvm, struct vgic_its *its,
* by removing the mapping and re-establishing it.
*/
if (device)
- vgic_its_free_device(kvm, device);
+ vgic_its_free_device(kvm, its, device);
/*
* The spec does not say whether unmapping a not-mapped device
@@ -1281,7 +1186,7 @@ static int vgic_its_cmd_handle_mapc(struct kvm *kvm, struct vgic_its *its,
if (!valid) {
vgic_its_free_collection(its, coll_id);
- vgic_its_invalidate_cache(kvm);
+ vgic_its_invalidate_cache(its);
} else {
struct kvm_vcpu *vcpu;
@@ -1372,23 +1277,19 @@ static int vgic_its_cmd_handle_inv(struct kvm *kvm, struct vgic_its *its,
int vgic_its_invall(struct kvm_vcpu *vcpu)
{
struct kvm *kvm = vcpu->kvm;
- int irq_count, i = 0;
- u32 *intids;
-
- irq_count = vgic_copy_lpi_list(kvm, vcpu, &intids);
- if (irq_count < 0)
- return irq_count;
+ struct vgic_dist *dist = &kvm->arch.vgic;
+ struct vgic_irq *irq;
+ unsigned long intid;
- for (i = 0; i < irq_count; i++) {
- struct vgic_irq *irq = vgic_get_irq(kvm, NULL, intids[i]);
+ xa_for_each(&dist->lpi_xa, intid, irq) {
+ irq = vgic_get_irq(kvm, NULL, intid);
if (!irq)
continue;
+
update_lpi_config(kvm, irq, vcpu, false);
vgic_put_irq(kvm, irq);
}
- kfree(intids);
-
if (vcpu->arch.vgic_cpu.vgic_v3.its_vpe.its_vm)
its_invall_vpe(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe);
@@ -1431,10 +1332,10 @@ static int vgic_its_cmd_handle_invall(struct kvm *kvm, struct vgic_its *its,
static int vgic_its_cmd_handle_movall(struct kvm *kvm, struct vgic_its *its,
u64 *its_cmd)
{
+ struct vgic_dist *dist = &kvm->arch.vgic;
struct kvm_vcpu *vcpu1, *vcpu2;
struct vgic_irq *irq;
- u32 *intids;
- int irq_count, i;
+ unsigned long intid;
/* We advertise GITS_TYPER.PTA==0, making the address the vcpu ID */
vcpu1 = kvm_get_vcpu_by_id(kvm, its_cmd_get_target_addr(its_cmd));
@@ -1446,12 +1347,8 @@ static int vgic_its_cmd_handle_movall(struct kvm *kvm, struct vgic_its *its,
if (vcpu1 == vcpu2)
return 0;
- irq_count = vgic_copy_lpi_list(kvm, vcpu1, &intids);
- if (irq_count < 0)
- return irq_count;
-
- for (i = 0; i < irq_count; i++) {
- irq = vgic_get_irq(kvm, NULL, intids[i]);
+ xa_for_each(&dist->lpi_xa, intid, irq) {
+ irq = vgic_get_irq(kvm, NULL, intid);
if (!irq)
continue;
@@ -1460,9 +1357,8 @@ static int vgic_its_cmd_handle_movall(struct kvm *kvm, struct vgic_its *its,
vgic_put_irq(kvm, irq);
}
- vgic_its_invalidate_cache(kvm);
+ vgic_its_invalidate_cache(its);
- kfree(intids);
return 0;
}
@@ -1813,7 +1709,7 @@ static void vgic_mmio_write_its_ctlr(struct kvm *kvm, struct vgic_its *its,
its->enabled = !!(val & GITS_CTLR_ENABLE);
if (!its->enabled)
- vgic_its_invalidate_cache(kvm);
+ vgic_its_invalidate_cache(its);
/*
* Try to process any pending commands. This function bails out early
@@ -1914,47 +1810,6 @@ out:
return ret;
}
-/* Default is 16 cached LPIs per vcpu */
-#define LPI_DEFAULT_PCPU_CACHE_SIZE 16
-
-void vgic_lpi_translation_cache_init(struct kvm *kvm)
-{
- struct vgic_dist *dist = &kvm->arch.vgic;
- unsigned int sz;
- int i;
-
- if (!list_empty(&dist->lpi_translation_cache))
- return;
-
- sz = atomic_read(&kvm->online_vcpus) * LPI_DEFAULT_PCPU_CACHE_SIZE;
-
- for (i = 0; i < sz; i++) {
- struct vgic_translation_cache_entry *cte;
-
- /* An allocation failure is not fatal */
- cte = kzalloc(sizeof(*cte), GFP_KERNEL_ACCOUNT);
- if (WARN_ON(!cte))
- break;
-
- INIT_LIST_HEAD(&cte->entry);
- list_add(&cte->entry, &dist->lpi_translation_cache);
- }
-}
-
-void vgic_lpi_translation_cache_destroy(struct kvm *kvm)
-{
- struct vgic_dist *dist = &kvm->arch.vgic;
- struct vgic_translation_cache_entry *cte, *tmp;
-
- vgic_its_invalidate_cache(kvm);
-
- list_for_each_entry_safe(cte, tmp,
- &dist->lpi_translation_cache, entry) {
- list_del(&cte->entry);
- kfree(cte);
- }
-}
-
#define INITIAL_BASER_VALUE \
(GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWb) | \
GIC_BASER_CACHEABILITY(GITS_BASER, OUTER, SameAsInner) | \
@@ -1987,8 +1842,6 @@ static int vgic_its_create(struct kvm_device *dev, u32 type)
kfree(its);
return ret;
}
-
- vgic_lpi_translation_cache_init(dev->kvm);
}
mutex_init(&its->its_lock);
@@ -2006,6 +1859,7 @@ static int vgic_its_create(struct kvm_device *dev, u32 type)
INIT_LIST_HEAD(&its->device_list);
INIT_LIST_HEAD(&its->collection_list);
+ xa_init(&its->translation_cache);
dev->kvm->arch.vgic.msis_require_devid = true;
dev->kvm->arch.vgic.has_its = true;
@@ -2036,6 +1890,8 @@ static void vgic_its_destroy(struct kvm_device *kvm_dev)
vgic_its_free_device_list(kvm, its);
vgic_its_free_collection_list(kvm, its);
+ vgic_its_invalidate_cache(its);
+ xa_destroy(&its->translation_cache);
mutex_unlock(&its->its_lock);
kfree(its);
@@ -2438,7 +2294,7 @@ static int vgic_its_restore_dte(struct vgic_its *its, u32 id,
ret = vgic_its_restore_itt(its, dev);
if (ret) {
- vgic_its_free_device(its->dev->kvm, dev);
+ vgic_its_free_device(its->dev->kvm, its, dev);
return ret;
}
diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c
index c15ee1df03..9e50928f5d 100644
--- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c
+++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c
@@ -277,7 +277,7 @@ static void vgic_mmio_write_v3r_ctlr(struct kvm_vcpu *vcpu,
return;
vgic_flush_pending_lpis(vcpu);
- vgic_its_invalidate_cache(vcpu->kvm);
+ vgic_its_invalidate_all_caches(vcpu->kvm);
atomic_set_release(&vgic_cpu->ctlr, 0);
} else {
ctlr = atomic_cmpxchg_acquire(&vgic_cpu->ctlr, 0,
@@ -919,8 +919,19 @@ free:
return ret;
}
-void vgic_v3_free_redist_region(struct vgic_redist_region *rdreg)
+void vgic_v3_free_redist_region(struct kvm *kvm, struct vgic_redist_region *rdreg)
{
+ struct kvm_vcpu *vcpu;
+ unsigned long c;
+
+ lockdep_assert_held(&kvm->arch.config_lock);
+
+ /* Garbage collect the region */
+ kvm_for_each_vcpu(c, vcpu, kvm) {
+ if (vcpu->arch.vgic_cpu.rdreg == rdreg)
+ vcpu->arch.vgic_cpu.rdreg = NULL;
+ }
+
list_del(&rdreg->list);
kfree(rdreg);
}
@@ -945,7 +956,7 @@ int vgic_v3_set_redist_base(struct kvm *kvm, u32 index, u64 addr, u32 count)
mutex_lock(&kvm->arch.config_lock);
rdreg = vgic_v3_rdist_region_from_index(kvm, index);
- vgic_v3_free_redist_region(rdreg);
+ vgic_v3_free_redist_region(kvm, rdreg);
mutex_unlock(&kvm->arch.config_lock);
return ret;
}
diff --git a/arch/arm64/kvm/vgic/vgic-v2.c b/arch/arm64/kvm/vgic/vgic-v2.c
index 7e9cdb78f7..ae5a44d570 100644
--- a/arch/arm64/kvm/vgic/vgic-v2.c
+++ b/arch/arm64/kvm/vgic/vgic-v2.c
@@ -464,17 +464,10 @@ void vgic_v2_load(struct kvm_vcpu *vcpu)
kvm_vgic_global_state.vctrl_base + GICH_APR);
}
-void vgic_v2_vmcr_sync(struct kvm_vcpu *vcpu)
-{
- struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
-
- cpu_if->vgic_vmcr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_VMCR);
-}
-
void vgic_v2_put(struct kvm_vcpu *vcpu)
{
struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
- vgic_v2_vmcr_sync(vcpu);
+ cpu_if->vgic_vmcr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_VMCR);
cpu_if->vgic_apr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_APR);
}
diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c
index 4ea3340786..ed6e412cd7 100644
--- a/arch/arm64/kvm/vgic/vgic-v3.c
+++ b/arch/arm64/kvm/vgic/vgic-v3.c
@@ -722,15 +722,7 @@ void vgic_v3_load(struct kvm_vcpu *vcpu)
{
struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
- /*
- * If dealing with a GICv2 emulation on GICv3, VMCR_EL2.VFIQen
- * is dependent on ICC_SRE_EL1.SRE, and we have to perform the
- * VMCR_EL2 save/restore in the world switch.
- */
- if (likely(cpu_if->vgic_sre))
- kvm_call_hyp(__vgic_v3_write_vmcr, cpu_if->vgic_vmcr);
-
- kvm_call_hyp(__vgic_v3_restore_aprs, cpu_if);
+ kvm_call_hyp(__vgic_v3_restore_vmcr_aprs, cpu_if);
if (has_vhe())
__vgic_v3_activate_traps(cpu_if);
@@ -738,24 +730,13 @@ void vgic_v3_load(struct kvm_vcpu *vcpu)
WARN_ON(vgic_v4_load(vcpu));
}
-void vgic_v3_vmcr_sync(struct kvm_vcpu *vcpu)
-{
- struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
-
- if (likely(cpu_if->vgic_sre))
- cpu_if->vgic_vmcr = kvm_call_hyp_ret(__vgic_v3_read_vmcr);
-}
-
void vgic_v3_put(struct kvm_vcpu *vcpu)
{
struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
+ kvm_call_hyp(__vgic_v3_save_vmcr_aprs, cpu_if);
WARN_ON(vgic_v4_put(vcpu));
- vgic_v3_vmcr_sync(vcpu);
-
- kvm_call_hyp(__vgic_v3_save_aprs, cpu_if);
-
if (has_vhe())
__vgic_v3_deactivate_traps(cpu_if);
}
diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c
index 4ec93587c8..f07b3ddff7 100644
--- a/arch/arm64/kvm/vgic/vgic.c
+++ b/arch/arm64/kvm/vgic/vgic.c
@@ -29,9 +29,8 @@ struct vgic_global kvm_vgic_global_state __ro_after_init = {
* its->cmd_lock (mutex)
* its->its_lock (mutex)
* vgic_cpu->ap_list_lock must be taken with IRQs disabled
- * kvm->lpi_list_lock must be taken with IRQs disabled
- * vgic_dist->lpi_xa.xa_lock must be taken with IRQs disabled
- * vgic_irq->irq_lock must be taken with IRQs disabled
+ * vgic_dist->lpi_xa.xa_lock must be taken with IRQs disabled
+ * vgic_irq->irq_lock must be taken with IRQs disabled
*
* As the ap_list_lock might be taken from the timer interrupt handler,
* we have to disable IRQs before taking this lock and everything lower
@@ -126,7 +125,6 @@ void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq)
__xa_erase(&dist->lpi_xa, irq->intid);
xa_unlock_irqrestore(&dist->lpi_xa, flags);
- atomic_dec(&dist->lpi_count);
kfree_rcu(irq, rcu);
}
@@ -939,17 +937,6 @@ void kvm_vgic_put(struct kvm_vcpu *vcpu)
vgic_v3_put(vcpu);
}
-void kvm_vgic_vmcr_sync(struct kvm_vcpu *vcpu)
-{
- if (unlikely(!irqchip_in_kernel(vcpu->kvm)))
- return;
-
- if (kvm_vgic_global_state.type == VGIC_V2)
- vgic_v2_vmcr_sync(vcpu);
- else
- vgic_v3_vmcr_sync(vcpu);
-}
-
int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
{
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h
index 0c2b82de8f..03d356a123 100644
--- a/arch/arm64/kvm/vgic/vgic.h
+++ b/arch/arm64/kvm/vgic/vgic.h
@@ -16,6 +16,7 @@
#define INTERRUPT_ID_BITS_SPIS 10
#define INTERRUPT_ID_BITS_ITS 16
+#define VGIC_LPI_MAX_INTID ((1 << INTERRUPT_ID_BITS_ITS) - 1)
#define VGIC_PRI_BITS 5
#define vgic_irq_is_sgi(intid) ((intid) < VGIC_NR_SGIS)
@@ -214,7 +215,6 @@ int vgic_register_dist_iodev(struct kvm *kvm, gpa_t dist_base_address,
void vgic_v2_init_lrs(void);
void vgic_v2_load(struct kvm_vcpu *vcpu);
void vgic_v2_put(struct kvm_vcpu *vcpu);
-void vgic_v2_vmcr_sync(struct kvm_vcpu *vcpu);
void vgic_v2_save_state(struct kvm_vcpu *vcpu);
void vgic_v2_restore_state(struct kvm_vcpu *vcpu);
@@ -253,7 +253,6 @@ bool vgic_v3_check_base(struct kvm *kvm);
void vgic_v3_load(struct kvm_vcpu *vcpu);
void vgic_v3_put(struct kvm_vcpu *vcpu);
-void vgic_v3_vmcr_sync(struct kvm_vcpu *vcpu);
bool vgic_has_its(struct kvm *kvm);
int kvm_vgic_register_its_device(void);
@@ -317,7 +316,7 @@ vgic_v3_rd_region_size(struct kvm *kvm, struct vgic_redist_region *rdreg)
struct vgic_redist_region *vgic_v3_rdist_region_from_index(struct kvm *kvm,
u32 index);
-void vgic_v3_free_redist_region(struct vgic_redist_region *rdreg);
+void vgic_v3_free_redist_region(struct kvm *kvm, struct vgic_redist_region *rdreg);
bool vgic_v3_rdist_overlap(struct kvm *kvm, gpa_t base, size_t size);
@@ -330,14 +329,11 @@ static inline bool vgic_dist_overlap(struct kvm *kvm, gpa_t base, size_t size)
}
bool vgic_lpis_enabled(struct kvm_vcpu *vcpu);
-int vgic_copy_lpi_list(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 **intid_ptr);
int vgic_its_resolve_lpi(struct kvm *kvm, struct vgic_its *its,
u32 devid, u32 eventid, struct vgic_irq **irq);
struct vgic_its *vgic_msi_to_its(struct kvm *kvm, struct kvm_msi *msi);
int vgic_its_inject_cached_translation(struct kvm *kvm, struct kvm_msi *msi);
-void vgic_lpi_translation_cache_init(struct kvm *kvm);
-void vgic_lpi_translation_cache_destroy(struct kvm *kvm);
-void vgic_its_invalidate_cache(struct kvm *kvm);
+void vgic_its_invalidate_all_caches(struct kvm *kvm);
/* GICv4.1 MMIO interface */
int vgic_its_inv_lpi(struct kvm *kvm, struct vgic_irq *irq);