diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-08-07 13:18:06 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-08-07 13:18:06 +0000 |
commit | 638a9e433ecd61e64761352dbec1fa4f5874c941 (patch) | |
tree | fdbff74a238d7a5a7d1cef071b7230bc064b9f25 /arch/arm64/kvm/hyp/nvhe | |
parent | Releasing progress-linux version 6.9.12-1~progress7.99u1. (diff) | |
download | linux-638a9e433ecd61e64761352dbec1fa4f5874c941.tar.xz linux-638a9e433ecd61e64761352dbec1fa4f5874c941.zip |
Merging upstream version 6.10.3.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'arch/arm64/kvm/hyp/nvhe')
-rw-r--r-- | arch/arm64/kvm/hyp/nvhe/Makefile | 13 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/nvhe/debug-sr.c | 8 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/nvhe/ffa.c | 13 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/nvhe/hyp-main.c | 111 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/nvhe/mem_protect.c | 8 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/nvhe/pkvm.c | 9 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/nvhe/psci-relay.c | 2 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/nvhe/setup.c | 27 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/nvhe/switch.c | 42 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/nvhe/tlb.c | 115 |
10 files changed, 257 insertions, 91 deletions
diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile index 2250253a64..50fa0ffb6b 100644 --- a/arch/arm64/kvm/hyp/nvhe/Makefile +++ b/arch/arm64/kvm/hyp/nvhe/Makefile @@ -97,16 +97,3 @@ KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_FTRACE) $(CC_FLAGS_SCS) $(CC_FLAGS_CFI) # causes a build failure. Remove profile optimization flags. KBUILD_CFLAGS := $(filter-out -fprofile-sample-use=% -fprofile-use=%, $(KBUILD_CFLAGS)) KBUILD_CFLAGS += -fno-asynchronous-unwind-tables -fno-unwind-tables - -# KVM nVHE code is run at a different exception code with a different map, so -# compiler instrumentation that inserts callbacks or checks into the code may -# cause crashes. Just disable it. -GCOV_PROFILE := n -KASAN_SANITIZE := n -KCSAN_SANITIZE := n -UBSAN_SANITIZE := n -KCOV_INSTRUMENT := n - -# Skip objtool checking for this directory because nVHE code is compiled with -# non-standard build rules. -OBJECT_FILES_NON_STANDARD := y diff --git a/arch/arm64/kvm/hyp/nvhe/debug-sr.c b/arch/arm64/kvm/hyp/nvhe/debug-sr.c index 7746ea507b..53efda0235 100644 --- a/arch/arm64/kvm/hyp/nvhe/debug-sr.c +++ b/arch/arm64/kvm/hyp/nvhe/debug-sr.c @@ -83,10 +83,10 @@ void __debug_save_host_buffers_nvhe(struct kvm_vcpu *vcpu) { /* Disable and flush SPE data generation */ if (vcpu_get_flag(vcpu, DEBUG_STATE_SAVE_SPE)) - __debug_save_spe(&vcpu->arch.host_debug_state.pmscr_el1); + __debug_save_spe(host_data_ptr(host_debug_state.pmscr_el1)); /* Disable and flush Self-Hosted Trace generation */ if (vcpu_get_flag(vcpu, DEBUG_STATE_SAVE_TRBE)) - __debug_save_trace(&vcpu->arch.host_debug_state.trfcr_el1); + __debug_save_trace(host_data_ptr(host_debug_state.trfcr_el1)); } void __debug_switch_to_guest(struct kvm_vcpu *vcpu) @@ -97,9 +97,9 @@ void __debug_switch_to_guest(struct kvm_vcpu *vcpu) void __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu) { if (vcpu_get_flag(vcpu, DEBUG_STATE_SAVE_SPE)) - __debug_restore_spe(vcpu->arch.host_debug_state.pmscr_el1); + __debug_restore_spe(*host_data_ptr(host_debug_state.pmscr_el1)); if (vcpu_get_flag(vcpu, DEBUG_STATE_SAVE_TRBE)) - __debug_restore_trace(vcpu->arch.host_debug_state.trfcr_el1); + __debug_restore_trace(*host_data_ptr(host_debug_state.trfcr_el1)); } void __debug_switch_to_host(struct kvm_vcpu *vcpu) diff --git a/arch/arm64/kvm/hyp/nvhe/ffa.c b/arch/arm64/kvm/hyp/nvhe/ffa.c index 320f2eaa14..efb053af33 100644 --- a/arch/arm64/kvm/hyp/nvhe/ffa.c +++ b/arch/arm64/kvm/hyp/nvhe/ffa.c @@ -177,6 +177,14 @@ static void ffa_retrieve_req(struct arm_smccc_res *res, u32 len) res); } +static void ffa_rx_release(struct arm_smccc_res *res) +{ + arm_smccc_1_1_smc(FFA_RX_RELEASE, + 0, 0, + 0, 0, 0, 0, 0, + res); +} + static void do_ffa_rxtx_map(struct arm_smccc_res *res, struct kvm_cpu_context *ctxt) { @@ -543,16 +551,19 @@ static void do_ffa_mem_reclaim(struct arm_smccc_res *res, if (WARN_ON(offset > len || fraglen > KVM_FFA_MBOX_NR_PAGES * PAGE_SIZE)) { ret = FFA_RET_ABORTED; + ffa_rx_release(res); goto out_unlock; } if (len > ffa_desc_buf.len) { ret = FFA_RET_NO_MEMORY; + ffa_rx_release(res); goto out_unlock; } buf = ffa_desc_buf.buf; memcpy(buf, hyp_buffers.rx, fraglen); + ffa_rx_release(res); for (fragoff = fraglen; fragoff < len; fragoff += fraglen) { ffa_mem_frag_rx(res, handle_lo, handle_hi, fragoff); @@ -563,6 +574,7 @@ static void do_ffa_mem_reclaim(struct arm_smccc_res *res, fraglen = res->a3; memcpy((void *)buf + fragoff, hyp_buffers.rx, fraglen); + ffa_rx_release(res); } ffa_mem_reclaim(res, handle_lo, handle_hi, flags); @@ -600,7 +612,6 @@ static bool ffa_call_supported(u64 func_id) case FFA_MSG_POLL: case FFA_MSG_WAIT: /* 32-bit variants of 64-bit calls */ - case FFA_MSG_SEND_DIRECT_REQ: case FFA_MSG_SEND_DIRECT_RESP: case FFA_RXTX_MAP: case FFA_MEM_DONATE: diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index 2385fd03ed..f43d845f3c 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -23,26 +23,84 @@ DEFINE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params); void __kvm_hyp_host_forward_smc(struct kvm_cpu_context *host_ctxt); +static void __hyp_sve_save_guest(struct kvm_vcpu *vcpu) +{ + __vcpu_sys_reg(vcpu, ZCR_EL1) = read_sysreg_el1(SYS_ZCR); + /* + * On saving/restoring guest sve state, always use the maximum VL for + * the guest. The layout of the data when saving the sve state depends + * on the VL, so use a consistent (i.e., the maximum) guest VL. + */ + sve_cond_update_zcr_vq(vcpu_sve_max_vq(vcpu) - 1, SYS_ZCR_EL2); + __sve_save_state(vcpu_sve_pffr(vcpu), &vcpu->arch.ctxt.fp_regs.fpsr, true); + write_sysreg_s(ZCR_ELx_LEN_MASK, SYS_ZCR_EL2); +} + +static void __hyp_sve_restore_host(void) +{ + struct cpu_sve_state *sve_state = *host_data_ptr(sve_state); + + /* + * On saving/restoring host sve state, always use the maximum VL for + * the host. The layout of the data when saving the sve state depends + * on the VL, so use a consistent (i.e., the maximum) host VL. + * + * Setting ZCR_EL2 to ZCR_ELx_LEN_MASK sets the effective length + * supported by the system (or limited at EL3). + */ + write_sysreg_s(ZCR_ELx_LEN_MASK, SYS_ZCR_EL2); + __sve_restore_state(sve_state->sve_regs + sve_ffr_offset(kvm_host_sve_max_vl), + &sve_state->fpsr, + true); + write_sysreg_el1(sve_state->zcr_el1, SYS_ZCR); +} + +static void fpsimd_sve_flush(void) +{ + *host_data_ptr(fp_owner) = FP_STATE_HOST_OWNED; +} + +static void fpsimd_sve_sync(struct kvm_vcpu *vcpu) +{ + if (!guest_owns_fp_regs()) + return; + + cpacr_clear_set(0, CPACR_ELx_FPEN | CPACR_ELx_ZEN); + isb(); + + if (vcpu_has_sve(vcpu)) + __hyp_sve_save_guest(vcpu); + else + __fpsimd_save_state(&vcpu->arch.ctxt.fp_regs); + + if (system_supports_sve()) + __hyp_sve_restore_host(); + else + __fpsimd_restore_state(*host_data_ptr(fpsimd_state)); + + *host_data_ptr(fp_owner) = FP_STATE_HOST_OWNED; +} + static void flush_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu) { struct kvm_vcpu *host_vcpu = hyp_vcpu->host_vcpu; + fpsimd_sve_flush(); + hyp_vcpu->vcpu.arch.ctxt = host_vcpu->arch.ctxt; hyp_vcpu->vcpu.arch.sve_state = kern_hyp_va(host_vcpu->arch.sve_state); - hyp_vcpu->vcpu.arch.sve_max_vl = host_vcpu->arch.sve_max_vl; + /* Limit guest vector length to the maximum supported by the host. */ + hyp_vcpu->vcpu.arch.sve_max_vl = min(host_vcpu->arch.sve_max_vl, kvm_host_sve_max_vl); hyp_vcpu->vcpu.arch.hw_mmu = host_vcpu->arch.hw_mmu; hyp_vcpu->vcpu.arch.hcr_el2 = host_vcpu->arch.hcr_el2; hyp_vcpu->vcpu.arch.mdcr_el2 = host_vcpu->arch.mdcr_el2; - hyp_vcpu->vcpu.arch.cptr_el2 = host_vcpu->arch.cptr_el2; hyp_vcpu->vcpu.arch.iflags = host_vcpu->arch.iflags; - hyp_vcpu->vcpu.arch.fp_state = host_vcpu->arch.fp_state; hyp_vcpu->vcpu.arch.debug_ptr = kern_hyp_va(host_vcpu->arch.debug_ptr); - hyp_vcpu->vcpu.arch.host_fpsimd_state = host_vcpu->arch.host_fpsimd_state; hyp_vcpu->vcpu.arch.vsesr_el2 = host_vcpu->arch.vsesr_el2; @@ -56,15 +114,15 @@ static void sync_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu) struct vgic_v3_cpu_if *host_cpu_if = &host_vcpu->arch.vgic_cpu.vgic_v3; unsigned int i; + fpsimd_sve_sync(&hyp_vcpu->vcpu); + host_vcpu->arch.ctxt = hyp_vcpu->vcpu.arch.ctxt; host_vcpu->arch.hcr_el2 = hyp_vcpu->vcpu.arch.hcr_el2; - host_vcpu->arch.cptr_el2 = hyp_vcpu->vcpu.arch.cptr_el2; host_vcpu->arch.fault = hyp_vcpu->vcpu.arch.fault; host_vcpu->arch.iflags = hyp_vcpu->vcpu.arch.iflags; - host_vcpu->arch.fp_state = hyp_vcpu->vcpu.arch.fp_state; host_cpu_if->vgic_hcr = hyp_cpu_if->vgic_hcr; for (i = 0; i < hyp_cpu_if->used_lrs; ++i) @@ -82,6 +140,17 @@ static void handle___kvm_vcpu_run(struct kvm_cpu_context *host_ctxt) struct pkvm_hyp_vcpu *hyp_vcpu; struct kvm *host_kvm; + /* + * KVM (and pKVM) doesn't support SME guests for now, and + * ensures that SME features aren't enabled in pstate when + * loading a vcpu. Therefore, if SME features enabled the host + * is misbehaving. + */ + if (unlikely(system_supports_sme() && read_sysreg_s(SYS_SVCR))) { + ret = -EINVAL; + goto out; + } + host_kvm = kern_hyp_va(host_vcpu->kvm); hyp_vcpu = pkvm_load_hyp_vcpu(host_kvm->arch.pkvm.handle, host_vcpu->vcpu_idx); @@ -178,16 +247,6 @@ static void handle___vgic_v3_get_gic_config(struct kvm_cpu_context *host_ctxt) cpu_reg(host_ctxt, 1) = __vgic_v3_get_gic_config(); } -static void handle___vgic_v3_read_vmcr(struct kvm_cpu_context *host_ctxt) -{ - cpu_reg(host_ctxt, 1) = __vgic_v3_read_vmcr(); -} - -static void handle___vgic_v3_write_vmcr(struct kvm_cpu_context *host_ctxt) -{ - __vgic_v3_write_vmcr(cpu_reg(host_ctxt, 1)); -} - static void handle___vgic_v3_init_lrs(struct kvm_cpu_context *host_ctxt) { __vgic_v3_init_lrs(); @@ -198,18 +257,18 @@ static void handle___kvm_get_mdcr_el2(struct kvm_cpu_context *host_ctxt) cpu_reg(host_ctxt, 1) = __kvm_get_mdcr_el2(); } -static void handle___vgic_v3_save_aprs(struct kvm_cpu_context *host_ctxt) +static void handle___vgic_v3_save_vmcr_aprs(struct kvm_cpu_context *host_ctxt) { DECLARE_REG(struct vgic_v3_cpu_if *, cpu_if, host_ctxt, 1); - __vgic_v3_save_aprs(kern_hyp_va(cpu_if)); + __vgic_v3_save_vmcr_aprs(kern_hyp_va(cpu_if)); } -static void handle___vgic_v3_restore_aprs(struct kvm_cpu_context *host_ctxt) +static void handle___vgic_v3_restore_vmcr_aprs(struct kvm_cpu_context *host_ctxt) { DECLARE_REG(struct vgic_v3_cpu_if *, cpu_if, host_ctxt, 1); - __vgic_v3_restore_aprs(kern_hyp_va(cpu_if)); + __vgic_v3_restore_vmcr_aprs(kern_hyp_va(cpu_if)); } static void handle___pkvm_init(struct kvm_cpu_context *host_ctxt) @@ -340,10 +399,8 @@ static const hcall_t host_hcall[] = { HANDLE_FUNC(__kvm_tlb_flush_vmid_range), HANDLE_FUNC(__kvm_flush_cpu_context), HANDLE_FUNC(__kvm_timer_set_cntvoff), - HANDLE_FUNC(__vgic_v3_read_vmcr), - HANDLE_FUNC(__vgic_v3_write_vmcr), - HANDLE_FUNC(__vgic_v3_save_aprs), - HANDLE_FUNC(__vgic_v3_restore_aprs), + HANDLE_FUNC(__vgic_v3_save_vmcr_aprs), + HANDLE_FUNC(__vgic_v3_restore_vmcr_aprs), HANDLE_FUNC(__pkvm_vcpu_init_traps), HANDLE_FUNC(__pkvm_init_vm), HANDLE_FUNC(__pkvm_init_vcpu), @@ -420,11 +477,7 @@ void handle_trap(struct kvm_cpu_context *host_ctxt) handle_host_smc(host_ctxt); break; case ESR_ELx_EC_SVE: - if (has_hvhe()) - sysreg_clear_set(cpacr_el1, 0, (CPACR_EL1_ZEN_EL1EN | - CPACR_EL1_ZEN_EL0EN)); - else - sysreg_clear_set(cptr_el2, CPTR_EL2_TZ, 0); + cpacr_clear_set(0, CPACR_ELx_ZEN); isb(); sve_cond_update_zcr_vq(ZCR_ELx_LEN_MASK, SYS_ZCR_EL2); break; diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 861c76021a..caba3e4bd0 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -533,7 +533,13 @@ void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt) int ret = 0; esr = read_sysreg_el2(SYS_ESR); - BUG_ON(!__get_fault_info(esr, &fault)); + if (!__get_fault_info(esr, &fault)) { + /* + * We've presumably raced with a page-table change which caused + * AT to fail, try again. + */ + return; + } addr = (fault.hpfar_el2 & HPFAR_MASK) << 8; ret = host_stage2_idmap(addr); diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c index 26dd9a20ad..95cf185742 100644 --- a/arch/arm64/kvm/hyp/nvhe/pkvm.c +++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c @@ -18,6 +18,8 @@ unsigned long __icache_flags; /* Used by kvm_get_vttbr(). */ unsigned int kvm_arm_vmid_bits; +unsigned int kvm_host_sve_max_vl; + /* * Set trap register values based on features in ID_AA64PFR0. */ @@ -63,7 +65,7 @@ static void pvm_init_traps_aa64pfr0(struct kvm_vcpu *vcpu) /* Trap SVE */ if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_SVE), feature_ids)) { if (has_hvhe()) - cptr_clear |= CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN; + cptr_clear |= CPACR_ELx_ZEN; else cptr_set |= CPTR_EL2_TZ; } @@ -200,7 +202,7 @@ static void pvm_init_trap_regs(struct kvm_vcpu *vcpu) } /* - * Initialize trap register values for protected VMs. + * Initialize trap register values in protected mode. */ void __pkvm_vcpu_init_traps(struct kvm_vcpu *vcpu) { @@ -430,6 +432,7 @@ static void *map_donated_memory(unsigned long host_va, size_t size) static void __unmap_donated_memory(void *va, size_t size) { + kvm_flush_dcache_to_poc(va, size); WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(va), PAGE_ALIGN(size) >> PAGE_SHIFT)); } @@ -574,6 +577,8 @@ unlock: if (ret) unmap_donated_memory(hyp_vcpu, sizeof(*hyp_vcpu)); + hyp_vcpu->vcpu.arch.cptr_el2 = kvm_get_reset_cptr_el2(&hyp_vcpu->vcpu); + return ret; } diff --git a/arch/arm64/kvm/hyp/nvhe/psci-relay.c b/arch/arm64/kvm/hyp/nvhe/psci-relay.c index d57bcb6ab9..dfe8fe0f7e 100644 --- a/arch/arm64/kvm/hyp/nvhe/psci-relay.c +++ b/arch/arm64/kvm/hyp/nvhe/psci-relay.c @@ -205,7 +205,7 @@ asmlinkage void __noreturn __kvm_host_psci_cpu_entry(bool is_cpu_on) struct psci_boot_args *boot_args; struct kvm_cpu_context *host_ctxt; - host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + host_ctxt = host_data_ptr(host_ctxt); if (is_cpu_on) boot_args = this_cpu_ptr(&cpu_on_args); diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c index bc58d1b515..f4350ba07b 100644 --- a/arch/arm64/kvm/hyp/nvhe/setup.c +++ b/arch/arm64/kvm/hyp/nvhe/setup.c @@ -67,6 +67,28 @@ static int divide_memory_pool(void *virt, unsigned long size) return 0; } +static int pkvm_create_host_sve_mappings(void) +{ + void *start, *end; + int ret, i; + + if (!system_supports_sve()) + return 0; + + for (i = 0; i < hyp_nr_cpus; i++) { + struct kvm_host_data *host_data = per_cpu_ptr(&kvm_host_data, i); + struct cpu_sve_state *sve_state = host_data->sve_state; + + start = kern_hyp_va(sve_state); + end = start + PAGE_ALIGN(pkvm_host_sve_state_size()); + ret = pkvm_create_mappings(start, end, PAGE_HYP); + if (ret) + return ret; + } + + return 0; +} + static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size, unsigned long *per_cpu_base, u32 hyp_va_bits) @@ -125,6 +147,8 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size, return ret; } + pkvm_create_host_sve_mappings(); + /* * Map the host sections RO in the hypervisor, but transfer the * ownership from the host to the hypervisor itself to make sure they @@ -257,8 +281,7 @@ static int fix_hyp_pgtable_refcnt(void) void __noreturn __pkvm_init_finalise(void) { - struct kvm_host_data *host_data = this_cpu_ptr(&kvm_host_data); - struct kvm_cpu_context *host_ctxt = &host_data->host_ctxt; + struct kvm_cpu_context *host_ctxt = host_data_ptr(host_ctxt); unsigned long nr_pages, reserved_pages, pfn; int ret; diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index c50f8459e4..6af179c635 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -40,7 +40,7 @@ static void __activate_traps(struct kvm_vcpu *vcpu) { u64 val; - ___activate_traps(vcpu); + ___activate_traps(vcpu, vcpu->arch.hcr_el2); __activate_traps_common(vcpu); val = vcpu->arch.cptr_el2; @@ -48,15 +48,14 @@ static void __activate_traps(struct kvm_vcpu *vcpu) val |= has_hvhe() ? CPACR_EL1_TTA : CPTR_EL2_TTA; if (cpus_have_final_cap(ARM64_SME)) { if (has_hvhe()) - val &= ~(CPACR_EL1_SMEN_EL1EN | CPACR_EL1_SMEN_EL0EN); + val &= ~CPACR_ELx_SMEN; else val |= CPTR_EL2_TSM; } - if (!guest_owns_fp_regs(vcpu)) { + if (!guest_owns_fp_regs()) { if (has_hvhe()) - val &= ~(CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN | - CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN); + val &= ~(CPACR_ELx_FPEN | CPACR_ELx_ZEN); else val |= CPTR_EL2_TFP | CPTR_EL2_TZ; @@ -182,6 +181,25 @@ static bool kvm_handle_pvm_sys64(struct kvm_vcpu *vcpu, u64 *exit_code) kvm_handle_pvm_sysreg(vcpu, exit_code)); } +static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu) +{ + /* + * Non-protected kvm relies on the host restoring its sve state. + * Protected kvm restores the host's sve state as not to reveal that + * fpsimd was used by a guest nor leak upper sve bits. + */ + if (unlikely(is_protected_kvm_enabled() && system_supports_sve())) { + __hyp_sve_save_host(); + + /* Re-enable SVE traps if not supported for the guest vcpu. */ + if (!vcpu_has_sve(vcpu)) + cpacr_clear_set(CPACR_ELx_ZEN, 0); + + } else { + __fpsimd_save_state(*host_data_ptr(fpsimd_state)); + } +} + static const exit_handler_fn hyp_exit_handlers[] = { [0 ... ESR_ELx_EC_MAX] = NULL, [ESR_ELx_EC_CP15_32] = kvm_hyp_handle_cp15_32, @@ -191,7 +209,6 @@ static const exit_handler_fn hyp_exit_handlers[] = { [ESR_ELx_EC_IABT_LOW] = kvm_hyp_handle_iabt_low, [ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low, [ESR_ELx_EC_WATCHPT_LOW] = kvm_hyp_handle_watchpt_low, - [ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth, [ESR_ELx_EC_MOPS] = kvm_hyp_handle_mops, }; @@ -203,13 +220,12 @@ static const exit_handler_fn pvm_exit_handlers[] = { [ESR_ELx_EC_IABT_LOW] = kvm_hyp_handle_iabt_low, [ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low, [ESR_ELx_EC_WATCHPT_LOW] = kvm_hyp_handle_watchpt_low, - [ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth, [ESR_ELx_EC_MOPS] = kvm_hyp_handle_mops, }; static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu) { - if (unlikely(kvm_vm_is_protected(kern_hyp_va(vcpu->kvm)))) + if (unlikely(vcpu_is_protected(vcpu))) return pvm_exit_handlers; return hyp_exit_handlers; @@ -228,9 +244,7 @@ static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu) */ static void early_exit_filter(struct kvm_vcpu *vcpu, u64 *exit_code) { - struct kvm *kvm = kern_hyp_va(vcpu->kvm); - - if (kvm_vm_is_protected(kvm) && vcpu_mode_is_32bit(vcpu)) { + if (unlikely(vcpu_is_protected(vcpu) && vcpu_mode_is_32bit(vcpu))) { /* * As we have caught the guest red-handed, decide that it isn't * fit for purpose anymore by making the vcpu invalid. The VMM @@ -264,7 +278,7 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu) pmr_sync(); } - host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + host_ctxt = host_data_ptr(host_ctxt); host_ctxt->__hyp_running_vcpu = vcpu; guest_ctxt = &vcpu->arch.ctxt; @@ -337,7 +351,7 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu) __sysreg_restore_state_nvhe(host_ctxt); - if (vcpu->arch.fp_state == FP_STATE_GUEST_OWNED) + if (guest_owns_fp_regs()) __fpsimd_save_fpexc32(vcpu); __debug_switch_to_host(vcpu); @@ -367,7 +381,7 @@ asmlinkage void __noreturn hyp_panic(void) struct kvm_cpu_context *host_ctxt; struct kvm_vcpu *vcpu; - host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + host_ctxt = host_data_ptr(host_ctxt); vcpu = host_ctxt->__hyp_running_vcpu; if (vcpu) { diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c index 2fc68da403..ca3c09df8d 100644 --- a/arch/arm64/kvm/hyp/nvhe/tlb.c +++ b/arch/arm64/kvm/hyp/nvhe/tlb.c @@ -11,13 +11,23 @@ #include <nvhe/mem_protect.h> struct tlb_inv_context { - u64 tcr; + struct kvm_s2_mmu *mmu; + u64 tcr; + u64 sctlr; }; -static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu, - struct tlb_inv_context *cxt, - bool nsh) +static void enter_vmid_context(struct kvm_s2_mmu *mmu, + struct tlb_inv_context *cxt, + bool nsh) { + struct kvm_s2_mmu *host_s2_mmu = &host_mmu.arch.mmu; + struct kvm_cpu_context *host_ctxt; + struct kvm_vcpu *vcpu; + + host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + vcpu = host_ctxt->__hyp_running_vcpu; + cxt->mmu = NULL; + /* * We have two requirements: * @@ -40,20 +50,55 @@ static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu, else dsb(ish); + /* + * If we're already in the desired context, then there's nothing to do. + */ + if (vcpu) { + /* + * We're in guest context. However, for this to work, this needs + * to be called from within __kvm_vcpu_run(), which ensures that + * __hyp_running_vcpu is set to the current guest vcpu. + */ + if (mmu == vcpu->arch.hw_mmu || WARN_ON(mmu != host_s2_mmu)) + return; + + cxt->mmu = vcpu->arch.hw_mmu; + } else { + /* We're in host context. */ + if (mmu == host_s2_mmu) + return; + + cxt->mmu = host_s2_mmu; + } + if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { u64 val; /* * For CPUs that are affected by ARM 1319367, we need to - * avoid a host Stage-1 walk while we have the guest's - * VMID set in the VTTBR in order to invalidate TLBs. - * We're guaranteed that the S1 MMU is enabled, so we can - * simply set the EPD bits to avoid any further TLB fill. + * avoid a Stage-1 walk with the old VMID while we have + * the new VMID set in the VTTBR in order to invalidate TLBs. + * We're guaranteed that the host S1 MMU is enabled, so + * we can simply set the EPD bits to avoid any further + * TLB fill. For guests, we ensure that the S1 MMU is + * temporarily enabled in the next context. */ val = cxt->tcr = read_sysreg_el1(SYS_TCR); val |= TCR_EPD1_MASK | TCR_EPD0_MASK; write_sysreg_el1(val, SYS_TCR); isb(); + + if (vcpu) { + val = cxt->sctlr = read_sysreg_el1(SYS_SCTLR); + if (!(val & SCTLR_ELx_M)) { + val |= SCTLR_ELx_M; + write_sysreg_el1(val, SYS_SCTLR); + isb(); + } + } else { + /* The host S1 MMU is always enabled. */ + cxt->sctlr = SCTLR_ELx_M; + } } /* @@ -62,18 +107,40 @@ static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu, * ensuring that we always have an ISB, but not two ISBs back * to back. */ - __load_stage2(mmu, kern_hyp_va(mmu->arch)); + if (vcpu) + __load_host_stage2(); + else + __load_stage2(mmu, kern_hyp_va(mmu->arch)); + asm(ALTERNATIVE("isb", "nop", ARM64_WORKAROUND_SPECULATIVE_AT)); } -static void __tlb_switch_to_host(struct tlb_inv_context *cxt) +static void exit_vmid_context(struct tlb_inv_context *cxt) { - __load_host_stage2(); + struct kvm_s2_mmu *mmu = cxt->mmu; + struct kvm_cpu_context *host_ctxt; + struct kvm_vcpu *vcpu; + + host_ctxt = &this_cpu_ptr(&kvm_host_data)->host_ctxt; + vcpu = host_ctxt->__hyp_running_vcpu; + + if (!mmu) + return; + + if (vcpu) + __load_stage2(mmu, kern_hyp_va(mmu->arch)); + else + __load_host_stage2(); if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { - /* Ensure write of the host VMID */ + /* Ensure write of the old VMID */ isb(); - /* Restore the host's TCR_EL1 */ + + if (!(cxt->sctlr & SCTLR_ELx_M)) { + write_sysreg_el1(cxt->sctlr, SYS_SCTLR); + isb(); + } + write_sysreg_el1(cxt->tcr, SYS_TCR); } } @@ -84,7 +151,7 @@ void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, struct tlb_inv_context cxt; /* Switch to requested VMID */ - __tlb_switch_to_guest(mmu, &cxt, false); + enter_vmid_context(mmu, &cxt, false); /* * We could do so much better if we had the VA as well. @@ -105,7 +172,7 @@ void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, dsb(ish); isb(); - __tlb_switch_to_host(&cxt); + exit_vmid_context(&cxt); } void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu, @@ -114,7 +181,7 @@ void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu, struct tlb_inv_context cxt; /* Switch to requested VMID */ - __tlb_switch_to_guest(mmu, &cxt, true); + enter_vmid_context(mmu, &cxt, true); /* * We could do so much better if we had the VA as well. @@ -135,7 +202,7 @@ void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu, dsb(nsh); isb(); - __tlb_switch_to_host(&cxt); + exit_vmid_context(&cxt); } void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu, @@ -152,7 +219,7 @@ void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu, start = round_down(start, stride); /* Switch to requested VMID */ - __tlb_switch_to_guest(mmu, &cxt, false); + enter_vmid_context(mmu, &cxt, false); __flush_s2_tlb_range_op(ipas2e1is, start, pages, stride, TLBI_TTL_UNKNOWN); @@ -162,7 +229,7 @@ void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu, dsb(ish); isb(); - __tlb_switch_to_host(&cxt); + exit_vmid_context(&cxt); } void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu) @@ -170,13 +237,13 @@ void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu) struct tlb_inv_context cxt; /* Switch to requested VMID */ - __tlb_switch_to_guest(mmu, &cxt, false); + enter_vmid_context(mmu, &cxt, false); __tlbi(vmalls12e1is); dsb(ish); isb(); - __tlb_switch_to_host(&cxt); + exit_vmid_context(&cxt); } void __kvm_flush_cpu_context(struct kvm_s2_mmu *mmu) @@ -184,19 +251,19 @@ void __kvm_flush_cpu_context(struct kvm_s2_mmu *mmu) struct tlb_inv_context cxt; /* Switch to requested VMID */ - __tlb_switch_to_guest(mmu, &cxt, false); + enter_vmid_context(mmu, &cxt, false); __tlbi(vmalle1); asm volatile("ic iallu"); dsb(nsh); isb(); - __tlb_switch_to_host(&cxt); + exit_vmid_context(&cxt); } void __kvm_flush_vm_context(void) { - /* Same remark as in __tlb_switch_to_guest() */ + /* Same remark as in enter_vmid_context() */ dsb(ish); __tlbi(alle1is); dsb(ish); |