summaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm/vmx/vmx.c
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-08-05 19:20:54 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-08-05 19:20:54 +0000
commit9cf27fc03a2eaf3d91167db54c19bad8652ac085 (patch)
tree42f079ff82e701ebcb76829974b4caca3e5b6798 /arch/x86/kvm/vmx/vmx.c
parentAdding upstream version 6.8.12. (diff)
downloadlinux-9cf27fc03a2eaf3d91167db54c19bad8652ac085.tar.xz
linux-9cf27fc03a2eaf3d91167db54c19bad8652ac085.zip
Adding upstream version 6.9.7.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'arch/x86/kvm/vmx/vmx.c')
-rw-r--r--arch/x86/kvm/vmx/vmx.c188
1 files changed, 107 insertions, 81 deletions
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 784f2ecca5..22411f4aff 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -38,6 +38,7 @@
#include <asm/desc.h>
#include <asm/fpu/api.h>
#include <asm/fpu/xstate.h>
+#include <asm/fred.h>
#include <asm/idtentry.h>
#include <asm/io.h>
#include <asm/irq_remapping.h>
@@ -49,6 +50,8 @@
#include <asm/spec-ctrl.h>
#include <asm/vmx.h>
+#include <trace/events/ipi.h>
+
#include "capabilities.h"
#include "cpuid.h"
#include "hyperv.h"
@@ -159,7 +162,7 @@ module_param(allow_smaller_maxphyaddr, bool, S_IRUGO);
/*
* List of MSRs that can be directly passed to the guest.
- * In addition to these x2apic and PT MSRs are handled specially.
+ * In addition to these x2apic, PT and LBR MSRs are handled specially.
*/
static u32 vmx_possible_passthrough_msrs[MAX_POSSIBLE_PASSTHROUGH_MSRS] = {
MSR_IA32_SPEC_CTRL,
@@ -215,6 +218,8 @@ module_param(ple_window_max, uint, 0444);
int __read_mostly pt_mode = PT_MODE_SYSTEM;
module_param(pt_mode, int, S_IRUGO);
+struct x86_pmu_lbr __ro_after_init vmx_lbr_caps;
+
static DEFINE_STATIC_KEY_FALSE(vmx_l1d_should_flush);
static DEFINE_STATIC_KEY_FALSE(vmx_l1d_flush_cond);
static DEFINE_MUTEX(vmx_l1d_flush_mutex);
@@ -667,25 +672,14 @@ static inline bool cpu_need_virtualize_apic_accesses(struct kvm_vcpu *vcpu)
return flexpriority_enabled && lapic_in_kernel(vcpu);
}
-static int possible_passthrough_msr_slot(u32 msr)
-{
- u32 i;
-
- for (i = 0; i < ARRAY_SIZE(vmx_possible_passthrough_msrs); i++)
- if (vmx_possible_passthrough_msrs[i] == msr)
- return i;
-
- return -ENOENT;
-}
-
-static bool is_valid_passthrough_msr(u32 msr)
+static int vmx_get_passthrough_msr_slot(u32 msr)
{
- bool r;
+ int i;
switch (msr) {
case 0x800 ... 0x8ff:
/* x2APIC MSRs. These are handled in vmx_update_msr_bitmap_x2apic() */
- return true;
+ return -ENOENT;
case MSR_IA32_RTIT_STATUS:
case MSR_IA32_RTIT_OUTPUT_BASE:
case MSR_IA32_RTIT_OUTPUT_MASK:
@@ -700,14 +694,16 @@ static bool is_valid_passthrough_msr(u32 msr)
case MSR_LBR_CORE_FROM ... MSR_LBR_CORE_FROM + 8:
case MSR_LBR_CORE_TO ... MSR_LBR_CORE_TO + 8:
/* LBR MSRs. These are handled in vmx_update_intercept_for_lbr_msrs() */
- return true;
+ return -ENOENT;
}
- r = possible_passthrough_msr_slot(msr) != -ENOENT;
-
- WARN(!r, "Invalid MSR %x, please adapt vmx_possible_passthrough_msrs[]", msr);
+ for (i = 0; i < ARRAY_SIZE(vmx_possible_passthrough_msrs); i++) {
+ if (vmx_possible_passthrough_msrs[i] == msr)
+ return i;
+ }
- return r;
+ WARN(1, "Invalid MSR %x, please adapt vmx_possible_passthrough_msrs[]", msr);
+ return -ENOENT;
}
struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr)
@@ -1290,8 +1286,6 @@ void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
u16 fs_sel, gs_sel;
int i;
- vmx->req_immediate_exit = false;
-
/*
* Note that guest MSRs to be saved/restored can also be changed
* when guest state is loaded. This happens when guest transitions
@@ -3963,6 +3957,7 @@ void vmx_disable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap;
+ int idx;
if (!cpu_has_vmx_msr_bitmap())
return;
@@ -3972,16 +3967,13 @@ void vmx_disable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type)
/*
* Mark the desired intercept state in shadow bitmap, this is needed
* for resync when the MSR filters change.
- */
- if (is_valid_passthrough_msr(msr)) {
- int idx = possible_passthrough_msr_slot(msr);
-
- if (idx != -ENOENT) {
- if (type & MSR_TYPE_R)
- clear_bit(idx, vmx->shadow_msr_intercept.read);
- if (type & MSR_TYPE_W)
- clear_bit(idx, vmx->shadow_msr_intercept.write);
- }
+ */
+ idx = vmx_get_passthrough_msr_slot(msr);
+ if (idx >= 0) {
+ if (type & MSR_TYPE_R)
+ clear_bit(idx, vmx->shadow_msr_intercept.read);
+ if (type & MSR_TYPE_W)
+ clear_bit(idx, vmx->shadow_msr_intercept.write);
}
if ((type & MSR_TYPE_R) &&
@@ -4007,6 +3999,7 @@ void vmx_enable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap;
+ int idx;
if (!cpu_has_vmx_msr_bitmap())
return;
@@ -4016,16 +4009,13 @@ void vmx_enable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type)
/*
* Mark the desired intercept state in shadow bitmap, this is needed
* for resync when the MSR filter changes.
- */
- if (is_valid_passthrough_msr(msr)) {
- int idx = possible_passthrough_msr_slot(msr);
-
- if (idx != -ENOENT) {
- if (type & MSR_TYPE_R)
- set_bit(idx, vmx->shadow_msr_intercept.read);
- if (type & MSR_TYPE_W)
- set_bit(idx, vmx->shadow_msr_intercept.write);
- }
+ */
+ idx = vmx_get_passthrough_msr_slot(msr);
+ if (idx >= 0) {
+ if (type & MSR_TYPE_R)
+ set_bit(idx, vmx->shadow_msr_intercept.read);
+ if (type & MSR_TYPE_W)
+ set_bit(idx, vmx->shadow_msr_intercept.write);
}
if (type & MSR_TYPE_R)
@@ -4136,6 +4126,9 @@ static void vmx_msr_filter_changed(struct kvm_vcpu *vcpu)
struct vcpu_vmx *vmx = to_vmx(vcpu);
u32 i;
+ if (!cpu_has_vmx_msr_bitmap())
+ return;
+
/*
* Redo intercept permissions for MSRs that KVM is passing through to
* the guest. Disabling interception will check the new MSR filter and
@@ -5575,10 +5568,7 @@ static int handle_dr(struct kvm_vcpu *vcpu)
reg = DEBUG_REG_ACCESS_REG(exit_qualification);
if (exit_qualification & TYPE_MOV_FROM_DR) {
- unsigned long val;
-
- kvm_get_dr(vcpu, dr, &val);
- kvm_register_write(vcpu, reg, val);
+ kvm_register_write(vcpu, reg, kvm_get_dr(vcpu, dr));
err = 0;
} else {
err = kvm_set_dr(vcpu, dr, kvm_register_read(vcpu, reg));
@@ -6000,22 +5990,46 @@ static int handle_pml_full(struct kvm_vcpu *vcpu)
return 1;
}
-static fastpath_t handle_fastpath_preemption_timer(struct kvm_vcpu *vcpu)
+static fastpath_t handle_fastpath_preemption_timer(struct kvm_vcpu *vcpu,
+ bool force_immediate_exit)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- if (!vmx->req_immediate_exit &&
- !unlikely(vmx->loaded_vmcs->hv_timer_soft_disabled)) {
- kvm_lapic_expired_hv_timer(vcpu);
+ /*
+ * In the *extremely* unlikely scenario that this is a spurious VM-Exit
+ * due to the timer expiring while it was "soft" disabled, just eat the
+ * exit and re-enter the guest.
+ */
+ if (unlikely(vmx->loaded_vmcs->hv_timer_soft_disabled))
return EXIT_FASTPATH_REENTER_GUEST;
- }
- return EXIT_FASTPATH_NONE;
+ /*
+ * If the timer expired because KVM used it to force an immediate exit,
+ * then mission accomplished.
+ */
+ if (force_immediate_exit)
+ return EXIT_FASTPATH_EXIT_HANDLED;
+
+ /*
+ * If L2 is active, go down the slow path as emulating the guest timer
+ * expiration likely requires synthesizing a nested VM-Exit.
+ */
+ if (is_guest_mode(vcpu))
+ return EXIT_FASTPATH_NONE;
+
+ kvm_lapic_expired_hv_timer(vcpu);
+ return EXIT_FASTPATH_REENTER_GUEST;
}
static int handle_preemption_timer(struct kvm_vcpu *vcpu)
{
- handle_fastpath_preemption_timer(vcpu);
+ /*
+ * This non-fastpath handler is reached if and only if the preemption
+ * timer was being used to emulate a guest timer while L2 is active.
+ * All other scenarios are supposed to be handled in the fastpath.
+ */
+ WARN_ON_ONCE(!is_guest_mode(vcpu));
+ kvm_lapic_expired_hv_timer(vcpu);
return 1;
}
@@ -6518,7 +6532,7 @@ static int __vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_DELIVERY_EV;
vcpu->run->internal.data[0] = vectoring_info;
vcpu->run->internal.data[1] = exit_reason.full;
- vcpu->run->internal.data[2] = vcpu->arch.exit_qualification;
+ vcpu->run->internal.data[2] = vmx_get_exit_qual(vcpu);
if (exit_reason.basic == EXIT_REASON_EPT_MISCONFIG) {
vcpu->run->internal.data[ndata++] =
vmcs_read64(GUEST_PHYSICAL_ADDRESS);
@@ -6552,7 +6566,7 @@ static int __vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
if (exit_reason.basic >= kvm_vmx_max_exit_handlers)
goto unexpected_vmexit;
-#ifdef CONFIG_RETPOLINE
+#ifdef CONFIG_MITIGATION_RETPOLINE
if (exit_reason.basic == EXIT_REASON_MSR_WRITE)
return kvm_emulate_wrmsr(vcpu);
else if (exit_reason.basic == EXIT_REASON_PREEMPTION_TIMER)
@@ -6969,14 +6983,16 @@ static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu)
{
u32 intr_info = vmx_get_intr_info(vcpu);
unsigned int vector = intr_info & INTR_INFO_VECTOR_MASK;
- gate_desc *desc = (gate_desc *)host_idt_base + vector;
if (KVM_BUG(!is_external_intr(intr_info), vcpu->kvm,
"unexpected VM-Exit interrupt info: 0x%x", intr_info))
return;
kvm_before_interrupt(vcpu, KVM_HANDLING_IRQ);
- vmx_do_interrupt_irqoff(gate_offset(desc));
+ if (cpu_feature_enabled(X86_FEATURE_FRED))
+ fred_entry_from_kvm(EVENT_TYPE_EXTINT, vector);
+ else
+ vmx_do_interrupt_irqoff(gate_offset((gate_desc *)host_idt_base + vector));
kvm_after_interrupt(vcpu);
vcpu->arch.at_instruction_boundary = true;
@@ -7155,13 +7171,13 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx)
msrs[i].host, false);
}
-static void vmx_update_hv_timer(struct kvm_vcpu *vcpu)
+static void vmx_update_hv_timer(struct kvm_vcpu *vcpu, bool force_immediate_exit)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
u64 tscl;
u32 delta_tsc;
- if (vmx->req_immediate_exit) {
+ if (force_immediate_exit) {
vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, 0);
vmx->loaded_vmcs->hv_timer_soft_disabled = false;
} else if (vmx->hv_deadline_tsc != -1) {
@@ -7214,13 +7230,22 @@ void noinstr vmx_spec_ctrl_restore_host(struct vcpu_vmx *vmx,
barrier_nospec();
}
-static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
+static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu,
+ bool force_immediate_exit)
{
+ /*
+ * If L2 is active, some VMX preemption timer exits can be handled in
+ * the fastpath even, all other exits must use the slow path.
+ */
+ if (is_guest_mode(vcpu) &&
+ to_vmx(vcpu)->exit_reason.basic != EXIT_REASON_PREEMPTION_TIMER)
+ return EXIT_FASTPATH_NONE;
+
switch (to_vmx(vcpu)->exit_reason.basic) {
case EXIT_REASON_MSR_WRITE:
return handle_fastpath_set_msr_irqoff(vcpu);
case EXIT_REASON_PREEMPTION_TIMER:
- return handle_fastpath_preemption_timer(vcpu);
+ return handle_fastpath_preemption_timer(vcpu, force_immediate_exit);
default:
return EXIT_FASTPATH_NONE;
}
@@ -7272,7 +7297,10 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
if ((u16)vmx->exit_reason.basic == EXIT_REASON_EXCEPTION_NMI &&
is_nmi(vmx_get_intr_info(vcpu))) {
kvm_before_interrupt(vcpu, KVM_HANDLING_NMI);
- vmx_do_nmi_irqoff();
+ if (cpu_feature_enabled(X86_FEATURE_FRED))
+ fred_entry_from_kvm(EVENT_TYPE_NMI, NMI_VECTOR);
+ else
+ vmx_do_nmi_irqoff();
kvm_after_interrupt(vcpu);
}
@@ -7280,7 +7308,7 @@ out:
guest_state_exit_irqoff();
}
-static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
+static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu, bool force_immediate_exit)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
unsigned long cr3, cr4;
@@ -7307,7 +7335,7 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
return EXIT_FASTPATH_NONE;
}
- trace_kvm_entry(vcpu);
+ trace_kvm_entry(vcpu, force_immediate_exit);
if (vmx->ple_window_dirty) {
vmx->ple_window_dirty = false;
@@ -7366,7 +7394,9 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
vmx_passthrough_lbr_msrs(vcpu);
if (enable_preemption_timer)
- vmx_update_hv_timer(vcpu);
+ vmx_update_hv_timer(vcpu, force_immediate_exit);
+ else if (force_immediate_exit)
+ smp_send_reschedule(vcpu->cpu);
kvm_wait_lapic_expire(vcpu);
@@ -7430,10 +7460,7 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
vmx_recover_nmi_blocking(vmx);
vmx_complete_interrupts(vmx);
- if (is_guest_mode(vcpu))
- return EXIT_FASTPATH_NONE;
-
- return vmx_exit_handlers_fastpath(vcpu);
+ return vmx_exit_handlers_fastpath(vcpu, force_immediate_exit);
}
static void vmx_vcpu_free(struct kvm_vcpu *vcpu)
@@ -7837,10 +7864,9 @@ static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
vmx_update_exception_bitmap(vcpu);
}
-static u64 vmx_get_perf_capabilities(void)
+static __init u64 vmx_get_perf_capabilities(void)
{
u64 perf_cap = PMU_CAP_FW_WRITES;
- struct x86_pmu_lbr lbr;
u64 host_perf_cap = 0;
if (!enable_pmu)
@@ -7850,8 +7876,16 @@ static u64 vmx_get_perf_capabilities(void)
rdmsrl(MSR_IA32_PERF_CAPABILITIES, host_perf_cap);
if (!cpu_feature_enabled(X86_FEATURE_ARCH_LBR)) {
- x86_perf_get_lbr(&lbr);
- if (lbr.nr)
+ x86_perf_get_lbr(&vmx_lbr_caps);
+
+ /*
+ * KVM requires LBR callstack support, as the overhead due to
+ * context switching LBRs without said support is too high.
+ * See intel_pmu_create_guest_lbr_event() for more info.
+ */
+ if (!vmx_lbr_caps.has_callstack)
+ memset(&vmx_lbr_caps, 0, sizeof(vmx_lbr_caps));
+ else if (vmx_lbr_caps.nr)
perf_cap |= host_perf_cap & PMU_CAP_LBR_FMT;
}
@@ -7933,11 +7967,6 @@ static __init void vmx_set_cpu_caps(void)
kvm_cpu_cap_check_and_set(X86_FEATURE_WAITPKG);
}
-static void vmx_request_immediate_exit(struct kvm_vcpu *vcpu)
-{
- to_vmx(vcpu)->req_immediate_exit = true;
-}
-
static int vmx_check_intercept_io(struct kvm_vcpu *vcpu,
struct x86_instruction_info *info)
{
@@ -8390,8 +8419,6 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
.check_intercept = vmx_check_intercept,
.handle_exit_irqoff = vmx_handle_exit_irqoff,
- .request_immediate_exit = vmx_request_immediate_exit,
-
.sched_in = vmx_sched_in,
.cpu_dirty_log_size = PML_ENTITY_NUM,
@@ -8651,7 +8678,6 @@ static __init int hardware_setup(void)
if (!enable_preemption_timer) {
vmx_x86_ops.set_hv_timer = NULL;
vmx_x86_ops.cancel_hv_timer = NULL;
- vmx_x86_ops.request_immediate_exit = __kvm_request_immediate_exit;
}
kvm_caps.supported_mce_cap |= MCG_LMCE_P;