diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-18 18:50:03 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-18 18:50:03 +0000 |
commit | 01a69402cf9d38ff180345d55c2ee51c7e89fbc7 (patch) | |
tree | b406c5242a088c4f59c6e4b719b783f43aca6ae9 /arch/x86/kvm/vmx/vmx.c | |
parent | Adding upstream version 6.7.12. (diff) | |
download | linux-01a69402cf9d38ff180345d55c2ee51c7e89fbc7.tar.xz linux-01a69402cf9d38ff180345d55c2ee51c7e89fbc7.zip |
Adding upstream version 6.8.9.upstream/6.8.9
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'arch/x86/kvm/vmx/vmx.c')
-rw-r--r-- | arch/x86/kvm/vmx/vmx.c | 112 |
1 files changed, 90 insertions, 22 deletions
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 856eef56b3..784f2ecca5 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -66,6 +66,7 @@ #include "vmx.h" #include "x86.h" #include "smm.h" +#include "vmx_onhyperv.h" MODULE_AUTHOR("Qumranet"); MODULE_LICENSE("GPL"); @@ -532,22 +533,14 @@ module_param(enlightened_vmcs, bool, 0444); static int hv_enable_l2_tlb_flush(struct kvm_vcpu *vcpu) { struct hv_enlightened_vmcs *evmcs; - struct hv_partition_assist_pg **p_hv_pa_pg = - &to_kvm_hv(vcpu->kvm)->hv_pa_pg; - /* - * Synthetic VM-Exit is not enabled in current code and so All - * evmcs in singe VM shares same assist page. - */ - if (!*p_hv_pa_pg) - *p_hv_pa_pg = kzalloc(PAGE_SIZE, GFP_KERNEL_ACCOUNT); + hpa_t partition_assist_page = hv_get_partition_assist_page(vcpu); - if (!*p_hv_pa_pg) + if (partition_assist_page == INVALID_PAGE) return -ENOMEM; evmcs = (struct hv_enlightened_vmcs *)to_vmx(vcpu)->loaded_vmcs->vmcs; - evmcs->partition_assist_page = - __pa(*p_hv_pa_pg); + evmcs->partition_assist_page = partition_assist_page; evmcs->hv_vm_id = (unsigned long)vcpu->kvm; evmcs->hv_enlightenments_control.nested_flush_hypercall = 1; @@ -1818,7 +1811,7 @@ static void vmx_inject_exception(struct kvm_vcpu *vcpu) * do generate error codes with bits 31:16 set, and so KVM's * ABI lets userspace shove in arbitrary 32-bit values. Drop * the upper bits to avoid VM-Fail, losing information that - * does't really exist is preferable to killing the VM. + * doesn't really exist is preferable to killing the VM. */ vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, (u16)ex->error_code); intr_info |= INTR_INFO_DELIVER_CODE_MASK; @@ -2064,6 +2057,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (vmx_get_vmx_msr(&vmx->nested.msrs, msr_info->index, &msr_info->data)) return 1; +#ifdef CONFIG_KVM_HYPERV /* * Enlightened VMCS v1 doesn't have certain VMCS fields but * instead of just ignoring the features, different Hyper-V @@ -2074,6 +2068,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (!msr_info->host_initiated && guest_cpuid_has_evmcs(vcpu)) nested_evmcs_filter_control_msr(vcpu, msr_info->index, &msr_info->data); +#endif break; case MSR_IA32_RTIT_CTL: if (!vmx_pt_mode_is_host_guest()) @@ -3409,7 +3404,8 @@ static void vmx_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa, update_guest_cr3 = false; vmx_ept_load_pdptrs(vcpu); } else { - guest_cr3 = root_hpa | kvm_get_active_pcid(vcpu); + guest_cr3 = root_hpa | kvm_get_active_pcid(vcpu) | + kvm_get_active_cr3_lam_bits(vcpu); } if (update_guest_cr3) @@ -4842,7 +4838,10 @@ static void __vmx_vcpu_reset(struct kvm_vcpu *vcpu) vmx->nested.posted_intr_nv = -1; vmx->nested.vmxon_ptr = INVALID_GPA; vmx->nested.current_vmptr = INVALID_GPA; + +#ifdef CONFIG_KVM_HYPERV vmx->nested.hv_evmcs_vmptr = EVMPTR_INVALID; +#endif vcpu->arch.microcode_version = 0x100000000ULL; vmx->msr_ia32_feature_control_valid_bits = FEAT_CTL_LOCKED; @@ -5791,7 +5790,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu) * would also use advanced VM-exit information for EPT violations to * reconstruct the page fault error code. */ - if (unlikely(allow_smaller_maxphyaddr && kvm_vcpu_is_illegal_gpa(vcpu, gpa))) + if (unlikely(allow_smaller_maxphyaddr && !kvm_vcpu_is_legal_gpa(vcpu, gpa))) return kvm_emulate_instruction(vcpu, 0); return kvm_mmu_page_fault(vcpu, gpa, error_code, NULL, 0); @@ -6766,10 +6765,10 @@ static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu) return; /* - * Grab the memslot so that the hva lookup for the mmu_notifier retry - * is guaranteed to use the same memslot as the pfn lookup, i.e. rely - * on the pfn lookup's validation of the memslot to ensure a valid hva - * is used for the retry check. + * Explicitly grab the memslot using KVM's internal slot ID to ensure + * KVM doesn't unintentionally grab a userspace memslot. It _should_ + * be impossible for userspace to create a memslot for the APIC when + * APICv is enabled, but paranoia won't hurt in this case. */ slot = id_to_memslot(slots, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT); if (!slot || slot->flags & KVM_MEMSLOT_INVALID) @@ -6794,8 +6793,7 @@ static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu) return; read_lock(&vcpu->kvm->mmu_lock); - if (mmu_invalidate_retry_hva(kvm, mmu_seq, - gfn_to_hva_memslot(slot, gfn))) { + if (mmu_invalidate_retry_gfn(kvm, mmu_seq, gfn)) { kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu); read_unlock(&vcpu->kvm->mmu_lock); goto out; @@ -7686,6 +7684,9 @@ static void nested_vmx_cr_fixed1_bits_update(struct kvm_vcpu *vcpu) cr4_fixed1_update(X86_CR4_UMIP, ecx, feature_bit(UMIP)); cr4_fixed1_update(X86_CR4_LA57, ecx, feature_bit(LA57)); + entry = kvm_find_cpuid_entry_index(vcpu, 0x7, 1); + cr4_fixed1_update(X86_CR4_LAM_SUP, eax, feature_bit(LAM)); + #undef cr4_fixed1_update } @@ -7772,6 +7773,7 @@ static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_XSAVES); kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_VMX); + kvm_governed_feature_check_and_set(vcpu, X86_FEATURE_LAM); vmx_setup_uret_msrs(vmx); @@ -7855,8 +7857,28 @@ static u64 vmx_get_perf_capabilities(void) if (vmx_pebs_supported()) { perf_cap |= host_perf_cap & PERF_CAP_PEBS_MASK; - if ((perf_cap & PERF_CAP_PEBS_FORMAT) < 4) - perf_cap &= ~PERF_CAP_PEBS_BASELINE; + + /* + * Disallow adaptive PEBS as it is functionally broken, can be + * used by the guest to read *host* LBRs, and can be used to + * bypass userspace event filters. To correctly and safely + * support adaptive PEBS, KVM needs to: + * + * 1. Account for the ADAPTIVE flag when (re)programming fixed + * counters. + * + * 2. Gain support from perf (or take direct control of counter + * programming) to support events without adaptive PEBS + * enabled for the hardware counter. + * + * 3. Ensure LBR MSRs cannot hold host data on VM-Entry with + * adaptive PEBS enabled and MSR_PEBS_DATA_CFG.LBRS=1. + * + * 4. Document which PMU events are effectively exposed to the + * guest via adaptive PEBS, and make adaptive PEBS mutually + * exclusive with KVM_SET_PMU_EVENT_FILTER if necessary. + */ + perf_cap &= ~PERF_CAP_PEBS_BASELINE; } return perf_cap; @@ -8218,6 +8240,50 @@ static void vmx_vm_destroy(struct kvm *kvm) free_pages((unsigned long)kvm_vmx->pid_table, vmx_get_pid_table_order(kvm)); } +/* + * Note, the SDM states that the linear address is masked *after* the modified + * canonicality check, whereas KVM masks (untags) the address and then performs + * a "normal" canonicality check. Functionally, the two methods are identical, + * and when the masking occurs relative to the canonicality check isn't visible + * to software, i.e. KVM's behavior doesn't violate the SDM. + */ +gva_t vmx_get_untagged_addr(struct kvm_vcpu *vcpu, gva_t gva, unsigned int flags) +{ + int lam_bit; + unsigned long cr3_bits; + + if (flags & (X86EMUL_F_FETCH | X86EMUL_F_IMPLICIT | X86EMUL_F_INVLPG)) + return gva; + + if (!is_64_bit_mode(vcpu)) + return gva; + + /* + * Bit 63 determines if the address should be treated as user address + * or a supervisor address. + */ + if (!(gva & BIT_ULL(63))) { + cr3_bits = kvm_get_active_cr3_lam_bits(vcpu); + if (!(cr3_bits & (X86_CR3_LAM_U57 | X86_CR3_LAM_U48))) + return gva; + + /* LAM_U48 is ignored if LAM_U57 is set. */ + lam_bit = cr3_bits & X86_CR3_LAM_U57 ? 56 : 47; + } else { + if (!kvm_is_cr4_bit_set(vcpu, X86_CR4_LAM_SUP)) + return gva; + + lam_bit = kvm_is_cr4_bit_set(vcpu, X86_CR4_LA57) ? 56 : 47; + } + + /* + * Untag the address by sign-extending the lam_bit, but NOT to bit 63. + * Bit 63 is retained from the raw virtual address so that untagging + * doesn't change a user access to a supervisor access, and vice versa. + */ + return (sign_extend64(gva, lam_bit) & ~BIT_ULL(63)) | (gva & BIT_ULL(63)); +} + static struct kvm_x86_ops vmx_x86_ops __initdata = { .name = KBUILD_MODNAME, @@ -8358,6 +8424,8 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = { .complete_emulated_msr = kvm_complete_insn_gp, .vcpu_deliver_sipi_vector = kvm_vcpu_deliver_sipi_vector, + + .get_untagged_addr = vmx_get_untagged_addr, }; static unsigned int vmx_handle_intel_pt_intr(void) |