Adding upstream version 7.0.14-dfsg.upstream/7.0.14-dfsg

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-11 08:17:27 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-11 08:17:27 +0000
commit: f215e02bf85f68d3a6106c2a1f4f7f063f819064 (patch)
tree: 6bb5b92c046312c4e95ac2620b10ddf482d3fa8b /src/VBox/VMM/VMMR0/HMVMXR0.cpp
parent: Initial commit. (diff)
download: virtualbox-f215e02bf85f68d3a6106c2a1f4f7f063f819064.tar.xz
virtualbox-f215e02bf85f68d3a6106c2a1f4f7f063f819064.zip
1 files changed, 7030 insertions, 0 deletions
diff --git a/src/VBox/VMM/VMMR0/HMVMXR0.cpp b/src/VBox/VMM/VMMR0/HMVMXR0.cpp
new file mode 100644
index 00000000..26b6252e
--- /dev/null
+++ b/src/VBox/VMM/VMMR0/HMVMXR0.cpp
@@ -0,0 +1,7030 @@
+/* $Id: HMVMXR0.cpp $ */
+/** @file
+ * HM VMX (Intel VT-x) - Host Context Ring-0.
+ */
+
+/*
+ * Copyright (C) 2012-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only
+ */
+
+
+/*********************************************************************************************************************************
+*   Header Files                                                                                                                 *
+*********************************************************************************************************************************/
+#define LOG_GROUP LOG_GROUP_HM
+#define VMCPU_INCL_CPUM_GST_CTX
+#include <iprt/x86.h>
+#include <iprt/asm-amd64-x86.h>
+#include <iprt/thread.h>
+#include <iprt/mem.h>
+#include <iprt/mp.h>
+
+#include <VBox/vmm/pdmapi.h>
+#include <VBox/vmm/dbgf.h>
+#include <VBox/vmm/iem.h>
+#include <VBox/vmm/iom.h>
+#include <VBox/vmm/tm.h>
+#include <VBox/vmm/em.h>
+#include <VBox/vmm/gcm.h>
+#include <VBox/vmm/gim.h>
+#include <VBox/vmm/apic.h>
+#include "HMInternal.h"
+#include <VBox/vmm/vmcc.h>
+#include <VBox/vmm/hmvmxinline.h>
+#include "HMVMXR0.h"
+#include "VMXInternal.h"
+#include "dtrace/VBoxVMM.h"
+
+
+/*********************************************************************************************************************************
+*   Defined Constants And Macros                                                                                                 *
+*********************************************************************************************************************************/
+#ifdef DEBUG_ramshankar
+# define HMVMX_ALWAYS_SAVE_GUEST_RFLAGS
+# define HMVMX_ALWAYS_SAVE_RO_GUEST_STATE
+# define HMVMX_ALWAYS_SAVE_FULL_GUEST_STATE
+# define HMVMX_ALWAYS_SYNC_FULL_GUEST_STATE
+# define HMVMX_ALWAYS_CLEAN_TRANSIENT
+# define HMVMX_ALWAYS_CHECK_GUEST_STATE
+# define HMVMX_ALWAYS_TRAP_ALL_XCPTS
+# define HMVMX_ALWAYS_TRAP_PF
+# define HMVMX_ALWAYS_FLUSH_TLB
+# define HMVMX_ALWAYS_SWAP_EFER
+#endif
+
+/** Enables the fAlwaysInterceptMovDRx related code. */
+#define VMX_WITH_MAYBE_ALWAYS_INTERCEPT_MOV_DRX 1
+
+
+/*********************************************************************************************************************************
+*   Structures and Typedefs                                                                                                      *
+*********************************************************************************************************************************/
+/**
+ * VMX page allocation information.
+ */
+typedef struct
+{
+    uint32_t    fValid;       /**< Whether to allocate this page (e.g, based on a CPU feature). */
+    uint32_t    uPadding0;    /**< Padding to ensure array of these structs are aligned to a multiple of 8. */
+    PRTHCPHYS   pHCPhys;      /**< Where to store the host-physical address of the allocation. */
+    PRTR0PTR    ppVirt;       /**< Where to store the host-virtual address of the allocation. */
+} VMXPAGEALLOCINFO;
+/** Pointer to VMX page-allocation info. */
+typedef VMXPAGEALLOCINFO *PVMXPAGEALLOCINFO;
+/** Pointer to a const VMX page-allocation info. */
+typedef const VMXPAGEALLOCINFO *PCVMXPAGEALLOCINFO;
+AssertCompileSizeAlignment(VMXPAGEALLOCINFO, 8);
+
+
+/*********************************************************************************************************************************
+*   Internal Functions                                                                                                           *
+*********************************************************************************************************************************/
+static bool     hmR0VmxShouldSwapEferMsr(PCVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient);
+static int      hmR0VmxExitHostNmi(PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo);
+
+
+/*********************************************************************************************************************************
+*   Global Variables                                                                                                             *
+*********************************************************************************************************************************/
+/** The DR6 value after writing zero to the register.
+ * Set by VMXR0GlobalInit(). */
+static uint64_t g_fDr6Zeroed = 0;
+
+
+/**
+ * Checks if the given MSR is part of the lastbranch-from-IP MSR stack.
+ * @returns @c true if it's part of LBR stack, @c false otherwise.
+ *
+ * @param   pVM         The cross context VM structure.
+ * @param   idMsr       The MSR.
+ * @param   pidxMsr     Where to store the index of the MSR in the LBR MSR array.
+ *                      Optional, can be NULL.
+ *
+ * @remarks Must only be called when LBR is enabled.
+ */
+DECL_FORCE_INLINE(bool) hmR0VmxIsLbrBranchFromMsr(PCVMCC pVM, uint32_t idMsr, uint32_t *pidxMsr)
+{
+    Assert(pVM->hmr0.s.vmx.fLbr);
+    Assert(pVM->hmr0.s.vmx.idLbrFromIpMsrFirst);
+    uint32_t const cLbrStack = pVM->hmr0.s.vmx.idLbrFromIpMsrLast - pVM->hmr0.s.vmx.idLbrFromIpMsrFirst + 1;
+    uint32_t const idxMsr    = idMsr - pVM->hmr0.s.vmx.idLbrFromIpMsrFirst;
+    if (idxMsr < cLbrStack)
+    {
+        if (pidxMsr)
+            *pidxMsr = idxMsr;
+        return true;
+    }
+    return false;
+}
+
+
+/**
+ * Checks if the given MSR is part of the lastbranch-to-IP MSR stack.
+ * @returns @c true if it's part of LBR stack, @c false otherwise.
+ *
+ * @param   pVM         The cross context VM structure.
+ * @param   idMsr       The MSR.
+ * @param   pidxMsr     Where to store the index of the MSR in the LBR MSR array.
+ *                      Optional, can be NULL.
+ *
+ * @remarks Must only be called when LBR is enabled and when lastbranch-to-IP MSRs
+ *          are supported by the CPU (see hmR0VmxSetupLbrMsrRange).
+ */
+DECL_FORCE_INLINE(bool) hmR0VmxIsLbrBranchToMsr(PCVMCC pVM, uint32_t idMsr, uint32_t *pidxMsr)
+{
+    Assert(pVM->hmr0.s.vmx.fLbr);
+    if (pVM->hmr0.s.vmx.idLbrToIpMsrFirst)
+    {
+        uint32_t const cLbrStack = pVM->hmr0.s.vmx.idLbrToIpMsrLast - pVM->hmr0.s.vmx.idLbrToIpMsrFirst + 1;
+        uint32_t const idxMsr    = idMsr - pVM->hmr0.s.vmx.idLbrToIpMsrFirst;
+        if (idxMsr < cLbrStack)
+        {
+            if (pidxMsr)
+                *pidxMsr = idxMsr;
+            return true;
+        }
+    }
+    return false;
+}
+
+
+/**
+ * Gets the active (in use) VMCS info. object for the specified VCPU.
+ *
+ * This is either the guest or nested-guest VMCS info. and need not necessarily
+ * pertain to the "current" VMCS (in the VMX definition of the term). For instance,
+ * if the VM-entry failed due to an invalid-guest state, we may have "cleared" the
+ * current VMCS while returning to ring-3. However, the VMCS info. object for that
+ * VMCS would still be active and returned here so that we could dump the VMCS
+ * fields to ring-3 for diagnostics. This function is thus only used to
+ * distinguish between the nested-guest or guest VMCS.
+ *
+ * @returns The active VMCS information.
+ * @param   pVCpu   The cross context virtual CPU structure.
+ *
+ * @thread  EMT.
+ * @remarks This function may be called with preemption or interrupts disabled!
+ */
+DECLINLINE(PVMXVMCSINFO) hmGetVmxActiveVmcsInfo(PVMCPUCC pVCpu)
+{
+    if (!pVCpu->hmr0.s.vmx.fSwitchedToNstGstVmcs)
+        return &pVCpu->hmr0.s.vmx.VmcsInfo;
+    return &pVCpu->hmr0.s.vmx.VmcsInfoNstGst;
+}
+
+
+/**
+ * Returns whether the VM-exit MSR-store area differs from the VM-exit MSR-load
+ * area.
+ *
+ * @returns @c true if it's different, @c false otherwise.
+ * @param   pVmcsInfo   The VMCS info. object.
+ */
+DECL_FORCE_INLINE(bool) hmR0VmxIsSeparateExitMsrStoreAreaVmcs(PCVMXVMCSINFO pVmcsInfo)
+{
+    return RT_BOOL(   pVmcsInfo->pvGuestMsrStore != pVmcsInfo->pvGuestMsrLoad
+                   && pVmcsInfo->pvGuestMsrStore);
+}
+
+
+/**
+ * Sets the given Processor-based VM-execution controls.
+ *
+ * @param   pVmxTransient   The VMX-transient structure.
+ * @param   uProcCtls       The Processor-based VM-execution controls to set.
+ */
+static void hmR0VmxSetProcCtlsVmcs(PVMXTRANSIENT pVmxTransient, uint32_t uProcCtls)
+{
+    PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
+    if ((pVmcsInfo->u32ProcCtls & uProcCtls) != uProcCtls)
+    {
+        pVmcsInfo->u32ProcCtls |= uProcCtls;
+        int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVmcsInfo->u32ProcCtls);
+        AssertRC(rc);
+    }
+}
+
+
+/**
+ * Removes the given Processor-based VM-execution controls.
+ *
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   pVmxTransient   The VMX-transient structure.
+ * @param   uProcCtls       The Processor-based VM-execution controls to remove.
+ *
+ * @remarks When executing a nested-guest, this will not remove any of the specified
+ *          controls if the nested hypervisor has set any one of them.
+ */
+static void hmR0VmxRemoveProcCtlsVmcs(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient, uint32_t uProcCtls)
+{
+    PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
+    if (pVmcsInfo->u32ProcCtls & uProcCtls)
+    {
+#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+        if (   !pVmxTransient->fIsNestedGuest
+            || !CPUMIsGuestVmxProcCtlsSet(&pVCpu->cpum.GstCtx, uProcCtls))
+#else
+        NOREF(pVCpu);
+        if (!pVmxTransient->fIsNestedGuest)
+#endif
+        {
+            pVmcsInfo->u32ProcCtls &= ~uProcCtls;
+            int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVmcsInfo->u32ProcCtls);
+            AssertRC(rc);
+        }
+    }
+}
+
+
+/**
+ * Sets the TSC offset for the current VMCS.
+ *
+ * @param   uTscOffset  The TSC offset to set.
+ * @param   pVmcsInfo   The VMCS info. object.
+ */
+static void hmR0VmxSetTscOffsetVmcs(PVMXVMCSINFO pVmcsInfo, uint64_t uTscOffset)
+{
+    if (pVmcsInfo->u64TscOffset != uTscOffset)
+    {
+        int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_TSC_OFFSET_FULL, uTscOffset);
+        AssertRC(rc);
+        pVmcsInfo->u64TscOffset = uTscOffset;
+    }
+}
+
+
+/**
+ * Loads the VMCS specified by the VMCS info. object.
+ *
+ * @returns VBox status code.
+ * @param   pVmcsInfo       The VMCS info. object.
+ *
+ * @remarks Can be called with interrupts disabled.
+ */
+static int hmR0VmxLoadVmcs(PVMXVMCSINFO pVmcsInfo)
+{
+    Assert(pVmcsInfo->HCPhysVmcs != 0 && pVmcsInfo->HCPhysVmcs != NIL_RTHCPHYS);
+    Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+
+    int rc = VMXLoadVmcs(pVmcsInfo->HCPhysVmcs);
+    if (RT_SUCCESS(rc))
+        pVmcsInfo->fVmcsState |= VMX_V_VMCS_LAUNCH_STATE_CURRENT;
+    return rc;
+}
+
+
+/**
+ * Clears the VMCS specified by the VMCS info. object.
+ *
+ * @returns VBox status code.
+ * @param   pVmcsInfo   The VMCS info. object.
+ *
+ * @remarks Can be called with interrupts disabled.
+ */
+static int hmR0VmxClearVmcs(PVMXVMCSINFO pVmcsInfo)
+{
+    Assert(pVmcsInfo->HCPhysVmcs != 0 && pVmcsInfo->HCPhysVmcs != NIL_RTHCPHYS);
+    Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+
+    int rc = VMXClearVmcs(pVmcsInfo->HCPhysVmcs);
+    if (RT_SUCCESS(rc))
+        pVmcsInfo->fVmcsState = VMX_V_VMCS_LAUNCH_STATE_CLEAR;
+    return rc;
+}
+
+
+/**
+ * Checks whether the MSR belongs to the set of guest MSRs that we restore
+ * lazily while leaving VT-x.
+ *
+ * @returns true if it does, false otherwise.
+ * @param   pVCpu   The cross context virtual CPU structure.
+ * @param   idMsr   The MSR to check.
+ */
+static bool hmR0VmxIsLazyGuestMsr(PCVMCPUCC pVCpu, uint32_t idMsr)
+{
+    if (pVCpu->CTX_SUFF(pVM)->hmr0.s.fAllow64BitGuests)
+    {
+        switch (idMsr)
+        {
+            case MSR_K8_LSTAR:
+            case MSR_K6_STAR:
+            case MSR_K8_SF_MASK:
+            case MSR_K8_KERNEL_GS_BASE:
+                return true;
+        }
+    }
+    return false;
+}
+
+
+/**
+ * Loads a set of guests MSRs to allow read/passthru to the guest.
+ *
+ * The name of this function is slightly confusing. This function does NOT
+ * postpone loading, but loads the MSR right now. "hmR0VmxLazy" is simply a
+ * common prefix for functions dealing with "lazy restoration" of the shared
+ * MSRs.
+ *
+ * @param   pVCpu   The cross context virtual CPU structure.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+static void hmR0VmxLazyLoadGuestMsrs(PVMCPUCC pVCpu)
+{
+    Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+    Assert(!VMMRZCallRing3IsEnabled(pVCpu));
+
+    Assert(pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_SAVED_HOST);
+    if (pVCpu->CTX_SUFF(pVM)->hmr0.s.fAllow64BitGuests)
+    {
+        /*
+         * If the guest MSRs are not loaded -and- if all the guest MSRs are identical
+         * to the MSRs on the CPU (which are the saved host MSRs, see assertion above) then
+         * we can skip a few MSR writes.
+         *
+         * Otherwise, it implies either 1. they're not loaded, or 2. they're loaded but the
+         * guest MSR values in the guest-CPU context might be different to what's currently
+         * loaded in the CPU. In either case, we need to write the new guest MSR values to the
+         * CPU, see @bugref{8728}.
+         */
+        PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+        if (   !(pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST)
+            && pCtx->msrKERNELGSBASE == pVCpu->hmr0.s.vmx.u64HostMsrKernelGsBase
+            && pCtx->msrLSTAR        == pVCpu->hmr0.s.vmx.u64HostMsrLStar
+            && pCtx->msrSTAR         == pVCpu->hmr0.s.vmx.u64HostMsrStar
+            && pCtx->msrSFMASK       == pVCpu->hmr0.s.vmx.u64HostMsrSfMask)
+        {
+#ifdef VBOX_STRICT
+            Assert(ASMRdMsr(MSR_K8_KERNEL_GS_BASE) == pCtx->msrKERNELGSBASE);
+            Assert(ASMRdMsr(MSR_K8_LSTAR)          == pCtx->msrLSTAR);
+            Assert(ASMRdMsr(MSR_K6_STAR)           == pCtx->msrSTAR);
+            Assert(ASMRdMsr(MSR_K8_SF_MASK)        == pCtx->msrSFMASK);
+#endif
+        }
+        else
+        {
+            ASMWrMsr(MSR_K8_KERNEL_GS_BASE, pCtx->msrKERNELGSBASE);
+            ASMWrMsr(MSR_K8_LSTAR,          pCtx->msrLSTAR);
+            ASMWrMsr(MSR_K6_STAR,           pCtx->msrSTAR);
+            /* The system call flag mask register isn't as benign and accepting of all
+               values as the above, so mask it to avoid #GP'ing on corrupted input. */
+            Assert(!(pCtx->msrSFMASK & ~(uint64_t)UINT32_MAX));
+            ASMWrMsr(MSR_K8_SF_MASK,        pCtx->msrSFMASK & UINT32_MAX);
+        }
+    }
+    pVCpu->hmr0.s.vmx.fLazyMsrs |= VMX_LAZY_MSRS_LOADED_GUEST;
+}
+
+
+/**
+ * Checks if the specified guest MSR is part of the VM-entry MSR-load area.
+ *
+ * @returns @c true if found, @c false otherwise.
+ * @param   pVmcsInfo   The VMCS info. object.
+ * @param   idMsr       The MSR to find.
+ */
+static bool hmR0VmxIsAutoLoadGuestMsr(PCVMXVMCSINFO pVmcsInfo, uint32_t idMsr)
+{
+    PCVMXAUTOMSR   pMsrs = (PCVMXAUTOMSR)pVmcsInfo->pvGuestMsrLoad;
+    uint32_t const cMsrs = pVmcsInfo->cEntryMsrLoad;
+    Assert(pMsrs);
+    Assert(sizeof(*pMsrs) * cMsrs <= X86_PAGE_4K_SIZE);
+    for (uint32_t i = 0; i < cMsrs; i++)
+    {
+        if (pMsrs[i].u32Msr == idMsr)
+            return true;
+    }
+    return false;
+}
+
+
+/**
+ * Performs lazy restoration of the set of host MSRs if they were previously
+ * loaded with guest MSR values.
+ *
+ * @param   pVCpu   The cross context virtual CPU structure.
+ *
+ * @remarks No-long-jump zone!!!
+ * @remarks The guest MSRs should have been saved back into the guest-CPU
+ *          context by hmR0VmxImportGuestState()!!!
+ */
+static void hmR0VmxLazyRestoreHostMsrs(PVMCPUCC pVCpu)
+{
+    Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+    Assert(!VMMRZCallRing3IsEnabled(pVCpu));
+
+    if (pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST)
+    {
+        Assert(pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_SAVED_HOST);
+        if (pVCpu->CTX_SUFF(pVM)->hmr0.s.fAllow64BitGuests)
+        {
+            ASMWrMsr(MSR_K8_LSTAR,          pVCpu->hmr0.s.vmx.u64HostMsrLStar);
+            ASMWrMsr(MSR_K6_STAR,           pVCpu->hmr0.s.vmx.u64HostMsrStar);
+            ASMWrMsr(MSR_K8_SF_MASK,        pVCpu->hmr0.s.vmx.u64HostMsrSfMask);
+            ASMWrMsr(MSR_K8_KERNEL_GS_BASE, pVCpu->hmr0.s.vmx.u64HostMsrKernelGsBase);
+        }
+    }
+    pVCpu->hmr0.s.vmx.fLazyMsrs &= ~(VMX_LAZY_MSRS_LOADED_GUEST | VMX_LAZY_MSRS_SAVED_HOST);
+}
+
+
+/**
+ * Sets pfnStartVm to the best suited variant.
+ *
+ * This must be called whenever anything changes relative to the hmR0VmXStartVm
+ * variant selection:
+ *      - pVCpu->hm.s.fLoadSaveGuestXcr0
+ *      - HM_WSF_IBPB_ENTRY in pVCpu->hmr0.s.fWorldSwitcher
+ *      - HM_WSF_IBPB_EXIT  in pVCpu->hmr0.s.fWorldSwitcher
+ *      - Perhaps: CPUMIsGuestFPUStateActive() (windows only)
+ *      - Perhaps: CPUMCTX.fXStateMask (windows only)
+ *
+ * We currently ASSUME that neither HM_WSF_IBPB_ENTRY nor HM_WSF_IBPB_EXIT
+ * cannot be changed at runtime.
+ */
+static void hmR0VmxUpdateStartVmFunction(PVMCPUCC pVCpu)
+{
+    static const struct CLANGWORKAROUND { PFNHMVMXSTARTVM pfn; } s_aHmR0VmxStartVmFunctions[] =
+    {
+        { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_SansL1dEntry_SansMdsEntry_SansIbpbExit },
+        { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_SansL1dEntry_SansMdsEntry_SansIbpbExit },
+        { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_SansL1dEntry_SansMdsEntry_SansIbpbExit },
+        { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_SansL1dEntry_SansMdsEntry_SansIbpbExit },
+        { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_WithL1dEntry_SansMdsEntry_SansIbpbExit },
+        { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_WithL1dEntry_SansMdsEntry_SansIbpbExit },
+        { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_WithL1dEntry_SansMdsEntry_SansIbpbExit },
+        { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_WithL1dEntry_SansMdsEntry_SansIbpbExit },
+        { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_SansL1dEntry_WithMdsEntry_SansIbpbExit },
+        { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_SansL1dEntry_WithMdsEntry_SansIbpbExit },
+        { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_SansL1dEntry_WithMdsEntry_SansIbpbExit },
+        { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_SansL1dEntry_WithMdsEntry_SansIbpbExit },
+        { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_WithL1dEntry_WithMdsEntry_SansIbpbExit },
+        { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_WithL1dEntry_WithMdsEntry_SansIbpbExit },
+        { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_WithL1dEntry_WithMdsEntry_SansIbpbExit },
+        { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_WithL1dEntry_WithMdsEntry_SansIbpbExit },
+        { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_SansL1dEntry_SansMdsEntry_WithIbpbExit },
+        { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_SansL1dEntry_SansMdsEntry_WithIbpbExit },
+        { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_SansL1dEntry_SansMdsEntry_WithIbpbExit },
+        { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_SansL1dEntry_SansMdsEntry_WithIbpbExit },
+        { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_WithL1dEntry_SansMdsEntry_WithIbpbExit },
+        { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_WithL1dEntry_SansMdsEntry_WithIbpbExit },
+        { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_WithL1dEntry_SansMdsEntry_WithIbpbExit },
+        { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_WithL1dEntry_SansMdsEntry_WithIbpbExit },
+        { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_SansL1dEntry_WithMdsEntry_WithIbpbExit },
+        { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_SansL1dEntry_WithMdsEntry_WithIbpbExit },
+        { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_SansL1dEntry_WithMdsEntry_WithIbpbExit },
+        { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_SansL1dEntry_WithMdsEntry_WithIbpbExit },
+        { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_WithL1dEntry_WithMdsEntry_WithIbpbExit },
+        { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_WithL1dEntry_WithMdsEntry_WithIbpbExit },
+        { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_WithL1dEntry_WithMdsEntry_WithIbpbExit },
+        { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_WithL1dEntry_WithMdsEntry_WithIbpbExit },
+    };
+    uintptr_t const idx = (pVCpu->hmr0.s.fLoadSaveGuestXcr0                 ?  1 : 0)
+                        | (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_IBPB_ENTRY ?  2 : 0)
+                        | (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_L1D_ENTRY  ?  4 : 0)
+                        | (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_MDS_ENTRY  ?  8 : 0)
+                        | (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_IBPB_EXIT  ? 16 : 0);
+    PFNHMVMXSTARTVM const pfnStartVm = s_aHmR0VmxStartVmFunctions[idx].pfn;
+    if (pVCpu->hmr0.s.vmx.pfnStartVm != pfnStartVm)
+        pVCpu->hmr0.s.vmx.pfnStartVm = pfnStartVm;
+}
+
+
+/**
+ * Pushes a 2-byte value onto the real-mode (in virtual-8086 mode) guest's
+ * stack.
+ *
+ * @returns Strict VBox status code (i.e. informational status codes too).
+ * @retval  VINF_EM_RESET if pushing a value to the stack caused a triple-fault.
+ * @param   pVCpu   The cross context virtual CPU structure.
+ * @param   uValue  The value to push to the guest stack.
+ */
+static VBOXSTRICTRC hmR0VmxRealModeGuestStackPush(PVMCPUCC pVCpu, uint16_t uValue)
+{
+    /*
+     * The stack limit is 0xffff in real-on-virtual 8086 mode. Real-mode with weird stack limits cannot be run in
+     * virtual 8086 mode in VT-x. See Intel spec. 26.3.1.2 "Checks on Guest Segment Registers".
+     * See Intel Instruction reference for PUSH and Intel spec. 22.33.1 "Segment Wraparound".
+     */
+    PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+    if (pCtx->sp == 1)
+        return VINF_EM_RESET;
+    pCtx->sp -= sizeof(uint16_t);       /* May wrap around which is expected behaviour. */
+    int rc = PGMPhysSimpleWriteGCPhys(pVCpu->CTX_SUFF(pVM), pCtx->ss.u64Base + pCtx->sp, &uValue, sizeof(uint16_t));
+    AssertRC(rc);
+    return rc;
+}
+
+
+/**
+ * Wrapper around VMXWriteVmcs16 taking a pVCpu parameter so VCC doesn't complain about
+ * unreferenced local parameters in the template code...
+ */
+DECL_FORCE_INLINE(int) hmR0VmxWriteVmcs16(PCVMCPUCC pVCpu, uint32_t uFieldEnc, uint16_t u16Val)
+{
+    RT_NOREF(pVCpu);
+    return VMXWriteVmcs16(uFieldEnc, u16Val);
+}
+
+
+/**
+ * Wrapper around VMXWriteVmcs32 taking a pVCpu parameter so VCC doesn't complain about
+ * unreferenced local parameters in the template code...
+ */
+DECL_FORCE_INLINE(int) hmR0VmxWriteVmcs32(PCVMCPUCC pVCpu, uint32_t uFieldEnc, uint32_t u32Val)
+{
+    RT_NOREF(pVCpu);
+    return VMXWriteVmcs32(uFieldEnc, u32Val);
+}
+
+
+/**
+ * Wrapper around VMXWriteVmcs64 taking a pVCpu parameter so VCC doesn't complain about
+ * unreferenced local parameters in the template code...
+ */
+DECL_FORCE_INLINE(int) hmR0VmxWriteVmcs64(PCVMCPUCC pVCpu, uint32_t uFieldEnc, uint64_t u64Val)
+{
+    RT_NOREF(pVCpu);
+    return VMXWriteVmcs64(uFieldEnc, u64Val);
+}
+
+
+/**
+ * Wrapper around VMXReadVmcs16 taking a pVCpu parameter so VCC doesn't complain about
+ * unreferenced local parameters in the template code...
+ */
+DECL_FORCE_INLINE(int) hmR0VmxReadVmcs16(PCVMCPUCC pVCpu, uint32_t uFieldEnc, uint16_t *pu16Val)
+{
+    RT_NOREF(pVCpu);
+    return VMXReadVmcs16(uFieldEnc, pu16Val);
+}
+
+
+/**
+ * Wrapper around VMXReadVmcs32 taking a pVCpu parameter so VCC doesn't complain about
+ * unreferenced local parameters in the template code...
+ */
+DECL_FORCE_INLINE(int) hmR0VmxReadVmcs32(PCVMCPUCC pVCpu, uint32_t uFieldEnc, uint32_t *pu32Val)
+{
+    RT_NOREF(pVCpu);
+    return VMXReadVmcs32(uFieldEnc, pu32Val);
+}
+
+
+/**
+ * Wrapper around VMXReadVmcs64 taking a pVCpu parameter so VCC doesn't complain about
+ * unreferenced local parameters in the template code...
+ */
+DECL_FORCE_INLINE(int) hmR0VmxReadVmcs64(PCVMCPUCC pVCpu, uint32_t uFieldEnc, uint64_t *pu64Val)
+{
+    RT_NOREF(pVCpu);
+    return VMXReadVmcs64(uFieldEnc, pu64Val);
+}
+
+
+/*
+ * Instantiate the code we share with the NEM darwin backend.
+ */
+#define VCPU_2_VMXSTATE(a_pVCpu)            (a_pVCpu)->hm.s
+#define VCPU_2_VMXSTATS(a_pVCpu)            (a_pVCpu)->hm.s
+
+#define VM_IS_VMX_UNRESTRICTED_GUEST(a_pVM) (a_pVM)->hmr0.s.vmx.fUnrestrictedGuest
+#define VM_IS_VMX_NESTED_PAGING(a_pVM)      (a_pVM)->hmr0.s.fNestedPaging
+#define VM_IS_VMX_PREEMPT_TIMER_USED(a_pVM) (a_pVM)->hmr0.s.vmx.fUsePreemptTimer
+#define VM_IS_VMX_LBR(a_pVM)                (a_pVM)->hmr0.s.vmx.fLbr
+
+#define VMX_VMCS_WRITE_16(a_pVCpu, a_FieldEnc, a_Val) hmR0VmxWriteVmcs16((a_pVCpu), (a_FieldEnc), (a_Val))
+#define VMX_VMCS_WRITE_32(a_pVCpu, a_FieldEnc, a_Val) hmR0VmxWriteVmcs32((a_pVCpu), (a_FieldEnc), (a_Val))
+#define VMX_VMCS_WRITE_64(a_pVCpu, a_FieldEnc, a_Val) hmR0VmxWriteVmcs64((a_pVCpu), (a_FieldEnc), (a_Val))
+#define VMX_VMCS_WRITE_NW(a_pVCpu, a_FieldEnc, a_Val) hmR0VmxWriteVmcs64((a_pVCpu), (a_FieldEnc), (a_Val))
+
+#define VMX_VMCS_READ_16(a_pVCpu, a_FieldEnc, a_pVal) hmR0VmxReadVmcs16((a_pVCpu), (a_FieldEnc), (a_pVal))
+#define VMX_VMCS_READ_32(a_pVCpu, a_FieldEnc, a_pVal) hmR0VmxReadVmcs32((a_pVCpu), (a_FieldEnc), (a_pVal))
+#define VMX_VMCS_READ_64(a_pVCpu, a_FieldEnc, a_pVal) hmR0VmxReadVmcs64((a_pVCpu), (a_FieldEnc), (a_pVal))
+#define VMX_VMCS_READ_NW(a_pVCpu, a_FieldEnc, a_pVal) hmR0VmxReadVmcs64((a_pVCpu), (a_FieldEnc), (a_pVal))
+
+#include "../VMMAll/VMXAllTemplate.cpp.h"
+
+#undef VMX_VMCS_WRITE_16
+#undef VMX_VMCS_WRITE_32
+#undef VMX_VMCS_WRITE_64
+#undef VMX_VMCS_WRITE_NW
+
+#undef VMX_VMCS_READ_16
+#undef VMX_VMCS_READ_32
+#undef VMX_VMCS_READ_64
+#undef VMX_VMCS_READ_NW
+
+#undef VM_IS_VMX_PREEMPT_TIMER_USED
+#undef VM_IS_VMX_NESTED_PAGING
+#undef VM_IS_VMX_UNRESTRICTED_GUEST
+#undef VCPU_2_VMXSTATS
+#undef VCPU_2_VMXSTATE
+
+
+/**
+ * Updates the VM's last error record.
+ *
+ * If there was a VMX instruction error, reads the error data from the VMCS and
+ * updates VCPU's last error record as well.
+ *
+ * @param   pVCpu   The cross context virtual CPU structure of the calling EMT.
+ *                  Can be NULL if @a rc is not VERR_VMX_UNABLE_TO_START_VM or
+ *                  VERR_VMX_INVALID_VMCS_FIELD.
+ * @param   rc      The error code.
+ */
+static void hmR0VmxUpdateErrorRecord(PVMCPUCC pVCpu, int rc)
+{
+    if (   rc == VERR_VMX_INVALID_VMCS_FIELD
+        || rc == VERR_VMX_UNABLE_TO_START_VM)
+    {
+        AssertPtrReturnVoid(pVCpu);
+        VMXReadVmcs32(VMX_VMCS32_RO_VM_INSTR_ERROR, &pVCpu->hm.s.vmx.LastError.u32InstrError);
+    }
+    pVCpu->CTX_SUFF(pVM)->hm.s.ForR3.rcInit = rc;
+}
+
+
+/**
+ * Enters VMX root mode operation on the current CPU.
+ *
+ * @returns VBox status code.
+ * @param   pHostCpu        The HM physical-CPU structure.
+ * @param   pVM             The cross context VM structure. Can be
+ *                          NULL, after a resume.
+ * @param   HCPhysCpuPage   Physical address of the VMXON region.
+ * @param   pvCpuPage       Pointer to the VMXON region.
+ */
+static int hmR0VmxEnterRootMode(PHMPHYSCPU pHostCpu, PVMCC pVM, RTHCPHYS HCPhysCpuPage, void *pvCpuPage)
+{
+    Assert(pHostCpu);
+    Assert(HCPhysCpuPage && HCPhysCpuPage != NIL_RTHCPHYS);
+    Assert(RT_ALIGN_T(HCPhysCpuPage, _4K, RTHCPHYS) == HCPhysCpuPage);
+    Assert(pvCpuPage);
+    Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+
+    if (pVM)
+    {
+        /* Write the VMCS revision identifier to the VMXON region. */
+        *(uint32_t *)pvCpuPage = RT_BF_GET(g_HmMsrs.u.vmx.u64Basic, VMX_BF_BASIC_VMCS_ID);
+    }
+
+    /* Paranoid: Disable interrupts as, in theory, interrupt handlers might mess with CR4. */
+    RTCCUINTREG const fEFlags = ASMIntDisableFlags();
+
+    /* Enable the VMX bit in CR4 if necessary. */
+    RTCCUINTREG const uOldCr4 = SUPR0ChangeCR4(X86_CR4_VMXE, RTCCUINTREG_MAX);
+
+    /* Record whether VMXE was already prior to us enabling it above. */
+    pHostCpu->fVmxeAlreadyEnabled = RT_BOOL(uOldCr4 & X86_CR4_VMXE);
+
+    /* Enter VMX root mode. */
+    int rc = VMXEnable(HCPhysCpuPage);
+    if (RT_FAILURE(rc))
+    {
+        /* Restore CR4.VMXE if it was not set prior to our attempt to set it above. */
+        if (!pHostCpu->fVmxeAlreadyEnabled)
+            SUPR0ChangeCR4(0 /* fOrMask */, ~(uint64_t)X86_CR4_VMXE);
+
+        if (pVM)
+            pVM->hm.s.ForR3.vmx.HCPhysVmxEnableError = HCPhysCpuPage;
+    }
+
+    /* Restore interrupts. */
+    ASMSetFlags(fEFlags);
+    return rc;
+}
+
+
+/**
+ * Exits VMX root mode operation on the current CPU.
+ *
+ * @returns VBox status code.
+ * @param   pHostCpu        The HM physical-CPU structure.
+ */
+static int hmR0VmxLeaveRootMode(PHMPHYSCPU pHostCpu)
+{
+    Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+
+    /* Paranoid: Disable interrupts as, in theory, interrupts handlers might mess with CR4. */
+    RTCCUINTREG const fEFlags = ASMIntDisableFlags();
+
+    /* If we're for some reason not in VMX root mode, then don't leave it. */
+    RTCCUINTREG const uHostCr4 = ASMGetCR4();
+
+    int rc;
+    if (uHostCr4 & X86_CR4_VMXE)
+    {
+        /* Exit VMX root mode and clear the VMX bit in CR4. */
+        VMXDisable();
+
+        /* Clear CR4.VMXE only if it was clear prior to use setting it. */
+        if (!pHostCpu->fVmxeAlreadyEnabled)
+            SUPR0ChangeCR4(0 /* fOrMask */, ~(uint64_t)X86_CR4_VMXE);
+
+        rc = VINF_SUCCESS;
+    }
+    else
+        rc = VERR_VMX_NOT_IN_VMX_ROOT_MODE;
+
+    /* Restore interrupts. */
+    ASMSetFlags(fEFlags);
+    return rc;
+}
+
+
+/**
+ * Allocates pages specified as specified by an array of VMX page allocation info
+ * objects.
+ *
+ * The pages contents are zero'd after allocation.
+ *
+ * @returns VBox status code.
+ * @param   phMemObj        Where to return the handle to the allocation.
+ * @param   paAllocInfo     The pointer to the first element of the VMX
+ *                          page-allocation info object array.
+ * @param   cEntries        The number of elements in the @a paAllocInfo array.
+ */
+static int hmR0VmxPagesAllocZ(PRTR0MEMOBJ phMemObj, PVMXPAGEALLOCINFO paAllocInfo, uint32_t cEntries)
+{
+    *phMemObj = NIL_RTR0MEMOBJ;
+
+    /* Figure out how many pages to allocate. */
+    uint32_t cPages = 0;
+    for (uint32_t iPage = 0; iPage < cEntries; iPage++)
+        cPages += !!paAllocInfo[iPage].fValid;
+
+    /* Allocate the pages. */
+    if (cPages)
+    {
+        size_t const cbPages = cPages << HOST_PAGE_SHIFT;
+        int rc = RTR0MemObjAllocPage(phMemObj, cbPages, false /* fExecutable */);
+        if (RT_FAILURE(rc))
+            return rc;
+
+        /* Zero the contents and assign each page to the corresponding VMX page-allocation entry. */
+        void *pvFirstPage = RTR0MemObjAddress(*phMemObj);
+        RT_BZERO(pvFirstPage, cbPages);
+
+        uint32_t iPage = 0;
+        for (uint32_t i = 0; i < cEntries; i++)
+            if (paAllocInfo[i].fValid)
+            {
+                RTHCPHYS const HCPhysPage = RTR0MemObjGetPagePhysAddr(*phMemObj, iPage);
+                void          *pvPage     = (void *)((uintptr_t)pvFirstPage + (iPage << X86_PAGE_4K_SHIFT));
+                Assert(HCPhysPage && HCPhysPage != NIL_RTHCPHYS);
+                AssertPtr(pvPage);
+
+                Assert(paAllocInfo[iPage].pHCPhys);
+                Assert(paAllocInfo[iPage].ppVirt);
+                *paAllocInfo[iPage].pHCPhys = HCPhysPage;
+                *paAllocInfo[iPage].ppVirt  = pvPage;
+
+                /* Move to next page. */
+                ++iPage;
+            }
+
+        /* Make sure all valid (requested) pages have been assigned. */
+        Assert(iPage == cPages);
+    }
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Frees pages allocated using hmR0VmxPagesAllocZ.
+ *
+ * @param   phMemObj    Pointer to the memory object handle.  Will be set to
+ *                      NIL.
+ */
+DECL_FORCE_INLINE(void) hmR0VmxPagesFree(PRTR0MEMOBJ phMemObj)
+{
+    /* We can cleanup wholesale since it's all one allocation. */
+    if (*phMemObj != NIL_RTR0MEMOBJ)
+    {
+        RTR0MemObjFree(*phMemObj, true /* fFreeMappings */);
+        *phMemObj = NIL_RTR0MEMOBJ;
+    }
+}
+
+
+/**
+ * Initializes a VMCS info. object.
+ *
+ * @param   pVmcsInfo           The VMCS info. object.
+ * @param   pVmcsInfoShared     The VMCS info. object shared with ring-3.
+ */
+static void hmR0VmxVmcsInfoInit(PVMXVMCSINFO pVmcsInfo, PVMXVMCSINFOSHARED pVmcsInfoShared)
+{
+    RT_ZERO(*pVmcsInfo);
+    RT_ZERO(*pVmcsInfoShared);
+
+    pVmcsInfo->pShared             = pVmcsInfoShared;
+    Assert(pVmcsInfo->hMemObj == NIL_RTR0MEMOBJ);
+    pVmcsInfo->HCPhysVmcs          = NIL_RTHCPHYS;
+    pVmcsInfo->HCPhysShadowVmcs    = NIL_RTHCPHYS;
+    pVmcsInfo->HCPhysMsrBitmap     = NIL_RTHCPHYS;
+    pVmcsInfo->HCPhysGuestMsrLoad  = NIL_RTHCPHYS;
+    pVmcsInfo->HCPhysGuestMsrStore = NIL_RTHCPHYS;
+    pVmcsInfo->HCPhysHostMsrLoad   = NIL_RTHCPHYS;
+    pVmcsInfo->HCPhysVirtApic      = NIL_RTHCPHYS;
+    pVmcsInfo->HCPhysEPTP          = NIL_RTHCPHYS;
+    pVmcsInfo->u64VmcsLinkPtr      = NIL_RTHCPHYS;
+    pVmcsInfo->idHostCpuState      = NIL_RTCPUID;
+    pVmcsInfo->idHostCpuExec       = NIL_RTCPUID;
+}
+
+
+/**
+ * Frees the VT-x structures for a VMCS info. object.
+ *
+ * @param   pVmcsInfo           The VMCS info. object.
+ * @param   pVmcsInfoShared     The VMCS info. object shared with ring-3.
+ */
+static void hmR0VmxVmcsInfoFree(PVMXVMCSINFO pVmcsInfo, PVMXVMCSINFOSHARED pVmcsInfoShared)
+{
+    hmR0VmxPagesFree(&pVmcsInfo->hMemObj);
+    hmR0VmxVmcsInfoInit(pVmcsInfo, pVmcsInfoShared);
+}
+
+
+/**
+ * Allocates the VT-x structures for a VMCS info. object.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   pVmcsInfo       The VMCS info. object.
+ * @param   fIsNstGstVmcs   Whether this is a nested-guest VMCS.
+ *
+ * @remarks The caller is expected to take care of any and all allocation failures.
+ *          This function will not perform any cleanup for failures half-way
+ *          through.
+ */
+static int hmR0VmxAllocVmcsInfo(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo, bool fIsNstGstVmcs)
+{
+    PVMCC pVM = pVCpu->CTX_SUFF(pVM);
+
+    bool const fMsrBitmaps = RT_BOOL(g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_MSR_BITMAPS);
+    bool const fShadowVmcs = !fIsNstGstVmcs ? pVM->hmr0.s.vmx.fUseVmcsShadowing : pVM->cpum.ro.GuestFeatures.fVmxVmcsShadowing;
+    Assert(!pVM->cpum.ro.GuestFeatures.fVmxVmcsShadowing);  /* VMCS shadowing is not yet exposed to the guest. */
+    VMXPAGEALLOCINFO aAllocInfo[] =
+    {
+        { true,        0 /* Unused */, &pVmcsInfo->HCPhysVmcs,         &pVmcsInfo->pvVmcs         },
+        { true,        0 /* Unused */, &pVmcsInfo->HCPhysGuestMsrLoad, &pVmcsInfo->pvGuestMsrLoad },
+        { true,        0 /* Unused */, &pVmcsInfo->HCPhysHostMsrLoad,  &pVmcsInfo->pvHostMsrLoad  },
+        { fMsrBitmaps, 0 /* Unused */, &pVmcsInfo->HCPhysMsrBitmap,    &pVmcsInfo->pvMsrBitmap    },
+        { fShadowVmcs, 0 /* Unused */, &pVmcsInfo->HCPhysShadowVmcs,   &pVmcsInfo->pvShadowVmcs   },
+    };
+
+    int rc = hmR0VmxPagesAllocZ(&pVmcsInfo->hMemObj, &aAllocInfo[0], RT_ELEMENTS(aAllocInfo));
+    if (RT_FAILURE(rc))
+        return rc;
+
+    /*
+     * We use the same page for VM-entry MSR-load and VM-exit MSR store areas.
+     * Because they contain a symmetric list of guest MSRs to load on VM-entry and store on VM-exit.
+     */
+    AssertCompile(RT_ELEMENTS(aAllocInfo) > 0);
+    Assert(pVmcsInfo->HCPhysGuestMsrLoad != NIL_RTHCPHYS);
+    pVmcsInfo->pvGuestMsrStore     = pVmcsInfo->pvGuestMsrLoad;
+    pVmcsInfo->HCPhysGuestMsrStore = pVmcsInfo->HCPhysGuestMsrLoad;
+
+    /*
+     * Get the virtual-APIC page rather than allocating them again.
+     */
+    if (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_TPR_SHADOW)
+    {
+        if (!fIsNstGstVmcs)
+        {
+            if (PDMHasApic(pVM))
+            {
+                rc = APICGetApicPageForCpu(pVCpu, &pVmcsInfo->HCPhysVirtApic, (PRTR0PTR)&pVmcsInfo->pbVirtApic, NULL /*pR3Ptr*/);
+                if (RT_FAILURE(rc))
+                    return rc;
+                Assert(pVmcsInfo->pbVirtApic);
+                Assert(pVmcsInfo->HCPhysVirtApic && pVmcsInfo->HCPhysVirtApic != NIL_RTHCPHYS);
+            }
+        }
+        else
+        {
+            /* These are setup later while marging the nested-guest VMCS. */
+            Assert(pVmcsInfo->pbVirtApic == NULL);
+            Assert(pVmcsInfo->HCPhysVirtApic == NIL_RTHCPHYS);
+        }
+    }
+
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Free all VT-x structures for the VM.
+ *
+ * @param   pVM     The cross context VM structure.
+ */
+static void hmR0VmxStructsFree(PVMCC pVM)
+{
+    hmR0VmxPagesFree(&pVM->hmr0.s.vmx.hMemObj);
+#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+    if (pVM->hmr0.s.vmx.fUseVmcsShadowing)
+    {
+        RTMemFree(pVM->hmr0.s.vmx.paShadowVmcsFields);
+        pVM->hmr0.s.vmx.paShadowVmcsFields = NULL;
+        RTMemFree(pVM->hmr0.s.vmx.paShadowVmcsRoFields);
+        pVM->hmr0.s.vmx.paShadowVmcsRoFields = NULL;
+    }
+#endif
+
+    for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++)
+    {
+        PVMCPUCC pVCpu = VMCC_GET_CPU(pVM, idCpu);
+        hmR0VmxVmcsInfoFree(&pVCpu->hmr0.s.vmx.VmcsInfo, &pVCpu->hm.s.vmx.VmcsInfo);
+#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+        if (pVM->cpum.ro.GuestFeatures.fVmx)
+            hmR0VmxVmcsInfoFree(&pVCpu->hmr0.s.vmx.VmcsInfoNstGst, &pVCpu->hm.s.vmx.VmcsInfoNstGst);
+#endif
+    }
+}
+
+
+/**
+ * Allocate all VT-x structures for the VM.
+ *
+ * @returns IPRT status code.
+ * @param   pVM     The cross context VM structure.
+ *
+ * @remarks This functions will cleanup on memory allocation failures.
+ */
+static int hmR0VmxStructsAlloc(PVMCC pVM)
+{
+    /*
+     * Sanity check the VMCS size reported by the CPU as we assume 4KB allocations.
+     * The VMCS size cannot be more than 4096 bytes.
+     *
+     * See Intel spec. Appendix A.1 "Basic VMX Information".
+     */
+    uint32_t const cbVmcs = RT_BF_GET(g_HmMsrs.u.vmx.u64Basic, VMX_BF_BASIC_VMCS_SIZE);
+    if (cbVmcs <= X86_PAGE_4K_SIZE)
+    { /* likely */ }
+    else
+    {
+        VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_INVALID_VMCS_SIZE;
+        return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
+    }
+
+    /*
+     * Allocate per-VM VT-x structures.
+     */
+    bool const fVirtApicAccess   = RT_BOOL(g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS);
+    bool const fUseVmcsShadowing = pVM->hmr0.s.vmx.fUseVmcsShadowing;
+    VMXPAGEALLOCINFO aAllocInfo[] =
+    {
+        { fVirtApicAccess,   0 /* Unused */, &pVM->hmr0.s.vmx.HCPhysApicAccess,    (PRTR0PTR)&pVM->hmr0.s.vmx.pbApicAccess },
+        { fUseVmcsShadowing, 0 /* Unused */, &pVM->hmr0.s.vmx.HCPhysVmreadBitmap,  &pVM->hmr0.s.vmx.pvVmreadBitmap         },
+        { fUseVmcsShadowing, 0 /* Unused */, &pVM->hmr0.s.vmx.HCPhysVmwriteBitmap, &pVM->hmr0.s.vmx.pvVmwriteBitmap        },
+#ifdef VBOX_WITH_CRASHDUMP_MAGIC
+        { true,              0 /* Unused */, &pVM->hmr0.s.vmx.HCPhysScratch,       (PRTR0PTR)&pVM->hmr0.s.vmx.pbScratch    },
+#endif
+    };
+
+    int rc = hmR0VmxPagesAllocZ(&pVM->hmr0.s.vmx.hMemObj, &aAllocInfo[0], RT_ELEMENTS(aAllocInfo));
+    if (RT_SUCCESS(rc))
+    {
+#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+        /* Allocate the shadow VMCS-fields array. */
+        if (fUseVmcsShadowing)
+        {
+            Assert(!pVM->hmr0.s.vmx.cShadowVmcsFields);
+            Assert(!pVM->hmr0.s.vmx.cShadowVmcsRoFields);
+            pVM->hmr0.s.vmx.paShadowVmcsFields   = (uint32_t *)RTMemAllocZ(sizeof(g_aVmcsFields));
+            pVM->hmr0.s.vmx.paShadowVmcsRoFields = (uint32_t *)RTMemAllocZ(sizeof(g_aVmcsFields));
+            if (!pVM->hmr0.s.vmx.paShadowVmcsFields || !pVM->hmr0.s.vmx.paShadowVmcsRoFields)
+                rc = VERR_NO_MEMORY;
+        }
+#endif
+
+        /*
+         * Allocate per-VCPU VT-x structures.
+         */
+        for (VMCPUID idCpu = 0; idCpu < pVM->cCpus && RT_SUCCESS(rc); idCpu++)
+        {
+            /* Allocate the guest VMCS structures. */
+            PVMCPUCC pVCpu = VMCC_GET_CPU(pVM, idCpu);
+            rc = hmR0VmxAllocVmcsInfo(pVCpu, &pVCpu->hmr0.s.vmx.VmcsInfo, false /* fIsNstGstVmcs */);
+
+#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+            /* Allocate the nested-guest VMCS structures, when the VMX feature is exposed to the guest. */
+            if (pVM->cpum.ro.GuestFeatures.fVmx && RT_SUCCESS(rc))
+                rc = hmR0VmxAllocVmcsInfo(pVCpu, &pVCpu->hmr0.s.vmx.VmcsInfoNstGst, true /* fIsNstGstVmcs */);
+#endif
+        }
+        if (RT_SUCCESS(rc))
+            return VINF_SUCCESS;
+    }
+    hmR0VmxStructsFree(pVM);
+    return rc;
+}
+
+
+/**
+ * Pre-initializes non-zero fields in VMX structures that will be allocated.
+ *
+ * @param   pVM     The cross context VM structure.
+ */
+static void hmR0VmxStructsInit(PVMCC pVM)
+{
+    /* Paranoia. */
+    Assert(pVM->hmr0.s.vmx.pbApicAccess == NULL);
+#ifdef VBOX_WITH_CRASHDUMP_MAGIC
+    Assert(pVM->hmr0.s.vmx.pbScratch == NULL);
+#endif
+
+    /*
+     * Initialize members up-front so we can cleanup en masse on allocation failures.
+     */
+#ifdef VBOX_WITH_CRASHDUMP_MAGIC
+    pVM->hmr0.s.vmx.HCPhysScratch       = NIL_RTHCPHYS;
+#endif
+    pVM->hmr0.s.vmx.HCPhysApicAccess    = NIL_RTHCPHYS;
+    pVM->hmr0.s.vmx.HCPhysVmreadBitmap  = NIL_RTHCPHYS;
+    pVM->hmr0.s.vmx.HCPhysVmwriteBitmap = NIL_RTHCPHYS;
+    for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++)
+    {
+        PVMCPUCC pVCpu = VMCC_GET_CPU(pVM, idCpu);
+        hmR0VmxVmcsInfoInit(&pVCpu->hmr0.s.vmx.VmcsInfo,       &pVCpu->hm.s.vmx.VmcsInfo);
+        hmR0VmxVmcsInfoInit(&pVCpu->hmr0.s.vmx.VmcsInfoNstGst, &pVCpu->hm.s.vmx.VmcsInfoNstGst);
+    }
+}
+
+#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+/**
+ * Returns whether an MSR at the given MSR-bitmap offset is intercepted or not.
+ *
+ * @returns @c true if the MSR is intercepted, @c false otherwise.
+ * @param   pbMsrBitmap     The MSR bitmap.
+ * @param   offMsr          The MSR byte offset.
+ * @param   iBit            The bit offset from the byte offset.
+ */
+DECLINLINE(bool) hmR0VmxIsMsrBitSet(uint8_t const *pbMsrBitmap, uint16_t offMsr, int32_t iBit)
+{
+    Assert(offMsr + (iBit >> 3) <= X86_PAGE_4K_SIZE);
+    return ASMBitTest(pbMsrBitmap, (offMsr << 3) + iBit);
+}
+#endif
+
+/**
+ * Sets the permission bits for the specified MSR in the given MSR bitmap.
+ *
+ * If the passed VMCS is a nested-guest VMCS, this function ensures that the
+ * read/write intercept is cleared from the MSR bitmap used for hardware-assisted
+ * VMX execution of the nested-guest, only if nested-guest is also not intercepting
+ * the read/write access of this MSR.
+ *
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   pVmcsInfo       The VMCS info. object.
+ * @param   fIsNstGstVmcs   Whether this is a nested-guest VMCS.
+ * @param   idMsr           The MSR value.
+ * @param   fMsrpm          The MSR permissions (see VMXMSRPM_XXX). This must
+ *                          include both a read -and- a write permission!
+ *
+ * @sa      CPUMGetVmxMsrPermission.
+ * @remarks Can be called with interrupts disabled.
+ */
+static void hmR0VmxSetMsrPermission(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo, bool fIsNstGstVmcs, uint32_t idMsr, uint32_t fMsrpm)
+{
+    uint8_t *pbMsrBitmap = (uint8_t *)pVmcsInfo->pvMsrBitmap;
+    Assert(pbMsrBitmap);
+    Assert(VMXMSRPM_IS_FLAG_VALID(fMsrpm));
+
+    /*
+     * MSR-bitmap Layout:
+     *   Byte index            MSR range            Interpreted as
+     * 0x000 - 0x3ff    0x00000000 - 0x00001fff    Low MSR read bits.
+     * 0x400 - 0x7ff    0xc0000000 - 0xc0001fff    High MSR read bits.
+     * 0x800 - 0xbff    0x00000000 - 0x00001fff    Low MSR write bits.
+     * 0xc00 - 0xfff    0xc0000000 - 0xc0001fff    High MSR write bits.
+     *
+     * A bit corresponding to an MSR within the above range causes a VM-exit
+     * if the bit is 1 on executions of RDMSR/WRMSR.  If an MSR falls out of
+     * the MSR range, it always cause a VM-exit.
+     *
+     * See Intel spec. 24.6.9 "MSR-Bitmap Address".
+     */
+    uint16_t const offBitmapRead  = 0;
+    uint16_t const offBitmapWrite = 0x800;
+    uint16_t       offMsr;
+    int32_t        iBit;
+    if (idMsr <= UINT32_C(0x00001fff))
+    {
+        offMsr = 0;
+        iBit   = idMsr;
+    }
+    else if (idMsr - UINT32_C(0xc0000000) <= UINT32_C(0x00001fff))
+    {
+        offMsr = 0x400;
+        iBit   = idMsr - UINT32_C(0xc0000000);
+    }
+    else
+        AssertMsgFailedReturnVoid(("Invalid MSR %#RX32\n", idMsr));
+
+    /*
+     * Set the MSR read permission.
+     */
+    uint16_t const offMsrRead = offBitmapRead + offMsr;
+    Assert(offMsrRead + (iBit >> 3) < offBitmapWrite);
+    if (fMsrpm & VMXMSRPM_ALLOW_RD)
+    {
+#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+        bool const fClear = !fIsNstGstVmcs ? true
+                          : !hmR0VmxIsMsrBitSet(pVCpu->cpum.GstCtx.hwvirt.vmx.abMsrBitmap, offMsrRead, iBit);
+#else
+        RT_NOREF2(pVCpu, fIsNstGstVmcs);
+        bool const fClear = true;
+#endif
+        if (fClear)
+            ASMBitClear(pbMsrBitmap, (offMsrRead << 3) + iBit);
+    }
+    else
+        ASMBitSet(pbMsrBitmap, (offMsrRead << 3) + iBit);
+
+    /*
+     * Set the MSR write permission.
+     */
+    uint16_t const offMsrWrite = offBitmapWrite + offMsr;
+    Assert(offMsrWrite + (iBit >> 3) < X86_PAGE_4K_SIZE);
+    if (fMsrpm & VMXMSRPM_ALLOW_WR)
+    {
+#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+        bool const fClear = !fIsNstGstVmcs ? true
+                          : !hmR0VmxIsMsrBitSet(pVCpu->cpum.GstCtx.hwvirt.vmx.abMsrBitmap, offMsrWrite, iBit);
+#else
+        RT_NOREF2(pVCpu, fIsNstGstVmcs);
+        bool const fClear = true;
+#endif
+        if (fClear)
+            ASMBitClear(pbMsrBitmap, (offMsrWrite << 3) + iBit);
+    }
+    else
+        ASMBitSet(pbMsrBitmap, (offMsrWrite << 3) + iBit);
+}
+
+
+/**
+ * Updates the VMCS with the number of effective MSRs in the auto-load/store MSR
+ * area.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu       The cross context virtual CPU structure.
+ * @param   pVmcsInfo   The VMCS info. object.
+ * @param   cMsrs       The number of MSRs.
+ */
+static int hmR0VmxSetAutoLoadStoreMsrCount(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo, uint32_t cMsrs)
+{
+    /* Shouldn't ever happen but there -is- a number. We're well within the recommended 512. */
+    uint32_t const cMaxSupportedMsrs = VMX_MISC_MAX_MSRS(g_HmMsrs.u.vmx.u64Misc);
+    if (RT_LIKELY(cMsrs < cMaxSupportedMsrs))
+    {
+        /* Commit the MSR counts to the VMCS and update the cache. */
+        if (pVmcsInfo->cEntryMsrLoad != cMsrs)
+        {
+            int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT, cMsrs);   AssertRC(rc);
+            rc     = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT, cMsrs);   AssertRC(rc);
+            rc     = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT,  cMsrs);   AssertRC(rc);
+            pVmcsInfo->cEntryMsrLoad = cMsrs;
+            pVmcsInfo->cExitMsrStore = cMsrs;
+            pVmcsInfo->cExitMsrLoad  = cMsrs;
+        }
+        return VINF_SUCCESS;
+    }
+
+    LogRel(("Auto-load/store MSR count exceeded! cMsrs=%u MaxSupported=%u\n", cMsrs, cMaxSupportedMsrs));
+    pVCpu->hm.s.u32HMError = VMX_UFC_INSUFFICIENT_GUEST_MSR_STORAGE;
+    return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
+}
+
+
+/**
+ * Adds a new (or updates the value of an existing) guest/host MSR
+ * pair to be swapped during the world-switch as part of the
+ * auto-load/store MSR area in the VMCS.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   pVmxTransient   The VMX-transient structure.
+ * @param   idMsr           The MSR.
+ * @param   uGuestMsrValue  Value of the guest MSR.
+ * @param   fSetReadWrite   Whether to set the guest read/write access of this
+ *                          MSR (thus not causing a VM-exit).
+ * @param   fUpdateHostMsr  Whether to update the value of the host MSR if
+ *                          necessary.
+ */
+static int hmR0VmxAddAutoLoadStoreMsr(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient, uint32_t idMsr, uint64_t uGuestMsrValue,
+                                      bool fSetReadWrite, bool fUpdateHostMsr)
+{
+    PVMXVMCSINFO  pVmcsInfo     = pVmxTransient->pVmcsInfo;
+    bool const      fIsNstGstVmcs = pVmxTransient->fIsNestedGuest;
+    PVMXAUTOMSR     pGuestMsrLoad = (PVMXAUTOMSR)pVmcsInfo->pvGuestMsrLoad;
+    uint32_t        cMsrs         = pVmcsInfo->cEntryMsrLoad;
+    uint32_t        i;
+
+    /* Paranoia. */
+    Assert(pGuestMsrLoad);
+
+#ifndef DEBUG_bird
+    LogFlowFunc(("pVCpu=%p idMsr=%#RX32 uGuestMsrValue=%#RX64\n", pVCpu, idMsr, uGuestMsrValue));
+#endif
+
+    /* Check if the MSR already exists in the VM-entry MSR-load area. */
+    for (i = 0; i < cMsrs; i++)
+    {
+        if (pGuestMsrLoad[i].u32Msr == idMsr)
+            break;
+    }
+
+    bool fAdded = false;
+    if (i == cMsrs)
+    {
+        /* The MSR does not exist, bump the MSR count to make room for the new MSR. */
+        ++cMsrs;
+        int rc = hmR0VmxSetAutoLoadStoreMsrCount(pVCpu, pVmcsInfo, cMsrs);
+        AssertMsgRCReturn(rc, ("Insufficient space to add MSR to VM-entry MSR-load/store area %u\n", idMsr), rc);
+
+        /* Set the guest to read/write this MSR without causing VM-exits. */
+        if (   fSetReadWrite
+            && (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS))
+            hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, fIsNstGstVmcs, idMsr, VMXMSRPM_ALLOW_RD_WR);
+
+        Log4Func(("Added MSR %#RX32, cMsrs=%u\n", idMsr, cMsrs));
+        fAdded = true;
+    }
+
+    /* Update the MSR value for the newly added or already existing MSR. */
+    pGuestMsrLoad[i].u32Msr   = idMsr;
+    pGuestMsrLoad[i].u64Value = uGuestMsrValue;
+
+    /* Create the corresponding slot in the VM-exit MSR-store area if we use a different page. */
+    if (hmR0VmxIsSeparateExitMsrStoreAreaVmcs(pVmcsInfo))
+    {
+        PVMXAUTOMSR pGuestMsrStore = (PVMXAUTOMSR)pVmcsInfo->pvGuestMsrStore;
+        pGuestMsrStore[i].u32Msr   = idMsr;
+        pGuestMsrStore[i].u64Value = uGuestMsrValue;
+    }
+
+    /* Update the corresponding slot in the host MSR area. */
+    PVMXAUTOMSR pHostMsr = (PVMXAUTOMSR)pVmcsInfo->pvHostMsrLoad;
+    Assert(pHostMsr != pVmcsInfo->pvGuestMsrLoad);
+    Assert(pHostMsr != pVmcsInfo->pvGuestMsrStore);
+    pHostMsr[i].u32Msr = idMsr;
+
+    /*
+     * Only if the caller requests to update the host MSR value AND we've newly added the
+     * MSR to the host MSR area do we actually update the value. Otherwise, it will be
+     * updated by hmR0VmxUpdateAutoLoadHostMsrs().
+     *
+     * We do this for performance reasons since reading MSRs may be quite expensive.
+     */
+    if (fAdded)
+    {
+        if (fUpdateHostMsr)
+        {
+            Assert(!VMMRZCallRing3IsEnabled(pVCpu));
+            Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+            pHostMsr[i].u64Value = ASMRdMsr(idMsr);
+        }
+        else
+        {
+            /* Someone else can do the work. */
+            pVCpu->hmr0.s.vmx.fUpdatedHostAutoMsrs = false;
+        }
+    }
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Removes a guest/host MSR pair to be swapped during the world-switch from the
+ * auto-load/store MSR area in the VMCS.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   pVmxTransient   The VMX-transient structure.
+ * @param   idMsr           The MSR.
+ */
+static int hmR0VmxRemoveAutoLoadStoreMsr(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient, uint32_t idMsr)
+{
+    PVMXVMCSINFO  pVmcsInfo     = pVmxTransient->pVmcsInfo;
+    bool const      fIsNstGstVmcs = pVmxTransient->fIsNestedGuest;
+    PVMXAUTOMSR     pGuestMsrLoad = (PVMXAUTOMSR)pVmcsInfo->pvGuestMsrLoad;
+    uint32_t        cMsrs         = pVmcsInfo->cEntryMsrLoad;
+
+#ifndef DEBUG_bird
+    LogFlowFunc(("pVCpu=%p idMsr=%#RX32\n", pVCpu, idMsr));
+#endif
+
+    for (uint32_t i = 0; i < cMsrs; i++)
+    {
+        /* Find the MSR. */
+        if (pGuestMsrLoad[i].u32Msr == idMsr)
+        {
+            /*
+             * If it's the last MSR, we only need to reduce the MSR count.
+             * If it's -not- the last MSR, copy the last MSR in place of it and reduce the MSR count.
+             */
+            if (i < cMsrs - 1)
+            {
+                /* Remove it from the VM-entry MSR-load area. */
+                pGuestMsrLoad[i].u32Msr   = pGuestMsrLoad[cMsrs - 1].u32Msr;
+                pGuestMsrLoad[i].u64Value = pGuestMsrLoad[cMsrs - 1].u64Value;
+
+                /* Remove it from the VM-exit MSR-store area if it's in a different page. */
+                if (hmR0VmxIsSeparateExitMsrStoreAreaVmcs(pVmcsInfo))
+                {
+                    PVMXAUTOMSR pGuestMsrStore = (PVMXAUTOMSR)pVmcsInfo->pvGuestMsrStore;
+                    Assert(pGuestMsrStore[i].u32Msr == idMsr);
+                    pGuestMsrStore[i].u32Msr   = pGuestMsrStore[cMsrs - 1].u32Msr;
+                    pGuestMsrStore[i].u64Value = pGuestMsrStore[cMsrs - 1].u64Value;
+                }
+
+                /* Remove it from the VM-exit MSR-load area. */
+                PVMXAUTOMSR pHostMsr = (PVMXAUTOMSR)pVmcsInfo->pvHostMsrLoad;
+                Assert(pHostMsr[i].u32Msr == idMsr);
+                pHostMsr[i].u32Msr   = pHostMsr[cMsrs - 1].u32Msr;
+                pHostMsr[i].u64Value = pHostMsr[cMsrs - 1].u64Value;
+            }
+
+            /* Reduce the count to reflect the removed MSR and bail. */
+            --cMsrs;
+            break;
+        }
+    }
+
+    /* Update the VMCS if the count changed (meaning the MSR was found and removed). */
+    if (cMsrs != pVmcsInfo->cEntryMsrLoad)
+    {
+        int rc = hmR0VmxSetAutoLoadStoreMsrCount(pVCpu, pVmcsInfo, cMsrs);
+        AssertRCReturn(rc, rc);
+
+        /* We're no longer swapping MSRs during the world-switch, intercept guest read/writes to them. */
+        if (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS)
+            hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, fIsNstGstVmcs, idMsr, VMXMSRPM_EXIT_RD | VMXMSRPM_EXIT_WR);
+
+        Log4Func(("Removed MSR %#RX32, cMsrs=%u\n", idMsr, cMsrs));
+        return VINF_SUCCESS;
+    }
+
+    return VERR_NOT_FOUND;
+}
+
+
+/**
+ * Updates the value of all host MSRs in the VM-exit MSR-load area.
+ *
+ * @param   pVCpu       The cross context virtual CPU structure.
+ * @param   pVmcsInfo   The VMCS info. object.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+static void hmR0VmxUpdateAutoLoadHostMsrs(PCVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo)
+{
+    RT_NOREF(pVCpu);
+    Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+
+    PVMXAUTOMSR pHostMsrLoad = (PVMXAUTOMSR)pVmcsInfo->pvHostMsrLoad;
+    uint32_t const cMsrs     = pVmcsInfo->cExitMsrLoad;
+    Assert(pHostMsrLoad);
+    Assert(sizeof(*pHostMsrLoad) * cMsrs <= X86_PAGE_4K_SIZE);
+    LogFlowFunc(("pVCpu=%p cMsrs=%u\n", pVCpu, cMsrs));
+    for (uint32_t i = 0; i < cMsrs; i++)
+    {
+        /*
+         * Performance hack for the host EFER MSR. We use the cached value rather than re-read it.
+         * Strict builds will catch mismatches in hmR0VmxCheckAutoLoadStoreMsrs(). See @bugref{7368}.
+         */
+        if (pHostMsrLoad[i].u32Msr == MSR_K6_EFER)
+            pHostMsrLoad[i].u64Value = g_uHmVmxHostMsrEfer;
+        else
+            pHostMsrLoad[i].u64Value = ASMRdMsr(pHostMsrLoad[i].u32Msr);
+    }
+}
+
+
+/**
+ * Saves a set of host MSRs to allow read/write passthru access to the guest and
+ * perform lazy restoration of the host MSRs while leaving VT-x.
+ *
+ * @param   pVCpu   The cross context virtual CPU structure.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+static void hmR0VmxLazySaveHostMsrs(PVMCPUCC pVCpu)
+{
+    Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+
+    /*
+     * Note: If you're adding MSRs here, make sure to update the MSR-bitmap accesses in hmR0VmxSetupVmcsProcCtls().
+     */
+    if (!(pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_SAVED_HOST))
+    {
+        Assert(!(pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST));  /* Guest MSRs better not be loaded now. */
+        if (pVCpu->CTX_SUFF(pVM)->hmr0.s.fAllow64BitGuests)
+        {
+            pVCpu->hmr0.s.vmx.u64HostMsrLStar        = ASMRdMsr(MSR_K8_LSTAR);
+            pVCpu->hmr0.s.vmx.u64HostMsrStar         = ASMRdMsr(MSR_K6_STAR);
+            pVCpu->hmr0.s.vmx.u64HostMsrSfMask       = ASMRdMsr(MSR_K8_SF_MASK);
+            pVCpu->hmr0.s.vmx.u64HostMsrKernelGsBase = ASMRdMsr(MSR_K8_KERNEL_GS_BASE);
+        }
+        pVCpu->hmr0.s.vmx.fLazyMsrs |= VMX_LAZY_MSRS_SAVED_HOST;
+    }
+}
+
+
+#ifdef VBOX_STRICT
+
+/**
+ * Verifies that our cached host EFER MSR value has not changed since we cached it.
+ *
+ * @param   pVmcsInfo   The VMCS info. object.
+ */
+static void hmR0VmxCheckHostEferMsr(PCVMXVMCSINFO pVmcsInfo)
+{
+    Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+
+    if (pVmcsInfo->u32ExitCtls & VMX_EXIT_CTLS_LOAD_EFER_MSR)
+    {
+        uint64_t const uHostEferMsr      = ASMRdMsr(MSR_K6_EFER);
+        uint64_t const uHostEferMsrCache = g_uHmVmxHostMsrEfer;
+        uint64_t       uVmcsEferMsrVmcs;
+        int rc = VMXReadVmcs64(VMX_VMCS64_HOST_EFER_FULL, &uVmcsEferMsrVmcs);
+        AssertRC(rc);
+
+        AssertMsgReturnVoid(uHostEferMsr == uVmcsEferMsrVmcs,
+                            ("EFER Host/VMCS mismatch! host=%#RX64 vmcs=%#RX64\n", uHostEferMsr, uVmcsEferMsrVmcs));
+        AssertMsgReturnVoid(uHostEferMsr == uHostEferMsrCache,
+                            ("EFER Host/Cache mismatch! host=%#RX64 cache=%#RX64\n", uHostEferMsr, uHostEferMsrCache));
+    }
+}
+
+
+/**
+ * Verifies whether the guest/host MSR pairs in the auto-load/store area in the
+ * VMCS are correct.
+ *
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   pVmcsInfo       The VMCS info. object.
+ * @param   fIsNstGstVmcs   Whether this is a nested-guest VMCS.
+ */
+static void hmR0VmxCheckAutoLoadStoreMsrs(PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo, bool fIsNstGstVmcs)
+{
+    Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+
+    /* Read the various MSR-area counts from the VMCS. */
+    uint32_t cEntryLoadMsrs;
+    uint32_t cExitStoreMsrs;
+    uint32_t cExitLoadMsrs;
+    int rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT, &cEntryLoadMsrs);  AssertRC(rc);
+    rc     = VMXReadVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT, &cExitStoreMsrs);  AssertRC(rc);
+    rc     = VMXReadVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT,  &cExitLoadMsrs);   AssertRC(rc);
+
+    /* Verify all the MSR counts are the same. */
+    Assert(cEntryLoadMsrs == cExitStoreMsrs);
+    Assert(cExitStoreMsrs == cExitLoadMsrs);
+    uint32_t const cMsrs = cExitLoadMsrs;
+
+    /* Verify the MSR counts do not exceed the maximum count supported by the hardware. */
+    Assert(cMsrs < VMX_MISC_MAX_MSRS(g_HmMsrs.u.vmx.u64Misc));
+
+    /* Verify the MSR counts are within the allocated page size. */
+    Assert(sizeof(VMXAUTOMSR) * cMsrs <= X86_PAGE_4K_SIZE);
+
+    /* Verify the relevant contents of the MSR areas match. */
+    PCVMXAUTOMSR pGuestMsrLoad  = (PCVMXAUTOMSR)pVmcsInfo->pvGuestMsrLoad;
+    PCVMXAUTOMSR pGuestMsrStore = (PCVMXAUTOMSR)pVmcsInfo->pvGuestMsrStore;
+    PCVMXAUTOMSR pHostMsrLoad   = (PCVMXAUTOMSR)pVmcsInfo->pvHostMsrLoad;
+    bool const   fSeparateExitMsrStorePage = hmR0VmxIsSeparateExitMsrStoreAreaVmcs(pVmcsInfo);
+    for (uint32_t i = 0; i < cMsrs; i++)
+    {
+        /* Verify that the MSRs are paired properly and that the host MSR has the correct value. */
+        if (fSeparateExitMsrStorePage)
+        {
+            AssertMsgReturnVoid(pGuestMsrLoad->u32Msr == pGuestMsrStore->u32Msr,
+                                ("GuestMsrLoad=%#RX32 GuestMsrStore=%#RX32 cMsrs=%u\n",
+                                 pGuestMsrLoad->u32Msr, pGuestMsrStore->u32Msr, cMsrs));
+        }
+
+        AssertMsgReturnVoid(pHostMsrLoad->u32Msr == pGuestMsrLoad->u32Msr,
+                            ("HostMsrLoad=%#RX32 GuestMsrLoad=%#RX32 cMsrs=%u\n",
+                             pHostMsrLoad->u32Msr, pGuestMsrLoad->u32Msr, cMsrs));
+
+        uint64_t const u64HostMsr = ASMRdMsr(pHostMsrLoad->u32Msr);
+        AssertMsgReturnVoid(pHostMsrLoad->u64Value == u64HostMsr,
+                            ("u32Msr=%#RX32 VMCS Value=%#RX64 ASMRdMsr=%#RX64 cMsrs=%u\n",
+                             pHostMsrLoad->u32Msr, pHostMsrLoad->u64Value, u64HostMsr, cMsrs));
+
+        /* Verify that cached host EFER MSR matches what's loaded on the CPU. */
+        bool const fIsEferMsr = RT_BOOL(pHostMsrLoad->u32Msr == MSR_K6_EFER);
+        AssertMsgReturnVoid(!fIsEferMsr || u64HostMsr == g_uHmVmxHostMsrEfer,
+                            ("Cached=%#RX64 ASMRdMsr=%#RX64 cMsrs=%u\n", g_uHmVmxHostMsrEfer, u64HostMsr, cMsrs));
+
+        /* Verify that the accesses are as expected in the MSR bitmap for auto-load/store MSRs. */
+        if (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS)
+        {
+            uint32_t const fMsrpm = CPUMGetVmxMsrPermission(pVmcsInfo->pvMsrBitmap, pGuestMsrLoad->u32Msr);
+            if (fIsEferMsr)
+            {
+                AssertMsgReturnVoid((fMsrpm & VMXMSRPM_EXIT_RD), ("Passthru read for EFER MSR!?\n"));
+                AssertMsgReturnVoid((fMsrpm & VMXMSRPM_EXIT_WR), ("Passthru write for EFER MSR!?\n"));
+            }
+            else
+            {
+                /* Verify LBR MSRs (used only for debugging) are intercepted. We don't passthru these MSRs to the guest yet. */
+                PCVMCC pVM = pVCpu->CTX_SUFF(pVM);
+                if (   pVM->hmr0.s.vmx.fLbr
+                    && (   hmR0VmxIsLbrBranchFromMsr(pVM, pGuestMsrLoad->u32Msr, NULL /* pidxMsr */)
+                        || hmR0VmxIsLbrBranchToMsr(pVM, pGuestMsrLoad->u32Msr, NULL /* pidxMsr */)
+                        || pGuestMsrLoad->u32Msr == pVM->hmr0.s.vmx.idLbrTosMsr))
+                {
+                    AssertMsgReturnVoid((fMsrpm & VMXMSRPM_MASK) == VMXMSRPM_EXIT_RD_WR,
+                                        ("u32Msr=%#RX32 cMsrs=%u Passthru read/write for LBR MSRs!\n",
+                                         pGuestMsrLoad->u32Msr, cMsrs));
+                }
+                else if (!fIsNstGstVmcs)
+                {
+                    AssertMsgReturnVoid((fMsrpm & VMXMSRPM_MASK) == VMXMSRPM_ALLOW_RD_WR,
+                                        ("u32Msr=%#RX32 cMsrs=%u No passthru read/write!\n", pGuestMsrLoad->u32Msr, cMsrs));
+                }
+                else
+                {
+                    /*
+                     * A nested-guest VMCS must -also- allow read/write passthrough for the MSR for us to
+                     * execute a nested-guest with MSR passthrough.
+                     *
+                     * Check if the nested-guest MSR bitmap allows passthrough, and if so, assert that we
+                     * allow passthrough too.
+                     */
+                    void const *pvMsrBitmapNstGst = pVCpu->cpum.GstCtx.hwvirt.vmx.abMsrBitmap;
+                    Assert(pvMsrBitmapNstGst);
+                    uint32_t const fMsrpmNstGst = CPUMGetVmxMsrPermission(pvMsrBitmapNstGst, pGuestMsrLoad->u32Msr);
+                    AssertMsgReturnVoid(fMsrpm == fMsrpmNstGst,
+                                        ("u32Msr=%#RX32 cMsrs=%u Permission mismatch fMsrpm=%#x fMsrpmNstGst=%#x!\n",
+                                         pGuestMsrLoad->u32Msr, cMsrs, fMsrpm, fMsrpmNstGst));
+                }
+            }
+        }
+
+        /* Move to the next MSR. */
+        pHostMsrLoad++;
+        pGuestMsrLoad++;
+        pGuestMsrStore++;
+    }
+}
+
+#endif /* VBOX_STRICT */
+
+/**
+ * Flushes the TLB using EPT.
+ *
+ * @param   pVCpu           The cross context virtual CPU structure of the calling
+ *                          EMT.  Can be NULL depending on @a enmTlbFlush.
+ * @param   pVmcsInfo       The VMCS info. object. Can be NULL depending on @a
+ *                          enmTlbFlush.
+ * @param   enmTlbFlush     Type of flush.
+ *
+ * @remarks Caller is responsible for making sure this function is called only
+ *          when NestedPaging is supported and providing @a enmTlbFlush that is
+ *          supported by the CPU.
+ * @remarks Can be called with interrupts disabled.
+ */
+static void hmR0VmxFlushEpt(PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo, VMXTLBFLUSHEPT enmTlbFlush)
+{
+    uint64_t au64Descriptor[2];
+    if (enmTlbFlush == VMXTLBFLUSHEPT_ALL_CONTEXTS)
+        au64Descriptor[0] = 0;
+    else
+    {
+        Assert(pVCpu);
+        Assert(pVmcsInfo);
+        au64Descriptor[0] = pVmcsInfo->HCPhysEPTP;
+    }
+    au64Descriptor[1] = 0;                       /* MBZ. Intel spec. 33.3 "VMX Instructions" */
+
+    int rc = VMXR0InvEPT(enmTlbFlush, &au64Descriptor[0]);
+    AssertMsg(rc == VINF_SUCCESS, ("VMXR0InvEPT %#x %#RHp failed. rc=%Rrc\n", enmTlbFlush, au64Descriptor[0], rc));
+
+    if (   RT_SUCCESS(rc)
+        && pVCpu)
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushNestedPaging);
+}
+
+
+/**
+ * Flushes the TLB using VPID.
+ *
+ * @param   pVCpu           The cross context virtual CPU structure of the calling
+ *                          EMT.  Can be NULL depending on @a enmTlbFlush.
+ * @param   enmTlbFlush     Type of flush.
+ * @param   GCPtr           Virtual address of the page to flush (can be 0 depending
+ *                          on @a enmTlbFlush).
+ *
+ * @remarks Can be called with interrupts disabled.
+ */
+static void hmR0VmxFlushVpid(PVMCPUCC pVCpu, VMXTLBFLUSHVPID enmTlbFlush, RTGCPTR GCPtr)
+{
+    Assert(pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fVpid);
+
+    uint64_t au64Descriptor[2];
+    if (enmTlbFlush == VMXTLBFLUSHVPID_ALL_CONTEXTS)
+    {
+        au64Descriptor[0] = 0;
+        au64Descriptor[1] = 0;
+    }
+    else
+    {
+        AssertPtr(pVCpu);
+        AssertMsg(pVCpu->hmr0.s.uCurrentAsid != 0, ("VMXR0InvVPID: invalid ASID %lu\n", pVCpu->hmr0.s.uCurrentAsid));
+        AssertMsg(pVCpu->hmr0.s.uCurrentAsid <= UINT16_MAX, ("VMXR0InvVPID: invalid ASID %lu\n", pVCpu->hmr0.s.uCurrentAsid));
+        au64Descriptor[0] = pVCpu->hmr0.s.uCurrentAsid;
+        au64Descriptor[1] = GCPtr;
+    }
+
+    int rc = VMXR0InvVPID(enmTlbFlush, &au64Descriptor[0]);
+    AssertMsg(rc == VINF_SUCCESS,
+              ("VMXR0InvVPID %#x %u %RGv failed with %Rrc\n", enmTlbFlush, pVCpu ? pVCpu->hmr0.s.uCurrentAsid : 0, GCPtr, rc));
+
+    if (   RT_SUCCESS(rc)
+        && pVCpu)
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushAsid);
+    NOREF(rc);
+}
+
+
+/**
+ * Invalidates a guest page by guest virtual address. Only relevant for EPT/VPID,
+ * otherwise there is nothing really to invalidate.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu   The cross context virtual CPU structure.
+ * @param   GCVirt  Guest virtual address of the page to invalidate.
+ */
+VMMR0DECL(int) VMXR0InvalidatePage(PVMCPUCC pVCpu, RTGCPTR GCVirt)
+{
+    AssertPtr(pVCpu);
+    LogFlowFunc(("pVCpu=%p GCVirt=%RGv\n", pVCpu, GCVirt));
+
+    if (!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_TLB_FLUSH))
+    {
+        /*
+         * We must invalidate the guest TLB entry in either case, we cannot ignore it even for
+         * the EPT case. See @bugref{6043} and @bugref{6177}.
+         *
+         * Set the VMCPU_FF_TLB_FLUSH force flag and flush before VM-entry in hmR0VmxFlushTLB*()
+         * as this function maybe called in a loop with individual addresses.
+         */
+        PVMCC pVM = pVCpu->CTX_SUFF(pVM);
+        if (pVM->hmr0.s.vmx.fVpid)
+        {
+            if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_INDIV_ADDR)
+            {
+                hmR0VmxFlushVpid(pVCpu, VMXTLBFLUSHVPID_INDIV_ADDR, GCVirt);
+                STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbInvlpgVirt);
+            }
+            else
+                VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
+        }
+        else if (pVM->hmr0.s.fNestedPaging)
+            VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
+    }
+
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Dummy placeholder for tagged-TLB flush handling before VM-entry. Used in the
+ * case where neither EPT nor VPID is supported by the CPU.
+ *
+ * @param   pHostCpu    The HM physical-CPU structure.
+ * @param   pVCpu       The cross context virtual CPU structure.
+ *
+ * @remarks Called with interrupts disabled.
+ */
+static void hmR0VmxFlushTaggedTlbNone(PHMPHYSCPU pHostCpu, PVMCPUCC pVCpu)
+{
+    AssertPtr(pVCpu);
+    AssertPtr(pHostCpu);
+
+    VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH);
+
+    Assert(pHostCpu->idCpu != NIL_RTCPUID);
+    pVCpu->hmr0.s.idLastCpu     = pHostCpu->idCpu;
+    pVCpu->hmr0.s.cTlbFlushes   = pHostCpu->cTlbFlushes;
+    pVCpu->hmr0.s.fForceTLBFlush  = false;
+    return;
+}
+
+
+/**
+ * Flushes the tagged-TLB entries for EPT+VPID CPUs as necessary.
+ *
+ * @param   pHostCpu    The HM physical-CPU structure.
+ * @param   pVCpu       The cross context virtual CPU structure.
+ * @param   pVmcsInfo   The VMCS info. object.
+ *
+ * @remarks  All references to "ASID" in this function pertains to "VPID" in Intel's
+ *           nomenclature. The reason is, to avoid confusion in compare statements
+ *           since the host-CPU copies are named "ASID".
+ *
+ * @remarks  Called with interrupts disabled.
+ */
+static void hmR0VmxFlushTaggedTlbBoth(PHMPHYSCPU pHostCpu, PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo)
+{
+#ifdef VBOX_WITH_STATISTICS
+    bool fTlbFlushed = false;
+# define HMVMX_SET_TAGGED_TLB_FLUSHED()       do { fTlbFlushed = true; } while (0)
+# define HMVMX_UPDATE_FLUSH_SKIPPED_STAT()    do { \
+                                                if (!fTlbFlushed) \
+                                                    STAM_COUNTER_INC(&pVCpu->hm.s.StatNoFlushTlbWorldSwitch); \
+                                              } while (0)
+#else
+# define HMVMX_SET_TAGGED_TLB_FLUSHED()       do { } while (0)
+# define HMVMX_UPDATE_FLUSH_SKIPPED_STAT()    do { } while (0)
+#endif
+
+    AssertPtr(pVCpu);
+    AssertPtr(pHostCpu);
+    Assert(pHostCpu->idCpu != NIL_RTCPUID);
+
+    PVMCC pVM = pVCpu->CTX_SUFF(pVM);
+    AssertMsg(pVM->hmr0.s.fNestedPaging && pVM->hmr0.s.vmx.fVpid,
+              ("hmR0VmxFlushTaggedTlbBoth cannot be invoked unless NestedPaging & VPID are enabled."
+               "fNestedPaging=%RTbool fVpid=%RTbool", pVM->hmr0.s.fNestedPaging, pVM->hmr0.s.vmx.fVpid));
+
+    /*
+     * Force a TLB flush for the first world-switch if the current CPU differs from the one we
+     * ran on last. If the TLB flush count changed, another VM (VCPU rather) has hit the ASID
+     * limit while flushing the TLB or the host CPU is online after a suspend/resume, so we
+     * cannot reuse the current ASID anymore.
+     */
+    if (   pVCpu->hmr0.s.idLastCpu   != pHostCpu->idCpu
+        || pVCpu->hmr0.s.cTlbFlushes != pHostCpu->cTlbFlushes)
+    {
+        ++pHostCpu->uCurrentAsid;
+        if (pHostCpu->uCurrentAsid >= g_uHmMaxAsid)
+        {
+            pHostCpu->uCurrentAsid = 1;            /* Wraparound to 1; host uses 0. */
+            pHostCpu->cTlbFlushes++;               /* All VCPUs that run on this host CPU must use a new VPID. */
+            pHostCpu->fFlushAsidBeforeUse = true;  /* All VCPUs that run on this host CPU must flush their new VPID before use. */
+        }
+
+        pVCpu->hmr0.s.uCurrentAsid = pHostCpu->uCurrentAsid;
+        pVCpu->hmr0.s.idLastCpu    = pHostCpu->idCpu;
+        pVCpu->hmr0.s.cTlbFlushes  = pHostCpu->cTlbFlushes;
+
+        /*
+         * Flush by EPT when we get rescheduled to a new host CPU to ensure EPT-only tagged mappings are also
+         * invalidated. We don't need to flush-by-VPID here as flushing by EPT covers it. See @bugref{6568}.
+         */
+        hmR0VmxFlushEpt(pVCpu, pVmcsInfo, pVM->hmr0.s.vmx.enmTlbFlushEpt);
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbWorldSwitch);
+        HMVMX_SET_TAGGED_TLB_FLUSHED();
+        VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH);
+    }
+    else if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH))    /* Check for explicit TLB flushes. */
+    {
+        /*
+         * Changes to the EPT paging structure by VMM requires flushing-by-EPT as the CPU
+         * creates guest-physical (ie. only EPT-tagged) mappings while traversing the EPT
+         * tables when EPT is in use. Flushing-by-VPID will only flush linear (only
+         * VPID-tagged) and combined (EPT+VPID tagged) mappings but not guest-physical
+         * mappings, see @bugref{6568}.
+         *
+         * See Intel spec. 28.3.2 "Creating and Using Cached Translation Information".
+         */
+        hmR0VmxFlushEpt(pVCpu, pVmcsInfo, pVM->hmr0.s.vmx.enmTlbFlushEpt);
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlb);
+        HMVMX_SET_TAGGED_TLB_FLUSHED();
+    }
+    else if (pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb)
+    {
+        /*
+         * The nested-guest specifies its own guest-physical address to use as the APIC-access
+         * address which requires flushing the TLB of EPT cached structures.
+         *
+         * See Intel spec. 28.3.3.4 "Guidelines for Use of the INVEPT Instruction".
+         */
+        hmR0VmxFlushEpt(pVCpu, pVmcsInfo, pVM->hmr0.s.vmx.enmTlbFlushEpt);
+        pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb = false;
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbNstGst);
+        HMVMX_SET_TAGGED_TLB_FLUSHED();
+    }
+
+
+    pVCpu->hmr0.s.fForceTLBFlush = false;
+    HMVMX_UPDATE_FLUSH_SKIPPED_STAT();
+
+    Assert(pVCpu->hmr0.s.idLastCpu == pHostCpu->idCpu);
+    Assert(pVCpu->hmr0.s.cTlbFlushes == pHostCpu->cTlbFlushes);
+    AssertMsg(pVCpu->hmr0.s.cTlbFlushes == pHostCpu->cTlbFlushes,
+              ("Flush count mismatch for cpu %d (%u vs %u)\n", pHostCpu->idCpu, pVCpu->hmr0.s.cTlbFlushes, pHostCpu->cTlbFlushes));
+    AssertMsg(pHostCpu->uCurrentAsid >= 1 && pHostCpu->uCurrentAsid < g_uHmMaxAsid,
+              ("Cpu[%u] uCurrentAsid=%u cTlbFlushes=%u pVCpu->idLastCpu=%u pVCpu->cTlbFlushes=%u\n", pHostCpu->idCpu,
+               pHostCpu->uCurrentAsid, pHostCpu->cTlbFlushes, pVCpu->hmr0.s.idLastCpu, pVCpu->hmr0.s.cTlbFlushes));
+    AssertMsg(pVCpu->hmr0.s.uCurrentAsid >= 1 && pVCpu->hmr0.s.uCurrentAsid < g_uHmMaxAsid,
+              ("Cpu[%u] pVCpu->uCurrentAsid=%u\n", pHostCpu->idCpu, pVCpu->hmr0.s.uCurrentAsid));
+
+    /* Update VMCS with the VPID. */
+    int rc  = VMXWriteVmcs16(VMX_VMCS16_VPID, pVCpu->hmr0.s.uCurrentAsid);
+    AssertRC(rc);
+
+#undef HMVMX_SET_TAGGED_TLB_FLUSHED
+}
+
+
+/**
+ * Flushes the tagged-TLB entries for EPT CPUs as necessary.
+ *
+ * @param   pHostCpu    The HM physical-CPU structure.
+ * @param   pVCpu       The cross context virtual CPU structure.
+ * @param   pVmcsInfo   The VMCS info. object.
+ *
+ * @remarks Called with interrupts disabled.
+ */
+static void hmR0VmxFlushTaggedTlbEpt(PHMPHYSCPU pHostCpu, PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo)
+{
+    AssertPtr(pVCpu);
+    AssertPtr(pHostCpu);
+    Assert(pHostCpu->idCpu != NIL_RTCPUID);
+    AssertMsg(pVCpu->CTX_SUFF(pVM)->hmr0.s.fNestedPaging, ("hmR0VmxFlushTaggedTlbEpt cannot be invoked without NestedPaging."));
+    AssertMsg(!pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fVpid, ("hmR0VmxFlushTaggedTlbEpt cannot be invoked with VPID."));
+
+    /*
+     * Force a TLB flush for the first world-switch if the current CPU differs from the one we ran on last.
+     * A change in the TLB flush count implies the host CPU is online after a suspend/resume.
+     */
+    if (   pVCpu->hmr0.s.idLastCpu   != pHostCpu->idCpu
+        || pVCpu->hmr0.s.cTlbFlushes != pHostCpu->cTlbFlushes)
+    {
+        pVCpu->hmr0.s.fForceTLBFlush = true;
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbWorldSwitch);
+    }
+
+    /* Check for explicit TLB flushes. */
+    if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH))
+    {
+        pVCpu->hmr0.s.fForceTLBFlush = true;
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlb);
+    }
+
+    /* Check for TLB flushes while switching to/from a nested-guest. */
+    if (pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb)
+    {
+        pVCpu->hmr0.s.fForceTLBFlush = true;
+        pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb = false;
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbNstGst);
+    }
+
+    pVCpu->hmr0.s.idLastCpu = pHostCpu->idCpu;
+    pVCpu->hmr0.s.cTlbFlushes = pHostCpu->cTlbFlushes;
+
+    if (pVCpu->hmr0.s.fForceTLBFlush)
+    {
+        hmR0VmxFlushEpt(pVCpu, pVmcsInfo, pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.enmTlbFlushEpt);
+        pVCpu->hmr0.s.fForceTLBFlush = false;
+    }
+}
+
+
+/**
+ * Flushes the tagged-TLB entries for VPID CPUs as necessary.
+ *
+ * @param   pHostCpu    The HM physical-CPU structure.
+ * @param   pVCpu       The cross context virtual CPU structure.
+ *
+ * @remarks Called with interrupts disabled.
+ */
+static void hmR0VmxFlushTaggedTlbVpid(PHMPHYSCPU pHostCpu, PVMCPUCC pVCpu)
+{
+    AssertPtr(pVCpu);
+    AssertPtr(pHostCpu);
+    Assert(pHostCpu->idCpu != NIL_RTCPUID);
+    AssertMsg(pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fVpid, ("hmR0VmxFlushTlbVpid cannot be invoked without VPID."));
+    AssertMsg(!pVCpu->CTX_SUFF(pVM)->hmr0.s.fNestedPaging, ("hmR0VmxFlushTlbVpid cannot be invoked with NestedPaging"));
+
+    /*
+     * Force a TLB flush for the first world switch if the current CPU differs from the one we
+     * ran on last. If the TLB flush count changed, another VM (VCPU rather) has hit the ASID
+     * limit while flushing the TLB or the host CPU is online after a suspend/resume, so we
+     * cannot reuse the current ASID anymore.
+     */
+    if (   pVCpu->hmr0.s.idLastCpu != pHostCpu->idCpu
+        || pVCpu->hmr0.s.cTlbFlushes != pHostCpu->cTlbFlushes)
+    {
+        pVCpu->hmr0.s.fForceTLBFlush = true;
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbWorldSwitch);
+    }
+
+    /* Check for explicit TLB flushes. */
+    if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH))
+    {
+        /*
+         * If we ever support VPID flush combinations other than ALL or SINGLE-context (see
+         * hmR0VmxSetupTaggedTlb()) we would need to explicitly flush in this case (add an
+         * fExplicitFlush = true here and change the pHostCpu->fFlushAsidBeforeUse check below to
+         * include fExplicitFlush's too) - an obscure corner case.
+         */
+        pVCpu->hmr0.s.fForceTLBFlush = true;
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlb);
+    }
+
+    /* Check for TLB flushes while switching to/from a nested-guest. */
+    if (pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb)
+    {
+        pVCpu->hmr0.s.fForceTLBFlush = true;
+        pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb = false;
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbNstGst);
+    }
+
+    PVMCC pVM = pVCpu->CTX_SUFF(pVM);
+    pVCpu->hmr0.s.idLastCpu = pHostCpu->idCpu;
+    if (pVCpu->hmr0.s.fForceTLBFlush)
+    {
+        ++pHostCpu->uCurrentAsid;
+        if (pHostCpu->uCurrentAsid >= g_uHmMaxAsid)
+        {
+            pHostCpu->uCurrentAsid        = 1;     /* Wraparound to 1; host uses 0 */
+            pHostCpu->cTlbFlushes++;               /* All VCPUs that run on this host CPU must use a new VPID. */
+            pHostCpu->fFlushAsidBeforeUse = true;  /* All VCPUs that run on this host CPU must flush their new VPID before use. */
+        }
+
+        pVCpu->hmr0.s.fForceTLBFlush = false;
+        pVCpu->hmr0.s.cTlbFlushes    = pHostCpu->cTlbFlushes;
+        pVCpu->hmr0.s.uCurrentAsid   = pHostCpu->uCurrentAsid;
+        if (pHostCpu->fFlushAsidBeforeUse)
+        {
+            if (pVM->hmr0.s.vmx.enmTlbFlushVpid == VMXTLBFLUSHVPID_SINGLE_CONTEXT)
+                hmR0VmxFlushVpid(pVCpu, VMXTLBFLUSHVPID_SINGLE_CONTEXT, 0 /* GCPtr */);
+            else if (pVM->hmr0.s.vmx.enmTlbFlushVpid == VMXTLBFLUSHVPID_ALL_CONTEXTS)
+            {
+                hmR0VmxFlushVpid(pVCpu, VMXTLBFLUSHVPID_ALL_CONTEXTS, 0 /* GCPtr */);
+                pHostCpu->fFlushAsidBeforeUse = false;
+            }
+            else
+            {
+                /* hmR0VmxSetupTaggedTlb() ensures we never get here. Paranoia. */
+                AssertMsgFailed(("Unsupported VPID-flush context type.\n"));
+            }
+        }
+    }
+
+    AssertMsg(pVCpu->hmr0.s.cTlbFlushes == pHostCpu->cTlbFlushes,
+              ("Flush count mismatch for cpu %d (%u vs %u)\n", pHostCpu->idCpu, pVCpu->hmr0.s.cTlbFlushes, pHostCpu->cTlbFlushes));
+    AssertMsg(pHostCpu->uCurrentAsid >= 1 && pHostCpu->uCurrentAsid < g_uHmMaxAsid,
+              ("Cpu[%u] uCurrentAsid=%u cTlbFlushes=%u pVCpu->idLastCpu=%u pVCpu->cTlbFlushes=%u\n", pHostCpu->idCpu,
+               pHostCpu->uCurrentAsid, pHostCpu->cTlbFlushes, pVCpu->hmr0.s.idLastCpu, pVCpu->hmr0.s.cTlbFlushes));
+    AssertMsg(pVCpu->hmr0.s.uCurrentAsid >= 1 && pVCpu->hmr0.s.uCurrentAsid < g_uHmMaxAsid,
+              ("Cpu[%u] pVCpu->uCurrentAsid=%u\n", pHostCpu->idCpu, pVCpu->hmr0.s.uCurrentAsid));
+
+    int rc  = VMXWriteVmcs16(VMX_VMCS16_VPID, pVCpu->hmr0.s.uCurrentAsid);
+    AssertRC(rc);
+}
+
+
+/**
+ * Flushes the guest TLB entry based on CPU capabilities.
+ *
+ * @param   pHostCpu    The HM physical-CPU structure.
+ * @param   pVCpu       The cross context virtual CPU structure.
+ * @param   pVmcsInfo   The VMCS info. object.
+ *
+ * @remarks Called with interrupts disabled.
+ */
+static void hmR0VmxFlushTaggedTlb(PHMPHYSCPU pHostCpu, PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
+{
+#ifdef HMVMX_ALWAYS_FLUSH_TLB
+    VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
+#endif
+    PVMCC pVM = pVCpu->CTX_SUFF(pVM);
+    switch (pVM->hmr0.s.vmx.enmTlbFlushType)
+    {
+        case VMXTLBFLUSHTYPE_EPT_VPID: hmR0VmxFlushTaggedTlbBoth(pHostCpu, pVCpu, pVmcsInfo); break;
+        case VMXTLBFLUSHTYPE_EPT:      hmR0VmxFlushTaggedTlbEpt(pHostCpu, pVCpu, pVmcsInfo);  break;
+        case VMXTLBFLUSHTYPE_VPID:     hmR0VmxFlushTaggedTlbVpid(pHostCpu, pVCpu);            break;
+        case VMXTLBFLUSHTYPE_NONE:     hmR0VmxFlushTaggedTlbNone(pHostCpu, pVCpu);            break;
+        default:
+            AssertMsgFailed(("Invalid flush-tag function identifier\n"));
+            break;
+    }
+    /* Don't assert that VMCPU_FF_TLB_FLUSH should no longer be pending. It can be set by other EMTs. */
+}
+
+
+/**
+ * Sets up the appropriate tagged TLB-flush level and handler for flushing guest
+ * TLB entries from the host TLB before VM-entry.
+ *
+ * @returns VBox status code.
+ * @param   pVM     The cross context VM structure.
+ */
+static int hmR0VmxSetupTaggedTlb(PVMCC pVM)
+{
+    /*
+     * Determine optimal flush type for nested paging.
+     * We cannot ignore EPT if no suitable flush-types is supported by the CPU as we've already setup
+     * unrestricted guest execution (see hmR3InitFinalizeR0()).
+     */
+    if (pVM->hmr0.s.fNestedPaging)
+    {
+        if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT)
+        {
+            if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT_SINGLE_CONTEXT)
+                pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_SINGLE_CONTEXT;
+            else if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT_ALL_CONTEXTS)
+                pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_ALL_CONTEXTS;
+            else
+            {
+                /* Shouldn't happen. EPT is supported but no suitable flush-types supported. */
+                pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_NOT_SUPPORTED;
+                VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_EPT_FLUSH_TYPE_UNSUPPORTED;
+                return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
+            }
+
+            /* Make sure the write-back cacheable memory type for EPT is supported. */
+            if (RT_UNLIKELY(!(g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_MEMTYPE_WB)))
+            {
+                pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_NOT_SUPPORTED;
+                VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_EPT_MEM_TYPE_NOT_WB;
+                return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
+            }
+
+            /* EPT requires a page-walk length of 4. */
+            if (RT_UNLIKELY(!(g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_PAGE_WALK_LENGTH_4)))
+            {
+                pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_NOT_SUPPORTED;
+                VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_EPT_PAGE_WALK_LENGTH_UNSUPPORTED;
+                return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
+            }
+        }
+        else
+        {
+            /* Shouldn't happen. EPT is supported but INVEPT instruction is not supported. */
+            pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_NOT_SUPPORTED;
+            VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_EPT_INVEPT_UNAVAILABLE;
+            return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
+        }
+    }
+
+    /*
+     * Determine optimal flush type for VPID.
+     */
+    if (pVM->hmr0.s.vmx.fVpid)
+    {
+        if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID)
+        {
+            if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_SINGLE_CONTEXT)
+                pVM->hmr0.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_SINGLE_CONTEXT;
+            else if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_ALL_CONTEXTS)
+                pVM->hmr0.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_ALL_CONTEXTS;
+            else
+            {
+                /* Neither SINGLE nor ALL-context flush types for VPID is supported by the CPU. Ignore VPID capability. */
+                if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_INDIV_ADDR)
+                    LogRelFunc(("Only INDIV_ADDR supported. Ignoring VPID.\n"));
+                if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_SINGLE_CONTEXT_RETAIN_GLOBALS)
+                    LogRelFunc(("Only SINGLE_CONTEXT_RETAIN_GLOBALS supported. Ignoring VPID.\n"));
+                pVM->hmr0.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_NOT_SUPPORTED;
+                pVM->hmr0.s.vmx.fVpid           = false;
+            }
+        }
+        else
+        {
+            /*  Shouldn't happen. VPID is supported but INVVPID is not supported by the CPU. Ignore VPID capability. */
+            Log4Func(("VPID supported without INVEPT support. Ignoring VPID.\n"));
+            pVM->hmr0.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_NOT_SUPPORTED;
+            pVM->hmr0.s.vmx.fVpid           = false;
+        }
+    }
+
+    /*
+     * Setup the handler for flushing tagged-TLBs.
+     */
+    if (pVM->hmr0.s.fNestedPaging && pVM->hmr0.s.vmx.fVpid)
+        pVM->hmr0.s.vmx.enmTlbFlushType = VMXTLBFLUSHTYPE_EPT_VPID;
+    else if (pVM->hmr0.s.fNestedPaging)
+        pVM->hmr0.s.vmx.enmTlbFlushType = VMXTLBFLUSHTYPE_EPT;
+    else if (pVM->hmr0.s.vmx.fVpid)
+        pVM->hmr0.s.vmx.enmTlbFlushType = VMXTLBFLUSHTYPE_VPID;
+    else
+        pVM->hmr0.s.vmx.enmTlbFlushType = VMXTLBFLUSHTYPE_NONE;
+
+
+    /*
+     * Copy out the result to ring-3.
+     */
+    pVM->hm.s.ForR3.vmx.fVpid           = pVM->hmr0.s.vmx.fVpid;
+    pVM->hm.s.ForR3.vmx.enmTlbFlushType = pVM->hmr0.s.vmx.enmTlbFlushType;
+    pVM->hm.s.ForR3.vmx.enmTlbFlushEpt  = pVM->hmr0.s.vmx.enmTlbFlushEpt;
+    pVM->hm.s.ForR3.vmx.enmTlbFlushVpid = pVM->hmr0.s.vmx.enmTlbFlushVpid;
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Sets up the LBR MSR ranges based on the host CPU.
+ *
+ * @returns VBox status code.
+ * @param   pVM     The cross context VM structure.
+ *
+ * @sa nemR3DarwinSetupLbrMsrRange
+ */
+static int hmR0VmxSetupLbrMsrRange(PVMCC pVM)
+{
+    Assert(pVM->hmr0.s.vmx.fLbr);
+    uint32_t idLbrFromIpMsrFirst;
+    uint32_t idLbrFromIpMsrLast;
+    uint32_t idLbrToIpMsrFirst;
+    uint32_t idLbrToIpMsrLast;
+    uint32_t idLbrTosMsr;
+
+    /*
+     * Determine the LBR MSRs supported for this host CPU family and model.
+     *
+     * See Intel spec. 17.4.8 "LBR Stack".
+     * See Intel "Model-Specific Registers" spec.
+     */
+    uint32_t const uFamilyModel = (g_CpumHostFeatures.s.uFamily << 8)
+                                | g_CpumHostFeatures.s.uModel;
+    switch (uFamilyModel)
+    {
+        case 0x0f01: case 0x0f02:
+            idLbrFromIpMsrFirst = MSR_P4_LASTBRANCH_0;
+            idLbrFromIpMsrLast  = MSR_P4_LASTBRANCH_3;
+            idLbrToIpMsrFirst   = 0x0;
+            idLbrToIpMsrLast    = 0x0;
+            idLbrTosMsr         = MSR_P4_LASTBRANCH_TOS;
+            break;
+
+        case 0x065c: case 0x065f: case 0x064e: case 0x065e: case 0x068e:
+        case 0x069e: case 0x0655: case 0x0666: case 0x067a: case 0x0667:
+        case 0x066a: case 0x066c: case 0x067d: case 0x067e:
+            idLbrFromIpMsrFirst = MSR_LASTBRANCH_0_FROM_IP;
+            idLbrFromIpMsrLast  = MSR_LASTBRANCH_31_FROM_IP;
+            idLbrToIpMsrFirst   = MSR_LASTBRANCH_0_TO_IP;
+            idLbrToIpMsrLast    = MSR_LASTBRANCH_31_TO_IP;
+            idLbrTosMsr         = MSR_LASTBRANCH_TOS;
+            break;
+
+        case 0x063d: case 0x0647: case 0x064f: case 0x0656: case 0x063c:
+        case 0x0645: case 0x0646: case 0x063f: case 0x062a: case 0x062d:
+        case 0x063a: case 0x063e: case 0x061a: case 0x061e: case 0x061f:
+        case 0x062e: case 0x0625: case 0x062c: case 0x062f:
+            idLbrFromIpMsrFirst = MSR_LASTBRANCH_0_FROM_IP;
+            idLbrFromIpMsrLast  = MSR_LASTBRANCH_15_FROM_IP;
+            idLbrToIpMsrFirst   = MSR_LASTBRANCH_0_TO_IP;
+            idLbrToIpMsrLast    = MSR_LASTBRANCH_15_TO_IP;
+            idLbrTosMsr         = MSR_LASTBRANCH_TOS;
+            break;
+
+        case 0x0617: case 0x061d: case 0x060f:
+            idLbrFromIpMsrFirst = MSR_CORE2_LASTBRANCH_0_FROM_IP;
+            idLbrFromIpMsrLast  = MSR_CORE2_LASTBRANCH_3_FROM_IP;
+            idLbrToIpMsrFirst   = MSR_CORE2_LASTBRANCH_0_TO_IP;
+            idLbrToIpMsrLast    = MSR_CORE2_LASTBRANCH_3_TO_IP;
+            idLbrTosMsr         = MSR_CORE2_LASTBRANCH_TOS;
+            break;
+
+        /* Atom and related microarchitectures we don't care about:
+        case 0x0637: case 0x064a: case 0x064c: case 0x064d: case 0x065a:
+        case 0x065d: case 0x061c: case 0x0626: case 0x0627: case 0x0635:
+        case 0x0636: */
+        /* All other CPUs: */
+        default:
+        {
+            LogRelFunc(("Could not determine LBR stack size for the CPU model %#x\n", uFamilyModel));
+            VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_LBR_STACK_SIZE_UNKNOWN;
+            return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
+        }
+    }
+
+    /*
+     * Validate.
+     */
+    uint32_t const cLbrStack = idLbrFromIpMsrLast - idLbrFromIpMsrFirst + 1;
+    PCVMCPU pVCpu0 = VMCC_GET_CPU_0(pVM);
+    AssertCompile(   RT_ELEMENTS(pVCpu0->hm.s.vmx.VmcsInfo.au64LbrFromIpMsr)
+                  == RT_ELEMENTS(pVCpu0->hm.s.vmx.VmcsInfo.au64LbrToIpMsr));
+    if (cLbrStack > RT_ELEMENTS(pVCpu0->hm.s.vmx.VmcsInfo.au64LbrFromIpMsr))
+    {
+        LogRelFunc(("LBR stack size of the CPU (%u) exceeds our buffer size\n", cLbrStack));
+        VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_LBR_STACK_SIZE_OVERFLOW;
+        return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
+    }
+    NOREF(pVCpu0);
+
+    /*
+     * Update the LBR info. to the VM struct. for use later.
+     */
+    pVM->hmr0.s.vmx.idLbrTosMsr = idLbrTosMsr;
+
+    pVM->hm.s.ForR3.vmx.idLbrFromIpMsrFirst = pVM->hmr0.s.vmx.idLbrFromIpMsrFirst = idLbrFromIpMsrFirst;
+    pVM->hm.s.ForR3.vmx.idLbrFromIpMsrLast  = pVM->hmr0.s.vmx.idLbrFromIpMsrLast  = idLbrFromIpMsrLast;
+
+    pVM->hm.s.ForR3.vmx.idLbrToIpMsrFirst   = pVM->hmr0.s.vmx.idLbrToIpMsrFirst   = idLbrToIpMsrFirst;
+    pVM->hm.s.ForR3.vmx.idLbrToIpMsrLast    = pVM->hmr0.s.vmx.idLbrToIpMsrLast    = idLbrToIpMsrLast;
+    return VINF_SUCCESS;
+}
+
+#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+
+/**
+ * Sets up the shadow VMCS fields arrays.
+ *
+ * This function builds arrays of VMCS fields to sync the shadow VMCS later while
+ * executing the guest.
+ *
+ * @returns VBox status code.
+ * @param   pVM     The cross context VM structure.
+ */
+static int hmR0VmxSetupShadowVmcsFieldsArrays(PVMCC pVM)
+{
+    /*
+     * Paranoia. Ensure we haven't exposed the VMWRITE-All VMX feature to the guest
+     * when the host does not support it.
+     */
+    bool const fGstVmwriteAll = pVM->cpum.ro.GuestFeatures.fVmxVmwriteAll;
+    if (   !fGstVmwriteAll
+        || (g_HmMsrs.u.vmx.u64Misc & VMX_MISC_VMWRITE_ALL))
+    { /* likely. */ }
+    else
+    {
+        LogRelFunc(("VMX VMWRITE-All feature exposed to the guest but host CPU does not support it!\n"));
+        VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_GST_HOST_VMWRITE_ALL;
+        return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
+    }
+
+    uint32_t const cVmcsFields = RT_ELEMENTS(g_aVmcsFields);
+    uint32_t       cRwFields   = 0;
+    uint32_t       cRoFields   = 0;
+    for (uint32_t i = 0; i < cVmcsFields; i++)
+    {
+        VMXVMCSFIELD VmcsField;
+        VmcsField.u = g_aVmcsFields[i];
+
+        /*
+         * We will be writing "FULL" (64-bit) fields while syncing the shadow VMCS.
+         * Therefore, "HIGH" (32-bit portion of 64-bit) fields must not be included
+         * in the shadow VMCS fields array as they would be redundant.
+         *
+         * If the VMCS field depends on a CPU feature that is not exposed to the guest,
+         * we must not include it in the shadow VMCS fields array. Guests attempting to
+         * VMREAD/VMWRITE such VMCS fields would cause a VM-exit and we shall emulate
+         * the required behavior.
+         */
+        if (   VmcsField.n.fAccessType == VMX_VMCSFIELD_ACCESS_FULL
+            && CPUMIsGuestVmxVmcsFieldValid(pVM, VmcsField.u))
+        {
+            /*
+             * Read-only fields are placed in a separate array so that while syncing shadow
+             * VMCS fields later (which is more performance critical) we can avoid branches.
+             *
+             * However, if the guest can write to all fields (including read-only fields),
+             * we treat it a as read/write field. Otherwise, writing to these fields would
+             * cause a VMWRITE instruction error while syncing the shadow VMCS.
+             */
+            if (   fGstVmwriteAll
+                || !VMXIsVmcsFieldReadOnly(VmcsField.u))
+                pVM->hmr0.s.vmx.paShadowVmcsFields[cRwFields++] = VmcsField.u;
+            else
+                pVM->hmr0.s.vmx.paShadowVmcsRoFields[cRoFields++] = VmcsField.u;
+        }
+    }
+
+    /* Update the counts. */
+    pVM->hmr0.s.vmx.cShadowVmcsFields   = cRwFields;
+    pVM->hmr0.s.vmx.cShadowVmcsRoFields = cRoFields;
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Sets up the VMREAD and VMWRITE bitmaps.
+ *
+ * @param   pVM     The cross context VM structure.
+ */
+static void hmR0VmxSetupVmreadVmwriteBitmaps(PVMCC pVM)
+{
+    /*
+     * By default, ensure guest attempts to access any VMCS fields cause VM-exits.
+     */
+    uint32_t const cbBitmap        = X86_PAGE_4K_SIZE;
+    uint8_t       *pbVmreadBitmap  = (uint8_t *)pVM->hmr0.s.vmx.pvVmreadBitmap;
+    uint8_t       *pbVmwriteBitmap = (uint8_t *)pVM->hmr0.s.vmx.pvVmwriteBitmap;
+    ASMMemFill32(pbVmreadBitmap,  cbBitmap, UINT32_C(0xffffffff));
+    ASMMemFill32(pbVmwriteBitmap, cbBitmap, UINT32_C(0xffffffff));
+
+    /*
+     * Skip intercepting VMREAD/VMWRITE to guest read/write fields in the
+     * VMREAD and VMWRITE bitmaps.
+     */
+    {
+        uint32_t const *paShadowVmcsFields = pVM->hmr0.s.vmx.paShadowVmcsFields;
+        uint32_t const  cShadowVmcsFields  = pVM->hmr0.s.vmx.cShadowVmcsFields;
+        for (uint32_t i = 0; i < cShadowVmcsFields; i++)
+        {
+            uint32_t const uVmcsField = paShadowVmcsFields[i];
+            Assert(!(uVmcsField & VMX_VMCSFIELD_RSVD_MASK));
+            Assert(uVmcsField >> 3 < cbBitmap);
+            ASMBitClear(pbVmreadBitmap,  uVmcsField & 0x7fff);
+            ASMBitClear(pbVmwriteBitmap, uVmcsField & 0x7fff);
+        }
+    }
+
+    /*
+     * Skip intercepting VMREAD for guest read-only fields in the VMREAD bitmap
+     * if the host supports VMWRITE to all supported VMCS fields.
+     */
+    if (g_HmMsrs.u.vmx.u64Misc & VMX_MISC_VMWRITE_ALL)
+    {
+        uint32_t const *paShadowVmcsRoFields = pVM->hmr0.s.vmx.paShadowVmcsRoFields;
+        uint32_t const  cShadowVmcsRoFields  = pVM->hmr0.s.vmx.cShadowVmcsRoFields;
+        for (uint32_t i = 0; i < cShadowVmcsRoFields; i++)
+        {
+            uint32_t const uVmcsField = paShadowVmcsRoFields[i];
+            Assert(!(uVmcsField & VMX_VMCSFIELD_RSVD_MASK));
+            Assert(uVmcsField >> 3 < cbBitmap);
+            ASMBitClear(pbVmreadBitmap, uVmcsField & 0x7fff);
+        }
+    }
+}
+
+#endif /* VBOX_WITH_NESTED_HWVIRT_VMX */
+
+/**
+ * Sets up the virtual-APIC page address for the VMCS.
+ *
+ * @param   pVmcsInfo   The VMCS info. object.
+ */
+DECLINLINE(void) hmR0VmxSetupVmcsVirtApicAddr(PCVMXVMCSINFO pVmcsInfo)
+{
+    RTHCPHYS const HCPhysVirtApic = pVmcsInfo->HCPhysVirtApic;
+    Assert(HCPhysVirtApic != NIL_RTHCPHYS);
+    Assert(!(HCPhysVirtApic & 0xfff));                       /* Bits 11:0 MBZ. */
+    int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_VIRT_APIC_PAGEADDR_FULL, HCPhysVirtApic);
+    AssertRC(rc);
+}
+
+
+/**
+ * Sets up the MSR-bitmap address for the VMCS.
+ *
+ * @param   pVmcsInfo   The VMCS info. object.
+ */
+DECLINLINE(void) hmR0VmxSetupVmcsMsrBitmapAddr(PCVMXVMCSINFO pVmcsInfo)
+{
+    RTHCPHYS const HCPhysMsrBitmap = pVmcsInfo->HCPhysMsrBitmap;
+    Assert(HCPhysMsrBitmap != NIL_RTHCPHYS);
+    Assert(!(HCPhysMsrBitmap & 0xfff));                      /* Bits 11:0 MBZ. */
+    int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_MSR_BITMAP_FULL, HCPhysMsrBitmap);
+    AssertRC(rc);
+}
+
+
+/**
+ * Sets up the APIC-access page address for the VMCS.
+ *
+ * @param   pVCpu   The cross context virtual CPU structure.
+ */
+DECLINLINE(void) hmR0VmxSetupVmcsApicAccessAddr(PVMCPUCC pVCpu)
+{
+    RTHCPHYS const HCPhysApicAccess = pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.HCPhysApicAccess;
+    Assert(HCPhysApicAccess != NIL_RTHCPHYS);
+    Assert(!(HCPhysApicAccess & 0xfff));                     /* Bits 11:0 MBZ. */
+    int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_APIC_ACCESSADDR_FULL, HCPhysApicAccess);
+    AssertRC(rc);
+}
+
+#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+
+/**
+ * Sets up the VMREAD bitmap address for the VMCS.
+ *
+ * @param   pVCpu   The cross context virtual CPU structure.
+ */
+DECLINLINE(void) hmR0VmxSetupVmcsVmreadBitmapAddr(PVMCPUCC pVCpu)
+{
+    RTHCPHYS const HCPhysVmreadBitmap = pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.HCPhysVmreadBitmap;
+    Assert(HCPhysVmreadBitmap != NIL_RTHCPHYS);
+    Assert(!(HCPhysVmreadBitmap & 0xfff));                     /* Bits 11:0 MBZ. */
+    int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_VMREAD_BITMAP_FULL, HCPhysVmreadBitmap);
+    AssertRC(rc);
+}
+
+
+/**
+ * Sets up the VMWRITE bitmap address for the VMCS.
+ *
+ * @param   pVCpu   The cross context virtual CPU structure.
+ */
+DECLINLINE(void) hmR0VmxSetupVmcsVmwriteBitmapAddr(PVMCPUCC pVCpu)
+{
+    RTHCPHYS const HCPhysVmwriteBitmap = pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.HCPhysVmwriteBitmap;
+    Assert(HCPhysVmwriteBitmap != NIL_RTHCPHYS);
+    Assert(!(HCPhysVmwriteBitmap & 0xfff));                     /* Bits 11:0 MBZ. */
+    int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_VMWRITE_BITMAP_FULL, HCPhysVmwriteBitmap);
+    AssertRC(rc);
+}
+
+#endif
+
+/**
+ * Sets up the VM-entry MSR load, VM-exit MSR-store and VM-exit MSR-load addresses
+ * in the VMCS.
+ *
+ * @returns VBox status code.
+ * @param   pVmcsInfo   The VMCS info. object.
+ */
+DECLINLINE(int) hmR0VmxSetupVmcsAutoLoadStoreMsrAddrs(PVMXVMCSINFO pVmcsInfo)
+{
+    RTHCPHYS const HCPhysGuestMsrLoad = pVmcsInfo->HCPhysGuestMsrLoad;
+    Assert(HCPhysGuestMsrLoad != NIL_RTHCPHYS);
+    Assert(!(HCPhysGuestMsrLoad & 0xf));                     /* Bits 3:0 MBZ. */
+
+    RTHCPHYS const HCPhysGuestMsrStore = pVmcsInfo->HCPhysGuestMsrStore;
+    Assert(HCPhysGuestMsrStore != NIL_RTHCPHYS);
+    Assert(!(HCPhysGuestMsrStore & 0xf));                    /* Bits 3:0 MBZ. */
+
+    RTHCPHYS const HCPhysHostMsrLoad = pVmcsInfo->HCPhysHostMsrLoad;
+    Assert(HCPhysHostMsrLoad != NIL_RTHCPHYS);
+    Assert(!(HCPhysHostMsrLoad & 0xf));                      /* Bits 3:0 MBZ. */
+
+    int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_ENTRY_MSR_LOAD_FULL, HCPhysGuestMsrLoad);   AssertRC(rc);
+    rc     = VMXWriteVmcs64(VMX_VMCS64_CTRL_EXIT_MSR_STORE_FULL, HCPhysGuestMsrStore);  AssertRC(rc);
+    rc     = VMXWriteVmcs64(VMX_VMCS64_CTRL_EXIT_MSR_LOAD_FULL,  HCPhysHostMsrLoad);    AssertRC(rc);
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Sets up MSR permissions in the MSR bitmap of a VMCS info. object.
+ *
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   pVmcsInfo       The VMCS info. object.
+ */
+static void hmR0VmxSetupVmcsMsrPermissions(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
+{
+    Assert(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS);
+
+    /*
+     * By default, ensure guest attempts to access any MSR cause VM-exits.
+     * This shall later be relaxed for specific MSRs as necessary.
+     *
+     * Note: For nested-guests, the entire bitmap will be merged prior to
+     * executing the nested-guest using hardware-assisted VMX and hence there
+     * is no need to perform this operation. See hmR0VmxMergeMsrBitmapNested.
+     */
+    Assert(pVmcsInfo->pvMsrBitmap);
+    ASMMemFill32(pVmcsInfo->pvMsrBitmap, X86_PAGE_4K_SIZE, UINT32_C(0xffffffff));
+
+    /*
+     * The guest can access the following MSRs (read, write) without causing
+     * VM-exits; they are loaded/stored automatically using fields in the VMCS.
+     */
+    PVMCC pVM = pVCpu->CTX_SUFF(pVM);
+    hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_SYSENTER_CS,  VMXMSRPM_ALLOW_RD_WR);
+    hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_SYSENTER_ESP, VMXMSRPM_ALLOW_RD_WR);
+    hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_SYSENTER_EIP, VMXMSRPM_ALLOW_RD_WR);
+    hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K8_GS_BASE,        VMXMSRPM_ALLOW_RD_WR);
+    hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K8_FS_BASE,        VMXMSRPM_ALLOW_RD_WR);
+
+    /*
+     * The IA32_PRED_CMD and IA32_FLUSH_CMD MSRs are write-only and has no state
+     * associated with then. We never need to intercept access (writes need to be
+     * executed without causing a VM-exit, reads will #GP fault anyway).
+     *
+     * The IA32_SPEC_CTRL MSR is read/write and has state. We allow the guest to
+     * read/write them. We swap the guest/host MSR value using the
+     * auto-load/store MSR area.
+     */
+    if (pVM->cpum.ro.GuestFeatures.fIbpb)
+        hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_PRED_CMD,  VMXMSRPM_ALLOW_RD_WR);
+    if (pVM->cpum.ro.GuestFeatures.fFlushCmd)
+        hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_FLUSH_CMD, VMXMSRPM_ALLOW_RD_WR);
+    if (pVM->cpum.ro.GuestFeatures.fIbrs)
+        hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_SPEC_CTRL, VMXMSRPM_ALLOW_RD_WR);
+
+    /*
+     * Allow full read/write access for the following MSRs (mandatory for VT-x)
+     * required for 64-bit guests.
+     */
+    if (pVM->hmr0.s.fAllow64BitGuests)
+    {
+        hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K8_LSTAR,          VMXMSRPM_ALLOW_RD_WR);
+        hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K6_STAR,           VMXMSRPM_ALLOW_RD_WR);
+        hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K8_SF_MASK,        VMXMSRPM_ALLOW_RD_WR);
+        hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K8_KERNEL_GS_BASE, VMXMSRPM_ALLOW_RD_WR);
+    }
+
+    /*
+     * IA32_EFER MSR is always intercepted, see @bugref{9180#c37}.
+     */
+#ifdef VBOX_STRICT
+    Assert(pVmcsInfo->pvMsrBitmap);
+    uint32_t const fMsrpmEfer = CPUMGetVmxMsrPermission(pVmcsInfo->pvMsrBitmap, MSR_K6_EFER);
+    Assert(fMsrpmEfer == VMXMSRPM_EXIT_RD_WR);
+#endif
+}
+
+
+/**
+ * Sets up pin-based VM-execution controls in the VMCS.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu       The cross context virtual CPU structure.
+ * @param   pVmcsInfo   The VMCS info. object.
+ */
+static int hmR0VmxSetupVmcsPinCtls(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
+{
+    PVMCC pVM = pVCpu->CTX_SUFF(pVM);
+    uint32_t       fVal = g_HmMsrs.u.vmx.PinCtls.n.allowed0;      /* Bits set here must always be set. */
+    uint32_t const fZap = g_HmMsrs.u.vmx.PinCtls.n.allowed1;      /* Bits cleared here must always be cleared. */
+
+    fVal |= VMX_PIN_CTLS_EXT_INT_EXIT                        /* External interrupts cause a VM-exit. */
+         |  VMX_PIN_CTLS_NMI_EXIT;                           /* Non-maskable interrupts (NMIs) cause a VM-exit. */
+
+    if (g_HmMsrs.u.vmx.PinCtls.n.allowed1 & VMX_PIN_CTLS_VIRT_NMI)
+        fVal |= VMX_PIN_CTLS_VIRT_NMI;                       /* Use virtual NMIs and virtual-NMI blocking features. */
+
+    /* Enable the VMX-preemption timer. */
+    if (pVM->hmr0.s.vmx.fUsePreemptTimer)
+    {
+        Assert(g_HmMsrs.u.vmx.PinCtls.n.allowed1 & VMX_PIN_CTLS_PREEMPT_TIMER);
+        fVal |= VMX_PIN_CTLS_PREEMPT_TIMER;
+    }
+
+#if 0
+    /* Enable posted-interrupt processing. */
+    if (pVM->hm.s.fPostedIntrs)
+    {
+        Assert(g_HmMsrs.u.vmx.PinCtls.n.allowed1  & VMX_PIN_CTLS_POSTED_INT);
+        Assert(g_HmMsrs.u.vmx.ExitCtls.n.allowed1 & VMX_EXIT_CTLS_ACK_EXT_INT);
+        fVal |= VMX_PIN_CTLS_POSTED_INT;
+    }
+#endif
+
+    if ((fVal & fZap) != fVal)
+    {
+        LogRelFunc(("Invalid pin-based VM-execution controls combo! Cpu=%#RX32 fVal=%#RX32 fZap=%#RX32\n",
+                    g_HmMsrs.u.vmx.PinCtls.n.allowed0, fVal, fZap));
+        pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_PIN_EXEC;
+        return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
+    }
+
+    /* Commit it to the VMCS and update our cache. */
+    int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PIN_EXEC, fVal);
+    AssertRC(rc);
+    pVmcsInfo->u32PinCtls = fVal;
+
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Sets up secondary processor-based VM-execution controls in the VMCS.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu       The cross context virtual CPU structure.
+ * @param   pVmcsInfo   The VMCS info. object.
+ */
+static int hmR0VmxSetupVmcsProcCtls2(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
+{
+    PVMCC pVM = pVCpu->CTX_SUFF(pVM);
+    uint32_t       fVal = g_HmMsrs.u.vmx.ProcCtls2.n.allowed0;    /* Bits set here must be set in the VMCS. */
+    uint32_t const fZap = g_HmMsrs.u.vmx.ProcCtls2.n.allowed1;    /* Bits cleared here must be cleared in the VMCS. */
+
+    /* WBINVD causes a VM-exit. */
+    if (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_WBINVD_EXIT)
+        fVal |= VMX_PROC_CTLS2_WBINVD_EXIT;
+
+    /* Enable EPT (aka nested-paging). */
+    if (pVM->hmr0.s.fNestedPaging)
+        fVal |= VMX_PROC_CTLS2_EPT;
+
+    /* Enable the INVPCID instruction if we expose it to the guest and is supported
+       by the hardware. Without this, guest executing INVPCID would cause a #UD. */
+    if (   pVM->cpum.ro.GuestFeatures.fInvpcid
+        && (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_INVPCID))
+        fVal |= VMX_PROC_CTLS2_INVPCID;
+
+    /* Enable VPID. */
+    if (pVM->hmr0.s.vmx.fVpid)
+        fVal |= VMX_PROC_CTLS2_VPID;
+
+    /* Enable unrestricted guest execution. */
+    if (pVM->hmr0.s.vmx.fUnrestrictedGuest)
+        fVal |= VMX_PROC_CTLS2_UNRESTRICTED_GUEST;
+
+#if 0
+    if (pVM->hm.s.fVirtApicRegs)
+    {
+        /* Enable APIC-register virtualization. */
+        Assert(g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_APIC_REG_VIRT);
+        fVal |= VMX_PROC_CTLS2_APIC_REG_VIRT;
+
+        /* Enable virtual-interrupt delivery. */
+        Assert(g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_VIRT_INTR_DELIVERY);
+        fVal |= VMX_PROC_CTLS2_VIRT_INTR_DELIVERY;
+    }
+#endif
+
+    /* Virtualize-APIC accesses if supported by the CPU. The virtual-APIC page is
+       where the TPR shadow resides. */
+    /** @todo VIRT_X2APIC support, it's mutually exclusive with this. So must be
+     *        done dynamically. */
+    if (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS)
+    {
+        fVal |= VMX_PROC_CTLS2_VIRT_APIC_ACCESS;
+        hmR0VmxSetupVmcsApicAccessAddr(pVCpu);
+   }
+
+    /* Enable the RDTSCP instruction if we expose it to the guest and is supported
+       by the hardware. Without this, guest executing RDTSCP would cause a #UD. */
+    if (   pVM->cpum.ro.GuestFeatures.fRdTscP
+        && (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_RDTSCP))
+        fVal |= VMX_PROC_CTLS2_RDTSCP;
+
+    /* Enable Pause-Loop exiting. */
+    if (   (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_PAUSE_LOOP_EXIT)
+        && pVM->hm.s.vmx.cPleGapTicks
+        && pVM->hm.s.vmx.cPleWindowTicks)
+    {
+        fVal |= VMX_PROC_CTLS2_PAUSE_LOOP_EXIT;
+
+        int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PLE_GAP, pVM->hm.s.vmx.cPleGapTicks);          AssertRC(rc);
+        rc     = VMXWriteVmcs32(VMX_VMCS32_CTRL_PLE_WINDOW, pVM->hm.s.vmx.cPleWindowTicks);    AssertRC(rc);
+    }
+
+    if ((fVal & fZap) != fVal)
+    {
+        LogRelFunc(("Invalid secondary processor-based VM-execution controls combo! cpu=%#RX32 fVal=%#RX32 fZap=%#RX32\n",
+                    g_HmMsrs.u.vmx.ProcCtls2.n.allowed0, fVal, fZap));
+        pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_PROC_EXEC2;
+        return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
+    }
+
+    /* Commit it to the VMCS and update our cache. */
+    int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC2, fVal);
+    AssertRC(rc);
+    pVmcsInfo->u32ProcCtls2 = fVal;
+
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Sets up processor-based VM-execution controls in the VMCS.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu       The cross context virtual CPU structure.
+ * @param   pVmcsInfo   The VMCS info. object.
+ */
+static int hmR0VmxSetupVmcsProcCtls(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
+{
+    PVMCC pVM = pVCpu->CTX_SUFF(pVM);
+    uint32_t       fVal = g_HmMsrs.u.vmx.ProcCtls.n.allowed0;     /* Bits set here must be set in the VMCS. */
+    uint32_t const fZap = g_HmMsrs.u.vmx.ProcCtls.n.allowed1;     /* Bits cleared here must be cleared in the VMCS. */
+
+    fVal |= VMX_PROC_CTLS_HLT_EXIT                                    /* HLT causes a VM-exit. */
+         |  VMX_PROC_CTLS_USE_TSC_OFFSETTING                          /* Use TSC-offsetting. */
+         |  VMX_PROC_CTLS_MOV_DR_EXIT                                 /* MOV DRx causes a VM-exit. */
+         |  VMX_PROC_CTLS_UNCOND_IO_EXIT                              /* All IO instructions cause a VM-exit. */
+         |  VMX_PROC_CTLS_RDPMC_EXIT                                  /* RDPMC causes a VM-exit. */
+         |  VMX_PROC_CTLS_MONITOR_EXIT                                /* MONITOR causes a VM-exit. */
+         |  VMX_PROC_CTLS_MWAIT_EXIT;                                 /* MWAIT causes a VM-exit. */
+
+    /* We toggle VMX_PROC_CTLS_MOV_DR_EXIT later, check if it's not -always- needed to be set or clear. */
+    if (   !(g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_MOV_DR_EXIT)
+        ||  (g_HmMsrs.u.vmx.ProcCtls.n.allowed0 & VMX_PROC_CTLS_MOV_DR_EXIT))
+    {
+        pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_PROC_MOV_DRX_EXIT;
+        return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
+    }
+
+    /* Without nested paging, INVLPG (also affects INVPCID) and MOV CR3 instructions should cause VM-exits. */
+    if (!pVM->hmr0.s.fNestedPaging)
+    {
+        Assert(!pVM->hmr0.s.vmx.fUnrestrictedGuest);
+        fVal |= VMX_PROC_CTLS_INVLPG_EXIT
+             |  VMX_PROC_CTLS_CR3_LOAD_EXIT
+             |  VMX_PROC_CTLS_CR3_STORE_EXIT;
+    }
+
+    /* Use TPR shadowing if supported by the CPU. */
+    if (   PDMHasApic(pVM)
+        && (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_TPR_SHADOW))
+    {
+        fVal |= VMX_PROC_CTLS_USE_TPR_SHADOW;                /* CR8 reads from the Virtual-APIC page. */
+                                                             /* CR8 writes cause a VM-exit based on TPR threshold. */
+        Assert(!(fVal & VMX_PROC_CTLS_CR8_STORE_EXIT));
+        Assert(!(fVal & VMX_PROC_CTLS_CR8_LOAD_EXIT));
+        hmR0VmxSetupVmcsVirtApicAddr(pVmcsInfo);
+    }
+    else
+    {
+        /* Some 32-bit CPUs do not support CR8 load/store exiting as MOV CR8 is
+           invalid on 32-bit Intel CPUs. Set this control only for 64-bit guests. */
+        if (pVM->hmr0.s.fAllow64BitGuests)
+            fVal |= VMX_PROC_CTLS_CR8_STORE_EXIT             /* CR8 reads cause a VM-exit. */
+                 |  VMX_PROC_CTLS_CR8_LOAD_EXIT;             /* CR8 writes cause a VM-exit. */
+    }
+
+    /* Use MSR-bitmaps if supported by the CPU. */
+    if (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_MSR_BITMAPS)
+    {
+        fVal |= VMX_PROC_CTLS_USE_MSR_BITMAPS;
+        hmR0VmxSetupVmcsMsrBitmapAddr(pVmcsInfo);
+    }
+
+    /* Use the secondary processor-based VM-execution controls if supported by the CPU. */
+    if (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_SECONDARY_CTLS)
+        fVal |= VMX_PROC_CTLS_USE_SECONDARY_CTLS;
+
+    if ((fVal & fZap) != fVal)
+    {
+        LogRelFunc(("Invalid processor-based VM-execution controls combo! cpu=%#RX32 fVal=%#RX32 fZap=%#RX32\n",
+                    g_HmMsrs.u.vmx.ProcCtls.n.allowed0, fVal, fZap));
+        pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_PROC_EXEC;
+        return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
+    }
+
+    /* Commit it to the VMCS and update our cache. */
+    int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, fVal);
+    AssertRC(rc);
+    pVmcsInfo->u32ProcCtls = fVal;
+
+    /* Set up MSR permissions that don't change through the lifetime of the VM. */
+    if (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS)
+        hmR0VmxSetupVmcsMsrPermissions(pVCpu, pVmcsInfo);
+
+    /* Set up secondary processor-based VM-execution controls if the CPU supports it. */
+    if (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_SECONDARY_CTLS)
+        return hmR0VmxSetupVmcsProcCtls2(pVCpu, pVmcsInfo);
+
+    /* Sanity check, should not really happen. */
+    if (RT_LIKELY(!pVM->hmr0.s.vmx.fUnrestrictedGuest))
+    { /* likely */ }
+    else
+    {
+        pVCpu->hm.s.u32HMError = VMX_UFC_INVALID_UX_COMBO;
+        return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
+    }
+
+    /* Old CPUs without secondary processor-based VM-execution controls would end up here. */
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Sets up miscellaneous (everything other than Pin, Processor and secondary
+ * Processor-based VM-execution) control fields in the VMCS.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu       The cross context virtual CPU structure.
+ * @param   pVmcsInfo   The VMCS info. object.
+ */
+static int hmR0VmxSetupVmcsMiscCtls(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
+{
+#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+    if (pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fUseVmcsShadowing)
+    {
+        hmR0VmxSetupVmcsVmreadBitmapAddr(pVCpu);
+        hmR0VmxSetupVmcsVmwriteBitmapAddr(pVCpu);
+    }
+#endif
+
+    Assert(pVmcsInfo->u64VmcsLinkPtr == NIL_RTHCPHYS);
+    int rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_VMCS_LINK_PTR_FULL, NIL_RTHCPHYS);
+    AssertRC(rc);
+
+    rc = hmR0VmxSetupVmcsAutoLoadStoreMsrAddrs(pVmcsInfo);
+    if (RT_SUCCESS(rc))
+    {
+        uint64_t const u64Cr0Mask = vmxHCGetFixedCr0Mask(pVCpu);
+        uint64_t const u64Cr4Mask = vmxHCGetFixedCr4Mask(pVCpu);
+
+        rc = VMXWriteVmcsNw(VMX_VMCS_CTRL_CR0_MASK, u64Cr0Mask);    AssertRC(rc);
+        rc = VMXWriteVmcsNw(VMX_VMCS_CTRL_CR4_MASK, u64Cr4Mask);    AssertRC(rc);
+
+        pVmcsInfo->u64Cr0Mask = u64Cr0Mask;
+        pVmcsInfo->u64Cr4Mask = u64Cr4Mask;
+
+        if (pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fLbr)
+        {
+            rc = VMXWriteVmcsNw(VMX_VMCS64_GUEST_DEBUGCTL_FULL, MSR_IA32_DEBUGCTL_LBR);
+            AssertRC(rc);
+        }
+        return VINF_SUCCESS;
+    }
+    else
+        LogRelFunc(("Failed to initialize VMCS auto-load/store MSR addresses. rc=%Rrc\n", rc));
+    return rc;
+}
+
+
+/**
+ * Sets up the initial exception bitmap in the VMCS based on static conditions.
+ *
+ * We shall setup those exception intercepts that don't change during the
+ * lifetime of the VM here. The rest are done dynamically while loading the
+ * guest state.
+ *
+ * @param   pVCpu       The cross context virtual CPU structure.
+ * @param   pVmcsInfo   The VMCS info. object.
+ */
+static void hmR0VmxSetupVmcsXcptBitmap(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
+{
+    /*
+     * The following exceptions are always intercepted:
+     *
+     * #AC - To prevent the guest from hanging the CPU and for dealing with
+     *       split-lock detecting host configs.
+     * #DB - To maintain the DR6 state even when intercepting DRx reads/writes and
+     *       recursive #DBs can cause a CPU hang.
+     * #PF - To sync our shadow page tables when nested-paging is not used.
+     */
+    bool const fNestedPaging = pVCpu->CTX_SUFF(pVM)->hmr0.s.fNestedPaging;
+    uint32_t const uXcptBitmap = RT_BIT(X86_XCPT_AC)
+                               | RT_BIT(X86_XCPT_DB)
+                               | (fNestedPaging ? 0 : RT_BIT(X86_XCPT_PF));
+
+    /* Commit it to the VMCS. */
+    int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXCEPTION_BITMAP, uXcptBitmap);
+    AssertRC(rc);
+
+    /* Update our cache of the exception bitmap. */
+    pVmcsInfo->u32XcptBitmap = uXcptBitmap;
+}
+
+
+#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+/**
+ * Sets up the VMCS for executing a nested-guest using hardware-assisted VMX.
+ *
+ * @returns VBox status code.
+ * @param   pVmcsInfo   The VMCS info. object.
+ */
+static int hmR0VmxSetupVmcsCtlsNested(PVMXVMCSINFO pVmcsInfo)
+{
+    Assert(pVmcsInfo->u64VmcsLinkPtr == NIL_RTHCPHYS);
+    int rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_VMCS_LINK_PTR_FULL, NIL_RTHCPHYS);
+    AssertRC(rc);
+
+    rc = hmR0VmxSetupVmcsAutoLoadStoreMsrAddrs(pVmcsInfo);
+    if (RT_SUCCESS(rc))
+    {
+        if (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_MSR_BITMAPS)
+            hmR0VmxSetupVmcsMsrBitmapAddr(pVmcsInfo);
+
+        /* Paranoia - We've not yet initialized these, they shall be done while merging the VMCS. */
+        Assert(!pVmcsInfo->u64Cr0Mask);
+        Assert(!pVmcsInfo->u64Cr4Mask);
+        return VINF_SUCCESS;
+    }
+    LogRelFunc(("Failed to set up the VMCS link pointer in the nested-guest VMCS. rc=%Rrc\n", rc));
+    return rc;
+}
+#endif
+
+
+/**
+ * Selector FNHMSVMVMRUN implementation.
+ */
+static DECLCALLBACK(int) hmR0VmxStartVmSelector(PVMXVMCSINFO pVmcsInfo, PVMCPUCC pVCpu, bool fResume)
+{
+    hmR0VmxUpdateStartVmFunction(pVCpu);
+    return pVCpu->hmr0.s.vmx.pfnStartVm(pVmcsInfo, pVCpu, fResume);
+}
+
+
+/**
+ * Sets up the VMCS for executing a guest (or nested-guest) using hardware-assisted
+ * VMX.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   pVmcsInfo       The VMCS info. object.
+ * @param   fIsNstGstVmcs   Whether this is a nested-guest VMCS.
+ */
+static int hmR0VmxSetupVmcs(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo, bool fIsNstGstVmcs)
+{
+    Assert(pVmcsInfo->pvVmcs);
+    Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+
+    /* Set the CPU specified revision identifier at the beginning of the VMCS structure. */
+    *(uint32_t *)pVmcsInfo->pvVmcs = RT_BF_GET(g_HmMsrs.u.vmx.u64Basic, VMX_BF_BASIC_VMCS_ID);
+    const char * const pszVmcs     = fIsNstGstVmcs ? "nested-guest VMCS" : "guest VMCS";
+
+    LogFlowFunc(("\n"));
+
+    /*
+     * Initialize the VMCS using VMCLEAR before loading the VMCS.
+     * See Intel spec. 31.6 "Preparation And Launching A Virtual Machine".
+     */
+    int rc = hmR0VmxClearVmcs(pVmcsInfo);
+    if (RT_SUCCESS(rc))
+    {
+        rc = hmR0VmxLoadVmcs(pVmcsInfo);
+        if (RT_SUCCESS(rc))
+        {
+            /*
+             * Initialize the hardware-assisted VMX execution handler for guest and nested-guest VMCS.
+             * The host is always 64-bit since we no longer support 32-bit hosts.
+             * Currently we have just a single handler for all guest modes as well, see @bugref{6208#c73}.
+             */
+            if (!fIsNstGstVmcs)
+            {
+                rc = hmR0VmxSetupVmcsPinCtls(pVCpu, pVmcsInfo);
+                if (RT_SUCCESS(rc))
+                {
+                    rc = hmR0VmxSetupVmcsProcCtls(pVCpu, pVmcsInfo);
+                    if (RT_SUCCESS(rc))
+                    {
+                        rc = hmR0VmxSetupVmcsMiscCtls(pVCpu, pVmcsInfo);
+                        if (RT_SUCCESS(rc))
+                        {
+                            hmR0VmxSetupVmcsXcptBitmap(pVCpu, pVmcsInfo);
+#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+                            /*
+                             * If a shadow VMCS is allocated for the VMCS info. object, initialize the
+                             * VMCS revision ID and shadow VMCS indicator bit. Also, clear the VMCS
+                             * making it fit for use when VMCS shadowing is later enabled.
+                             */
+                            if (pVmcsInfo->pvShadowVmcs)
+                            {
+                                VMXVMCSREVID VmcsRevId;
+                                VmcsRevId.u = RT_BF_GET(g_HmMsrs.u.vmx.u64Basic, VMX_BF_BASIC_VMCS_ID);
+                                VmcsRevId.n.fIsShadowVmcs = 1;
+                                *(uint32_t *)pVmcsInfo->pvShadowVmcs = VmcsRevId.u;
+                                rc = vmxHCClearShadowVmcs(pVmcsInfo);
+                                if (RT_SUCCESS(rc))
+                                { /* likely */ }
+                                else
+                                    LogRelFunc(("Failed to initialize shadow VMCS. rc=%Rrc\n", rc));
+                            }
+#endif
+                        }
+                        else
+                            LogRelFunc(("Failed to setup miscellaneous controls. rc=%Rrc\n", rc));
+                    }
+                    else
+                        LogRelFunc(("Failed to setup processor-based VM-execution controls. rc=%Rrc\n", rc));
+                }
+                else
+                    LogRelFunc(("Failed to setup pin-based controls. rc=%Rrc\n", rc));
+            }
+            else
+            {
+#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+                rc = hmR0VmxSetupVmcsCtlsNested(pVmcsInfo);
+                if (RT_SUCCESS(rc))
+                { /* likely */ }
+                else
+                    LogRelFunc(("Failed to initialize nested-guest VMCS. rc=%Rrc\n", rc));
+#else
+                AssertFailed();
+#endif
+            }
+        }
+        else
+            LogRelFunc(("Failed to load the %s. rc=%Rrc\n", rc, pszVmcs));
+    }
+    else
+        LogRelFunc(("Failed to clear the %s. rc=%Rrc\n", rc, pszVmcs));
+
+    /* Sync any CPU internal VMCS data back into our VMCS in memory. */
+    if (RT_SUCCESS(rc))
+    {
+        rc = hmR0VmxClearVmcs(pVmcsInfo);
+        if (RT_SUCCESS(rc))
+        { /* likely */ }
+        else
+            LogRelFunc(("Failed to clear the %s post setup. rc=%Rrc\n", rc, pszVmcs));
+    }
+
+    /*
+     * Update the last-error record both for failures and success, so we
+     * can propagate the status code back to ring-3 for diagnostics.
+     */
+    hmR0VmxUpdateErrorRecord(pVCpu, rc);
+    NOREF(pszVmcs);
+    return rc;
+}
+
+
+/**
+ * Does global VT-x initialization (called during module initialization).
+ *
+ * @returns VBox status code.
+ */
+VMMR0DECL(int) VMXR0GlobalInit(void)
+{
+#ifdef HMVMX_USE_FUNCTION_TABLE
+    AssertCompile(VMX_EXIT_MAX + 1 == RT_ELEMENTS(g_aVMExitHandlers));
+# ifdef VBOX_STRICT
+    for (unsigned i = 0; i < RT_ELEMENTS(g_aVMExitHandlers); i++)
+        Assert(g_aVMExitHandlers[i].pfn);
+# endif
+#endif
+
+    /*
+     * For detecting whether DR6.RTM is writable or not (done in VMXR0InitVM).
+     */
+    RTTHREADPREEMPTSTATE Preempt = RTTHREADPREEMPTSTATE_INITIALIZER;
+    RTThreadPreemptDisable(&Preempt);
+    RTCCUINTXREG const fSavedDr6 = ASMGetDR6();
+    ASMSetDR6(0);
+    RTCCUINTXREG const fZeroDr6  = ASMGetDR6();
+    ASMSetDR6(fSavedDr6);
+    RTThreadPreemptRestore(&Preempt);
+
+    g_fDr6Zeroed = fZeroDr6;
+
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Does global VT-x termination (called during module termination).
+ */
+VMMR0DECL(void) VMXR0GlobalTerm()
+{
+    /* Nothing to do currently. */
+}
+
+
+/**
+ * Sets up and activates VT-x on the current CPU.
+ *
+ * @returns VBox status code.
+ * @param   pHostCpu        The HM physical-CPU structure.
+ * @param   pVM             The cross context VM structure.  Can be
+ *                          NULL after a host resume operation.
+ * @param   pvCpuPage       Pointer to the VMXON region (can be NULL if @a
+ *                          fEnabledByHost is @c true).
+ * @param   HCPhysCpuPage   Physical address of the VMXON region (can be 0 if
+ *                          @a fEnabledByHost is @c true).
+ * @param   fEnabledByHost  Set if SUPR0EnableVTx() or similar was used to
+ *                          enable VT-x on the host.
+ * @param   pHwvirtMsrs     Pointer to the hardware-virtualization MSRs.
+ */
+VMMR0DECL(int) VMXR0EnableCpu(PHMPHYSCPU pHostCpu, PVMCC pVM, void *pvCpuPage, RTHCPHYS HCPhysCpuPage, bool fEnabledByHost,
+                              PCSUPHWVIRTMSRS pHwvirtMsrs)
+{
+    AssertPtr(pHostCpu);
+    AssertPtr(pHwvirtMsrs);
+    Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+
+    /* Enable VT-x if it's not already enabled by the host. */
+    if (!fEnabledByHost)
+    {
+        int rc = hmR0VmxEnterRootMode(pHostCpu, pVM, HCPhysCpuPage, pvCpuPage);
+        if (RT_FAILURE(rc))
+            return rc;
+    }
+
+    /*
+     * Flush all EPT tagged-TLB entries (in case VirtualBox or any other hypervisor have been
+     * using EPTPs) so we don't retain any stale guest-physical mappings which won't get
+     * invalidated when flushing by VPID.
+     */
+    if (pHwvirtMsrs->u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT_ALL_CONTEXTS)
+    {
+        hmR0VmxFlushEpt(NULL /* pVCpu */, NULL /* pVmcsInfo */, VMXTLBFLUSHEPT_ALL_CONTEXTS);
+        pHostCpu->fFlushAsidBeforeUse = false;
+    }
+    else
+        pHostCpu->fFlushAsidBeforeUse = true;
+
+    /* Ensure each VCPU scheduled on this CPU gets a new VPID on resume. See @bugref{6255}. */
+    ++pHostCpu->cTlbFlushes;
+
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Deactivates VT-x on the current CPU.
+ *
+ * @returns VBox status code.
+ * @param   pHostCpu        The HM physical-CPU structure.
+ * @param   pvCpuPage       Pointer to the VMXON region.
+ * @param   HCPhysCpuPage   Physical address of the VMXON region.
+ *
+ * @remarks This function should never be called when SUPR0EnableVTx() or
+ *          similar was used to enable VT-x on the host.
+ */
+VMMR0DECL(int) VMXR0DisableCpu(PHMPHYSCPU pHostCpu, void *pvCpuPage, RTHCPHYS HCPhysCpuPage)
+{
+    RT_NOREF2(pvCpuPage, HCPhysCpuPage);
+
+    Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+    return hmR0VmxLeaveRootMode(pHostCpu);
+}
+
+
+/**
+ * Does per-VM VT-x initialization.
+ *
+ * @returns VBox status code.
+ * @param   pVM             The cross context VM structure.
+ */
+VMMR0DECL(int) VMXR0InitVM(PVMCC pVM)
+{
+    AssertPtr(pVM);
+    LogFlowFunc(("pVM=%p\n", pVM));
+
+    hmR0VmxStructsInit(pVM);
+    int rc = hmR0VmxStructsAlloc(pVM);
+    if (RT_FAILURE(rc))
+    {
+        LogRelFunc(("Failed to allocated VMX structures. rc=%Rrc\n", rc));
+        return rc;
+    }
+
+    /* Setup the crash dump page. */
+#ifdef VBOX_WITH_CRASHDUMP_MAGIC
+    strcpy((char *)pVM->hmr0.s.vmx.pbScratch, "SCRATCH Magic");
+    *(uint64_t *)(pVM->hmr0.s.vmx.pbScratch + 16) = UINT64_C(0xdeadbeefdeadbeef);
+#endif
+
+    /*
+     * Copy out stuff that's for ring-3 and determin default configuration.
+     */
+    pVM->hm.s.ForR3.vmx.u64HostDr6Zeroed = g_fDr6Zeroed;
+
+    /* Since we do not emulate RTM, make sure DR6.RTM cannot be cleared by the
+       guest and cause confusion there.  It appears that the DR6.RTM bit can be
+       cleared even if TSX-NI is disabled (microcode update / system / whatever). */
+#ifdef VMX_WITH_MAYBE_ALWAYS_INTERCEPT_MOV_DRX
+    if (pVM->hm.s.vmx.fAlwaysInterceptMovDRxCfg == 0)
+        pVM->hmr0.s.vmx.fAlwaysInterceptMovDRx = g_fDr6Zeroed != X86_DR6_RA1_MASK;
+    else
+#endif
+        pVM->hmr0.s.vmx.fAlwaysInterceptMovDRx = pVM->hm.s.vmx.fAlwaysInterceptMovDRxCfg > 0;
+    pVM->hm.s.ForR3.vmx.fAlwaysInterceptMovDRx = pVM->hmr0.s.vmx.fAlwaysInterceptMovDRx;
+
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Does per-VM VT-x termination.
+ *
+ * @returns VBox status code.
+ * @param   pVM     The cross context VM structure.
+ */
+VMMR0DECL(int) VMXR0TermVM(PVMCC pVM)
+{
+    AssertPtr(pVM);
+    LogFlowFunc(("pVM=%p\n", pVM));
+
+#ifdef VBOX_WITH_CRASHDUMP_MAGIC
+    if (pVM->hmr0.s.vmx.pbScratch)
+        RT_BZERO(pVM->hmr0.s.vmx.pbScratch, X86_PAGE_4K_SIZE);
+#endif
+    hmR0VmxStructsFree(pVM);
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Sets up the VM for execution using hardware-assisted VMX.
+ * This function is only called once per-VM during initialization.
+ *
+ * @returns VBox status code.
+ * @param   pVM     The cross context VM structure.
+ */
+VMMR0DECL(int) VMXR0SetupVM(PVMCC pVM)
+{
+    AssertPtr(pVM);
+    Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+
+    LogFlowFunc(("pVM=%p\n", pVM));
+
+    /*
+     * At least verify if VMX is enabled, since we can't check if we're in VMX root mode or not
+     * without causing a #GP.
+     */
+    RTCCUINTREG const uHostCr4 = ASMGetCR4();
+    if (RT_LIKELY(uHostCr4 & X86_CR4_VMXE))
+    { /* likely */ }
+    else
+        return VERR_VMX_NOT_IN_VMX_ROOT_MODE;
+
+    /*
+     * Check that nested paging is supported if enabled and copy over the flag to the
+     * ring-0 only structure.
+     */
+    bool const fNestedPaging = pVM->hm.s.fNestedPagingCfg;
+    AssertReturn(   !fNestedPaging
+                 || (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_EPT), /** @todo use a ring-0 copy of ProcCtls2.n.allowed1 */
+                 VERR_INCOMPATIBLE_CONFIG);
+    pVM->hmr0.s.fNestedPaging = fNestedPaging;
+    pVM->hmr0.s.fAllow64BitGuests = pVM->hm.s.fAllow64BitGuestsCfg;
+
+    /*
+     * Without unrestricted guest execution, pRealModeTSS and pNonPagingModeEPTPageTable *must*
+     * always be allocated. We no longer support the highly unlikely case of unrestricted guest
+     * without pRealModeTSS, see hmR3InitFinalizeR0Intel().
+     */
+    bool const fUnrestrictedGuest = pVM->hm.s.vmx.fUnrestrictedGuestCfg;
+    AssertReturn(   !fUnrestrictedGuest
+                || (   (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_UNRESTRICTED_GUEST)
+                    && fNestedPaging),
+                    VERR_INCOMPATIBLE_CONFIG);
+    if (   !fUnrestrictedGuest
+        &&  (   !pVM->hm.s.vmx.pNonPagingModeEPTPageTable
+             || !pVM->hm.s.vmx.pRealModeTSS))
+    {
+        LogRelFunc(("Invalid real-on-v86 state.\n"));
+        return VERR_INTERNAL_ERROR;
+    }
+    pVM->hmr0.s.vmx.fUnrestrictedGuest = fUnrestrictedGuest;
+
+    /* Initialize these always, see hmR3InitFinalizeR0().*/
+    pVM->hm.s.ForR3.vmx.enmTlbFlushEpt  = pVM->hmr0.s.vmx.enmTlbFlushEpt  = VMXTLBFLUSHEPT_NONE;
+    pVM->hm.s.ForR3.vmx.enmTlbFlushVpid = pVM->hmr0.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_NONE;
+
+    /* Setup the tagged-TLB flush handlers. */
+    int rc = hmR0VmxSetupTaggedTlb(pVM);
+    if (RT_FAILURE(rc))
+    {
+        LogRelFunc(("Failed to setup tagged TLB. rc=%Rrc\n", rc));
+        return rc;
+    }
+
+    /* Determine LBR capabilities. */
+    pVM->hmr0.s.vmx.fLbr = pVM->hm.s.vmx.fLbrCfg;
+    if (pVM->hmr0.s.vmx.fLbr)
+    {
+        rc = hmR0VmxSetupLbrMsrRange(pVM);
+        if (RT_FAILURE(rc))
+        {
+            LogRelFunc(("Failed to setup LBR MSR range. rc=%Rrc\n", rc));
+            return rc;
+        }
+    }
+
+#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+    /* Setup the shadow VMCS fields array and VMREAD/VMWRITE bitmaps. */
+    if (pVM->hmr0.s.vmx.fUseVmcsShadowing)
+    {
+        rc = hmR0VmxSetupShadowVmcsFieldsArrays(pVM);
+        if (RT_SUCCESS(rc))
+            hmR0VmxSetupVmreadVmwriteBitmaps(pVM);
+        else
+        {
+            LogRelFunc(("Failed to setup shadow VMCS fields arrays. rc=%Rrc\n", rc));
+            return rc;
+        }
+    }
+#endif
+
+    for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++)
+    {
+        PVMCPUCC pVCpu = VMCC_GET_CPU(pVM, idCpu);
+        Log4Func(("pVCpu=%p idCpu=%RU32\n", pVCpu, pVCpu->idCpu));
+
+        pVCpu->hmr0.s.vmx.pfnStartVm = hmR0VmxStartVmSelector;
+
+        rc = hmR0VmxSetupVmcs(pVCpu, &pVCpu->hmr0.s.vmx.VmcsInfo,  false /* fIsNstGstVmcs */);
+        if (RT_SUCCESS(rc))
+        {
+#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+            if (pVM->cpum.ro.GuestFeatures.fVmx)
+            {
+                rc = hmR0VmxSetupVmcs(pVCpu, &pVCpu->hmr0.s.vmx.VmcsInfoNstGst, true /* fIsNstGstVmcs */);
+                if (RT_SUCCESS(rc))
+                { /* likely */ }
+                else
+                {
+                    LogRelFunc(("Nested-guest VMCS setup failed. rc=%Rrc\n", rc));
+                    return rc;
+                }
+            }
+#endif
+        }
+        else
+        {
+            LogRelFunc(("VMCS setup failed. rc=%Rrc\n", rc));
+            return rc;
+        }
+    }
+
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Saves the host control registers (CR0, CR3, CR4) into the host-state area in
+ * the VMCS.
+ * @returns CR4 for passing along to hmR0VmxExportHostSegmentRegs.
+ */
+static uint64_t hmR0VmxExportHostControlRegs(void)
+{
+    int rc = VMXWriteVmcsNw(VMX_VMCS_HOST_CR0, ASMGetCR0());    AssertRC(rc);
+    rc     = VMXWriteVmcsNw(VMX_VMCS_HOST_CR3, ASMGetCR3());    AssertRC(rc);
+    uint64_t uHostCr4 = ASMGetCR4();
+    rc     = VMXWriteVmcsNw(VMX_VMCS_HOST_CR4, uHostCr4);       AssertRC(rc);
+    return uHostCr4;
+}
+
+
+/**
+ * Saves the host segment registers and GDTR, IDTR, (TR, GS and FS bases) into
+ * the host-state area in the VMCS.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu       The cross context virtual CPU structure.
+ * @param   uHostCr4    The host CR4 value.
+ */
+static int hmR0VmxExportHostSegmentRegs(PVMCPUCC pVCpu, uint64_t uHostCr4)
+{
+    /*
+     * If we've executed guest code using hardware-assisted VMX, the host-state bits
+     * will be messed up. We should -not- save the messed up state without restoring
+     * the original host-state, see @bugref{7240}.
+     *
+     * This apparently can happen (most likely the FPU changes), deal with it rather than
+     * asserting. Was observed booting Solaris 10u10 32-bit guest.
+     */
+    if (pVCpu->hmr0.s.vmx.fRestoreHostFlags > VMX_RESTORE_HOST_REQUIRED)
+    {
+        Log4Func(("Restoring Host State: fRestoreHostFlags=%#RX32 HostCpuId=%u\n", pVCpu->hmr0.s.vmx.fRestoreHostFlags,
+                  pVCpu->idCpu));
+        VMXRestoreHostState(pVCpu->hmr0.s.vmx.fRestoreHostFlags, &pVCpu->hmr0.s.vmx.RestoreHost);
+        pVCpu->hmr0.s.vmx.fRestoreHostFlags = 0;
+    }
+
+    /*
+     * Get all the host info.
+     * ASSUME it is safe to use rdfsbase and friends if the CR4.FSGSBASE bit is set
+     * without also checking the cpuid bit.
+     */
+    uint32_t fRestoreHostFlags;
+#if RT_INLINE_ASM_EXTERNAL
+    if (uHostCr4 & X86_CR4_FSGSBASE)
+    {
+        hmR0VmxExportHostSegmentRegsAsmHlp(&pVCpu->hmr0.s.vmx.RestoreHost, true /*fHaveFsGsBase*/);
+        fRestoreHostFlags = VMX_RESTORE_HOST_CAN_USE_WRFSBASE_AND_WRGSBASE;
+    }
+    else
+    {
+        hmR0VmxExportHostSegmentRegsAsmHlp(&pVCpu->hmr0.s.vmx.RestoreHost, false /*fHaveFsGsBase*/);
+        fRestoreHostFlags = 0;
+    }
+    RTSEL uSelES = pVCpu->hmr0.s.vmx.RestoreHost.uHostSelES;
+    RTSEL uSelDS = pVCpu->hmr0.s.vmx.RestoreHost.uHostSelDS;
+    RTSEL uSelFS = pVCpu->hmr0.s.vmx.RestoreHost.uHostSelFS;
+    RTSEL uSelGS = pVCpu->hmr0.s.vmx.RestoreHost.uHostSelGS;
+#else
+    pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR = ASMGetTR();
+    pVCpu->hmr0.s.vmx.RestoreHost.uHostSelSS = ASMGetSS();
+    pVCpu->hmr0.s.vmx.RestoreHost.uHostSelCS = ASMGetCS();
+    ASMGetGDTR((PRTGDTR)&pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr);
+    ASMGetIDTR((PRTIDTR)&pVCpu->hmr0.s.vmx.RestoreHost.HostIdtr);
+    if (uHostCr4 & X86_CR4_FSGSBASE)
+    {
+        pVCpu->hmr0.s.vmx.RestoreHost.uHostFSBase = ASMGetFSBase();
+        pVCpu->hmr0.s.vmx.RestoreHost.uHostGSBase = ASMGetGSBase();
+        fRestoreHostFlags = VMX_RESTORE_HOST_CAN_USE_WRFSBASE_AND_WRGSBASE;
+    }
+    else
+    {
+        pVCpu->hmr0.s.vmx.RestoreHost.uHostFSBase = ASMRdMsr(MSR_K8_FS_BASE);
+        pVCpu->hmr0.s.vmx.RestoreHost.uHostGSBase = ASMRdMsr(MSR_K8_GS_BASE);
+        fRestoreHostFlags = 0;
+    }
+    RTSEL uSelES, uSelDS, uSelFS, uSelGS;
+    pVCpu->hmr0.s.vmx.RestoreHost.uHostSelDS = uSelDS = ASMGetDS();
+    pVCpu->hmr0.s.vmx.RestoreHost.uHostSelES = uSelES = ASMGetES();
+    pVCpu->hmr0.s.vmx.RestoreHost.uHostSelFS = uSelFS = ASMGetFS();
+    pVCpu->hmr0.s.vmx.RestoreHost.uHostSelGS = uSelGS = ASMGetGS();
+#endif
+
+    /*
+     * Determine if the host segment registers are suitable for VT-x. Otherwise use zero to
+     * gain VM-entry and restore them before we get preempted.
+     *
+     * See Intel spec. 26.2.3 "Checks on Host Segment and Descriptor-Table Registers".
+     */
+    RTSEL const uSelAll = uSelFS | uSelGS | uSelES | uSelDS;
+    if (uSelAll & (X86_SEL_RPL | X86_SEL_LDT))
+    {
+        if (!(uSelAll & X86_SEL_LDT))
+        {
+#define VMXLOCAL_ADJUST_HOST_SEG(a_Seg, a_uVmcsVar) \
+                do { \
+                    (a_uVmcsVar) = pVCpu->hmr0.s.vmx.RestoreHost.uHostSel##a_Seg; \
+                    if ((a_uVmcsVar) & X86_SEL_RPL) \
+                    { \
+                        fRestoreHostFlags |= VMX_RESTORE_HOST_SEL_##a_Seg; \
+                        (a_uVmcsVar) = 0; \
+                    } \
+                } while (0)
+            VMXLOCAL_ADJUST_HOST_SEG(DS, uSelDS);
+            VMXLOCAL_ADJUST_HOST_SEG(ES, uSelES);
+            VMXLOCAL_ADJUST_HOST_SEG(FS, uSelFS);
+            VMXLOCAL_ADJUST_HOST_SEG(GS, uSelGS);
+#undef VMXLOCAL_ADJUST_HOST_SEG
+        }
+        else
+        {
+#define VMXLOCAL_ADJUST_HOST_SEG(a_Seg, a_uVmcsVar) \
+                do { \
+                    (a_uVmcsVar) = pVCpu->hmr0.s.vmx.RestoreHost.uHostSel##a_Seg; \
+                    if ((a_uVmcsVar) & (X86_SEL_RPL | X86_SEL_LDT)) \
+                    { \
+                        if (!((a_uVmcsVar) & X86_SEL_LDT)) \
+                            fRestoreHostFlags |= VMX_RESTORE_HOST_SEL_##a_Seg; \
+                        else \
+                        { \
+                            uint32_t const fAttr = ASMGetSegAttr(a_uVmcsVar); \
+                            if ((fAttr & X86_DESC_P) && fAttr != UINT32_MAX) \
+                                fRestoreHostFlags |= VMX_RESTORE_HOST_SEL_##a_Seg; \
+                        } \
+                        (a_uVmcsVar) = 0; \
+                    } \
+                } while (0)
+            VMXLOCAL_ADJUST_HOST_SEG(DS, uSelDS);
+            VMXLOCAL_ADJUST_HOST_SEG(ES, uSelES);
+            VMXLOCAL_ADJUST_HOST_SEG(FS, uSelFS);
+            VMXLOCAL_ADJUST_HOST_SEG(GS, uSelGS);
+#undef VMXLOCAL_ADJUST_HOST_SEG
+        }
+    }
+
+    /* Verification based on Intel spec. 26.2.3 "Checks on Host Segment and Descriptor-Table Registers"  */
+    Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR & X86_SEL_RPL)); Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR & X86_SEL_LDT)); Assert(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR);
+    Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelCS & X86_SEL_RPL)); Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelCS & X86_SEL_LDT)); Assert(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelCS);
+    Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelSS & X86_SEL_RPL)); Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelSS & X86_SEL_LDT));
+    Assert(!(uSelDS & X86_SEL_RPL)); Assert(!(uSelDS & X86_SEL_LDT));
+    Assert(!(uSelES & X86_SEL_RPL)); Assert(!(uSelES & X86_SEL_LDT));
+    Assert(!(uSelFS & X86_SEL_RPL)); Assert(!(uSelFS & X86_SEL_LDT));
+    Assert(!(uSelGS & X86_SEL_RPL)); Assert(!(uSelGS & X86_SEL_LDT));
+
+    /*
+     * Determine if we need to manually need to restore the GDTR and IDTR limits as VT-x zaps
+     * them to the maximum limit (0xffff) on every VM-exit.
+     */
+    if (pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.cb != 0xffff)
+        fRestoreHostFlags |= VMX_RESTORE_HOST_GDTR;
+
+    /*
+     * IDT limit is effectively capped at 0xfff. (See Intel spec. 6.14.1 "64-Bit Mode IDT" and
+     * Intel spec. 6.2 "Exception and Interrupt Vectors".)  Therefore if the host has the limit
+     * as 0xfff, VT-x bloating the limit to 0xffff shouldn't cause any different CPU behavior.
+     * However, several hosts either insists on 0xfff being the limit (Windows Patch Guard) or
+     * uses the limit for other purposes (darwin puts the CPU ID in there but botches sidt
+     * alignment in at least one consumer).  So, we're only allowing the IDTR.LIMIT to be left
+     * at 0xffff on hosts where we are sure it won't cause trouble.
+     */
+#if defined(RT_OS_LINUX) || defined(RT_OS_SOLARIS)
+    if (pVCpu->hmr0.s.vmx.RestoreHost.HostIdtr.cb <  0x0fff)
+#else
+    if (pVCpu->hmr0.s.vmx.RestoreHost.HostIdtr.cb != 0xffff)
+#endif
+        fRestoreHostFlags |= VMX_RESTORE_HOST_IDTR;
+
+    /*
+     * Host TR base. Verify that TR selector doesn't point past the GDT. Masking off the TI
+     * and RPL bits is effectively what the CPU does for "scaling by 8". TI is always 0 and
+     * RPL should be too in most cases.
+     */
+    RTSEL const uSelTR = pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR;
+    AssertMsgReturn((uSelTR | X86_SEL_RPL_LDT) <= pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.cb,
+                    ("TR selector exceeds limit. TR=%RTsel cbGdt=%#x\n", uSelTR, pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.cb),
+                    VERR_VMX_INVALID_HOST_STATE);
+
+    PCX86DESCHC pDesc = (PCX86DESCHC)(pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.uAddr + (uSelTR & X86_SEL_MASK));
+    uintptr_t const uTRBase = X86DESC64_BASE(pDesc);
+
+    /*
+     * VT-x unconditionally restores the TR limit to 0x67 and type to 11 (32-bit busy TSS) on
+     * all VM-exits. The type is the same for 64-bit busy TSS[1]. The limit needs manual
+     * restoration if the host has something else. Task switching is not supported in 64-bit
+     * mode[2], but the limit still matters as IOPM is supported in 64-bit mode. Restoring the
+     * limit lazily while returning to ring-3 is safe because IOPM is not applicable in ring-0.
+     *
+     * [1] See Intel spec. 3.5 "System Descriptor Types".
+     * [2] See Intel spec. 7.2.3 "TSS Descriptor in 64-bit mode".
+     */
+    Assert(pDesc->System.u4Type == 11);
+    if (   pDesc->System.u16LimitLow != 0x67
+        || pDesc->System.u4LimitHigh)
+    {
+        fRestoreHostFlags |= VMX_RESTORE_HOST_SEL_TR;
+
+        /* If the host has made GDT read-only, we would need to temporarily toggle CR0.WP before writing the GDT. */
+        if (g_fHmHostKernelFeatures & SUPKERNELFEATURES_GDT_READ_ONLY)
+            fRestoreHostFlags |= VMX_RESTORE_HOST_GDT_READ_ONLY;
+        if (g_fHmHostKernelFeatures & SUPKERNELFEATURES_GDT_NEED_WRITABLE)
+        {
+            /* The GDT is read-only but the writable GDT is available. */
+            fRestoreHostFlags |= VMX_RESTORE_HOST_GDT_NEED_WRITABLE;
+            pVCpu->hmr0.s.vmx.RestoreHost.HostGdtrRw.cb = pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.cb;
+            int rc = SUPR0GetCurrentGdtRw(&pVCpu->hmr0.s.vmx.RestoreHost.HostGdtrRw.uAddr);
+            AssertRCReturn(rc, rc);
+        }
+    }
+
+    pVCpu->hmr0.s.vmx.fRestoreHostFlags = fRestoreHostFlags;
+
+    /*
+     * Do all the VMCS updates in one block to assist nested virtualization.
+     */
+    int rc;
+    rc = VMXWriteVmcs16(VMX_VMCS16_HOST_CS_SEL,  pVCpu->hmr0.s.vmx.RestoreHost.uHostSelCS);       AssertRC(rc);
+    rc = VMXWriteVmcs16(VMX_VMCS16_HOST_SS_SEL,  pVCpu->hmr0.s.vmx.RestoreHost.uHostSelSS);       AssertRC(rc);
+    rc = VMXWriteVmcs16(VMX_VMCS16_HOST_DS_SEL,  uSelDS);                                         AssertRC(rc);
+    rc = VMXWriteVmcs16(VMX_VMCS16_HOST_ES_SEL,  uSelES);                                         AssertRC(rc);
+    rc = VMXWriteVmcs16(VMX_VMCS16_HOST_FS_SEL,  uSelFS);                                         AssertRC(rc);
+    rc = VMXWriteVmcs16(VMX_VMCS16_HOST_GS_SEL,  uSelGS);                                         AssertRC(rc);
+    rc = VMXWriteVmcs16(VMX_VMCS16_HOST_TR_SEL,  pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR);       AssertRC(rc);
+    rc = VMXWriteVmcsNw(VMX_VMCS_HOST_GDTR_BASE, pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.uAddr);   AssertRC(rc);
+    rc = VMXWriteVmcsNw(VMX_VMCS_HOST_IDTR_BASE, pVCpu->hmr0.s.vmx.RestoreHost.HostIdtr.uAddr);   AssertRC(rc);
+    rc = VMXWriteVmcsNw(VMX_VMCS_HOST_TR_BASE,   uTRBase);                                        AssertRC(rc);
+    rc = VMXWriteVmcsNw(VMX_VMCS_HOST_FS_BASE,   pVCpu->hmr0.s.vmx.RestoreHost.uHostFSBase);      AssertRC(rc);
+    rc = VMXWriteVmcsNw(VMX_VMCS_HOST_GS_BASE,   pVCpu->hmr0.s.vmx.RestoreHost.uHostGSBase);      AssertRC(rc);
+
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Exports certain host MSRs in the VM-exit MSR-load area and some in the
+ * host-state area of the VMCS.
+ *
+ * These MSRs will be automatically restored on the host after every successful
+ * VM-exit.
+ *
+ * @param   pVCpu   The cross context virtual CPU structure.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+static void hmR0VmxExportHostMsrs(PVMCPUCC pVCpu)
+{
+    AssertPtr(pVCpu);
+
+    /*
+     * Save MSRs that we restore lazily (due to preemption or transition to ring-3)
+     * rather than swapping them on every VM-entry.
+     */
+    hmR0VmxLazySaveHostMsrs(pVCpu);
+
+    /*
+     * Host Sysenter MSRs.
+     */
+    int rc = VMXWriteVmcs32(VMX_VMCS32_HOST_SYSENTER_CS, ASMRdMsr_Low(MSR_IA32_SYSENTER_CS));   AssertRC(rc);
+    rc     = VMXWriteVmcsNw(VMX_VMCS_HOST_SYSENTER_ESP,  ASMRdMsr(MSR_IA32_SYSENTER_ESP));      AssertRC(rc);
+    rc     = VMXWriteVmcsNw(VMX_VMCS_HOST_SYSENTER_EIP,  ASMRdMsr(MSR_IA32_SYSENTER_EIP));      AssertRC(rc);
+
+    /*
+     * Host EFER MSR.
+     *
+     * If the CPU supports the newer VMCS controls for managing EFER, use it. Otherwise it's
+     * done as part of auto-load/store MSR area in the VMCS, see hmR0VmxExportGuestMsrs().
+     */
+    if (g_fHmVmxSupportsVmcsEfer)
+    {
+        rc = VMXWriteVmcs64(VMX_VMCS64_HOST_EFER_FULL, g_uHmVmxHostMsrEfer);
+        AssertRC(rc);
+    }
+
+    /** @todo IA32_PERF_GLOBALCTRL, IA32_PAT also see
+     *        hmR0VmxExportGuestEntryExitCtls(). */
+}
+
+
+/**
+ * Figures out if we need to swap the EFER MSR which is particularly expensive.
+ *
+ * We check all relevant bits. For now, that's everything besides LMA/LME, as
+ * these two bits are handled by VM-entry, see hmR0VMxExportGuestEntryExitCtls().
+ *
+ * @returns true if we need to load guest EFER, false otherwise.
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   pVmxTransient   The VMX-transient structure.
+ *
+ * @remarks Requires EFER, CR4.
+ * @remarks No-long-jump zone!!!
+ */
+static bool hmR0VmxShouldSwapEferMsr(PCVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient)
+{
+#ifdef HMVMX_ALWAYS_SWAP_EFER
+    RT_NOREF2(pVCpu, pVmxTransient);
+    return true;
+#else
+    PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+    uint64_t const u64HostEfer  = g_uHmVmxHostMsrEfer;
+    uint64_t const u64GuestEfer = pCtx->msrEFER;
+
+# ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+    /*
+     * For nested-guests, we shall honor swapping the EFER MSR when requested by
+     * the nested-guest.
+     */
+    if (   pVmxTransient->fIsNestedGuest
+        && (   CPUMIsGuestVmxEntryCtlsSet(pCtx, VMX_ENTRY_CTLS_LOAD_EFER_MSR)
+            || CPUMIsGuestVmxExitCtlsSet(pCtx, VMX_EXIT_CTLS_SAVE_EFER_MSR)
+            || CPUMIsGuestVmxExitCtlsSet(pCtx, VMX_EXIT_CTLS_LOAD_EFER_MSR)))
+        return true;
+# else
+    RT_NOREF(pVmxTransient);
+#endif
+
+    /*
+     * For 64-bit guests, if EFER.SCE bit differs, we need to swap the EFER MSR
+     * to ensure that the guest's SYSCALL behaviour isn't broken, see @bugref{7386}.
+     */
+    if (   CPUMIsGuestInLongModeEx(pCtx)
+        && (u64GuestEfer & MSR_K6_EFER_SCE) != (u64HostEfer & MSR_K6_EFER_SCE))
+        return true;
+
+    /*
+     * If the guest uses PAE and EFER.NXE bit differs, we need to swap the EFER MSR
+     * as it affects guest paging. 64-bit paging implies CR4.PAE as well.
+     *
+     * See Intel spec. 4.5 "IA-32e Paging".
+     * See Intel spec. 4.1.1 "Three Paging Modes".
+     *
+     * Verify that we always intercept CR4.PAE and CR0.PG bits, so we don't need to
+     * import CR4 and CR0 from the VMCS here as those bits are always up to date.
+     */
+    Assert(vmxHCGetFixedCr4Mask(pVCpu) & X86_CR4_PAE);
+    Assert(vmxHCGetFixedCr0Mask(pVCpu) & X86_CR0_PG);
+    if (   (pCtx->cr4 & X86_CR4_PAE)
+        && (pCtx->cr0 & X86_CR0_PG))
+    {
+        /*
+         * If nested paging is not used, verify that the guest paging mode matches the
+         * shadow paging mode which is/will be placed in the VMCS (which is what will
+         * actually be used while executing the guest and not the CR4 shadow value).
+         */
+        AssertMsg(   pVCpu->CTX_SUFF(pVM)->hmr0.s.fNestedPaging
+                  || pVCpu->hm.s.enmShadowMode == PGMMODE_PAE
+                  || pVCpu->hm.s.enmShadowMode == PGMMODE_PAE_NX
+                  || pVCpu->hm.s.enmShadowMode == PGMMODE_AMD64
+                  || pVCpu->hm.s.enmShadowMode == PGMMODE_AMD64_NX,
+                  ("enmShadowMode=%u\n", pVCpu->hm.s.enmShadowMode));
+        if ((u64GuestEfer & MSR_K6_EFER_NXE) != (u64HostEfer & MSR_K6_EFER_NXE))
+        {
+            /* Verify that the host is NX capable. */
+            Assert(g_CpumHostFeatures.s.fNoExecute);
+            return true;
+        }
+    }
+
+    return false;
+#endif
+}
+
+
+/**
+ * Exports the guest's RSP into the guest-state area in the VMCS.
+ *
+ * @param   pVCpu   The cross context virtual CPU structure.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+static void hmR0VmxExportGuestRsp(PVMCPUCC pVCpu)
+{
+    if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_RSP)
+    {
+        HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_RSP);
+
+        int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_RSP, pVCpu->cpum.GstCtx.rsp);
+        AssertRC(rc);
+
+        ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_RSP);
+        Log4Func(("rsp=%#RX64\n", pVCpu->cpum.GstCtx.rsp));
+    }
+}
+
+
+/**
+ * Exports the guest hardware-virtualization state.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   pVmxTransient   The VMX-transient structure.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+static int hmR0VmxExportGuestHwvirtState(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient)
+{
+    if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_HWVIRT)
+    {
+#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+        /*
+         * Check if the VMX feature is exposed to the guest and if the host CPU supports
+         * VMCS shadowing.
+         */
+        if (pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fUseVmcsShadowing)
+        {
+            /*
+             * If the nested hypervisor has loaded a current VMCS and is in VMX root mode,
+             * copy the nested hypervisor's current VMCS into the shadow VMCS and enable
+             * VMCS shadowing to skip intercepting some or all VMREAD/VMWRITE VM-exits.
+             *
+             * We check for VMX root mode here in case the guest executes VMXOFF without
+             * clearing the current VMCS pointer and our VMXOFF instruction emulation does
+             * not clear the current VMCS pointer.
+             */
+            PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
+            if (   CPUMIsGuestInVmxRootMode(&pVCpu->cpum.GstCtx)
+                && !CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx)
+                && CPUMIsGuestVmxCurrentVmcsValid(&pVCpu->cpum.GstCtx))
+            {
+                /* Paranoia. */
+                Assert(!pVmxTransient->fIsNestedGuest);
+
+                /*
+                 * For performance reasons, also check if the nested hypervisor's current VMCS
+                 * was newly loaded or modified before copying it to the shadow VMCS.
+                 */
+                if (!pVCpu->hm.s.vmx.fCopiedNstGstToShadowVmcs)
+                {
+                    int rc = vmxHCCopyNstGstToShadowVmcs(pVCpu, pVmcsInfo);
+                    AssertRCReturn(rc, rc);
+                    pVCpu->hm.s.vmx.fCopiedNstGstToShadowVmcs = true;
+                }
+                vmxHCEnableVmcsShadowing(pVCpu, pVmcsInfo);
+            }
+            else
+                vmxHCDisableVmcsShadowing(pVCpu, pVmcsInfo);
+        }
+#else
+        NOREF(pVmxTransient);
+#endif
+        ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_HWVIRT);
+    }
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Exports the guest debug registers into the guest-state area in the VMCS.
+ * The guest debug bits are partially shared with the host (e.g. DR6, DR0-3).
+ *
+ * This also sets up whether \#DB and MOV DRx accesses cause VM-exits.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   pVmxTransient   The VMX-transient structure.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+static int hmR0VmxExportSharedDebugState(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+
+    /** @todo NSTVMX: Figure out what we want to do with nested-guest instruction
+     *        stepping. */
+    PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
+    if (pVmxTransient->fIsNestedGuest)
+    {
+        int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_DR7, CPUMGetGuestDR7(pVCpu));
+        AssertRC(rc);
+
+        /*
+         * We don't want to always intercept MOV DRx for nested-guests as it causes
+         * problems when the nested hypervisor isn't intercepting them, see @bugref{10080}.
+         * Instead, they are strictly only requested when the nested hypervisor intercepts
+         * them -- handled while merging VMCS controls.
+         *
+         * If neither the outer nor the nested-hypervisor is intercepting MOV DRx,
+         * then the nested-guest debug state should be actively loaded on the host so that
+         * nested-guest reads its own debug registers without causing VM-exits.
+         */
+        if (   !(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_MOV_DR_EXIT)
+            && !CPUMIsGuestDebugStateActive(pVCpu))
+            CPUMR0LoadGuestDebugState(pVCpu, true /* include DR6 */);
+        return VINF_SUCCESS;
+    }
+
+#ifdef VBOX_STRICT
+    /* Validate. Intel spec. 26.3.1.1 "Checks on Guest Controls Registers, Debug Registers, MSRs" */
+    if (pVmcsInfo->u32EntryCtls & VMX_ENTRY_CTLS_LOAD_DEBUG)
+    {
+        /* Validate. Intel spec. 17.2 "Debug Registers", recompiler paranoia checks. */
+        Assert((pVCpu->cpum.GstCtx.dr[7] & (X86_DR7_MBZ_MASK | X86_DR7_RAZ_MASK)) == 0);
+        Assert((pVCpu->cpum.GstCtx.dr[7] & X86_DR7_RA1_MASK) == X86_DR7_RA1_MASK);
+    }
+#endif
+
+    bool     fSteppingDB      = false;
+    uint32_t uProcCtls        = pVmcsInfo->u32ProcCtls;
+    if (pVCpu->hm.s.fSingleInstruction)
+    {
+        /* If the CPU supports the monitor trap flag, use it for single stepping in DBGF and avoid intercepting #DB. */
+        if (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_MONITOR_TRAP_FLAG)
+        {
+            uProcCtls |= VMX_PROC_CTLS_MONITOR_TRAP_FLAG;
+            Assert(fSteppingDB == false);
+        }
+        else
+        {
+            pVCpu->cpum.GstCtx.eflags.u |= X86_EFL_TF;
+            pVCpu->hm.s.fCtxChanged |= HM_CHANGED_GUEST_RFLAGS;
+            pVCpu->hmr0.s.fClearTrapFlag = true;
+            fSteppingDB = true;
+        }
+    }
+
+#ifdef VMX_WITH_MAYBE_ALWAYS_INTERCEPT_MOV_DRX
+    bool     fInterceptMovDRx = pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fAlwaysInterceptMovDRx;
+#else
+    bool     fInterceptMovDRx = false;
+#endif
+    uint64_t u64GuestDr7;
+    if (   fSteppingDB
+        || (CPUMGetHyperDR7(pVCpu) & X86_DR7_ENABLED_MASK))
+    {
+        /*
+         * Use the combined guest and host DRx values found in the hypervisor register set
+         * because the hypervisor debugger has breakpoints active or someone is single stepping
+         * on the host side without a monitor trap flag.
+         *
+         * Note! DBGF expects a clean DR6 state before executing guest code.
+         */
+        if (!CPUMIsHyperDebugStateActive(pVCpu))
+        {
+            CPUMR0LoadHyperDebugState(pVCpu, true /* include DR6 */);
+            Assert(CPUMIsHyperDebugStateActive(pVCpu));
+            Assert(!CPUMIsGuestDebugStateActive(pVCpu));
+        }
+
+        /* Update DR7 with the hypervisor value (other DRx registers are handled by CPUM one way or another). */
+        u64GuestDr7 = CPUMGetHyperDR7(pVCpu);
+        pVCpu->hmr0.s.fUsingHyperDR7 = true;
+        fInterceptMovDRx = true;
+    }
+    else
+    {
+        /*
+         * If the guest has enabled debug registers, we need to load them prior to
+         * executing guest code so they'll trigger at the right time.
+         */
+        HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_DR7);
+        if (pVCpu->cpum.GstCtx.dr[7] & (X86_DR7_ENABLED_MASK | X86_DR7_GD))
+        {
+            if (!CPUMIsGuestDebugStateActive(pVCpu))
+            {
+                CPUMR0LoadGuestDebugState(pVCpu, true /* include DR6 */);
+                Assert(CPUMIsGuestDebugStateActive(pVCpu));
+                Assert(!CPUMIsHyperDebugStateActive(pVCpu));
+                STAM_COUNTER_INC(&pVCpu->hm.s.StatDRxArmed);
+            }
+#ifndef VMX_WITH_MAYBE_ALWAYS_INTERCEPT_MOV_DRX
+            Assert(!fInterceptMovDRx);
+#endif
+        }
+        else if (!CPUMIsGuestDebugStateActive(pVCpu))
+        {
+            /*
+             * If no debugging enabled, we'll lazy load DR0-3.  Unlike on AMD-V, we
+             * must intercept #DB in order to maintain a correct DR6 guest value, and
+             * because we need to intercept it to prevent nested #DBs from hanging the
+             * CPU, we end up always having to intercept it. See hmR0VmxSetupVmcsXcptBitmap().
+             */
+            fInterceptMovDRx = true;
+        }
+
+        /* Update DR7 with the actual guest value. */
+        u64GuestDr7 = pVCpu->cpum.GstCtx.dr[7];
+        pVCpu->hmr0.s.fUsingHyperDR7 = false;
+    }
+
+    if (fInterceptMovDRx)
+        uProcCtls |= VMX_PROC_CTLS_MOV_DR_EXIT;
+    else
+        uProcCtls &= ~VMX_PROC_CTLS_MOV_DR_EXIT;
+
+    /*
+     * Update the processor-based VM-execution controls with the MOV-DRx intercepts and the
+     * monitor-trap flag and update our cache.
+     */
+    if (uProcCtls != pVmcsInfo->u32ProcCtls)
+    {
+        int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, uProcCtls);
+        AssertRC(rc);
+        pVmcsInfo->u32ProcCtls = uProcCtls;
+    }
+
+    /*
+     * Update guest DR7.
+     */
+    int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_DR7, u64GuestDr7);
+    AssertRC(rc);
+
+    /*
+     * If we have forced EFLAGS.TF to be set because we're single-stepping in the hypervisor debugger,
+     * we need to clear interrupt inhibition if any as otherwise it causes a VM-entry failure.
+     *
+     * See Intel spec. 26.3.1.5 "Checks on Guest Non-Register State".
+     */
+    if (fSteppingDB)
+    {
+        Assert(pVCpu->hm.s.fSingleInstruction);
+        Assert(pVCpu->cpum.GstCtx.eflags.Bits.u1TF);
+
+        uint32_t fIntrState = 0;
+        rc = VMXReadVmcs32(VMX_VMCS32_GUEST_INT_STATE, &fIntrState);
+        AssertRC(rc);
+
+        if (fIntrState & (VMX_VMCS_GUEST_INT_STATE_BLOCK_STI | VMX_VMCS_GUEST_INT_STATE_BLOCK_MOVSS))
+        {
+            fIntrState &= ~(VMX_VMCS_GUEST_INT_STATE_BLOCK_STI | VMX_VMCS_GUEST_INT_STATE_BLOCK_MOVSS);
+            rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_INT_STATE, fIntrState);
+            AssertRC(rc);
+        }
+    }
+
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Exports certain guest MSRs into the VM-entry MSR-load and VM-exit MSR-store
+ * areas.
+ *
+ * These MSRs will automatically be loaded to the host CPU on every successful
+ * VM-entry and stored from the host CPU on every successful VM-exit.
+ *
+ * We creates/updates MSR slots for the host MSRs in the VM-exit MSR-load area. The
+ * actual host MSR values are not- updated here for performance reasons. See
+ * hmR0VmxExportHostMsrs().
+ *
+ * We also exports the guest sysenter MSRs into the guest-state area in the VMCS.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   pVmxTransient   The VMX-transient structure.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+static int hmR0VmxExportGuestMsrs(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient)
+{
+    AssertPtr(pVCpu);
+    AssertPtr(pVmxTransient);
+
+    PVMCC pVM = pVCpu->CTX_SUFF(pVM);
+    PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+
+    /*
+     * MSRs that we use the auto-load/store MSR area in the VMCS.
+     * For 64-bit hosts, we load/restore them lazily, see hmR0VmxLazyLoadGuestMsrs(),
+     * nothing to do here. The host MSR values are updated when it's safe in
+     * hmR0VmxLazySaveHostMsrs().
+     *
+     * For nested-guests, the guests MSRs from the VM-entry MSR-load area are already
+     * loaded (into the guest-CPU context) by the VMLAUNCH/VMRESUME instruction
+     * emulation. The merged MSR permission bitmap will ensure that we get VM-exits
+     * for any MSR that are not part of the lazy MSRs so we do not need to place
+     * those MSRs into the auto-load/store MSR area. Nothing to do here.
+     */
+    if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_VMX_GUEST_AUTO_MSRS)
+    {
+        /* No auto-load/store MSRs currently. */
+        ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_VMX_GUEST_AUTO_MSRS);
+    }
+
+    /*
+     * Guest Sysenter MSRs.
+     */
+    if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_SYSENTER_MSR_MASK)
+    {
+        HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_SYSENTER_MSRS);
+
+        if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_SYSENTER_CS_MSR)
+        {
+            int rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_SYSENTER_CS, pCtx->SysEnter.cs);
+            AssertRC(rc);
+            ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_SYSENTER_CS_MSR);
+        }
+
+        if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_SYSENTER_EIP_MSR)
+        {
+            int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_SYSENTER_EIP, pCtx->SysEnter.eip);
+            AssertRC(rc);
+            ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_SYSENTER_EIP_MSR);
+        }
+
+        if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_SYSENTER_ESP_MSR)
+        {
+            int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_SYSENTER_ESP, pCtx->SysEnter.esp);
+            AssertRC(rc);
+            ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_SYSENTER_ESP_MSR);
+        }
+    }
+
+    /*
+     * Guest/host EFER MSR.
+     */
+    if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_EFER_MSR)
+    {
+        /* Whether we are using the VMCS to swap the EFER MSR must have been
+           determined earlier while exporting VM-entry/VM-exit controls. */
+        Assert(!(ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_VMX_ENTRY_EXIT_CTLS));
+        HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_EFER);
+
+        if (hmR0VmxShouldSwapEferMsr(pVCpu, pVmxTransient))
+        {
+            /*
+             * EFER.LME is written by software, while EFER.LMA is set by the CPU to (CR0.PG & EFER.LME).
+             * This means a guest can set EFER.LME=1 while CR0.PG=0 and EFER.LMA can remain 0.
+             * VT-x requires that "IA-32e mode guest" VM-entry control must be identical to EFER.LMA
+             * and to CR0.PG. Without unrestricted execution, CR0.PG (used for VT-x, not the shadow)
+             * must always be 1. This forces us to effectively clear both EFER.LMA and EFER.LME until
+             * the guest has also set CR0.PG=1. Otherwise, we would run into an invalid-guest state
+             * during VM-entry.
+             */
+            uint64_t uGuestEferMsr = pCtx->msrEFER;
+            if (!pVM->hmr0.s.vmx.fUnrestrictedGuest)
+            {
+                if (!(pCtx->msrEFER & MSR_K6_EFER_LMA))
+                    uGuestEferMsr &= ~MSR_K6_EFER_LME;
+                else
+                    Assert((pCtx->msrEFER & (MSR_K6_EFER_LMA | MSR_K6_EFER_LME)) == (MSR_K6_EFER_LMA | MSR_K6_EFER_LME));
+            }
+
+            /*
+             * If the CPU supports VMCS controls for swapping EFER, use it. Otherwise, we have no option
+             * but to use the auto-load store MSR area in the VMCS for swapping EFER. See @bugref{7368}.
+             */
+            if (g_fHmVmxSupportsVmcsEfer)
+            {
+                int rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_EFER_FULL, uGuestEferMsr);
+                AssertRC(rc);
+            }
+            else
+            {
+                /*
+                 * We shall use the auto-load/store MSR area only for loading the EFER MSR but we must
+                 * continue to intercept guest read and write accesses to it, see @bugref{7386#c16}.
+                 */
+                int rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, MSR_K6_EFER, uGuestEferMsr,
+                                                    false /* fSetReadWrite */, false /* fUpdateHostMsr */);
+                AssertRCReturn(rc, rc);
+            }
+
+            Log4Func(("efer=%#RX64 shadow=%#RX64\n", uGuestEferMsr, pCtx->msrEFER));
+        }
+        else if (!g_fHmVmxSupportsVmcsEfer)
+            hmR0VmxRemoveAutoLoadStoreMsr(pVCpu, pVmxTransient, MSR_K6_EFER);
+
+        ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_EFER_MSR);
+    }
+
+    /*
+     * Other MSRs.
+     */
+    if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_OTHER_MSRS)
+    {
+        /* Speculation Control (R/W). */
+        HMVMX_CPUMCTX_ASSERT(pVCpu, HM_CHANGED_GUEST_OTHER_MSRS);
+        if (pVM->cpum.ro.GuestFeatures.fIbrs)
+        {
+            int rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, MSR_IA32_SPEC_CTRL, CPUMGetGuestSpecCtrl(pVCpu),
+                                                false /* fSetReadWrite */, false /* fUpdateHostMsr */);
+            AssertRCReturn(rc, rc);
+        }
+
+        /* Last Branch Record. */
+        if (pVM->hmr0.s.vmx.fLbr)
+        {
+            PVMXVMCSINFOSHARED const pVmcsInfoShared = pVmxTransient->pVmcsInfo->pShared;
+            uint32_t const idFromIpMsrStart = pVM->hmr0.s.vmx.idLbrFromIpMsrFirst;
+            uint32_t const idToIpMsrStart   = pVM->hmr0.s.vmx.idLbrToIpMsrFirst;
+            uint32_t const cLbrStack        = pVM->hmr0.s.vmx.idLbrFromIpMsrLast - pVM->hmr0.s.vmx.idLbrFromIpMsrFirst + 1;
+            Assert(cLbrStack <= 32);
+            for (uint32_t i = 0; i < cLbrStack; i++)
+            {
+                int rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, idFromIpMsrStart + i,
+                                                    pVmcsInfoShared->au64LbrFromIpMsr[i],
+                                                    false /* fSetReadWrite */, false /* fUpdateHostMsr */);
+                AssertRCReturn(rc, rc);
+
+                /* Some CPUs don't have a Branch-To-IP MSR (P4 and related Xeons). */
+                if (idToIpMsrStart != 0)
+                {
+                    rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, idToIpMsrStart + i,
+                                                    pVmcsInfoShared->au64LbrToIpMsr[i],
+                                                    false /* fSetReadWrite */, false /* fUpdateHostMsr */);
+                    AssertRCReturn(rc, rc);
+                }
+            }
+
+            /* Add LBR top-of-stack MSR (which contains the index to the most recent record). */
+            int rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, pVM->hmr0.s.vmx.idLbrTosMsr,
+                                                pVmcsInfoShared->u64LbrTosMsr, false /* fSetReadWrite */,
+                                                false /* fUpdateHostMsr */);
+            AssertRCReturn(rc, rc);
+        }
+
+        ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_OTHER_MSRS);
+    }
+
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Wrapper for running the guest code in VT-x.
+ *
+ * @returns VBox status code, no informational status codes.
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   pVmxTransient   The VMX-transient structure.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+DECLINLINE(int) hmR0VmxRunGuest(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient)
+{
+    /* Mark that HM is the keeper of all guest-CPU registers now that we're going to execute guest code. */
+    pVCpu->cpum.GstCtx.fExtrn |= HMVMX_CPUMCTX_EXTRN_ALL | CPUMCTX_EXTRN_KEEPER_HM;
+
+    PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
+    bool const   fResumeVM = RT_BOOL(pVmcsInfo->fVmcsState & VMX_V_VMCS_LAUNCH_STATE_LAUNCHED);
+#ifdef VBOX_WITH_STATISTICS
+    if (fResumeVM)
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatVmxVmResume);
+    else
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatVmxVmLaunch);
+#endif
+    int rc = pVCpu->hmr0.s.vmx.pfnStartVm(pVmcsInfo, pVCpu, fResumeVM);
+    AssertMsg(rc <= VINF_SUCCESS, ("%Rrc\n", rc));
+    return rc;
+}
+
+
+/**
+ * Reports world-switch error and dumps some useful debug info.
+ *
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   rcVMRun         The return code from VMLAUNCH/VMRESUME.
+ * @param   pVmxTransient   The VMX-transient structure (only
+ *                          exitReason updated).
+ */
+static void hmR0VmxReportWorldSwitchError(PVMCPUCC pVCpu, int rcVMRun, PVMXTRANSIENT pVmxTransient)
+{
+    Assert(pVCpu);
+    Assert(pVmxTransient);
+    HMVMX_ASSERT_PREEMPT_SAFE(pVCpu);
+
+    Log4Func(("VM-entry failure: %Rrc\n", rcVMRun));
+    switch (rcVMRun)
+    {
+        case VERR_VMX_INVALID_VMXON_PTR:
+            AssertFailed();
+            break;
+        case VINF_SUCCESS:                  /* VMLAUNCH/VMRESUME succeeded but VM-entry failed... yeah, true story. */
+        case VERR_VMX_UNABLE_TO_START_VM:   /* VMLAUNCH/VMRESUME itself failed. */
+        {
+            int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_REASON, &pVCpu->hm.s.vmx.LastError.u32ExitReason);
+            rc    |= VMXReadVmcs32(VMX_VMCS32_RO_VM_INSTR_ERROR, &pVCpu->hm.s.vmx.LastError.u32InstrError);
+            AssertRC(rc);
+            vmxHCReadToTransientSlow<HMVMX_READ_EXIT_QUALIFICATION>(pVCpu, pVmxTransient);
+
+            pVCpu->hm.s.vmx.LastError.idEnteredCpu = pVCpu->hmr0.s.idEnteredCpu;
+            /* LastError.idCurrentCpu was already updated in hmR0VmxPreRunGuestCommitted().
+               Cannot do it here as we may have been long preempted. */
+
+#ifdef VBOX_STRICT
+                PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
+                Log4(("uExitReason        %#RX32 (VmxTransient %#RX16)\n", pVCpu->hm.s.vmx.LastError.u32ExitReason,
+                     pVmxTransient->uExitReason));
+                Log4(("Exit Qualification %#RX64\n", pVmxTransient->uExitQual));
+                Log4(("InstrError         %#RX32\n", pVCpu->hm.s.vmx.LastError.u32InstrError));
+                if (pVCpu->hm.s.vmx.LastError.u32InstrError <= HMVMX_INSTR_ERROR_MAX)
+                    Log4(("InstrError Desc.  \"%s\"\n", g_apszVmxInstrErrors[pVCpu->hm.s.vmx.LastError.u32InstrError]));
+                else
+                    Log4(("InstrError Desc.    Range exceeded %u\n", HMVMX_INSTR_ERROR_MAX));
+                Log4(("Entered host CPU   %u\n", pVCpu->hm.s.vmx.LastError.idEnteredCpu));
+                Log4(("Current host CPU   %u\n", pVCpu->hm.s.vmx.LastError.idCurrentCpu));
+
+                static struct
+                {
+                    /** Name of the field to log. */
+                    const char     *pszName;
+                    /** The VMCS field. */
+                    uint32_t        uVmcsField;
+                    /** Whether host support of this field needs to be checked. */
+                    bool            fCheckSupport;
+                } const s_aVmcsFields[] =
+                {
+                    { "VMX_VMCS32_CTRL_PIN_EXEC",                 VMX_VMCS32_CTRL_PIN_EXEC,                   false  },
+                    { "VMX_VMCS32_CTRL_PROC_EXEC",                VMX_VMCS32_CTRL_PROC_EXEC,                  false  },
+                    { "VMX_VMCS32_CTRL_PROC_EXEC2",               VMX_VMCS32_CTRL_PROC_EXEC2,                 true   },
+                    { "VMX_VMCS32_CTRL_ENTRY",                    VMX_VMCS32_CTRL_ENTRY,                      false  },
+                    { "VMX_VMCS32_CTRL_EXIT",                     VMX_VMCS32_CTRL_EXIT,                       false  },
+                    { "VMX_VMCS32_CTRL_CR3_TARGET_COUNT",         VMX_VMCS32_CTRL_CR3_TARGET_COUNT,           false  },
+                    { "VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO",  VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO,    false  },
+                    { "VMX_VMCS32_CTRL_ENTRY_EXCEPTION_ERRCODE",  VMX_VMCS32_CTRL_ENTRY_EXCEPTION_ERRCODE,    false  },
+                    { "VMX_VMCS32_CTRL_ENTRY_INSTR_LENGTH",       VMX_VMCS32_CTRL_ENTRY_INSTR_LENGTH,         false  },
+                    { "VMX_VMCS32_CTRL_TPR_THRESHOLD",            VMX_VMCS32_CTRL_TPR_THRESHOLD,              false  },
+                    { "VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT",     VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT,       false  },
+                    { "VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT",      VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT,        false  },
+                    { "VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT",     VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT,       false  },
+                    { "VMX_VMCS32_CTRL_EXCEPTION_BITMAP",         VMX_VMCS32_CTRL_EXCEPTION_BITMAP,           false  },
+                    { "VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MASK",     VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MASK,       false  },
+                    { "VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MATCH",    VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MATCH,      false  },
+                    { "VMX_VMCS_CTRL_CR0_MASK",                   VMX_VMCS_CTRL_CR0_MASK,                     false  },
+                    { "VMX_VMCS_CTRL_CR0_READ_SHADOW",            VMX_VMCS_CTRL_CR0_READ_SHADOW,              false  },
+                    { "VMX_VMCS_CTRL_CR4_MASK",                   VMX_VMCS_CTRL_CR4_MASK,                     false  },
+                    { "VMX_VMCS_CTRL_CR4_READ_SHADOW",            VMX_VMCS_CTRL_CR4_READ_SHADOW,              false  },
+                    { "VMX_VMCS64_CTRL_EPTP_FULL",                VMX_VMCS64_CTRL_EPTP_FULL,                  true   },
+                    { "VMX_VMCS_GUEST_RIP",                       VMX_VMCS_GUEST_RIP,                         false  },
+                    { "VMX_VMCS_GUEST_RSP",                       VMX_VMCS_GUEST_RSP,                         false  },
+                    { "VMX_VMCS_GUEST_RFLAGS",                    VMX_VMCS_GUEST_RFLAGS,                      false  },
+                    { "VMX_VMCS16_VPID",                          VMX_VMCS16_VPID,                            true,  },
+                    { "VMX_VMCS_HOST_CR0",                        VMX_VMCS_HOST_CR0,                          false  },
+                    { "VMX_VMCS_HOST_CR3",                        VMX_VMCS_HOST_CR3,                          false  },
+                    { "VMX_VMCS_HOST_CR4",                        VMX_VMCS_HOST_CR4,                          false  },
+                    /* The order of selector fields below are fixed! */
+                    { "VMX_VMCS16_HOST_ES_SEL",                   VMX_VMCS16_HOST_ES_SEL,                     false  },
+                    { "VMX_VMCS16_HOST_CS_SEL",                   VMX_VMCS16_HOST_CS_SEL,                     false  },
+                    { "VMX_VMCS16_HOST_SS_SEL",                   VMX_VMCS16_HOST_SS_SEL,                     false  },
+                    { "VMX_VMCS16_HOST_DS_SEL",                   VMX_VMCS16_HOST_DS_SEL,                     false  },
+                    { "VMX_VMCS16_HOST_FS_SEL",                   VMX_VMCS16_HOST_FS_SEL,                     false  },
+                    { "VMX_VMCS16_HOST_GS_SEL",                   VMX_VMCS16_HOST_GS_SEL,                     false  },
+                    { "VMX_VMCS16_HOST_TR_SEL",                   VMX_VMCS16_HOST_TR_SEL,                     false  },
+                    /* End of ordered selector fields. */
+                    { "VMX_VMCS_HOST_TR_BASE",                    VMX_VMCS_HOST_TR_BASE,                      false  },
+                    { "VMX_VMCS_HOST_GDTR_BASE",                  VMX_VMCS_HOST_GDTR_BASE,                    false  },
+                    { "VMX_VMCS_HOST_IDTR_BASE",                  VMX_VMCS_HOST_IDTR_BASE,                    false  },
+                    { "VMX_VMCS32_HOST_SYSENTER_CS",              VMX_VMCS32_HOST_SYSENTER_CS,                false  },
+                    { "VMX_VMCS_HOST_SYSENTER_EIP",               VMX_VMCS_HOST_SYSENTER_EIP,                 false  },
+                    { "VMX_VMCS_HOST_SYSENTER_ESP",               VMX_VMCS_HOST_SYSENTER_ESP,                 false  },
+                    { "VMX_VMCS_HOST_RSP",                        VMX_VMCS_HOST_RSP,                          false  },
+                    { "VMX_VMCS_HOST_RIP",                        VMX_VMCS_HOST_RIP,                          false  }
+                };
+
+                RTGDTR      HostGdtr;
+                ASMGetGDTR(&HostGdtr);
+
+                uint32_t const cVmcsFields = RT_ELEMENTS(s_aVmcsFields);
+                for (uint32_t i = 0; i < cVmcsFields; i++)
+                {
+                    uint32_t const uVmcsField = s_aVmcsFields[i].uVmcsField;
+
+                    bool fSupported;
+                    if (!s_aVmcsFields[i].fCheckSupport)
+                        fSupported = true;
+                    else
+                    {
+                        PVMCC pVM = pVCpu->CTX_SUFF(pVM);
+                        switch (uVmcsField)
+                        {
+                            case VMX_VMCS64_CTRL_EPTP_FULL:  fSupported = pVM->hmr0.s.fNestedPaging;    break;
+                            case VMX_VMCS16_VPID:            fSupported = pVM->hmr0.s.vmx.fVpid;          break;
+                            case VMX_VMCS32_CTRL_PROC_EXEC2:
+                                fSupported = RT_BOOL(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_SECONDARY_CTLS);
+                                break;
+                            default:
+                                AssertMsgFailedReturnVoid(("Failed to provide VMCS field support for %#RX32\n", uVmcsField));
+                        }
+                    }
+
+                    if (fSupported)
+                    {
+                        uint8_t const uWidth = RT_BF_GET(uVmcsField, VMX_BF_VMCSFIELD_WIDTH);
+                        switch (uWidth)
+                        {
+                            case VMX_VMCSFIELD_WIDTH_16BIT:
+                            {
+                                uint16_t u16Val;
+                                rc = VMXReadVmcs16(uVmcsField, &u16Val);
+                                AssertRC(rc);
+                                Log4(("%-40s = %#RX16\n", s_aVmcsFields[i].pszName, u16Val));
+
+                                if (   uVmcsField >= VMX_VMCS16_HOST_ES_SEL
+                                    && uVmcsField <= VMX_VMCS16_HOST_TR_SEL)
+                                {
+                                    if (u16Val < HostGdtr.cbGdt)
+                                    {
+                                        /* Order of selectors in s_apszSel is fixed and matches the order in s_aVmcsFields. */
+                                        static const char * const s_apszSel[] = { "Host ES", "Host CS", "Host SS", "Host DS",
+                                                                                  "Host FS", "Host GS", "Host TR" };
+                                        uint8_t const idxSel = RT_BF_GET(uVmcsField, VMX_BF_VMCSFIELD_INDEX);
+                                        Assert(idxSel < RT_ELEMENTS(s_apszSel));
+                                        PCX86DESCHC pDesc = (PCX86DESCHC)(HostGdtr.pGdt + (u16Val & X86_SEL_MASK));
+                                        hmR0DumpDescriptor(pDesc, u16Val, s_apszSel[idxSel]);
+                                    }
+                                    else
+                                        Log4(("  Selector value exceeds GDT limit!\n"));
+                                }
+                                break;
+                            }
+
+                            case VMX_VMCSFIELD_WIDTH_32BIT:
+                            {
+                                uint32_t u32Val;
+                                rc = VMXReadVmcs32(uVmcsField, &u32Val);
+                                AssertRC(rc);
+                                Log4(("%-40s = %#RX32\n", s_aVmcsFields[i].pszName, u32Val));
+                                break;
+                            }
+
+                            case VMX_VMCSFIELD_WIDTH_64BIT:
+                            case VMX_VMCSFIELD_WIDTH_NATURAL:
+                            {
+                                uint64_t u64Val;
+                                rc = VMXReadVmcs64(uVmcsField, &u64Val);
+                                AssertRC(rc);
+                                Log4(("%-40s = %#RX64\n", s_aVmcsFields[i].pszName, u64Val));
+                                break;
+                            }
+                        }
+                    }
+                }
+
+                Log4(("MSR_K6_EFER            = %#RX64\n", ASMRdMsr(MSR_K6_EFER)));
+                Log4(("MSR_K8_CSTAR           = %#RX64\n", ASMRdMsr(MSR_K8_CSTAR)));
+                Log4(("MSR_K8_LSTAR           = %#RX64\n", ASMRdMsr(MSR_K8_LSTAR)));
+                Log4(("MSR_K6_STAR            = %#RX64\n", ASMRdMsr(MSR_K6_STAR)));
+                Log4(("MSR_K8_SF_MASK         = %#RX64\n", ASMRdMsr(MSR_K8_SF_MASK)));
+                Log4(("MSR_K8_KERNEL_GS_BASE  = %#RX64\n", ASMRdMsr(MSR_K8_KERNEL_GS_BASE)));
+#endif /* VBOX_STRICT */
+            break;
+        }
+
+        default:
+            /* Impossible */
+            AssertMsgFailed(("hmR0VmxReportWorldSwitchError %Rrc (%#x)\n", rcVMRun, rcVMRun));
+            break;
+    }
+}
+
+
+/**
+ * Sets up the usage of TSC-offsetting and updates the VMCS.
+ *
+ * If offsetting is not possible, cause VM-exits on RDTSC(P)s. Also sets up the
+ * VMX-preemption timer.
+ *
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   pVmxTransient   The VMX-transient structure.
+ * @param   idCurrentCpu    The current CPU number.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+static void hmR0VmxUpdateTscOffsettingAndPreemptTimer(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient, RTCPUID idCurrentCpu)
+{
+    bool         fOffsettedTsc;
+    bool         fParavirtTsc;
+    uint64_t     uTscOffset;
+    PVMCC        pVM       = pVCpu->CTX_SUFF(pVM);
+    PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
+
+    if (pVM->hmr0.s.vmx.fUsePreemptTimer)
+    {
+        /* The TMCpuTickGetDeadlineAndTscOffset function is expensive (calling it on
+           every entry slowed down the bs2-test1 CPUID testcase by ~33% (on an 10980xe). */
+        uint64_t cTicksToDeadline;
+        if (   idCurrentCpu == pVCpu->hmr0.s.idLastCpu
+            && TMVirtualSyncIsCurrentDeadlineVersion(pVM, pVCpu->hmr0.s.vmx.uTscDeadlineVersion))
+        {
+            STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatVmxPreemptionReusingDeadline);
+            fOffsettedTsc = TMCpuTickCanUseRealTSC(pVM, pVCpu, &uTscOffset, &fParavirtTsc);
+            cTicksToDeadline = pVCpu->hmr0.s.vmx.uTscDeadline - SUPReadTsc();
+            if ((int64_t)cTicksToDeadline > 0)
+            { /* hopefully */ }
+            else
+            {
+                STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatVmxPreemptionReusingDeadlineExpired);
+                cTicksToDeadline = 0;
+            }
+        }
+        else
+        {
+            STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatVmxPreemptionRecalcingDeadline);
+            cTicksToDeadline = TMCpuTickGetDeadlineAndTscOffset(pVM, pVCpu, &uTscOffset, &fOffsettedTsc, &fParavirtTsc,
+                                                                &pVCpu->hmr0.s.vmx.uTscDeadline,
+                                                                &pVCpu->hmr0.s.vmx.uTscDeadlineVersion);
+            pVCpu->hmr0.s.vmx.uTscDeadline += cTicksToDeadline;
+            if (cTicksToDeadline >= 128)
+            { /* hopefully */ }
+            else
+                STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatVmxPreemptionRecalcingDeadlineExpired);
+        }
+
+        /* Make sure the returned values have sane upper and lower boundaries. */
+        uint64_t const u64CpuHz = SUPGetCpuHzFromGipBySetIndex(g_pSUPGlobalInfoPage, pVCpu->iHostCpuSet);
+        cTicksToDeadline   = RT_MIN(cTicksToDeadline, u64CpuHz / 64);      /* 1/64th of a second,  15.625ms. */ /** @todo r=bird: Once real+virtual timers move to separate thread, we can raise the upper limit (16ms isn't much). ASSUMES working poke cpu function. */
+        cTicksToDeadline   = RT_MAX(cTicksToDeadline, u64CpuHz / 32678);   /* 1/32768th of a second,  ~30us. */
+        cTicksToDeadline >>= pVM->hm.s.vmx.cPreemptTimerShift;
+
+        /** @todo r=ramshankar: We need to find a way to integrate nested-guest
+         *        preemption timers here. We probably need to clamp the preemption timer,
+         *        after converting the timer value to the host. */
+        uint32_t const cPreemptionTickCount = (uint32_t)RT_MIN(cTicksToDeadline, UINT32_MAX - 16);
+        int rc = VMXWriteVmcs32(VMX_VMCS32_PREEMPT_TIMER_VALUE, cPreemptionTickCount);
+        AssertRC(rc);
+    }
+    else
+        fOffsettedTsc = TMCpuTickCanUseRealTSC(pVM, pVCpu, &uTscOffset, &fParavirtTsc);
+
+    if (fParavirtTsc)
+    {
+        /* Currently neither Hyper-V nor KVM need to update their paravirt. TSC
+           information before every VM-entry, hence disable it for performance sake. */
+#if 0
+        int rc = GIMR0UpdateParavirtTsc(pVM, 0 /* u64Offset */);
+        AssertRC(rc);
+#endif
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatTscParavirt);
+    }
+
+    if (   fOffsettedTsc
+        && RT_LIKELY(!pVCpu->hmr0.s.fDebugWantRdTscExit))
+    {
+        if (pVmxTransient->fIsNestedGuest)
+            uTscOffset = CPUMApplyNestedGuestTscOffset(pVCpu, uTscOffset);
+        hmR0VmxSetTscOffsetVmcs(pVmcsInfo, uTscOffset);
+        hmR0VmxRemoveProcCtlsVmcs(pVCpu, pVmxTransient, VMX_PROC_CTLS_RDTSC_EXIT);
+    }
+    else
+    {
+        /* We can't use TSC-offsetting (non-fixed TSC, warp drive active etc.), VM-exit on RDTSC(P). */
+        hmR0VmxSetProcCtlsVmcs(pVmxTransient, VMX_PROC_CTLS_RDTSC_EXIT);
+    }
+}
+
+
+/**
+ * Saves the guest state from the VMCS into the guest-CPU context.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu   The cross context virtual CPU structure.
+ * @param   fWhat   What to import, CPUMCTX_EXTRN_XXX.
+ */
+VMMR0DECL(int) VMXR0ImportStateOnDemand(PVMCPUCC pVCpu, uint64_t fWhat)
+{
+    AssertPtr(pVCpu);
+    PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
+    return vmxHCImportGuestStateEx(pVCpu, pVmcsInfo, fWhat);
+}
+
+
+/**
+ * Gets VMX VM-exit auxiliary information.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   pVmxExitAux     Where to store the VM-exit auxiliary info.
+ * @param   fWhat           What to fetch, HMVMX_READ_XXX.
+ */
+VMMR0DECL(int) VMXR0GetExitAuxInfo(PVMCPUCC pVCpu, PVMXEXITAUX pVmxExitAux, uint32_t fWhat)
+{
+    PVMXTRANSIENT pVmxTransient = pVCpu->hmr0.s.vmx.pVmxTransient;
+    if (RT_LIKELY(pVmxTransient))
+    {
+        AssertCompile(sizeof(fWhat) == sizeof(pVmxTransient->fVmcsFieldsRead));
+
+        /* The exit reason is always available. */
+        pVmxExitAux->uReason = pVmxTransient->uExitReason;
+
+
+        if (fWhat & HMVMX_READ_EXIT_QUALIFICATION)
+        {
+            vmxHCReadToTransientSlow<HMVMX_READ_EXIT_QUALIFICATION>(pVCpu, pVmxTransient);
+            pVmxExitAux->u64Qual = pVmxTransient->uExitQual;
+#ifdef VBOX_STRICT
+            fWhat &= ~HMVMX_READ_EXIT_QUALIFICATION;
+#endif
+        }
+
+        if (fWhat & HMVMX_READ_IDT_VECTORING_INFO)
+        {
+            vmxHCReadToTransientSlow<HMVMX_READ_IDT_VECTORING_INFO>(pVCpu, pVmxTransient);
+            pVmxExitAux->uIdtVectoringInfo = pVmxTransient->uIdtVectoringInfo;
+#ifdef VBOX_STRICT
+            fWhat &= ~HMVMX_READ_IDT_VECTORING_INFO;
+#endif
+        }
+
+        if (fWhat & HMVMX_READ_IDT_VECTORING_ERROR_CODE)
+        {
+            vmxHCReadToTransientSlow<HMVMX_READ_IDT_VECTORING_ERROR_CODE>(pVCpu, pVmxTransient);
+            pVmxExitAux->uIdtVectoringErrCode = pVmxTransient->uIdtVectoringErrorCode;
+#ifdef VBOX_STRICT
+            fWhat &= ~HMVMX_READ_IDT_VECTORING_ERROR_CODE;
+#endif
+        }
+
+        if (fWhat & HMVMX_READ_EXIT_INSTR_LEN)
+        {
+            vmxHCReadToTransientSlow<HMVMX_READ_EXIT_INSTR_LEN>(pVCpu, pVmxTransient);
+            pVmxExitAux->cbInstr = pVmxTransient->cbExitInstr;
+#ifdef VBOX_STRICT
+            fWhat &= ~HMVMX_READ_EXIT_INSTR_LEN;
+#endif
+        }
+
+        if (fWhat & HMVMX_READ_EXIT_INTERRUPTION_INFO)
+        {
+            vmxHCReadToTransientSlow<HMVMX_READ_EXIT_INTERRUPTION_INFO>(pVCpu, pVmxTransient);
+            pVmxExitAux->uExitIntInfo = pVmxTransient->uExitIntInfo;
+#ifdef VBOX_STRICT
+            fWhat &= ~HMVMX_READ_EXIT_INTERRUPTION_INFO;
+#endif
+        }
+
+        if (fWhat & HMVMX_READ_EXIT_INTERRUPTION_ERROR_CODE)
+        {
+            vmxHCReadToTransientSlow<HMVMX_READ_EXIT_INTERRUPTION_ERROR_CODE>(pVCpu, pVmxTransient);
+            pVmxExitAux->uExitIntErrCode = pVmxTransient->uExitIntErrorCode;
+#ifdef VBOX_STRICT
+            fWhat &= ~HMVMX_READ_EXIT_INTERRUPTION_ERROR_CODE;
+#endif
+        }
+
+        if (fWhat & HMVMX_READ_EXIT_INSTR_INFO)
+        {
+            vmxHCReadToTransientSlow<HMVMX_READ_EXIT_INSTR_INFO>(pVCpu, pVmxTransient);
+            pVmxExitAux->InstrInfo.u = pVmxTransient->ExitInstrInfo.u;
+#ifdef VBOX_STRICT
+            fWhat &= ~HMVMX_READ_EXIT_INSTR_INFO;
+#endif
+        }
+
+        if (fWhat & HMVMX_READ_GUEST_LINEAR_ADDR)
+        {
+            vmxHCReadToTransientSlow<HMVMX_READ_GUEST_LINEAR_ADDR>(pVCpu, pVmxTransient);
+            pVmxExitAux->u64GuestLinearAddr = pVmxTransient->uGuestLinearAddr;
+#ifdef VBOX_STRICT
+            fWhat &= ~HMVMX_READ_GUEST_LINEAR_ADDR;
+#endif
+        }
+
+        if (fWhat & HMVMX_READ_GUEST_PHYSICAL_ADDR)
+        {
+            vmxHCReadToTransientSlow<HMVMX_READ_GUEST_PHYSICAL_ADDR>(pVCpu, pVmxTransient);
+            pVmxExitAux->u64GuestPhysAddr = pVmxTransient->uGuestPhysicalAddr;
+#ifdef VBOX_STRICT
+            fWhat &= ~HMVMX_READ_GUEST_PHYSICAL_ADDR;
+#endif
+        }
+
+        if (fWhat & HMVMX_READ_GUEST_PENDING_DBG_XCPTS)
+        {
+#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+            vmxHCReadToTransientSlow<HMVMX_READ_GUEST_PENDING_DBG_XCPTS>(pVCpu, pVmxTransient);
+            pVmxExitAux->u64GuestPendingDbgXcpts = pVmxTransient->uGuestPendingDbgXcpts;
+#else
+            pVmxExitAux->u64GuestPendingDbgXcpts = 0;
+#endif
+#ifdef VBOX_STRICT
+            fWhat &= ~HMVMX_READ_GUEST_PENDING_DBG_XCPTS;
+#endif
+        }
+
+        AssertMsg(!fWhat, ("fWhat=%#RX32 fVmcsFieldsRead=%#RX32\n", fWhat, pVmxTransient->fVmcsFieldsRead));
+        return VINF_SUCCESS;
+    }
+    return VERR_NOT_AVAILABLE;
+}
+
+
+/**
+ * Does the necessary state syncing before returning to ring-3 for any reason
+ * (longjmp, preemption, voluntary exits to ring-3) from VT-x.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   fImportState    Whether to import the guest state from the VMCS back
+ *                          to the guest-CPU context.
+ *
+ * @remarks No-long-jmp zone!!!
+ */
+static int hmR0VmxLeave(PVMCPUCC pVCpu, bool fImportState)
+{
+    Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+    Assert(!VMMRZCallRing3IsEnabled(pVCpu));
+
+    RTCPUID const idCpu = RTMpCpuId();
+    Log4Func(("HostCpuId=%u\n", idCpu));
+
+    /*
+     * !!! IMPORTANT !!!
+     * If you modify code here, check whether VMXR0CallRing3Callback() needs to be updated too.
+     */
+
+    /* Save the guest state if necessary. */
+    PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
+    if (fImportState)
+    {
+        int rc = vmxHCImportGuestStateEx(pVCpu, pVmcsInfo, HMVMX_CPUMCTX_EXTRN_ALL);
+        AssertRCReturn(rc, rc);
+    }
+
+    /* Restore host FPU state if necessary. We will resync on next R0 reentry. */
+    CPUMR0FpuStateMaybeSaveGuestAndRestoreHost(pVCpu);
+    Assert(!CPUMIsGuestFPUStateActive(pVCpu));
+
+    /* Restore host debug registers if necessary. We will resync on next R0 reentry. */
+#ifdef VMX_WITH_MAYBE_ALWAYS_INTERCEPT_MOV_DRX
+    Assert(   (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_MOV_DR_EXIT)
+           ||  pVCpu->hmr0.s.vmx.fSwitchedToNstGstVmcs
+           || (!CPUMIsHyperDebugStateActive(pVCpu) && !pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fAlwaysInterceptMovDRx));
+#else
+    Assert(   (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_MOV_DR_EXIT)
+           ||  pVCpu->hmr0.s.vmx.fSwitchedToNstGstVmcs
+           || !CPUMIsHyperDebugStateActive(pVCpu));
+#endif
+    CPUMR0DebugStateMaybeSaveGuestAndRestoreHost(pVCpu, true /* save DR6 */);
+    Assert(!CPUMIsGuestDebugStateActive(pVCpu));
+    Assert(!CPUMIsHyperDebugStateActive(pVCpu));
+
+    /* Restore host-state bits that VT-x only restores partially. */
+    if (pVCpu->hmr0.s.vmx.fRestoreHostFlags > VMX_RESTORE_HOST_REQUIRED)
+    {
+        Log4Func(("Restoring Host State: fRestoreHostFlags=%#RX32 HostCpuId=%u\n", pVCpu->hmr0.s.vmx.fRestoreHostFlags, idCpu));
+        VMXRestoreHostState(pVCpu->hmr0.s.vmx.fRestoreHostFlags, &pVCpu->hmr0.s.vmx.RestoreHost);
+    }
+    pVCpu->hmr0.s.vmx.fRestoreHostFlags = 0;
+
+    /* Restore the lazy host MSRs as we're leaving VT-x context. */
+    if (pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST)
+    {
+        /* We shouldn't restore the host MSRs without saving the guest MSRs first. */
+        if (!fImportState)
+        {
+            int rc = vmxHCImportGuestStateEx(pVCpu, pVmcsInfo, CPUMCTX_EXTRN_KERNEL_GS_BASE | CPUMCTX_EXTRN_SYSCALL_MSRS);
+            AssertRCReturn(rc, rc);
+        }
+        hmR0VmxLazyRestoreHostMsrs(pVCpu);
+        Assert(!pVCpu->hmr0.s.vmx.fLazyMsrs);
+    }
+    else
+        pVCpu->hmr0.s.vmx.fLazyMsrs = 0;
+
+    /* Update auto-load/store host MSRs values when we re-enter VT-x (as we could be on a different CPU). */
+    pVCpu->hmr0.s.vmx.fUpdatedHostAutoMsrs = false;
+
+    STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatEntry);
+    STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatImportGuestState);
+    STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExportGuestState);
+    STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatPreExit);
+    STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitHandling);
+    STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitIO);
+    STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitMovCRx);
+    STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitXcptNmi);
+    STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitVmentry);
+    STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchLongJmpToR3);
+
+    VMCPU_CMPXCHG_STATE(pVCpu, VMCPUSTATE_STARTED_HM, VMCPUSTATE_STARTED_EXEC);
+
+    /** @todo This partially defeats the purpose of having preemption hooks.
+     *  The problem is, deregistering the hooks should be moved to a place that
+     *  lasts until the EMT is about to be destroyed not everytime while leaving HM
+     *  context.
+     */
+    int rc = hmR0VmxClearVmcs(pVmcsInfo);
+    AssertRCReturn(rc, rc);
+
+#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+    /*
+     * A valid shadow VMCS is made active as part of VM-entry. It is necessary to
+     * clear a shadow VMCS before allowing that VMCS to become active on another
+     * logical processor. We may or may not be importing guest state which clears
+     * it, so cover for it here.
+     *
+     * See Intel spec. 24.11.1 "Software Use of Virtual-Machine Control Structures".
+     */
+    if (   pVmcsInfo->pvShadowVmcs
+        && pVmcsInfo->fShadowVmcsState != VMX_V_VMCS_LAUNCH_STATE_CLEAR)
+    {
+        rc = vmxHCClearShadowVmcs(pVmcsInfo);
+        AssertRCReturn(rc, rc);
+    }
+
+    /*
+     * Flag that we need to re-export the host state if we switch to this VMCS before
+     * executing guest or nested-guest code.
+     */
+    pVmcsInfo->idHostCpuState = NIL_RTCPUID;
+#endif
+
+    Log4Func(("Cleared Vmcs. HostCpuId=%u\n", idCpu));
+    NOREF(idCpu);
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Leaves the VT-x session.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu   The cross context virtual CPU structure.
+ *
+ * @remarks No-long-jmp zone!!!
+ */
+static int hmR0VmxLeaveSession(PVMCPUCC pVCpu)
+{
+    HM_DISABLE_PREEMPT(pVCpu);
+    HMVMX_ASSERT_CPU_SAFE(pVCpu);
+    Assert(!VMMRZCallRing3IsEnabled(pVCpu));
+    Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+
+    /* When thread-context hooks are used, we can avoid doing the leave again if we had been preempted before
+       and done this from the VMXR0ThreadCtxCallback(). */
+    if (!pVCpu->hmr0.s.fLeaveDone)
+    {
+        int rc2 = hmR0VmxLeave(pVCpu, true /* fImportState */);
+        AssertRCReturnStmt(rc2, HM_RESTORE_PREEMPT(), rc2);
+        pVCpu->hmr0.s.fLeaveDone = true;
+    }
+    Assert(!pVCpu->cpum.GstCtx.fExtrn);
+
+    /*
+     * !!! IMPORTANT !!!
+     * If you modify code here, make sure to check whether VMXR0CallRing3Callback() needs to be updated too.
+     */
+
+    /* Deregister hook now that we've left HM context before re-enabling preemption. */
+    /** @todo Deregistering here means we need to VMCLEAR always
+     *        (longjmp/exit-to-r3) in VT-x which is not efficient, eliminate need
+     *        for calling VMMR0ThreadCtxHookDisable here! */
+    VMMR0ThreadCtxHookDisable(pVCpu);
+
+    /* Leave HM context. This takes care of local init (term) and deregistering the longjmp-to-ring-3 callback. */
+    int rc = HMR0LeaveCpu(pVCpu);
+    HM_RESTORE_PREEMPT();
+    return rc;
+}
+
+
+/**
+ * Take necessary actions before going back to ring-3.
+ *
+ * An action requires us to go back to ring-3. This function does the necessary
+ * steps before we can safely return to ring-3. This is not the same as longjmps
+ * to ring-3, this is voluntary and prepares the guest so it may continue
+ * executing outside HM (recompiler/IEM).
+ *
+ * @returns VBox status code.
+ * @param   pVCpu   The cross context virtual CPU structure.
+ * @param   rcExit  The reason for exiting to ring-3. Can be
+ *                  VINF_VMM_UNKNOWN_RING3_CALL.
+ */
+static int hmR0VmxExitToRing3(PVMCPUCC pVCpu, VBOXSTRICTRC rcExit)
+{
+    HMVMX_ASSERT_PREEMPT_SAFE(pVCpu);
+
+    PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
+    if (RT_UNLIKELY(rcExit == VERR_VMX_INVALID_VMCS_PTR))
+    {
+        VMXGetCurrentVmcs(&pVCpu->hm.s.vmx.LastError.HCPhysCurrentVmcs);
+        pVCpu->hm.s.vmx.LastError.u32VmcsRev   = *(uint32_t *)pVmcsInfo->pvVmcs;
+        pVCpu->hm.s.vmx.LastError.idEnteredCpu = pVCpu->hmr0.s.idEnteredCpu;
+        /* LastError.idCurrentCpu was updated in hmR0VmxPreRunGuestCommitted(). */
+    }
+
+    /* Please, no longjumps here (any logging shouldn't flush jump back to ring-3). NO LOGGING BEFORE THIS POINT! */
+    VMMRZCallRing3Disable(pVCpu);
+    Log4Func(("rcExit=%d\n", VBOXSTRICTRC_VAL(rcExit)));
+
+    /*
+     * Convert any pending HM events back to TRPM due to premature exits to ring-3.
+     * We need to do this only on returns to ring-3 and not for longjmps to ring3.
+     *
+     * This is because execution may continue from ring-3 and we would need to inject
+     * the event from there (hence place it back in TRPM).
+     */
+    if (pVCpu->hm.s.Event.fPending)
+    {
+        vmxHCPendingEventToTrpmTrap(pVCpu);
+        Assert(!pVCpu->hm.s.Event.fPending);
+
+        /* Clear the events from the VMCS. */
+        int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, 0);    AssertRC(rc);
+        rc     = VMXWriteVmcs32(VMX_VMCS_GUEST_PENDING_DEBUG_XCPTS, 0);         AssertRC(rc);
+    }
+#ifdef VBOX_STRICT
+    /*
+     * We check for rcExit here since for errors like VERR_VMX_UNABLE_TO_START_VM (which are
+     * fatal), we don't care about verifying duplicate injection of events. Errors like
+     * VERR_EM_INTERPRET are converted to their VINF_* counterparts -prior- to  calling this
+     * function so those should and will be checked below.
+     */
+    else if (RT_SUCCESS(rcExit))
+    {
+        /*
+         * Ensure we don't accidentally clear a pending HM event without clearing the VMCS.
+         * This can be pretty hard to debug otherwise, interrupts might get injected twice
+         * occasionally, see @bugref{9180#c42}.
+         *
+         * However, if the VM-entry failed, any VM entry-interruption info. field would
+         * be left unmodified as the event would not have been injected to the guest. In
+         * such cases, don't assert, we're not going to continue guest execution anyway.
+         */
+        uint32_t uExitReason;
+        uint32_t uEntryIntInfo;
+        int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_REASON, &uExitReason);
+        rc    |= VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, &uEntryIntInfo);
+        AssertRC(rc);
+        AssertMsg(VMX_EXIT_REASON_HAS_ENTRY_FAILED(uExitReason) || !VMX_ENTRY_INT_INFO_IS_VALID(uEntryIntInfo),
+                  ("uExitReason=%#RX32 uEntryIntInfo=%#RX32 rcExit=%d\n", uExitReason, uEntryIntInfo, VBOXSTRICTRC_VAL(rcExit)));
+    }
+#endif
+
+    /*
+     * Clear the interrupt-window and NMI-window VMCS controls as we could have got
+     * a VM-exit with higher priority than interrupt-window or NMI-window VM-exits
+     * (e.g. TPR below threshold).
+     */
+    if (!CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx))
+    {
+        vmxHCClearIntWindowExitVmcs(pVCpu, pVmcsInfo);
+        vmxHCClearNmiWindowExitVmcs(pVCpu, pVmcsInfo);
+    }
+
+    /* If we're emulating an instruction, we shouldn't have any TRPM traps pending
+       and if we're injecting an event we should have a TRPM trap pending. */
+    AssertMsg(rcExit != VINF_EM_RAW_INJECT_TRPM_EVENT || TRPMHasTrap(pVCpu), ("%Rrc\n", VBOXSTRICTRC_VAL(rcExit)));
+#ifndef DEBUG_bird /* Triggered after firing an NMI against NT4SP1, possibly a triple fault in progress. */
+    AssertMsg(rcExit != VINF_EM_RAW_EMULATE_INSTR || !TRPMHasTrap(pVCpu), ("%Rrc\n", VBOXSTRICTRC_VAL(rcExit)));
+#endif
+
+    /* Save guest state and restore host state bits. */
+    int rc = hmR0VmxLeaveSession(pVCpu);
+    AssertRCReturn(rc, rc);
+    STAM_COUNTER_DEC(&pVCpu->hm.s.StatSwitchLongJmpToR3);
+
+    /* Thread-context hooks are unregistered at this point!!! */
+    /* Ring-3 callback notifications are unregistered at this point!!! */
+
+    /* Sync recompiler state. */
+    VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TO_R3);
+    CPUMSetChangedFlags(pVCpu, CPUM_CHANGED_SYSENTER_MSR
+                             | CPUM_CHANGED_LDTR
+                             | CPUM_CHANGED_GDTR
+                             | CPUM_CHANGED_IDTR
+                             | CPUM_CHANGED_TR
+                             | CPUM_CHANGED_HIDDEN_SEL_REGS);
+    if (   pVCpu->CTX_SUFF(pVM)->hmr0.s.fNestedPaging
+        && CPUMIsGuestPagingEnabledEx(&pVCpu->cpum.GstCtx))
+        CPUMSetChangedFlags(pVCpu, CPUM_CHANGED_GLOBAL_TLB_FLUSH);
+
+    Assert(!pVCpu->hmr0.s.fClearTrapFlag);
+
+    /* Update the exit-to-ring 3 reason. */
+    pVCpu->hm.s.rcLastExitToR3 = VBOXSTRICTRC_VAL(rcExit);
+
+    /* On our way back from ring-3 reload the guest state if there is a possibility of it being changed. */
+    if (   rcExit != VINF_EM_RAW_INTERRUPT
+        || CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx))
+    {
+        Assert(!(pVCpu->cpum.GstCtx.fExtrn & HMVMX_CPUMCTX_EXTRN_ALL));
+        ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_ALL_GUEST);
+    }
+
+    STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchExitToR3);
+    VMMRZCallRing3Enable(pVCpu);
+    return rc;
+}
+
+
+/**
+ * VMMRZCallRing3() callback wrapper which saves the guest state before we
+ * longjump due to a ring-0 assertion.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu           The cross context virtual CPU structure.
+ */
+VMMR0DECL(int) VMXR0AssertionCallback(PVMCPUCC pVCpu)
+{
+    /*
+     * !!! IMPORTANT !!!
+     * If you modify code here, check whether hmR0VmxLeave() and hmR0VmxLeaveSession() needs to be updated too.
+     * This is a stripped down version which gets out ASAP, trying to not trigger any further assertions.
+     */
+    VMMR0AssertionRemoveNotification(pVCpu);
+    VMMRZCallRing3Disable(pVCpu);
+    HM_DISABLE_PREEMPT(pVCpu);
+
+    PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
+    vmxHCImportGuestStateEx(pVCpu, pVmcsInfo, HMVMX_CPUMCTX_EXTRN_ALL);
+    CPUMR0FpuStateMaybeSaveGuestAndRestoreHost(pVCpu);
+    CPUMR0DebugStateMaybeSaveGuestAndRestoreHost(pVCpu, true /* save DR6 */);
+
+    /* Restore host-state bits that VT-x only restores partially. */
+    if (pVCpu->hmr0.s.vmx.fRestoreHostFlags > VMX_RESTORE_HOST_REQUIRED)
+        VMXRestoreHostState(pVCpu->hmr0.s.vmx.fRestoreHostFlags, &pVCpu->hmr0.s.vmx.RestoreHost);
+    pVCpu->hmr0.s.vmx.fRestoreHostFlags = 0;
+
+    /* Restore the lazy host MSRs as we're leaving VT-x context. */
+    if (pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST)
+        hmR0VmxLazyRestoreHostMsrs(pVCpu);
+
+    /* Update auto-load/store host MSRs values when we re-enter VT-x (as we could be on a different CPU). */
+    pVCpu->hmr0.s.vmx.fUpdatedHostAutoMsrs = false;
+    VMCPU_CMPXCHG_STATE(pVCpu, VMCPUSTATE_STARTED_HM, VMCPUSTATE_STARTED_EXEC);
+
+    /* Clear the current VMCS data back to memory (shadow VMCS if any would have been
+       cleared as part of importing the guest state above. */
+    hmR0VmxClearVmcs(pVmcsInfo);
+
+    /** @todo eliminate the need for calling VMMR0ThreadCtxHookDisable here!  */
+    VMMR0ThreadCtxHookDisable(pVCpu);
+
+    /* Leave HM context. This takes care of local init (term). */
+    HMR0LeaveCpu(pVCpu);
+    HM_RESTORE_PREEMPT();
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Enters the VT-x session.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu   The cross context virtual CPU structure.
+ */
+VMMR0DECL(int) VMXR0Enter(PVMCPUCC pVCpu)
+{
+    AssertPtr(pVCpu);
+    Assert(pVCpu->CTX_SUFF(pVM)->hm.s.vmx.fSupported);
+    Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+
+    LogFlowFunc(("pVCpu=%p\n", pVCpu));
+    Assert((pVCpu->hm.s.fCtxChanged &  (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE))
+                                    == (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE));
+
+#ifdef VBOX_STRICT
+    /* At least verify VMX is enabled, since we can't check if we're in VMX root mode without #GP'ing. */
+    RTCCUINTREG uHostCr4 = ASMGetCR4();
+    if (!(uHostCr4 & X86_CR4_VMXE))
+    {
+        LogRelFunc(("X86_CR4_VMXE bit in CR4 is not set!\n"));
+        return VERR_VMX_X86_CR4_VMXE_CLEARED;
+    }
+#endif
+
+    /*
+     * Do the EMT scheduled L1D and MDS flush here if needed.
+     */
+    if (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_L1D_SCHED)
+        ASMWrMsr(MSR_IA32_FLUSH_CMD, MSR_IA32_FLUSH_CMD_F_L1D);
+    else if (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_MDS_SCHED)
+        hmR0MdsClear();
+
+    /*
+     * Load the appropriate VMCS as the current and active one.
+     */
+    PVMXVMCSINFO pVmcsInfo;
+    bool const fInNestedGuestMode = CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx);
+    if (!fInNestedGuestMode)
+        pVmcsInfo = &pVCpu->hmr0.s.vmx.VmcsInfo;
+    else
+        pVmcsInfo = &pVCpu->hmr0.s.vmx.VmcsInfoNstGst;
+    int rc = hmR0VmxLoadVmcs(pVmcsInfo);
+    if (RT_SUCCESS(rc))
+    {
+        pVCpu->hmr0.s.vmx.fSwitchedToNstGstVmcs           = fInNestedGuestMode;
+        pVCpu->hm.s.vmx.fSwitchedToNstGstVmcsCopyForRing3 = fInNestedGuestMode;
+        pVCpu->hmr0.s.fLeaveDone = false;
+        Log4Func(("Loaded Vmcs. HostCpuId=%u\n", RTMpCpuId()));
+    }
+    return rc;
+}
+
+
+/**
+ * The thread-context callback.
+ *
+ * This is used together with RTThreadCtxHookCreate() on platforms which
+ * supports it, and directly from VMMR0EmtPrepareForBlocking() and
+ * VMMR0EmtResumeAfterBlocking() on platforms which don't.
+ *
+ * @param   enmEvent        The thread-context event.
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   fGlobalInit     Whether global VT-x/AMD-V init. was used.
+ * @thread  EMT(pVCpu)
+ */
+VMMR0DECL(void) VMXR0ThreadCtxCallback(RTTHREADCTXEVENT enmEvent, PVMCPUCC pVCpu, bool fGlobalInit)
+{
+    AssertPtr(pVCpu);
+    RT_NOREF1(fGlobalInit);
+
+    switch (enmEvent)
+    {
+        case RTTHREADCTXEVENT_OUT:
+        {
+            Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+            VMCPU_ASSERT_EMT(pVCpu);
+
+            /* No longjmps (logger flushes, locks) in this fragile context. */
+            VMMRZCallRing3Disable(pVCpu);
+            Log4Func(("Preempting: HostCpuId=%u\n", RTMpCpuId()));
+
+            /* Restore host-state (FPU, debug etc.) */
+            if (!pVCpu->hmr0.s.fLeaveDone)
+            {
+                /*
+                 * Do -not- import the guest-state here as we might already be in the middle of importing
+                 * it, esp. bad if we're holding the PGM lock, see comment in hmR0VmxImportGuestState().
+                 */
+                hmR0VmxLeave(pVCpu, false /* fImportState */);
+                pVCpu->hmr0.s.fLeaveDone = true;
+            }
+
+            /* Leave HM context, takes care of local init (term). */
+            int rc = HMR0LeaveCpu(pVCpu);
+            AssertRC(rc);
+
+            /* Restore longjmp state. */
+            VMMRZCallRing3Enable(pVCpu);
+            STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatSwitchPreempt);
+            break;
+        }
+
+        case RTTHREADCTXEVENT_IN:
+        {
+            Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+            VMCPU_ASSERT_EMT(pVCpu);
+
+            /* Do the EMT scheduled L1D and MDS flush here if needed. */
+            if (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_L1D_SCHED)
+                ASMWrMsr(MSR_IA32_FLUSH_CMD, MSR_IA32_FLUSH_CMD_F_L1D);
+            else if (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_MDS_SCHED)
+                hmR0MdsClear();
+
+            /* No longjmps here, as we don't want to trigger preemption (& its hook) while resuming. */
+            VMMRZCallRing3Disable(pVCpu);
+            Log4Func(("Resumed: HostCpuId=%u\n", RTMpCpuId()));
+
+            /* Initialize the bare minimum state required for HM. This takes care of
+               initializing VT-x if necessary (onlined CPUs, local init etc.) */
+            int rc = hmR0EnterCpu(pVCpu);
+            AssertRC(rc);
+            Assert(   (pVCpu->hm.s.fCtxChanged & (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE))
+                                              == (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE));
+
+            /* Load the active VMCS as the current one. */
+            PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
+            rc = hmR0VmxLoadVmcs(pVmcsInfo);
+            AssertRC(rc);
+            Log4Func(("Resumed: Loaded Vmcs. HostCpuId=%u\n", RTMpCpuId()));
+            pVCpu->hmr0.s.fLeaveDone = false;
+
+            /* Restore longjmp state. */
+            VMMRZCallRing3Enable(pVCpu);
+            break;
+        }
+
+        default:
+            break;
+    }
+}
+
+
+/**
+ * Exports the host state into the VMCS host-state area.
+ * Sets up the VM-exit MSR-load area.
+ *
+ * The CPU state will be loaded from these fields on every successful VM-exit.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu   The cross context virtual CPU structure.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+static int hmR0VmxExportHostState(PVMCPUCC pVCpu)
+{
+    Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+
+    int rc = VINF_SUCCESS;
+    if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_HOST_CONTEXT)
+    {
+        uint64_t uHostCr4 = hmR0VmxExportHostControlRegs();
+
+        rc = hmR0VmxExportHostSegmentRegs(pVCpu, uHostCr4);
+        AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
+
+        hmR0VmxExportHostMsrs(pVCpu);
+
+        pVCpu->hm.s.fCtxChanged &= ~HM_CHANGED_HOST_CONTEXT;
+    }
+    return rc;
+}
+
+
+/**
+ * Saves the host state in the VMCS host-state.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu   The cross context virtual CPU structure.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+VMMR0DECL(int) VMXR0ExportHostState(PVMCPUCC pVCpu)
+{
+    AssertPtr(pVCpu);
+    Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+
+    /*
+     * Export the host state here while entering HM context.
+     * When thread-context hooks are used, we might get preempted and have to re-save the host
+     * state but most of the time we won't be, so do it here before we disable interrupts.
+     */
+    return hmR0VmxExportHostState(pVCpu);
+}
+
+
+/**
+ * Exports the guest state into the VMCS guest-state area.
+ *
+ * The will typically be done before VM-entry when the guest-CPU state and the
+ * VMCS state may potentially be out of sync.
+ *
+ * Sets up the VM-entry MSR-load and VM-exit MSR-store areas. Sets up the
+ * VM-entry controls.
+ * Sets up the appropriate VMX non-root function to execute guest code based on
+ * the guest CPU mode.
+ *
+ * @returns VBox strict status code.
+ * @retval  VINF_EM_RESCHEDULE_REM if we try to emulate non-paged guest code
+ *          without unrestricted guest execution and the VMMDev is not presently
+ *          mapped (e.g. EFI32).
+ *
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   pVmxTransient   The VMX-transient structure.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+static VBOXSTRICTRC hmR0VmxExportGuestState(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    AssertPtr(pVCpu);
+    HMVMX_ASSERT_PREEMPT_SAFE(pVCpu);
+    LogFlowFunc(("pVCpu=%p\n", pVCpu));
+
+    STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatExportGuestState, x);
+
+    /*
+     * Determine real-on-v86 mode.
+     * Used when the guest is in real-mode and unrestricted guest execution is not used.
+     */
+    PVMXVMCSINFOSHARED pVmcsInfoShared = pVmxTransient->pVmcsInfo->pShared;
+    if (    pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fUnrestrictedGuest
+        || !CPUMIsGuestInRealModeEx(&pVCpu->cpum.GstCtx))
+        pVmcsInfoShared->RealMode.fRealOnV86Active = false;
+    else
+    {
+        Assert(!pVmxTransient->fIsNestedGuest);
+        pVmcsInfoShared->RealMode.fRealOnV86Active = true;
+    }
+
+    /*
+     * Any ordering dependency among the sub-functions below must be explicitly stated using comments.
+     * Ideally, assert that the cross-dependent bits are up-to-date at the point of using it.
+     */
+    int rc = vmxHCExportGuestEntryExitCtls(pVCpu, pVmxTransient);
+    AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
+
+    rc = vmxHCExportGuestCR0(pVCpu, pVmxTransient);
+    AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
+
+    VBOXSTRICTRC rcStrict = vmxHCExportGuestCR3AndCR4(pVCpu, pVmxTransient);
+    if (rcStrict == VINF_SUCCESS)
+    { /* likely */ }
+    else
+    {
+        Assert(rcStrict == VINF_EM_RESCHEDULE_REM || RT_FAILURE_NP(rcStrict));
+        return rcStrict;
+    }
+
+    rc = vmxHCExportGuestSegRegsXdtr(pVCpu, pVmxTransient);
+    AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
+
+    rc = hmR0VmxExportGuestMsrs(pVCpu, pVmxTransient);
+    AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
+
+    vmxHCExportGuestApicTpr(pVCpu, pVmxTransient);
+    vmxHCExportGuestXcptIntercepts(pVCpu, pVmxTransient);
+    vmxHCExportGuestRip(pVCpu);
+    hmR0VmxExportGuestRsp(pVCpu);
+    vmxHCExportGuestRflags(pVCpu, pVmxTransient);
+
+    rc = hmR0VmxExportGuestHwvirtState(pVCpu, pVmxTransient);
+    AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
+
+    /* Clear any bits that may be set but exported unconditionally or unused/reserved bits. */
+    ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~(  (HM_CHANGED_GUEST_GPRS_MASK & ~HM_CHANGED_GUEST_RSP)
+                                                  |  HM_CHANGED_GUEST_CR2
+                                                  | (HM_CHANGED_GUEST_DR_MASK & ~HM_CHANGED_GUEST_DR7)
+                                                  |  HM_CHANGED_GUEST_X87
+                                                  |  HM_CHANGED_GUEST_SSE_AVX
+                                                  |  HM_CHANGED_GUEST_OTHER_XSAVE
+                                                  |  HM_CHANGED_GUEST_XCRx
+                                                  |  HM_CHANGED_GUEST_KERNEL_GS_BASE /* Part of lazy or auto load-store MSRs. */
+                                                  |  HM_CHANGED_GUEST_SYSCALL_MSRS   /* Part of lazy or auto load-store MSRs. */
+                                                  |  HM_CHANGED_GUEST_TSC_AUX
+                                                  |  HM_CHANGED_GUEST_OTHER_MSRS
+                                                  | (HM_CHANGED_KEEPER_STATE_MASK & ~HM_CHANGED_VMX_MASK)));
+
+    STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExportGuestState, x);
+    return rc;
+}
+
+
+/**
+ * Exports the state shared between the host and guest into the VMCS.
+ *
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   pVmxTransient   The VMX-transient structure.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+static void hmR0VmxExportSharedState(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+    Assert(!VMMRZCallRing3IsEnabled(pVCpu));
+
+    if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_GUEST_DR_MASK)
+    {
+        int rc = hmR0VmxExportSharedDebugState(pVCpu, pVmxTransient);
+        AssertRC(rc);
+        pVCpu->hm.s.fCtxChanged &= ~HM_CHANGED_GUEST_DR_MASK;
+
+        /* Loading shared debug bits might have changed eflags.TF bit for debugging purposes. */
+        if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_GUEST_RFLAGS)
+            vmxHCExportGuestRflags(pVCpu, pVmxTransient);
+    }
+
+    if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_VMX_GUEST_LAZY_MSRS)
+    {
+        hmR0VmxLazyLoadGuestMsrs(pVCpu);
+        pVCpu->hm.s.fCtxChanged &= ~HM_CHANGED_VMX_GUEST_LAZY_MSRS;
+    }
+
+    AssertMsg(!(pVCpu->hm.s.fCtxChanged & HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE),
+              ("fCtxChanged=%#RX64\n", pVCpu->hm.s.fCtxChanged));
+}
+
+
+/**
+ * Worker for loading the guest-state bits in the inner VT-x execution loop.
+ *
+ * @returns Strict VBox status code (i.e. informational status codes too).
+ * @retval  VINF_EM_RESCHEDULE_REM if we try to emulate non-paged guest code
+ *          without unrestricted guest execution and the VMMDev is not presently
+ *          mapped (e.g. EFI32).
+ *
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   pVmxTransient   The VMX-transient structure.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+static VBOXSTRICTRC hmR0VmxExportGuestStateOptimal(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    HMVMX_ASSERT_PREEMPT_SAFE(pVCpu);
+    Assert(!VMMRZCallRing3IsEnabled(pVCpu));
+
+#ifdef HMVMX_ALWAYS_SYNC_FULL_GUEST_STATE
+    ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_ALL_GUEST);
+#endif
+
+    /*
+     * For many VM-exits only RIP/RSP/RFLAGS (and HWVIRT state when executing a nested-guest)
+     * changes. First try to export only these without going through all other changed-flag checks.
+     */
+    VBOXSTRICTRC   rcStrict;
+    uint64_t const fCtxMask     = HM_CHANGED_ALL_GUEST & ~HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE;
+    uint64_t const fMinimalMask = HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RSP | HM_CHANGED_GUEST_RFLAGS | HM_CHANGED_GUEST_HWVIRT;
+    uint64_t const fCtxChanged  = ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged);
+
+    /* If only RIP/RSP/RFLAGS/HWVIRT changed, export only those (quicker, happens more often).*/
+    if (    (fCtxChanged & fMinimalMask)
+        && !(fCtxChanged & (fCtxMask & ~fMinimalMask)))
+    {
+        vmxHCExportGuestRip(pVCpu);
+        hmR0VmxExportGuestRsp(pVCpu);
+        vmxHCExportGuestRflags(pVCpu, pVmxTransient);
+        rcStrict = hmR0VmxExportGuestHwvirtState(pVCpu, pVmxTransient);
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatExportMinimal);
+    }
+    /* If anything else also changed, go through the full export routine and export as required. */
+    else if (fCtxChanged & fCtxMask)
+    {
+        rcStrict = hmR0VmxExportGuestState(pVCpu, pVmxTransient);
+        if (RT_LIKELY(rcStrict == VINF_SUCCESS))
+        { /* likely */}
+        else
+        {
+            AssertMsg(rcStrict == VINF_EM_RESCHEDULE_REM, ("Failed to export guest state! rc=%Rrc\n",
+                                                           VBOXSTRICTRC_VAL(rcStrict)));
+            Assert(!VMMRZCallRing3IsEnabled(pVCpu));
+            return rcStrict;
+        }
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatExportFull);
+    }
+    /* Nothing changed, nothing to load here. */
+    else
+        rcStrict = VINF_SUCCESS;
+
+#ifdef VBOX_STRICT
+    /* All the guest state bits should be loaded except maybe the host context and/or the shared host/guest bits. */
+    uint64_t const fCtxChangedCur = ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged);
+    AssertMsg(!(fCtxChangedCur & fCtxMask), ("fCtxChangedCur=%#RX64\n", fCtxChangedCur));
+#endif
+    return rcStrict;
+}
+
+
+/**
+ * Map the APIC-access page for virtualizing APIC accesses.
+ *
+ * This can cause a longjumps to R3 due to the acquisition of the PGM lock. Hence,
+ * this not done as part of exporting guest state, see @bugref{8721}.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   GCPhysApicBase  The guest-physical address of the APIC access page.
+ */
+static int hmR0VmxMapHCApicAccessPage(PVMCPUCC pVCpu, RTGCPHYS GCPhysApicBase)
+{
+    PVMCC pVM = pVCpu->CTX_SUFF(pVM);
+    Assert(GCPhysApicBase);
+
+    LogFunc(("Mapping HC APIC-access page at %#RGp\n", GCPhysApicBase));
+
+    /* Unalias the existing mapping. */
+    int rc = PGMHandlerPhysicalReset(pVM, GCPhysApicBase);
+    AssertRCReturn(rc, rc);
+
+    /* Map the HC APIC-access page in place of the MMIO page, also updates the shadow page tables if necessary. */
+    Assert(pVM->hmr0.s.vmx.HCPhysApicAccess != NIL_RTHCPHYS);
+    rc = IOMR0MmioMapMmioHCPage(pVM, pVCpu, GCPhysApicBase, pVM->hmr0.s.vmx.HCPhysApicAccess, X86_PTE_RW | X86_PTE_P);
+    AssertRCReturn(rc, rc);
+
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Worker function passed to RTMpOnSpecific() that is to be called on the target
+ * CPU.
+ *
+ * @param   idCpu       The ID for the CPU the function is called on.
+ * @param   pvUser1     Null, not used.
+ * @param   pvUser2     Null, not used.
+ */
+static DECLCALLBACK(void) hmR0DispatchHostNmi(RTCPUID idCpu, void *pvUser1, void *pvUser2)
+{
+    RT_NOREF3(idCpu, pvUser1, pvUser2);
+    VMXDispatchHostNmi();
+}
+
+
+/**
+ * Dispatching an NMI on the host CPU that received it.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu       The cross context virtual CPU structure.
+ * @param   pVmcsInfo   The VMCS info. object corresponding to the VMCS that was
+ *                      executing when receiving the host NMI in VMX non-root
+ *                      operation.
+ */
+static int hmR0VmxExitHostNmi(PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo)
+{
+    RTCPUID const idCpu = pVmcsInfo->idHostCpuExec;
+    Assert(idCpu != NIL_RTCPUID);
+
+    /*
+     * We don't want to delay dispatching the NMI any more than we have to. However,
+     * we have already chosen -not- to dispatch NMIs when interrupts were still disabled
+     * after executing guest or nested-guest code for the following reasons:
+     *
+     *   - We would need to perform VMREADs with interrupts disabled and is orders of
+     *     magnitude worse when we run as a nested hypervisor without VMCS shadowing
+     *     supported by the host hypervisor.
+     *
+     *   - It affects the common VM-exit scenario and keeps interrupts disabled for a
+     *     longer period of time just for handling an edge case like host NMIs which do
+     *     not occur nearly as frequently as other VM-exits.
+     *
+     * Let's cover the most likely scenario first. Check if we are on the target CPU
+     * and dispatch the NMI right away. This should be much faster than calling into
+     * RTMpOnSpecific() machinery.
+     */
+    bool fDispatched = false;
+    RTCCUINTREG const fEFlags = ASMIntDisableFlags();
+    if (idCpu == RTMpCpuId())
+    {
+        VMXDispatchHostNmi();
+        fDispatched = true;
+    }
+    ASMSetFlags(fEFlags);
+    if (fDispatched)
+    {
+        STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatExitHostNmiInGC);
+        return VINF_SUCCESS;
+    }
+
+    /*
+     * RTMpOnSpecific() waits until the worker function has run on the target CPU. So
+     * there should be no race or recursion even if we are unlucky enough to be preempted
+     * (to the target CPU) without dispatching the host NMI above.
+     */
+    STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatExitHostNmiInGCIpi);
+    return RTMpOnSpecific(idCpu, &hmR0DispatchHostNmi, NULL /* pvUser1 */,  NULL /* pvUser2 */);
+}
+
+
+#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+/**
+ * Merges the guest with the nested-guest MSR bitmap in preparation of executing the
+ * nested-guest using hardware-assisted VMX.
+ *
+ * @param   pVCpu               The cross context virtual CPU structure.
+ * @param   pVmcsInfoNstGst     The nested-guest VMCS info. object.
+ * @param   pVmcsInfoGst        The guest VMCS info. object.
+ */
+static void hmR0VmxMergeMsrBitmapNested(PCVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfoNstGst, PCVMXVMCSINFO pVmcsInfoGst)
+{
+    uint32_t const cbMsrBitmap    = X86_PAGE_4K_SIZE;
+    uint64_t       *pu64MsrBitmap = (uint64_t *)pVmcsInfoNstGst->pvMsrBitmap;
+    Assert(pu64MsrBitmap);
+
+    /*
+     * We merge the guest MSR bitmap with the nested-guest MSR bitmap such that any
+     * MSR that is intercepted by the guest is also intercepted while executing the
+     * nested-guest using hardware-assisted VMX.
+     *
+     * Note! If the nested-guest is not using an MSR bitmap, every MSR must cause a
+     *       nested-guest VM-exit even if the outer guest is not intercepting some
+     *       MSRs. We cannot assume the caller has initialized the nested-guest
+     *       MSR bitmap in this case.
+     *
+     *       The nested hypervisor may also switch whether it uses MSR bitmaps for
+     *       each of its VM-entry, hence initializing it once per-VM while setting
+     *       up the nested-guest VMCS is not sufficient.
+     */
+    PCVMXVVMCS const pVmcsNstGst  = &pVCpu->cpum.GstCtx.hwvirt.vmx.Vmcs;
+    if (pVmcsNstGst->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS)
+    {
+        uint64_t const *pu64MsrBitmapNstGst = (uint64_t const *)&pVCpu->cpum.GstCtx.hwvirt.vmx.abMsrBitmap[0];
+        uint64_t const *pu64MsrBitmapGst    = (uint64_t const *)pVmcsInfoGst->pvMsrBitmap;
+        Assert(pu64MsrBitmapNstGst);
+        Assert(pu64MsrBitmapGst);
+
+        /** @todo Detect and use EVEX.POR? */
+        uint32_t const cFrags = cbMsrBitmap / sizeof(uint64_t);
+        for (uint32_t i = 0; i < cFrags; i++)
+            pu64MsrBitmap[i] = pu64MsrBitmapNstGst[i] | pu64MsrBitmapGst[i];
+    }
+    else
+        ASMMemFill32(pu64MsrBitmap, cbMsrBitmap, UINT32_C(0xffffffff));
+}
+
+
+/**
+ * Merges the guest VMCS in to the nested-guest VMCS controls in preparation of
+ * hardware-assisted VMX execution of the nested-guest.
+ *
+ * For a guest, we don't modify these controls once we set up the VMCS and hence
+ * this function is never called.
+ *
+ * For nested-guests since the nested hypervisor provides these controls on every
+ * nested-guest VM-entry and could potentially change them everytime we need to
+ * merge them before every nested-guest VM-entry.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu   The cross context virtual CPU structure.
+ */
+static int hmR0VmxMergeVmcsNested(PVMCPUCC pVCpu)
+{
+    PVMCC const         pVM          = pVCpu->CTX_SUFF(pVM);
+    PCVMXVMCSINFO const pVmcsInfoGst = &pVCpu->hmr0.s.vmx.VmcsInfo;
+    PCVMXVVMCS const    pVmcsNstGst  = &pVCpu->cpum.GstCtx.hwvirt.vmx.Vmcs;
+
+    /*
+     * Merge the controls with the requirements of the guest VMCS.
+     *
+     * We do not need to validate the nested-guest VMX features specified in the nested-guest
+     * VMCS with the features supported by the physical CPU as it's already done by the
+     * VMLAUNCH/VMRESUME instruction emulation.
+     *
+     * This is because the VMX features exposed by CPUM (through CPUID/MSRs) to the guest are
+     * derived from the VMX features supported by the physical CPU.
+     */
+
+    /* Pin-based VM-execution controls. */
+    uint32_t const u32PinCtls = pVmcsNstGst->u32PinCtls | pVmcsInfoGst->u32PinCtls;
+
+    /* Processor-based VM-execution controls. */
+    uint32_t       u32ProcCtls = (pVmcsNstGst->u32ProcCtls  & ~VMX_PROC_CTLS_USE_IO_BITMAPS)
+                               | (pVmcsInfoGst->u32ProcCtls & ~(  VMX_PROC_CTLS_INT_WINDOW_EXIT
+                                                                | VMX_PROC_CTLS_NMI_WINDOW_EXIT
+                                                                | VMX_PROC_CTLS_MOV_DR_EXIT /* hmR0VmxExportSharedDebugState makes
+                                                                                               sure guest DRx regs are loaded. */
+                                                                | VMX_PROC_CTLS_USE_TPR_SHADOW
+                                                                | VMX_PROC_CTLS_MONITOR_TRAP_FLAG));
+
+    /* Secondary processor-based VM-execution controls. */
+    uint32_t const u32ProcCtls2 = (pVmcsNstGst->u32ProcCtls2  & ~VMX_PROC_CTLS2_VPID)
+                                | (pVmcsInfoGst->u32ProcCtls2 & ~(  VMX_PROC_CTLS2_VIRT_APIC_ACCESS
+                                                                  | VMX_PROC_CTLS2_INVPCID
+                                                                  | VMX_PROC_CTLS2_VMCS_SHADOWING
+                                                                  | VMX_PROC_CTLS2_RDTSCP
+                                                                  | VMX_PROC_CTLS2_XSAVES_XRSTORS
+                                                                  | VMX_PROC_CTLS2_APIC_REG_VIRT
+                                                                  | VMX_PROC_CTLS2_VIRT_INT_DELIVERY
+                                                                  | VMX_PROC_CTLS2_VMFUNC));
+
+    /*
+     * VM-entry controls:
+     * These controls contains state that depends on the nested-guest state (primarily
+     * EFER MSR) and is thus not constant between VMLAUNCH/VMRESUME and the nested-guest
+     * VM-exit. Although the nested hypervisor cannot change it, we need to in order to
+     * properly continue executing the nested-guest if the EFER MSR changes but does not
+     * cause a nested-guest VM-exits.
+     *
+     * VM-exit controls:
+     * These controls specify the host state on return. We cannot use the controls from
+     * the nested hypervisor state as is as it would contain the guest state rather than
+     * the host state. Since the host state is subject to change (e.g. preemption, trips
+     * to ring-3, longjmp and rescheduling to a different host CPU) they are not constant
+     * through VMLAUNCH/VMRESUME and the nested-guest VM-exit.
+     *
+     * VM-entry MSR-load:
+     * The guest MSRs from the VM-entry MSR-load area are already loaded into the guest-CPU
+     * context by the VMLAUNCH/VMRESUME instruction emulation.
+     *
+     * VM-exit MSR-store:
+     * The VM-exit emulation will take care of populating the MSRs from the guest-CPU context
+     * back into the VM-exit MSR-store area.
+     *
+     * VM-exit MSR-load areas:
+     * This must contain the real host MSRs with hardware-assisted VMX execution. Hence, we
+     * can entirely ignore what the nested hypervisor wants to load here.
+     */
+
+    /*
+     * Exception bitmap.
+     *
+     * We could remove #UD from the guest bitmap and merge it with the nested-guest bitmap
+     * here (and avoid doing anything while exporting nested-guest state), but to keep the
+     * code more flexible if intercepting exceptions become more dynamic in the future we do
+     * it as part of exporting the nested-guest state.
+     */
+    uint32_t const u32XcptBitmap = pVmcsNstGst->u32XcptBitmap | pVmcsInfoGst->u32XcptBitmap;
+
+    /*
+     * CR0/CR4 guest/host mask.
+     *
+     * Modifications by the nested-guest to CR0/CR4 bits owned by the host and the guest must
+     * cause VM-exits, so we need to merge them here.
+     */
+    uint64_t const u64Cr0Mask = pVmcsNstGst->u64Cr0Mask.u | pVmcsInfoGst->u64Cr0Mask;
+    uint64_t const u64Cr4Mask = pVmcsNstGst->u64Cr4Mask.u | pVmcsInfoGst->u64Cr4Mask;
+
+    /*
+     * Page-fault error-code mask and match.
+     *
+     * Although we require unrestricted guest execution (and thereby nested-paging) for
+     * hardware-assisted VMX execution of nested-guests and thus the outer guest doesn't
+     * normally intercept #PFs, it might intercept them for debugging purposes.
+     *
+     * If the outer guest is not intercepting #PFs, we can use the nested-guest #PF filters.
+     * If the outer guest is intercepting #PFs, we must intercept all #PFs.
+     */
+    uint32_t u32XcptPFMask;
+    uint32_t u32XcptPFMatch;
+    if (!(pVmcsInfoGst->u32XcptBitmap & RT_BIT(X86_XCPT_PF)))
+    {
+        u32XcptPFMask  = pVmcsNstGst->u32XcptPFMask;
+        u32XcptPFMatch = pVmcsNstGst->u32XcptPFMatch;
+    }
+    else
+    {
+        u32XcptPFMask  = 0;
+        u32XcptPFMatch = 0;
+    }
+
+    /*
+     * Pause-Loop exiting.
+     */
+    /** @todo r=bird: given that both pVM->hm.s.vmx.cPleGapTicks and
+     *        pVM->hm.s.vmx.cPleWindowTicks defaults to zero, I cannot see how
+     *        this will work... */
+    uint32_t const cPleGapTicks    = RT_MIN(pVM->hm.s.vmx.cPleGapTicks,    pVmcsNstGst->u32PleGap);
+    uint32_t const cPleWindowTicks = RT_MIN(pVM->hm.s.vmx.cPleWindowTicks, pVmcsNstGst->u32PleWindow);
+
+    /*
+     * Pending debug exceptions.
+     * Currently just copy whatever the nested-guest provides us.
+     */
+    uint64_t const uPendingDbgXcpts = pVmcsNstGst->u64GuestPendingDbgXcpts.u;
+
+    /*
+     * I/O Bitmap.
+     *
+     * We do not use the I/O bitmap that may be provided by the nested hypervisor as we always
+     * intercept all I/O port accesses.
+     */
+    Assert(u32ProcCtls & VMX_PROC_CTLS_UNCOND_IO_EXIT);
+    Assert(!(u32ProcCtls & VMX_PROC_CTLS_USE_IO_BITMAPS));
+
+    /*
+     * VMCS shadowing.
+     *
+     * We do not yet expose VMCS shadowing to the guest and thus VMCS shadowing should not be
+     * enabled while executing the nested-guest.
+     */
+    Assert(!(u32ProcCtls2 & VMX_PROC_CTLS2_VMCS_SHADOWING));
+
+    /*
+     * APIC-access page.
+     */
+    RTHCPHYS HCPhysApicAccess;
+    if (u32ProcCtls2 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS)
+    {
+        Assert(g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS);
+        RTGCPHYS const GCPhysApicAccess = pVmcsNstGst->u64AddrApicAccess.u;
+
+        void          *pvPage;
+        PGMPAGEMAPLOCK PgLockApicAccess;
+        int rc = PGMPhysGCPhys2CCPtr(pVM, GCPhysApicAccess, &pvPage, &PgLockApicAccess);
+        if (RT_SUCCESS(rc))
+        {
+            rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysApicAccess, &HCPhysApicAccess);
+            AssertMsgRCReturn(rc, ("Failed to get host-physical address for APIC-access page at %#RGp\n", GCPhysApicAccess), rc);
+
+            /** @todo Handle proper releasing of page-mapping lock later. */
+            PGMPhysReleasePageMappingLock(pVCpu->CTX_SUFF(pVM), &PgLockApicAccess);
+        }
+        else
+            return rc;
+    }
+    else
+        HCPhysApicAccess = 0;
+
+    /*
+     * Virtual-APIC page and TPR threshold.
+     */
+    RTHCPHYS HCPhysVirtApic;
+    uint32_t u32TprThreshold;
+    if (u32ProcCtls & VMX_PROC_CTLS_USE_TPR_SHADOW)
+    {
+        Assert(g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_TPR_SHADOW);
+        RTGCPHYS const GCPhysVirtApic = pVmcsNstGst->u64AddrVirtApic.u;
+
+        void          *pvPage;
+        PGMPAGEMAPLOCK PgLockVirtApic;
+        int rc = PGMPhysGCPhys2CCPtr(pVM, GCPhysVirtApic, &pvPage, &PgLockVirtApic);
+        if (RT_SUCCESS(rc))
+        {
+            rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysVirtApic, &HCPhysVirtApic);
+            AssertMsgRCReturn(rc, ("Failed to get host-physical address for virtual-APIC page at %#RGp\n", GCPhysVirtApic), rc);
+
+            /** @todo Handle proper releasing of page-mapping lock later. */
+            PGMPhysReleasePageMappingLock(pVCpu->CTX_SUFF(pVM), &PgLockVirtApic);
+        }
+        else
+            return rc;
+
+        u32TprThreshold = pVmcsNstGst->u32TprThreshold;
+    }
+    else
+    {
+        HCPhysVirtApic  = 0;
+        u32TprThreshold = 0;
+
+        /*
+         * We must make sure CR8 reads/write must cause VM-exits when TPR shadowing is not
+         * used by the nested hypervisor. Preventing MMIO accesses to the physical APIC will
+         * be taken care of by EPT/shadow paging.
+         */
+        if (pVM->hmr0.s.fAllow64BitGuests)
+            u32ProcCtls |= VMX_PROC_CTLS_CR8_STORE_EXIT
+                        |  VMX_PROC_CTLS_CR8_LOAD_EXIT;
+    }
+
+    /*
+     * Validate basic assumptions.
+     */
+    PVMXVMCSINFO pVmcsInfoNstGst = &pVCpu->hmr0.s.vmx.VmcsInfoNstGst;
+    Assert(pVM->hmr0.s.vmx.fUnrestrictedGuest);
+    Assert(g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_SECONDARY_CTLS);
+    Assert(hmGetVmxActiveVmcsInfo(pVCpu) == pVmcsInfoNstGst);
+
+    /*
+     * Commit it to the nested-guest VMCS.
+     */
+    int rc = VINF_SUCCESS;
+    if (pVmcsInfoNstGst->u32PinCtls != u32PinCtls)
+        rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PIN_EXEC, u32PinCtls);
+    if (pVmcsInfoNstGst->u32ProcCtls != u32ProcCtls)
+        rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, u32ProcCtls);
+    if (pVmcsInfoNstGst->u32ProcCtls2 != u32ProcCtls2)
+        rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC2, u32ProcCtls2);
+    if (pVmcsInfoNstGst->u32XcptBitmap != u32XcptBitmap)
+        rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_EXCEPTION_BITMAP, u32XcptBitmap);
+    if (pVmcsInfoNstGst->u64Cr0Mask != u64Cr0Mask)
+        rc |= VMXWriteVmcsNw(VMX_VMCS_CTRL_CR0_MASK, u64Cr0Mask);
+    if (pVmcsInfoNstGst->u64Cr4Mask != u64Cr4Mask)
+        rc |= VMXWriteVmcsNw(VMX_VMCS_CTRL_CR4_MASK, u64Cr4Mask);
+    if (pVmcsInfoNstGst->u32XcptPFMask != u32XcptPFMask)
+        rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MASK, u32XcptPFMask);
+    if (pVmcsInfoNstGst->u32XcptPFMatch != u32XcptPFMatch)
+        rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MATCH, u32XcptPFMatch);
+    if (   !(u32ProcCtls  & VMX_PROC_CTLS_PAUSE_EXIT)
+        &&  (u32ProcCtls2 & VMX_PROC_CTLS2_PAUSE_LOOP_EXIT))
+    {
+        Assert(g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_PAUSE_LOOP_EXIT);
+        rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PLE_GAP, cPleGapTicks);
+        rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PLE_WINDOW, cPleWindowTicks);
+    }
+    if (pVmcsInfoNstGst->HCPhysVirtApic != HCPhysVirtApic)
+        rc |= VMXWriteVmcs64(VMX_VMCS64_CTRL_VIRT_APIC_PAGEADDR_FULL, HCPhysVirtApic);
+    rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_TPR_THRESHOLD, u32TprThreshold);
+    if (u32ProcCtls2 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS)
+        rc |= VMXWriteVmcs64(VMX_VMCS64_CTRL_APIC_ACCESSADDR_FULL, HCPhysApicAccess);
+    rc |= VMXWriteVmcsNw(VMX_VMCS_GUEST_PENDING_DEBUG_XCPTS, uPendingDbgXcpts);
+    AssertRC(rc);
+
+    /*
+     * Update the nested-guest VMCS cache.
+     */
+    pVmcsInfoNstGst->u32PinCtls     = u32PinCtls;
+    pVmcsInfoNstGst->u32ProcCtls    = u32ProcCtls;
+    pVmcsInfoNstGst->u32ProcCtls2   = u32ProcCtls2;
+    pVmcsInfoNstGst->u32XcptBitmap  = u32XcptBitmap;
+    pVmcsInfoNstGst->u64Cr0Mask     = u64Cr0Mask;
+    pVmcsInfoNstGst->u64Cr4Mask     = u64Cr4Mask;
+    pVmcsInfoNstGst->u32XcptPFMask  = u32XcptPFMask;
+    pVmcsInfoNstGst->u32XcptPFMatch = u32XcptPFMatch;
+    pVmcsInfoNstGst->HCPhysVirtApic = HCPhysVirtApic;
+
+    /*
+     * We need to flush the TLB if we are switching the APIC-access page address.
+     * See Intel spec. 28.3.3.4 "Guidelines for Use of the INVEPT Instruction".
+     */
+    if (u32ProcCtls2 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS)
+        pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb = true;
+
+    /*
+     * MSR bitmap.
+     *
+     * The MSR bitmap address has already been initialized while setting up the nested-guest
+     * VMCS, here we need to merge the MSR bitmaps.
+     */
+    if (u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS)
+        hmR0VmxMergeMsrBitmapNested(pVCpu, pVmcsInfoNstGst, pVmcsInfoGst);
+
+    return VINF_SUCCESS;
+}
+#endif /* VBOX_WITH_NESTED_HWVIRT_VMX */
+
+
+/**
+ * Does the preparations before executing guest code in VT-x.
+ *
+ * This may cause longjmps to ring-3 and may even result in rescheduling to the
+ * recompiler/IEM. We must be cautious what we do here regarding committing
+ * guest-state information into the VMCS assuming we assuredly execute the
+ * guest in VT-x mode.
+ *
+ * If we fall back to the recompiler/IEM after updating the VMCS and clearing
+ * the common-state (TRPM/forceflags), we must undo those changes so that the
+ * recompiler/IEM can (and should) use them when it resumes guest execution.
+ * Otherwise such operations must be done when we can no longer exit to ring-3.
+ *
+ * @returns Strict VBox status code (i.e. informational status codes too).
+ * @retval  VINF_SUCCESS if we can proceed with running the guest, interrupts
+ *          have been disabled.
+ * @retval  VINF_VMX_VMEXIT if a nested-guest VM-exit occurs (e.g., while evaluating
+ *          pending events).
+ * @retval  VINF_EM_RESET if a triple-fault occurs while injecting a
+ *          double-fault into the guest.
+ * @retval  VINF_EM_DBG_STEPPED if @a fStepping is true and an event was
+ *          dispatched directly.
+ * @retval  VINF_* scheduling changes, we have to go back to ring-3.
+ *
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   pVmxTransient   The VMX-transient structure.
+ * @param   fStepping       Whether we are single-stepping the guest in the
+ *                          hypervisor debugger. Makes us ignore some of the reasons
+ *                          for returning to ring-3, and return VINF_EM_DBG_STEPPED
+ *                          if event dispatching took place.
+ */
+static VBOXSTRICTRC hmR0VmxPreRunGuest(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient, bool fStepping)
+{
+    Assert(VMMRZCallRing3IsEnabled(pVCpu));
+
+    Log4Func(("fIsNested=%RTbool fStepping=%RTbool\n", pVmxTransient->fIsNestedGuest, fStepping));
+
+#ifdef VBOX_WITH_NESTED_HWVIRT_ONLY_IN_IEM
+    if (pVmxTransient->fIsNestedGuest)
+    {
+        RT_NOREF2(pVCpu, fStepping);
+        Log2Func(("Rescheduling to IEM due to nested-hwvirt or forced IEM exec -> VINF_EM_RESCHEDULE_REM\n"));
+        return VINF_EM_RESCHEDULE_REM;
+    }
+#endif
+
+    /*
+     * Check and process force flag actions, some of which might require us to go back to ring-3.
+     */
+    VBOXSTRICTRC rcStrict = vmxHCCheckForceFlags(pVCpu, pVmxTransient->fIsNestedGuest, fStepping);
+    if (rcStrict == VINF_SUCCESS)
+    {
+        /* FFs don't get set all the time. */
+#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+        if (   pVmxTransient->fIsNestedGuest
+            && !CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx))
+        {
+            STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchNstGstVmexit);
+            return VINF_VMX_VMEXIT;
+        }
+#endif
+    }
+    else
+        return rcStrict;
+
+    /*
+     * Virtualize memory-mapped accesses to the physical APIC (may take locks).
+     */
+    PVMCC pVM = pVCpu->CTX_SUFF(pVM);
+    if (   !pVCpu->hm.s.vmx.u64GstMsrApicBase
+        && (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS)
+        && PDMHasApic(pVM))
+    {
+        /* Get the APIC base MSR from the virtual APIC device. */
+        uint64_t const uApicBaseMsr = APICGetBaseMsrNoCheck(pVCpu);
+
+        /* Map the APIC access page. */
+        int rc = hmR0VmxMapHCApicAccessPage(pVCpu, uApicBaseMsr & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK);
+        AssertRCReturn(rc, rc);
+
+        /* Update the per-VCPU cache of the APIC base MSR corresponding to the mapped APIC access page. */
+        pVCpu->hm.s.vmx.u64GstMsrApicBase = uApicBaseMsr;
+    }
+
+#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+    /*
+     * Merge guest VMCS controls with the nested-guest VMCS controls.
+     *
+     * Even if we have not executed the guest prior to this (e.g. when resuming from a
+     * saved state), we should be okay with merging controls as we initialize the
+     * guest VMCS controls as part of VM setup phase.
+     */
+    if (   pVmxTransient->fIsNestedGuest
+        && !pVCpu->hm.s.vmx.fMergedNstGstCtls)
+    {
+        int rc = hmR0VmxMergeVmcsNested(pVCpu);
+        AssertRCReturn(rc, rc);
+        pVCpu->hm.s.vmx.fMergedNstGstCtls = true;
+    }
+#endif
+
+    /*
+     * Evaluate events to be injected into the guest.
+     *
+     * Events in TRPM can be injected without inspecting the guest state.
+     * If any new events (interrupts/NMI) are pending currently, we try to set up the
+     * guest to cause a VM-exit the next time they are ready to receive the event.
+     */
+    if (TRPMHasTrap(pVCpu))
+        vmxHCTrpmTrapToPendingEvent(pVCpu);
+
+    uint32_t fIntrState;
+    rcStrict = vmxHCEvaluatePendingEvent(pVCpu, pVmxTransient->pVmcsInfo, pVmxTransient->fIsNestedGuest,
+                                         &fIntrState);
+
+#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+    /*
+     * While evaluating pending events if something failed (unlikely) or if we were
+     * preparing to run a nested-guest but performed a nested-guest VM-exit, we should bail.
+     */
+    if (rcStrict != VINF_SUCCESS)
+        return rcStrict;
+    if (   pVmxTransient->fIsNestedGuest
+        && !CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx))
+    {
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchNstGstVmexit);
+        return VINF_VMX_VMEXIT;
+    }
+#else
+    Assert(rcStrict == VINF_SUCCESS);
+#endif
+
+    /*
+     * Event injection may take locks (currently the PGM lock for real-on-v86 case) and thus
+     * needs to be done with longjmps or interrupts + preemption enabled. Event injection might
+     * also result in triple-faulting the VM.
+     *
+     * With nested-guests, the above does not apply since unrestricted guest execution is a
+     * requirement. Regardless, we do this here to avoid duplicating code elsewhere.
+     */
+    rcStrict = vmxHCInjectPendingEvent(pVCpu, pVmxTransient->pVmcsInfo, pVmxTransient->fIsNestedGuest,
+                                       fIntrState, fStepping);
+    if (RT_LIKELY(rcStrict == VINF_SUCCESS))
+    { /* likely */ }
+    else
+    {
+        AssertMsg(rcStrict == VINF_EM_RESET || (rcStrict == VINF_EM_DBG_STEPPED && fStepping),
+                  ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict)));
+        return rcStrict;
+    }
+
+    /*
+     * A longjump might result in importing CR3 even for VM-exits that don't necessarily
+     * import CR3 themselves. We will need to update them here, as even as late as the above
+     * hmR0VmxInjectPendingEvent() call may lazily import guest-CPU state on demand causing
+     * the below force flags to be set.
+     */
+    if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3))
+    {
+        Assert(!(ASMAtomicUoReadU64(&pVCpu->cpum.GstCtx.fExtrn) & CPUMCTX_EXTRN_CR3));
+        int rc2 = PGMUpdateCR3(pVCpu, CPUMGetGuestCR3(pVCpu));
+        AssertMsgReturn(rc2 == VINF_SUCCESS || rc2 == VINF_PGM_SYNC_CR3,
+                        ("%Rrc\n", rc2), RT_FAILURE_NP(rc2) ? rc2 : VERR_IPE_UNEXPECTED_INFO_STATUS);
+        Assert(!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3));
+    }
+
+#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+    /* Paranoia. */
+    Assert(!pVmxTransient->fIsNestedGuest || CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx));
+#endif
+
+    /*
+     * No longjmps to ring-3 from this point on!!!
+     * Asserts() will still longjmp to ring-3 (but won't return), which is intentional, better than a kernel panic.
+     * This also disables flushing of the R0-logger instance (if any).
+     */
+    VMMRZCallRing3Disable(pVCpu);
+
+    /*
+     * Export the guest state bits.
+     *
+     * We cannot perform longjmps while loading the guest state because we do not preserve the
+     * host/guest state (although the VMCS will be preserved) across longjmps which can cause
+     * CPU migration.
+     *
+     * If we are injecting events to a real-on-v86 mode guest, we would have updated RIP and some segment
+     * registers. Hence, exporting of the guest state needs to be done -after- injection of events.
+     */
+    rcStrict = hmR0VmxExportGuestStateOptimal(pVCpu, pVmxTransient);
+    if (RT_LIKELY(rcStrict == VINF_SUCCESS))
+    { /* likely */ }
+    else
+    {
+        VMMRZCallRing3Enable(pVCpu);
+        return rcStrict;
+    }
+
+    /*
+     * We disable interrupts so that we don't miss any interrupts that would flag preemption
+     * (IPI/timers etc.) when thread-context hooks aren't used and we've been running with
+     * preemption disabled for a while.  Since this is purely to aid the
+     * RTThreadPreemptIsPending() code, it doesn't matter that it may temporarily reenable and
+     * disable interrupt on NT.
+     *
+     * We need to check for force-flags that could've possible been altered since we last
+     * checked them (e.g. by PDMGetInterrupt() leaving the PDM critical section,
+     * see @bugref{6398}).
+     *
+     * We also check a couple of other force-flags as a last opportunity to get the EMT back
+     * to ring-3 before executing guest code.
+     */
+    pVmxTransient->fEFlags = ASMIntDisableFlags();
+
+    if (   (   !VM_FF_IS_ANY_SET(pVM, VM_FF_EMT_RENDEZVOUS | VM_FF_TM_VIRTUAL_SYNC)
+            && !VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_HM_TO_R3_MASK))
+        || (   fStepping /* Optimized for the non-stepping case, so a bit of unnecessary work when stepping. */
+            && !VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_HM_TO_R3_MASK & ~(VMCPU_FF_TIMER | VMCPU_FF_PDM_CRITSECT))) )
+    {
+        if (!RTThreadPreemptIsPending(NIL_RTTHREAD))
+        {
+#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+            /*
+             * If we are executing a nested-guest make sure that we should intercept subsequent
+             * events. The one we are injecting might be part of VM-entry. This is mainly to keep
+             * the VM-exit instruction emulation happy.
+             */
+            if (pVmxTransient->fIsNestedGuest)
+                CPUMSetGuestVmxInterceptEvents(&pVCpu->cpum.GstCtx, true);
+#endif
+
+            /*
+             * We've injected any pending events. This is really the point of no return (to ring-3).
+             *
+             * Note! The caller expects to continue with interrupts & longjmps disabled on successful
+             *       returns from this function, so do -not- enable them here.
+             */
+            pVCpu->hm.s.Event.fPending = false;
+            return VINF_SUCCESS;
+        }
+
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchPendingHostIrq);
+        rcStrict = VINF_EM_RAW_INTERRUPT;
+    }
+    else
+    {
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchHmToR3FF);
+        rcStrict = VINF_EM_RAW_TO_R3;
+    }
+
+    ASMSetFlags(pVmxTransient->fEFlags);
+    VMMRZCallRing3Enable(pVCpu);
+
+    return rcStrict;
+}
+
+
+/**
+ * Final preparations before executing guest code using hardware-assisted VMX.
+ *
+ * We can no longer get preempted to a different host CPU and there are no returns
+ * to ring-3. We ignore any errors that may happen from this point (e.g. VMWRITE
+ * failures), this function is not intended to fail sans unrecoverable hardware
+ * errors.
+ *
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   pVmxTransient   The VMX-transient structure.
+ *
+ * @remarks Called with preemption disabled.
+ * @remarks No-long-jump zone!!!
+ */
+static void hmR0VmxPreRunGuestCommitted(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    Assert(!VMMRZCallRing3IsEnabled(pVCpu));
+    Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+    Assert(!pVCpu->hm.s.Event.fPending);
+
+    /*
+     * Indicate start of guest execution and where poking EMT out of guest-context is recognized.
+     */
+    VMCPU_ASSERT_STATE(pVCpu, VMCPUSTATE_STARTED_HM);
+    VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_EXEC);
+
+    PVMCC         pVM          = pVCpu->CTX_SUFF(pVM);
+    PVMXVMCSINFO  pVmcsInfo    = pVmxTransient->pVmcsInfo;
+    PHMPHYSCPU    pHostCpu     = hmR0GetCurrentCpu();
+    RTCPUID const idCurrentCpu = pHostCpu->idCpu;
+
+    if (!CPUMIsGuestFPUStateActive(pVCpu))
+    {
+        STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatLoadGuestFpuState, x);
+        if (CPUMR0LoadGuestFPU(pVM, pVCpu) == VINF_CPUM_HOST_CR0_MODIFIED)
+            pVCpu->hm.s.fCtxChanged |= HM_CHANGED_HOST_CONTEXT;
+        STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatLoadGuestFpuState, x);
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatLoadGuestFpu);
+    }
+
+    /*
+     * Re-export the host state bits as we may've been preempted (only happens when
+     * thread-context hooks are used or when the VM start function changes) or if
+     * the host CR0 is modified while loading the guest FPU state above.
+     *
+     * The 64-on-32 switcher saves the (64-bit) host state into the VMCS and if we
+     * changed the switcher back to 32-bit, we *must* save the 32-bit host state here,
+     * see @bugref{8432}.
+     *
+     * This may also happen when switching to/from a nested-guest VMCS without leaving
+     * ring-0.
+     */
+    if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_HOST_CONTEXT)
+    {
+        hmR0VmxExportHostState(pVCpu);
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatExportHostState);
+    }
+    Assert(!(pVCpu->hm.s.fCtxChanged & HM_CHANGED_HOST_CONTEXT));
+
+    /*
+     * Export the state shared between host and guest (FPU, debug, lazy MSRs).
+     */
+    if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE)
+        hmR0VmxExportSharedState(pVCpu, pVmxTransient);
+    AssertMsg(!pVCpu->hm.s.fCtxChanged, ("fCtxChanged=%#RX64\n", pVCpu->hm.s.fCtxChanged));
+
+    /*
+     * Store status of the shared guest/host debug state at the time of VM-entry.
+     */
+    pVmxTransient->fWasGuestDebugStateActive = CPUMIsGuestDebugStateActive(pVCpu);
+    pVmxTransient->fWasHyperDebugStateActive = CPUMIsHyperDebugStateActive(pVCpu);
+
+    /*
+     * Always cache the TPR-shadow if the virtual-APIC page exists, thereby skipping
+     * more than one conditional check. The post-run side of our code shall determine
+     * if it needs to sync. the virtual APIC TPR with the TPR-shadow.
+     */
+    if (pVmcsInfo->pbVirtApic)
+        pVmxTransient->u8GuestTpr = pVmcsInfo->pbVirtApic[XAPIC_OFF_TPR];
+
+    /*
+     * Update the host MSRs values in the VM-exit MSR-load area.
+     */
+    if (!pVCpu->hmr0.s.vmx.fUpdatedHostAutoMsrs)
+    {
+        if (pVmcsInfo->cExitMsrLoad > 0)
+            hmR0VmxUpdateAutoLoadHostMsrs(pVCpu, pVmcsInfo);
+        pVCpu->hmr0.s.vmx.fUpdatedHostAutoMsrs = true;
+    }
+
+    /*
+     * Evaluate if we need to intercept guest RDTSC/P accesses. Set up the
+     * VMX-preemption timer based on the next virtual sync clock deadline.
+     */
+    if (   !pVmxTransient->fUpdatedTscOffsettingAndPreemptTimer
+        || idCurrentCpu != pVCpu->hmr0.s.idLastCpu)
+    {
+        hmR0VmxUpdateTscOffsettingAndPreemptTimer(pVCpu, pVmxTransient, idCurrentCpu);
+        pVmxTransient->fUpdatedTscOffsettingAndPreemptTimer = true;
+    }
+
+    /* Record statistics of how often we use TSC offsetting as opposed to intercepting RDTSC/P. */
+    bool const fIsRdtscIntercepted = RT_BOOL(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_RDTSC_EXIT);
+    if (!fIsRdtscIntercepted)
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatTscOffset);
+    else
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatTscIntercept);
+
+    ASMAtomicUoWriteBool(&pVCpu->hm.s.fCheckedTLBFlush, true);  /* Used for TLB flushing, set this across the world switch. */
+    hmR0VmxFlushTaggedTlb(pHostCpu, pVCpu, pVmcsInfo);          /* Invalidate the appropriate guest entries from the TLB. */
+    Assert(idCurrentCpu == pVCpu->hmr0.s.idLastCpu);
+    pVCpu->hm.s.vmx.LastError.idCurrentCpu = idCurrentCpu;      /* Record the error reporting info. with the current host CPU. */
+    pVmcsInfo->idHostCpuState = idCurrentCpu;                   /* Record the CPU for which the host-state has been exported. */
+    pVmcsInfo->idHostCpuExec  = idCurrentCpu;                   /* Record the CPU on which we shall execute. */
+
+    STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatEntry, &pVCpu->hm.s.StatInGC, x);
+
+    TMNotifyStartOfExecution(pVM, pVCpu);                       /* Notify TM to resume its clocks when TSC is tied to execution,
+                                                                   as we're about to start executing the guest. */
+
+    /*
+     * Load the guest TSC_AUX MSR when we are not intercepting RDTSCP.
+     *
+     * This is done this late as updating the TSC offsetting/preemption timer above
+     * figures out if we can skip intercepting RDTSCP by calculating the number of
+     * host CPU ticks till the next virtual sync deadline (for the dynamic case).
+     */
+    if (   (pVmcsInfo->u32ProcCtls2 & VMX_PROC_CTLS2_RDTSCP)
+        && !fIsRdtscIntercepted)
+    {
+        vmxHCImportGuestStateEx(pVCpu, pVmcsInfo, CPUMCTX_EXTRN_TSC_AUX);
+
+        /* NB: Because we call hmR0VmxAddAutoLoadStoreMsr with fUpdateHostMsr=true,
+           it's safe even after hmR0VmxUpdateAutoLoadHostMsrs has already been done. */
+        int rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, MSR_K8_TSC_AUX, CPUMGetGuestTscAux(pVCpu),
+                                            true /* fSetReadWrite */, true /* fUpdateHostMsr */);
+        AssertRC(rc);
+        Assert(!pVmxTransient->fRemoveTscAuxMsr);
+        pVmxTransient->fRemoveTscAuxMsr = true;
+    }
+
+#ifdef VBOX_STRICT
+    Assert(pVCpu->hmr0.s.vmx.fUpdatedHostAutoMsrs);
+    hmR0VmxCheckAutoLoadStoreMsrs(pVCpu, pVmcsInfo, pVmxTransient->fIsNestedGuest);
+    hmR0VmxCheckHostEferMsr(pVmcsInfo);
+    AssertRC(vmxHCCheckCachedVmcsCtls(pVCpu, pVmcsInfo, pVmxTransient->fIsNestedGuest));
+#endif
+
+#ifdef HMVMX_ALWAYS_CHECK_GUEST_STATE
+    /** @todo r=ramshankar: We can now probably use iemVmxVmentryCheckGuestState here.
+     *        Add a PVMXMSRS parameter to it, so that IEM can look at the host MSRs,
+     *        see @bugref{9180#c54}. */
+    uint32_t const uInvalidReason = hmR0VmxCheckGuestState(pVCpu, pVmcsInfo);
+    if (uInvalidReason != VMX_IGS_REASON_NOT_FOUND)
+        Log4(("hmR0VmxCheckGuestState returned %#x\n", uInvalidReason));
+#endif
+}
+
+
+/**
+ * First C routine invoked after running guest code using hardware-assisted VMX.
+ *
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   pVmxTransient   The VMX-transient structure.
+ * @param   rcVMRun         Return code of VMLAUNCH/VMRESUME.
+ *
+ * @remarks Called with interrupts disabled, and returns with interrupts enabled!
+ *
+ * @remarks No-long-jump zone!!! This function will however re-enable longjmps
+ *          unconditionally when it is safe to do so.
+ */
+static void hmR0VmxPostRunGuest(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient, int rcVMRun)
+{
+    ASMAtomicUoWriteBool(&pVCpu->hm.s.fCheckedTLBFlush, false); /* See HMInvalidatePageOnAllVCpus(): used for TLB flushing. */
+    ASMAtomicIncU32(&pVCpu->hmr0.s.cWorldSwitchExits);          /* Initialized in vmR3CreateUVM(): used for EMT poking. */
+    pVCpu->hm.s.fCtxChanged            = 0;                     /* Exits/longjmps to ring-3 requires saving the guest state. */
+    pVmxTransient->fVmcsFieldsRead     = 0;                     /* Transient fields need to be read from the VMCS. */
+    pVmxTransient->fVectoringPF        = false;                 /* Vectoring page-fault needs to be determined later. */
+    pVmxTransient->fVectoringDoublePF  = false;                 /* Vectoring double page-fault needs to be determined later. */
+
+    PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
+    if (!(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_RDTSC_EXIT))
+    {
+        uint64_t uGstTsc;
+        if (!pVmxTransient->fIsNestedGuest)
+            uGstTsc = pVCpu->hmr0.s.uTscExit + pVmcsInfo->u64TscOffset;
+        else
+        {
+            uint64_t const uNstGstTsc = pVCpu->hmr0.s.uTscExit + pVmcsInfo->u64TscOffset;
+            uGstTsc = CPUMRemoveNestedGuestTscOffset(pVCpu, uNstGstTsc);
+        }
+        TMCpuTickSetLastSeen(pVCpu, uGstTsc);                           /* Update TM with the guest TSC. */
+    }
+
+    STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatInGC, &pVCpu->hm.s.StatPreExit, x);
+    TMNotifyEndOfExecution(pVCpu->CTX_SUFF(pVM), pVCpu, pVCpu->hmr0.s.uTscExit); /* Notify TM that the guest is no longer running. */
+    VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_HM);
+
+    pVCpu->hmr0.s.vmx.fRestoreHostFlags |= VMX_RESTORE_HOST_REQUIRED;   /* Some host state messed up by VMX needs restoring. */
+    pVmcsInfo->fVmcsState |= VMX_V_VMCS_LAUNCH_STATE_LAUNCHED;          /* Use VMRESUME instead of VMLAUNCH in the next run. */
+#ifdef VBOX_STRICT
+    hmR0VmxCheckHostEferMsr(pVmcsInfo);                                 /* Verify that the host EFER MSR wasn't modified. */
+#endif
+    Assert(!ASMIntAreEnabled());
+    ASMSetFlags(pVmxTransient->fEFlags);                                /* Enable interrupts. */
+    Assert(!VMMRZCallRing3IsEnabled(pVCpu));
+
+#ifdef HMVMX_ALWAYS_CLEAN_TRANSIENT
+    /*
+     * Clean all the VMCS fields in the transient structure before reading
+     * anything from the VMCS.
+     */
+    pVmxTransient->uExitReason            = 0;
+    pVmxTransient->uExitIntErrorCode      = 0;
+    pVmxTransient->uExitQual              = 0;
+    pVmxTransient->uGuestLinearAddr       = 0;
+    pVmxTransient->uExitIntInfo           = 0;
+    pVmxTransient->cbExitInstr            = 0;
+    pVmxTransient->ExitInstrInfo.u        = 0;
+    pVmxTransient->uEntryIntInfo          = 0;
+    pVmxTransient->uEntryXcptErrorCode    = 0;
+    pVmxTransient->cbEntryInstr           = 0;
+    pVmxTransient->uIdtVectoringInfo      = 0;
+    pVmxTransient->uIdtVectoringErrorCode = 0;
+#endif
+
+    /*
+     * Save the basic VM-exit reason and check if the VM-entry failed.
+     * See Intel spec. 24.9.1 "Basic VM-exit Information".
+     */
+    uint32_t uExitReason;
+    int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_REASON, &uExitReason);
+    AssertRC(rc);
+    pVmxTransient->uExitReason    = VMX_EXIT_REASON_BASIC(uExitReason);
+    pVmxTransient->fVMEntryFailed = VMX_EXIT_REASON_HAS_ENTRY_FAILED(uExitReason);
+
+    /*
+     * Log the VM-exit before logging anything else as otherwise it might be a
+     * tad confusing what happens before and after the world-switch.
+     */
+    HMVMX_LOG_EXIT(pVCpu, uExitReason);
+
+    /*
+     * Remove the TSC_AUX MSR from the auto-load/store MSR area and reset any MSR
+     * bitmap permissions, if it was added before VM-entry.
+     */
+    if (pVmxTransient->fRemoveTscAuxMsr)
+    {
+        hmR0VmxRemoveAutoLoadStoreMsr(pVCpu, pVmxTransient, MSR_K8_TSC_AUX);
+        pVmxTransient->fRemoveTscAuxMsr = false;
+    }
+
+    /*
+     * Check if VMLAUNCH/VMRESUME succeeded.
+     * If this failed, we cause a guru meditation and cease further execution.
+     */
+    if (RT_LIKELY(rcVMRun == VINF_SUCCESS))
+    {
+        /*
+         * Update the VM-exit history array here even if the VM-entry failed due to:
+         *   - Invalid guest state.
+         *   - MSR loading.
+         *   - Machine-check event.
+         *
+         * In any of the above cases we will still have a "valid" VM-exit reason
+         * despite @a fVMEntryFailed being false.
+         *
+         * See Intel spec. 26.7 "VM-Entry failures during or after loading guest state".
+         *
+         * Note! We don't have CS or RIP at this point.  Will probably address that later
+         *       by amending the history entry added here.
+         */
+        EMHistoryAddExit(pVCpu, EMEXIT_MAKE_FT(EMEXIT_F_KIND_VMX, pVmxTransient->uExitReason & EMEXIT_F_TYPE_MASK),
+                         UINT64_MAX, pVCpu->hmr0.s.uTscExit);
+
+        if (RT_LIKELY(!pVmxTransient->fVMEntryFailed))
+        {
+            VMMRZCallRing3Enable(pVCpu);
+            Assert(!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3));
+
+#ifdef HMVMX_ALWAYS_SAVE_RO_GUEST_STATE
+            vmxHCReadAllRoFieldsVmcs(pVCpu, pVmxTransient);
+#endif
+
+            /*
+             * Always import the guest-interruptibility state as we need it while evaluating
+             * injecting events on re-entry.  We could in *theory* postpone reading it for
+             * exits that does not involve instruction emulation, but since most exits are
+             * for instruction emulation (exceptions being external interrupts, shadow
+             * paging building page faults and EPT violations, and interrupt window stuff)
+             * this is a reasonable simplification.
+             *
+             * We don't import CR0 (when unrestricted guest execution is unavailable) despite
+             * checking for real-mode while exporting the state because all bits that cause
+             * mode changes wrt CR0 are intercepted.
+             *
+             * Note! This mask _must_ match the default value for the default a_fDonePostExit
+             *       value for the vmxHCImportGuestState template!
+             */
+            /** @todo r=bird: consider dropping the INHIBIT_XXX and fetch the state
+             * explicitly in the exit handlers and injection function.  That way we have
+             * fewer clusters of vmread spread around the code, because the EM history
+             * executor won't execute very many non-exiting instructions before stopping. */
+            rc = vmxHCImportGuestState<  CPUMCTX_EXTRN_INHIBIT_INT
+                                       | CPUMCTX_EXTRN_INHIBIT_NMI
+#if defined(HMVMX_ALWAYS_SYNC_FULL_GUEST_STATE) || defined(HMVMX_ALWAYS_SAVE_FULL_GUEST_STATE)
+                                       | HMVMX_CPUMCTX_EXTRN_ALL
+#elif defined(HMVMX_ALWAYS_SAVE_GUEST_RFLAGS)
+                                       | CPUMCTX_EXTRN_RFLAGS
+#endif
+                                       , 0 /*a_fDoneLocal*/, 0 /*a_fDonePostExit*/>(pVCpu, pVmcsInfo, __FUNCTION__);
+            AssertRC(rc);
+
+            /*
+             * Sync the TPR shadow with our APIC state.
+             */
+            if (   !pVmxTransient->fIsNestedGuest
+                && (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_TPR_SHADOW))
+            {
+                Assert(pVmcsInfo->pbVirtApic);
+                if (pVmxTransient->u8GuestTpr != pVmcsInfo->pbVirtApic[XAPIC_OFF_TPR])
+                {
+                    rc = APICSetTpr(pVCpu, pVmcsInfo->pbVirtApic[XAPIC_OFF_TPR]);
+                    AssertRC(rc);
+                    ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_APIC_TPR);
+                }
+            }
+
+            Assert(VMMRZCallRing3IsEnabled(pVCpu));
+            Assert(   pVmxTransient->fWasGuestDebugStateActive == false
+                   || pVmxTransient->fWasHyperDebugStateActive == false);
+            return;
+        }
+    }
+#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+    else if (pVmxTransient->fIsNestedGuest)
+        AssertMsgFailed(("VMLAUNCH/VMRESUME failed but shouldn't happen when VMLAUNCH/VMRESUME was emulated in IEM!\n"));
+#endif
+    else
+        Log4Func(("VM-entry failure: rcVMRun=%Rrc fVMEntryFailed=%RTbool\n", rcVMRun, pVmxTransient->fVMEntryFailed));
+
+    VMMRZCallRing3Enable(pVCpu);
+}
+
+
+/**
+ * Runs the guest code using hardware-assisted VMX the normal way.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu       The cross context virtual CPU structure.
+ * @param   pcLoops     Pointer to the number of executed loops.
+ */
+static VBOXSTRICTRC hmR0VmxRunGuestCodeNormal(PVMCPUCC pVCpu, uint32_t *pcLoops)
+{
+    uint32_t const cMaxResumeLoops = pVCpu->CTX_SUFF(pVM)->hmr0.s.cMaxResumeLoops;
+    Assert(pcLoops);
+    Assert(*pcLoops <= cMaxResumeLoops);
+    Assert(!CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx));
+
+#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+    /*
+     * Switch to the guest VMCS as we may have transitioned from executing the nested-guest
+     * without leaving ring-0. Otherwise, if we came from ring-3 we would have loaded the
+     * guest VMCS while entering the VMX ring-0 session.
+     */
+    if (pVCpu->hmr0.s.vmx.fSwitchedToNstGstVmcs)
+    {
+        int rc = vmxHCSwitchToGstOrNstGstVmcs(pVCpu, false /* fSwitchToNstGstVmcs */);
+        if (RT_SUCCESS(rc))
+        { /* likely */ }
+        else
+        {
+            LogRelFunc(("Failed to switch to the guest VMCS. rc=%Rrc\n", rc));
+            return rc;
+        }
+    }
+#endif
+
+    VMXTRANSIENT VmxTransient;
+    RT_ZERO(VmxTransient);
+    VmxTransient.pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
+
+    /* Paranoia. */
+    Assert(VmxTransient.pVmcsInfo == &pVCpu->hmr0.s.vmx.VmcsInfo);
+
+    VBOXSTRICTRC rcStrict = VERR_INTERNAL_ERROR_5;
+    for (;;)
+    {
+        Assert(!HMR0SuspendPending());
+        HMVMX_ASSERT_CPU_SAFE(pVCpu);
+        STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatEntry, x);
+
+        /*
+         * Preparatory work for running nested-guest code, this may force us to
+         * return to ring-3.
+         *
+         * Warning! This bugger disables interrupts on VINF_SUCCESS!
+         */
+        rcStrict = hmR0VmxPreRunGuest(pVCpu, &VmxTransient, false /* fStepping */);
+        if (rcStrict != VINF_SUCCESS)
+            break;
+
+        /* Interrupts are disabled at this point! */
+        hmR0VmxPreRunGuestCommitted(pVCpu, &VmxTransient);
+        int rcRun = hmR0VmxRunGuest(pVCpu, &VmxTransient);
+        hmR0VmxPostRunGuest(pVCpu, &VmxTransient, rcRun);
+        /* Interrupts are re-enabled at this point! */
+
+        /*
+         * Check for errors with running the VM (VMLAUNCH/VMRESUME).
+         */
+        if (RT_SUCCESS(rcRun))
+        { /* very likely */ }
+        else
+        {
+            STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatPreExit, x);
+            hmR0VmxReportWorldSwitchError(pVCpu, rcRun, &VmxTransient);
+            return rcRun;
+        }
+
+        /*
+         * Profile the VM-exit.
+         */
+        AssertMsg(VmxTransient.uExitReason <= VMX_EXIT_MAX, ("%#x\n", VmxTransient.uExitReason));
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatExitAll);
+        STAM_COUNTER_INC(&pVCpu->hm.s.aStatExitReason[VmxTransient.uExitReason & MASK_EXITREASON_STAT]);
+        STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatPreExit, &pVCpu->hm.s.StatExitHandling, x);
+        HMVMX_START_EXIT_DISPATCH_PROF();
+
+        VBOXVMM_R0_HMVMX_VMEXIT_NOCTX(pVCpu, &pVCpu->cpum.GstCtx, VmxTransient.uExitReason);
+
+        /*
+         * Handle the VM-exit.
+         */
+#ifdef HMVMX_USE_FUNCTION_TABLE
+        rcStrict = g_aVMExitHandlers[VmxTransient.uExitReason].pfn(pVCpu, &VmxTransient);
+#else
+        rcStrict = hmR0VmxHandleExit(pVCpu, &VmxTransient);
+#endif
+        STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitHandling, x);
+        if (rcStrict == VINF_SUCCESS)
+        {
+            if (++(*pcLoops) <= cMaxResumeLoops)
+                continue;
+            STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchMaxResumeLoops);
+            rcStrict = VINF_EM_RAW_INTERRUPT;
+        }
+        break;
+    }
+
+    STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatEntry, x);
+    return rcStrict;
+}
+
+
+#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+/**
+ * Runs the nested-guest code using hardware-assisted VMX.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu       The cross context virtual CPU structure.
+ * @param   pcLoops     Pointer to the number of executed loops.
+ *
+ * @sa      hmR0VmxRunGuestCodeNormal.
+ */
+static VBOXSTRICTRC hmR0VmxRunGuestCodeNested(PVMCPUCC pVCpu, uint32_t *pcLoops)
+{
+    uint32_t const cMaxResumeLoops = pVCpu->CTX_SUFF(pVM)->hmr0.s.cMaxResumeLoops;
+    Assert(pcLoops);
+    Assert(*pcLoops <= cMaxResumeLoops);
+    Assert(CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx));
+
+    /*
+     * Switch to the nested-guest VMCS as we may have transitioned from executing the
+     * guest without leaving ring-0. Otherwise, if we came from ring-3 we would have
+     * loaded the nested-guest VMCS while entering the VMX ring-0 session.
+     */
+    if (!pVCpu->hmr0.s.vmx.fSwitchedToNstGstVmcs)
+    {
+        int rc = vmxHCSwitchToGstOrNstGstVmcs(pVCpu, true /* fSwitchToNstGstVmcs */);
+        if (RT_SUCCESS(rc))
+        { /* likely */ }
+        else
+        {
+            LogRelFunc(("Failed to switch to the nested-guest VMCS. rc=%Rrc\n", rc));
+            return rc;
+        }
+    }
+
+    VMXTRANSIENT VmxTransient;
+    RT_ZERO(VmxTransient);
+    VmxTransient.pVmcsInfo      = hmGetVmxActiveVmcsInfo(pVCpu);
+    VmxTransient.fIsNestedGuest = true;
+
+    /* Paranoia. */
+    Assert(VmxTransient.pVmcsInfo == &pVCpu->hmr0.s.vmx.VmcsInfoNstGst);
+
+    /* Setup pointer so PGM/IEM can query VM-exit auxiliary info on demand in ring-0. */
+    pVCpu->hmr0.s.vmx.pVmxTransient = &VmxTransient;
+
+    VBOXSTRICTRC rcStrict = VERR_INTERNAL_ERROR_5;
+    for (;;)
+    {
+        Assert(!HMR0SuspendPending());
+        HMVMX_ASSERT_CPU_SAFE(pVCpu);
+        STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatEntry, x);
+
+        /*
+         * Preparatory work for running guest code, this may force us to
+         * return to ring-3.
+         *
+         * Warning! This bugger disables interrupts on VINF_SUCCESS!
+         */
+        rcStrict = hmR0VmxPreRunGuest(pVCpu, &VmxTransient, false /* fStepping */);
+        if (rcStrict != VINF_SUCCESS)
+            break;
+
+        /* Interrupts are disabled at this point! */
+        hmR0VmxPreRunGuestCommitted(pVCpu, &VmxTransient);
+        int rcRun = hmR0VmxRunGuest(pVCpu, &VmxTransient);
+        hmR0VmxPostRunGuest(pVCpu, &VmxTransient, rcRun);
+        /* Interrupts are re-enabled at this point! */
+
+        /*
+         * Check for errors with running the VM (VMLAUNCH/VMRESUME).
+         */
+        if (RT_SUCCESS(rcRun))
+        { /* very likely */ }
+        else
+        {
+            STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatPreExit, x);
+            hmR0VmxReportWorldSwitchError(pVCpu, rcRun, &VmxTransient);
+            rcStrict = rcRun;
+            break;
+        }
+
+        /*
+         * Profile the VM-exit.
+         */
+        AssertMsg(VmxTransient.uExitReason <= VMX_EXIT_MAX, ("%#x\n", VmxTransient.uExitReason));
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatNestedExitAll);
+        STAM_COUNTER_INC(&pVCpu->hm.s.aStatNestedExitReason[VmxTransient.uExitReason & MASK_EXITREASON_STAT]);
+        STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatPreExit, &pVCpu->hm.s.StatExitHandling, x);
+        HMVMX_START_EXIT_DISPATCH_PROF();
+
+        VBOXVMM_R0_HMVMX_VMEXIT_NOCTX(pVCpu, &pVCpu->cpum.GstCtx, VmxTransient.uExitReason);
+
+        /*
+         * Handle the VM-exit.
+         */
+        rcStrict = vmxHCHandleExitNested(pVCpu, &VmxTransient);
+        STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitHandling, x);
+        if (rcStrict == VINF_SUCCESS)
+        {
+            if (!CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx))
+            {
+                STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchNstGstVmexit);
+                rcStrict = VINF_VMX_VMEXIT;
+            }
+            else
+            {
+                if (++(*pcLoops) <= cMaxResumeLoops)
+                    continue;
+                STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchMaxResumeLoops);
+                rcStrict = VINF_EM_RAW_INTERRUPT;
+            }
+        }
+        else
+            Assert(rcStrict != VINF_VMX_VMEXIT);
+        break;
+    }
+
+    /* Ensure VM-exit auxiliary info. is no longer available. */
+    pVCpu->hmr0.s.vmx.pVmxTransient = NULL;
+
+    STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatEntry, x);
+    return rcStrict;
+}
+#endif /* VBOX_WITH_NESTED_HWVIRT_VMX */
+
+
+/** @name Execution loop for single stepping, DBGF events and expensive Dtrace
+ *  probes.
+ *
+ * The following few functions and associated structure contains the bloat
+ * necessary for providing detailed debug events and dtrace probes as well as
+ * reliable host side single stepping.  This works on the principle of
+ * "subclassing" the normal execution loop and workers.  We replace the loop
+ * method completely and override selected helpers to add necessary adjustments
+ * to their core operation.
+ *
+ * The goal is to keep the "parent" code lean and mean, so as not to sacrifice
+ * any performance for debug and analysis features.
+ *
+ * @{
+ */
+
+/**
+ * Single steps guest code using hardware-assisted VMX.
+ *
+ * This is -not- the same as the guest single-stepping itself (say using EFLAGS.TF)
+ * but single-stepping through the hypervisor debugger.
+ *
+ * @returns Strict VBox status code (i.e. informational status codes too).
+ * @param   pVCpu       The cross context virtual CPU structure.
+ * @param   pcLoops     Pointer to the number of executed loops.
+ *
+ * @note    Mostly the same as hmR0VmxRunGuestCodeNormal().
+ */
+static VBOXSTRICTRC hmR0VmxRunGuestCodeDebug(PVMCPUCC pVCpu, uint32_t *pcLoops)
+{
+    uint32_t const cMaxResumeLoops = pVCpu->CTX_SUFF(pVM)->hmr0.s.cMaxResumeLoops;
+    Assert(pcLoops);
+    Assert(*pcLoops <= cMaxResumeLoops);
+
+    VMXTRANSIENT VmxTransient;
+    RT_ZERO(VmxTransient);
+    VmxTransient.pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
+
+    /* Set HMCPU indicators.  */
+    bool const fSavedSingleInstruction = pVCpu->hm.s.fSingleInstruction;
+    pVCpu->hm.s.fSingleInstruction     = pVCpu->hm.s.fSingleInstruction || DBGFIsStepping(pVCpu);
+    pVCpu->hmr0.s.fDebugWantRdTscExit    = false;
+    pVCpu->hmr0.s.fUsingDebugLoop        = true;
+
+    /* State we keep to help modify and later restore the VMCS fields we alter, and for detecting steps.  */
+    VMXRUNDBGSTATE DbgState;
+    vmxHCRunDebugStateInit(pVCpu, &VmxTransient, &DbgState);
+    vmxHCPreRunGuestDebugStateUpdate(pVCpu, &VmxTransient, &DbgState);
+
+    /*
+     * The loop.
+     */
+    VBOXSTRICTRC rcStrict  = VERR_INTERNAL_ERROR_5;
+    for (;;)
+    {
+        Assert(!HMR0SuspendPending());
+        HMVMX_ASSERT_CPU_SAFE(pVCpu);
+        STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatEntry, x);
+        bool fStepping = pVCpu->hm.s.fSingleInstruction;
+
+        /* Set up VM-execution controls the next two can respond to. */
+        vmxHCPreRunGuestDebugStateApply(pVCpu, &VmxTransient, &DbgState);
+
+        /*
+         * Preparatory work for running guest code, this may force us to
+         * return to ring-3.
+         *
+         * Warning! This bugger disables interrupts on VINF_SUCCESS!
+         */
+        rcStrict = hmR0VmxPreRunGuest(pVCpu, &VmxTransient, fStepping);
+        if (rcStrict != VINF_SUCCESS)
+            break;
+
+        /* Interrupts are disabled at this point! */
+        hmR0VmxPreRunGuestCommitted(pVCpu, &VmxTransient);
+
+        /* Override any obnoxious code in the above two calls. */
+        vmxHCPreRunGuestDebugStateApply(pVCpu, &VmxTransient, &DbgState);
+
+        /*
+         * Finally execute the guest.
+         */
+        int rcRun = hmR0VmxRunGuest(pVCpu, &VmxTransient);
+
+        hmR0VmxPostRunGuest(pVCpu, &VmxTransient, rcRun);
+        /* Interrupts are re-enabled at this point! */
+
+        /* Check for errors with running the VM (VMLAUNCH/VMRESUME). */
+        if (RT_SUCCESS(rcRun))
+        { /* very likely */ }
+        else
+        {
+            STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatPreExit, x);
+            hmR0VmxReportWorldSwitchError(pVCpu, rcRun, &VmxTransient);
+            return rcRun;
+        }
+
+        /* Profile the VM-exit. */
+        AssertMsg(VmxTransient.uExitReason <= VMX_EXIT_MAX, ("%#x\n", VmxTransient.uExitReason));
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatDebugExitAll);
+        STAM_COUNTER_INC(&pVCpu->hm.s.aStatExitReason[VmxTransient.uExitReason & MASK_EXITREASON_STAT]);
+        STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatPreExit, &pVCpu->hm.s.StatExitHandling, x);
+        HMVMX_START_EXIT_DISPATCH_PROF();
+
+        VBOXVMM_R0_HMVMX_VMEXIT_NOCTX(pVCpu, &pVCpu->cpum.GstCtx, VmxTransient.uExitReason);
+
+        /*
+         * Handle the VM-exit - we quit earlier on certain VM-exits, see hmR0VmxHandleExitDebug().
+         */
+        rcStrict = vmxHCRunDebugHandleExit(pVCpu, &VmxTransient, &DbgState);
+        STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitHandling, x);
+        if (rcStrict != VINF_SUCCESS)
+            break;
+        if (++(*pcLoops) > cMaxResumeLoops)
+        {
+            STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchMaxResumeLoops);
+            rcStrict = VINF_EM_RAW_INTERRUPT;
+            break;
+        }
+
+        /*
+         * Stepping: Did the RIP change, if so, consider it a single step.
+         * Otherwise, make sure one of the TFs gets set.
+         */
+        if (fStepping)
+        {
+            int rc = vmxHCImportGuestStateEx(pVCpu, VmxTransient.pVmcsInfo, CPUMCTX_EXTRN_CS | CPUMCTX_EXTRN_RIP);
+            AssertRC(rc);
+            if (   pVCpu->cpum.GstCtx.rip    != DbgState.uRipStart
+                || pVCpu->cpum.GstCtx.cs.Sel != DbgState.uCsStart)
+            {
+                rcStrict = VINF_EM_DBG_STEPPED;
+                break;
+            }
+            ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_DR7);
+        }
+
+        /*
+         * Update when dtrace settings changes (DBGF kicks us, so no need to check).
+         */
+        if (VBOXVMM_GET_SETTINGS_SEQ_NO() != DbgState.uDtraceSettingsSeqNo)
+            vmxHCPreRunGuestDebugStateUpdate(pVCpu, &VmxTransient, &DbgState);
+
+        /* Restore all controls applied by hmR0VmxPreRunGuestDebugStateApply above. */
+        rcStrict = vmxHCRunDebugStateRevert(pVCpu, &VmxTransient, &DbgState, rcStrict);
+        Assert(rcStrict == VINF_SUCCESS);
+    }
+
+    /*
+     * Clear the X86_EFL_TF if necessary.
+     */
+    if (pVCpu->hmr0.s.fClearTrapFlag)
+    {
+        int rc = vmxHCImportGuestStateEx(pVCpu, VmxTransient.pVmcsInfo, CPUMCTX_EXTRN_RFLAGS);
+        AssertRC(rc);
+        pVCpu->hmr0.s.fClearTrapFlag = false;
+        pVCpu->cpum.GstCtx.eflags.Bits.u1TF = 0;
+    }
+    /** @todo there seems to be issues with the resume flag when the monitor trap
+     *        flag is pending without being used. Seen early in bios init when
+     *        accessing APIC page in protected mode. */
+
+/** @todo we need to do hmR0VmxRunDebugStateRevert here too, in case we broke
+ *        out of the above loop. */
+
+    /* Restore HMCPU indicators. */
+    pVCpu->hmr0.s.fUsingDebugLoop     = false;
+    pVCpu->hmr0.s.fDebugWantRdTscExit = false;
+    pVCpu->hm.s.fSingleInstruction  = fSavedSingleInstruction;
+
+    STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatEntry, x);
+    return rcStrict;
+}
+
+/** @} */
+
+
+/**
+ * Checks if any expensive dtrace probes are enabled and we should go to the
+ * debug loop.
+ *
+ * @returns true if we should use debug loop, false if not.
+ */
+static bool hmR0VmxAnyExpensiveProbesEnabled(void)
+{
+    /* It's probably faster to OR the raw 32-bit counter variables together.
+       Since the variables are in an array and the probes are next to one
+       another (more or less), we have good locality.  So, better read
+       eight-nine cache lines ever time and only have one conditional, than
+       128+ conditionals, right? */
+    return (  VBOXVMM_R0_HMVMX_VMEXIT_ENABLED_RAW() /* expensive too due to context */
+            | VBOXVMM_XCPT_DE_ENABLED_RAW()
+            | VBOXVMM_XCPT_DB_ENABLED_RAW()
+            | VBOXVMM_XCPT_BP_ENABLED_RAW()
+            | VBOXVMM_XCPT_OF_ENABLED_RAW()
+            | VBOXVMM_XCPT_BR_ENABLED_RAW()
+            | VBOXVMM_XCPT_UD_ENABLED_RAW()
+            | VBOXVMM_XCPT_NM_ENABLED_RAW()
+            | VBOXVMM_XCPT_DF_ENABLED_RAW()
+            | VBOXVMM_XCPT_TS_ENABLED_RAW()
+            | VBOXVMM_XCPT_NP_ENABLED_RAW()
+            | VBOXVMM_XCPT_SS_ENABLED_RAW()
+            | VBOXVMM_XCPT_GP_ENABLED_RAW()
+            | VBOXVMM_XCPT_PF_ENABLED_RAW()
+            | VBOXVMM_XCPT_MF_ENABLED_RAW()
+            | VBOXVMM_XCPT_AC_ENABLED_RAW()
+            | VBOXVMM_XCPT_XF_ENABLED_RAW()
+            | VBOXVMM_XCPT_VE_ENABLED_RAW()
+            | VBOXVMM_XCPT_SX_ENABLED_RAW()
+            | VBOXVMM_INT_SOFTWARE_ENABLED_RAW()
+            | VBOXVMM_INT_HARDWARE_ENABLED_RAW()
+           ) != 0
+        || (  VBOXVMM_INSTR_HALT_ENABLED_RAW()
+            | VBOXVMM_INSTR_MWAIT_ENABLED_RAW()
+            | VBOXVMM_INSTR_MONITOR_ENABLED_RAW()
+            | VBOXVMM_INSTR_CPUID_ENABLED_RAW()
+            | VBOXVMM_INSTR_INVD_ENABLED_RAW()
+            | VBOXVMM_INSTR_WBINVD_ENABLED_RAW()
+            | VBOXVMM_INSTR_INVLPG_ENABLED_RAW()
+            | VBOXVMM_INSTR_RDTSC_ENABLED_RAW()
+            | VBOXVMM_INSTR_RDTSCP_ENABLED_RAW()
+            | VBOXVMM_INSTR_RDPMC_ENABLED_RAW()
+            | VBOXVMM_INSTR_RDMSR_ENABLED_RAW()
+            | VBOXVMM_INSTR_WRMSR_ENABLED_RAW()
+            | VBOXVMM_INSTR_CRX_READ_ENABLED_RAW()
+            | VBOXVMM_INSTR_CRX_WRITE_ENABLED_RAW()
+            | VBOXVMM_INSTR_DRX_READ_ENABLED_RAW()
+            | VBOXVMM_INSTR_DRX_WRITE_ENABLED_RAW()
+            | VBOXVMM_INSTR_PAUSE_ENABLED_RAW()
+            | VBOXVMM_INSTR_XSETBV_ENABLED_RAW()
+            | VBOXVMM_INSTR_SIDT_ENABLED_RAW()
+            | VBOXVMM_INSTR_LIDT_ENABLED_RAW()
+            | VBOXVMM_INSTR_SGDT_ENABLED_RAW()
+            | VBOXVMM_INSTR_LGDT_ENABLED_RAW()
+            | VBOXVMM_INSTR_SLDT_ENABLED_RAW()
+            | VBOXVMM_INSTR_LLDT_ENABLED_RAW()
+            | VBOXVMM_INSTR_STR_ENABLED_RAW()
+            | VBOXVMM_INSTR_LTR_ENABLED_RAW()
+            | VBOXVMM_INSTR_GETSEC_ENABLED_RAW()
+            | VBOXVMM_INSTR_RSM_ENABLED_RAW()
+            | VBOXVMM_INSTR_RDRAND_ENABLED_RAW()
+            | VBOXVMM_INSTR_RDSEED_ENABLED_RAW()
+            | VBOXVMM_INSTR_XSAVES_ENABLED_RAW()
+            | VBOXVMM_INSTR_XRSTORS_ENABLED_RAW()
+            | VBOXVMM_INSTR_VMM_CALL_ENABLED_RAW()
+            | VBOXVMM_INSTR_VMX_VMCLEAR_ENABLED_RAW()
+            | VBOXVMM_INSTR_VMX_VMLAUNCH_ENABLED_RAW()
+            | VBOXVMM_INSTR_VMX_VMPTRLD_ENABLED_RAW()
+            | VBOXVMM_INSTR_VMX_VMPTRST_ENABLED_RAW()
+            | VBOXVMM_INSTR_VMX_VMREAD_ENABLED_RAW()
+            | VBOXVMM_INSTR_VMX_VMRESUME_ENABLED_RAW()
+            | VBOXVMM_INSTR_VMX_VMWRITE_ENABLED_RAW()
+            | VBOXVMM_INSTR_VMX_VMXOFF_ENABLED_RAW()
+            | VBOXVMM_INSTR_VMX_VMXON_ENABLED_RAW()
+            | VBOXVMM_INSTR_VMX_VMFUNC_ENABLED_RAW()
+            | VBOXVMM_INSTR_VMX_INVEPT_ENABLED_RAW()
+            | VBOXVMM_INSTR_VMX_INVVPID_ENABLED_RAW()
+            | VBOXVMM_INSTR_VMX_INVPCID_ENABLED_RAW()
+           ) != 0
+        || (  VBOXVMM_EXIT_TASK_SWITCH_ENABLED_RAW()
+            | VBOXVMM_EXIT_HALT_ENABLED_RAW()
+            | VBOXVMM_EXIT_MWAIT_ENABLED_RAW()
+            | VBOXVMM_EXIT_MONITOR_ENABLED_RAW()
+            | VBOXVMM_EXIT_CPUID_ENABLED_RAW()
+            | VBOXVMM_EXIT_INVD_ENABLED_RAW()
+            | VBOXVMM_EXIT_WBINVD_ENABLED_RAW()
+            | VBOXVMM_EXIT_INVLPG_ENABLED_RAW()
+            | VBOXVMM_EXIT_RDTSC_ENABLED_RAW()
+            | VBOXVMM_EXIT_RDTSCP_ENABLED_RAW()
+            | VBOXVMM_EXIT_RDPMC_ENABLED_RAW()
+            | VBOXVMM_EXIT_RDMSR_ENABLED_RAW()
+            | VBOXVMM_EXIT_WRMSR_ENABLED_RAW()
+            | VBOXVMM_EXIT_CRX_READ_ENABLED_RAW()
+            | VBOXVMM_EXIT_CRX_WRITE_ENABLED_RAW()
+            | VBOXVMM_EXIT_DRX_READ_ENABLED_RAW()
+            | VBOXVMM_EXIT_DRX_WRITE_ENABLED_RAW()
+            | VBOXVMM_EXIT_PAUSE_ENABLED_RAW()
+            | VBOXVMM_EXIT_XSETBV_ENABLED_RAW()
+            | VBOXVMM_EXIT_SIDT_ENABLED_RAW()
+            | VBOXVMM_EXIT_LIDT_ENABLED_RAW()
+            | VBOXVMM_EXIT_SGDT_ENABLED_RAW()
+            | VBOXVMM_EXIT_LGDT_ENABLED_RAW()
+            | VBOXVMM_EXIT_SLDT_ENABLED_RAW()
+            | VBOXVMM_EXIT_LLDT_ENABLED_RAW()
+            | VBOXVMM_EXIT_STR_ENABLED_RAW()
+            | VBOXVMM_EXIT_LTR_ENABLED_RAW()
+            | VBOXVMM_EXIT_GETSEC_ENABLED_RAW()
+            | VBOXVMM_EXIT_RSM_ENABLED_RAW()
+            | VBOXVMM_EXIT_RDRAND_ENABLED_RAW()
+            | VBOXVMM_EXIT_RDSEED_ENABLED_RAW()
+            | VBOXVMM_EXIT_XSAVES_ENABLED_RAW()
+            | VBOXVMM_EXIT_XRSTORS_ENABLED_RAW()
+            | VBOXVMM_EXIT_VMM_CALL_ENABLED_RAW()
+            | VBOXVMM_EXIT_VMX_VMCLEAR_ENABLED_RAW()
+            | VBOXVMM_EXIT_VMX_VMLAUNCH_ENABLED_RAW()
+            | VBOXVMM_EXIT_VMX_VMPTRLD_ENABLED_RAW()
+            | VBOXVMM_EXIT_VMX_VMPTRST_ENABLED_RAW()
+            | VBOXVMM_EXIT_VMX_VMREAD_ENABLED_RAW()
+            | VBOXVMM_EXIT_VMX_VMRESUME_ENABLED_RAW()
+            | VBOXVMM_EXIT_VMX_VMWRITE_ENABLED_RAW()
+            | VBOXVMM_EXIT_VMX_VMXOFF_ENABLED_RAW()
+            | VBOXVMM_EXIT_VMX_VMXON_ENABLED_RAW()
+            | VBOXVMM_EXIT_VMX_VMFUNC_ENABLED_RAW()
+            | VBOXVMM_EXIT_VMX_INVEPT_ENABLED_RAW()
+            | VBOXVMM_EXIT_VMX_INVVPID_ENABLED_RAW()
+            | VBOXVMM_EXIT_VMX_INVPCID_ENABLED_RAW()
+            | VBOXVMM_EXIT_VMX_EPT_VIOLATION_ENABLED_RAW()
+            | VBOXVMM_EXIT_VMX_EPT_MISCONFIG_ENABLED_RAW()
+            | VBOXVMM_EXIT_VMX_VAPIC_ACCESS_ENABLED_RAW()
+            | VBOXVMM_EXIT_VMX_VAPIC_WRITE_ENABLED_RAW()
+           ) != 0;
+}
+
+
+/**
+ * Runs the guest using hardware-assisted VMX.
+ *
+ * @returns Strict VBox status code (i.e. informational status codes too).
+ * @param   pVCpu   The cross context virtual CPU structure.
+ */
+VMMR0DECL(VBOXSTRICTRC) VMXR0RunGuestCode(PVMCPUCC pVCpu)
+{
+    AssertPtr(pVCpu);
+    PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+    Assert(VMMRZCallRing3IsEnabled(pVCpu));
+    Assert(!ASMAtomicUoReadU64(&pCtx->fExtrn));
+    HMVMX_ASSERT_PREEMPT_SAFE(pVCpu);
+
+    VBOXSTRICTRC rcStrict;
+    uint32_t     cLoops = 0;
+    for (;;)
+    {
+#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+        bool const fInNestedGuestMode = CPUMIsGuestInVmxNonRootMode(pCtx);
+#else
+        NOREF(pCtx);
+        bool const fInNestedGuestMode = false;
+#endif
+        if (!fInNestedGuestMode)
+        {
+            if (   !pVCpu->hm.s.fUseDebugLoop
+                && (!VBOXVMM_ANY_PROBES_ENABLED() || !hmR0VmxAnyExpensiveProbesEnabled())
+                && !DBGFIsStepping(pVCpu)
+                && !pVCpu->CTX_SUFF(pVM)->dbgf.ro.cEnabledInt3Breakpoints)
+                rcStrict = hmR0VmxRunGuestCodeNormal(pVCpu, &cLoops);
+            else
+                rcStrict = hmR0VmxRunGuestCodeDebug(pVCpu, &cLoops);
+        }
+#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+        else
+            rcStrict = hmR0VmxRunGuestCodeNested(pVCpu, &cLoops);
+
+        if (rcStrict == VINF_VMX_VMLAUNCH_VMRESUME)
+        {
+            Assert(CPUMIsGuestInVmxNonRootMode(pCtx));
+            continue;
+        }
+        if (rcStrict == VINF_VMX_VMEXIT)
+        {
+            Assert(!CPUMIsGuestInVmxNonRootMode(pCtx));
+            continue;
+        }
+#endif
+        break;
+    }
+
+    int const rcLoop = VBOXSTRICTRC_VAL(rcStrict);
+    switch (rcLoop)
+    {
+        case VERR_EM_INTERPRETER:   rcStrict = VINF_EM_RAW_EMULATE_INSTR;   break;
+        case VINF_EM_RESET:         rcStrict = VINF_EM_TRIPLE_FAULT;        break;
+    }
+
+    int rc2 = hmR0VmxExitToRing3(pVCpu, rcStrict);
+    if (RT_FAILURE(rc2))
+    {
+        pVCpu->hm.s.u32HMError = (uint32_t)VBOXSTRICTRC_VAL(rcStrict);
+        rcStrict = rc2;
+    }
+    Assert(!ASMAtomicUoReadU64(&pCtx->fExtrn));
+    Assert(!VMMR0AssertionIsNotificationSet(pVCpu));
+    return rcStrict;
+}
+
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-11 08:17:27 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-11 08:17:27 +0000
commit	f215e02bf85f68d3a6106c2a1f4f7f063f819064 (patch)
tree	6bb5b92c046312c4e95ac2620b10ddf482d3fa8b /src/VBox/VMM/VMMR0/HMVMXR0.cpp
parent	Initial commit. (diff)
download	virtualbox-f215e02bf85f68d3a6106c2a1f4f7f063f819064.tar.xz virtualbox-f215e02bf85f68d3a6106c2a1f4f7f063f819064.zip