33 files changed, 50913 insertions, 0 deletions
diff --git a/src/VBox/VMM/VMMR0/CPUMR0.cpp b/src/VBox/VMM/VMMR0/CPUMR0.cpp
new file mode 100644
index 00000000..04cbaa97
--- /dev/null
+++ b/src/VBox/VMM/VMMR0/CPUMR0.cpp
@@ -0,0 +1,954 @@
+/* $Id: CPUMR0.cpp $ */
+/** @file
+ * CPUM - Host Context Ring 0.
+ */
+
+/*
+ * Copyright (C) 2006-2020 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+
+/*********************************************************************************************************************************
+*   Header Files                                                                                                                 *
+*********************************************************************************************************************************/
+#define LOG_GROUP LOG_GROUP_CPUM
+#include <VBox/vmm/cpum.h>
+#include "CPUMInternal.h"
+#include <VBox/vmm/vmcc.h>
+#include <VBox/vmm/gvm.h>
+#include <VBox/err.h>
+#include <VBox/log.h>
+#include <VBox/vmm/hm.h>
+#include <iprt/assert.h>
+#include <iprt/asm-amd64-x86.h>
+#ifdef VBOX_WITH_VMMR0_DISABLE_LAPIC_NMI
+# include <iprt/mem.h>
+# include <iprt/memobj.h>
+# include <VBox/apic.h>
+#endif
+#include <iprt/x86.h>
+
+
+/*********************************************************************************************************************************
+*   Structures and Typedefs                                                                                                      *
+*********************************************************************************************************************************/
+#ifdef VBOX_WITH_VMMR0_DISABLE_LAPIC_NMI
+/**
+ * Local APIC mappings.
+ */
+typedef struct CPUMHOSTLAPIC
+{
+    /** Indicates that the entry is in use and have valid data. */
+    bool        fEnabled;
+    /** Whether it's operating in X2APIC mode (EXTD). */
+    bool        fX2Apic;
+    /** The APIC version number. */
+    uint32_t    uVersion;
+    /** The physical address of the APIC registers. */
+    RTHCPHYS    PhysBase;
+    /** The memory object entering the physical address. */
+    RTR0MEMOBJ  hMemObj;
+    /** The mapping object for hMemObj. */
+    RTR0MEMOBJ  hMapObj;
+    /** The mapping address APIC registers.
+     * @remarks Different CPUs may use the same physical address to map their
+     *          APICs, so this pointer is only valid when on the CPU owning the
+     *          APIC. */
+    void       *pv;
+} CPUMHOSTLAPIC;
+#endif
+
+
+/*********************************************************************************************************************************
+*   Global Variables                                                                                                             *
+*********************************************************************************************************************************/
+#ifdef VBOX_WITH_VMMR0_DISABLE_LAPIC_NMI
+static CPUMHOSTLAPIC g_aLApics[RTCPUSET_MAX_CPUS];
+#endif
+
+/**
+ * CPUID bits to unify among all cores.
+ */
+static struct
+{
+    uint32_t uLeaf;  /**< Leaf to check. */
+    uint32_t uEcx;   /**< which bits in ecx to unify between CPUs. */
+    uint32_t uEdx;   /**< which bits in edx to unify between CPUs. */
+}
+const g_aCpuidUnifyBits[] =
+{
+    {
+        0x00000001,
+        X86_CPUID_FEATURE_ECX_CX16 | X86_CPUID_FEATURE_ECX_MONITOR,
+        X86_CPUID_FEATURE_EDX_CX8
+    }
+};
+
+
+
+/*********************************************************************************************************************************
+*   Internal Functions                                                                                                           *
+*********************************************************************************************************************************/
+#ifdef VBOX_WITH_VMMR0_DISABLE_LAPIC_NMI
+static int  cpumR0MapLocalApics(void);
+static void cpumR0UnmapLocalApics(void);
+#endif
+static int  cpumR0SaveHostDebugState(PVMCPUCC pVCpu);
+
+
+/**
+ * Does the Ring-0 CPU initialization once during module load.
+ * XXX Host-CPU hot-plugging?
+ */
+VMMR0_INT_DECL(int) CPUMR0ModuleInit(void)
+{
+    int rc = VINF_SUCCESS;
+#ifdef VBOX_WITH_VMMR0_DISABLE_LAPIC_NMI
+    rc = cpumR0MapLocalApics();
+#endif
+    return rc;
+}
+
+
+/**
+ * Terminate the module.
+ */
+VMMR0_INT_DECL(int) CPUMR0ModuleTerm(void)
+{
+#ifdef VBOX_WITH_VMMR0_DISABLE_LAPIC_NMI
+    cpumR0UnmapLocalApics();
+#endif
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Check the CPUID features of this particular CPU and disable relevant features
+ * for the guest which do not exist on this CPU. We have seen systems where the
+ * X86_CPUID_FEATURE_ECX_MONITOR feature flag is only set on some host CPUs, see
+ * @bugref{5436}.
+ *
+ * @note This function might be called simultaneously on more than one CPU!
+ *
+ * @param   idCpu       The identifier for the CPU the function is called on.
+ * @param   pvUser1     Pointer to the VM structure.
+ * @param   pvUser2     Ignored.
+ */
+static DECLCALLBACK(void) cpumR0CheckCpuid(RTCPUID idCpu, void *pvUser1, void *pvUser2)
+{
+    PVMCC     pVM   = (PVMCC)pvUser1;
+
+    NOREF(idCpu); NOREF(pvUser2);
+    for (uint32_t i = 0; i < RT_ELEMENTS(g_aCpuidUnifyBits); i++)
+    {
+        /* Note! Cannot use cpumCpuIdGetLeaf from here because we're not
+                 necessarily in the VM process context.  So, we using the
+                 legacy arrays as temporary storage. */
+
+        uint32_t   uLeaf = g_aCpuidUnifyBits[i].uLeaf;
+        PCPUMCPUID pLegacyLeaf;
+        if (uLeaf < RT_ELEMENTS(pVM->cpum.s.aGuestCpuIdPatmStd))
+            pLegacyLeaf = &pVM->cpum.s.aGuestCpuIdPatmStd[uLeaf];
+        else if (uLeaf - UINT32_C(0x80000000) < RT_ELEMENTS(pVM->cpum.s.aGuestCpuIdPatmExt))
+            pLegacyLeaf = &pVM->cpum.s.aGuestCpuIdPatmExt[uLeaf - UINT32_C(0x80000000)];
+        else if (uLeaf - UINT32_C(0xc0000000) < RT_ELEMENTS(pVM->cpum.s.aGuestCpuIdPatmCentaur))
+            pLegacyLeaf = &pVM->cpum.s.aGuestCpuIdPatmCentaur[uLeaf - UINT32_C(0xc0000000)];
+        else
+            continue;
+
+        uint32_t eax, ebx, ecx, edx;
+        ASMCpuIdExSlow(uLeaf, 0, 0, 0, &eax, &ebx, &ecx, &edx);
+
+        ASMAtomicAndU32(&pLegacyLeaf->uEcx, ecx | ~g_aCpuidUnifyBits[i].uEcx);
+        ASMAtomicAndU32(&pLegacyLeaf->uEdx, edx | ~g_aCpuidUnifyBits[i].uEdx);
+    }
+}
+
+
+/**
+ * Does Ring-0 CPUM initialization.
+ *
+ * This is mainly to check that the Host CPU mode is compatible
+ * with VBox.
+ *
+ * @returns VBox status code.
+ * @param   pVM         The cross context VM structure.
+ */
+VMMR0_INT_DECL(int) CPUMR0InitVM(PVMCC pVM)
+{
+    LogFlow(("CPUMR0Init: %p\n", pVM));
+
+    /*
+     * Check CR0 & CR4 flags.
+     */
+    uint32_t u32CR0 = ASMGetCR0();
+    if ((u32CR0 & (X86_CR0_PE | X86_CR0_PG)) != (X86_CR0_PE | X86_CR0_PG)) /* a bit paranoid perhaps.. */
+    {
+        Log(("CPUMR0Init: PE or PG not set. cr0=%#x\n", u32CR0));
+        return VERR_UNSUPPORTED_CPU_MODE;
+    }
+
+    /*
+     * Check for sysenter and syscall usage.
+     */
+    if (ASMHasCpuId())
+    {
+        /*
+         * SYSENTER/SYSEXIT
+         *
+         * Intel docs claim you should test both the flag and family, model &
+         * stepping because some Pentium Pro CPUs have the SEP cpuid flag set,
+         * but don't support it.  AMD CPUs may support this feature in legacy
+         * mode, they've banned it from long mode.  Since we switch to 32-bit
+         * mode when entering raw-mode context the feature would become
+         * accessible again on AMD CPUs, so we have to check regardless of
+         * host bitness.
+         */
+        uint32_t u32CpuVersion;
+        uint32_t u32Dummy;
+        uint32_t fFeatures; /* (Used further down to check for MSRs, so don't clobber.) */
+        ASMCpuId(1, &u32CpuVersion, &u32Dummy, &u32Dummy, &fFeatures);
+        uint32_t const u32Family   = u32CpuVersion >> 8;
+        uint32_t const u32Model    = (u32CpuVersion >> 4) & 0xF;
+        uint32_t const u32Stepping = u32CpuVersion & 0xF;
+        if (    (fFeatures & X86_CPUID_FEATURE_EDX_SEP)
+            &&  (   u32Family   != 6    /* (> pentium pro) */
+                 || u32Model    >= 3
+                 || u32Stepping >= 3
+                 || !ASMIsIntelCpu())
+           )
+        {
+            /*
+             * Read the MSR and see if it's in use or not.
+             */
+            uint32_t u32 = ASMRdMsr_Low(MSR_IA32_SYSENTER_CS);
+            if (u32)
+            {
+                pVM->cpum.s.fHostUseFlags |= CPUM_USE_SYSENTER;
+                Log(("CPUMR0Init: host uses sysenter cs=%08x%08x\n", ASMRdMsr_High(MSR_IA32_SYSENTER_CS), u32));
+            }
+        }
+
+        /*
+         * SYSCALL/SYSRET
+         *
+         * This feature is indicated by the SEP bit returned in EDX by CPUID
+         * function 0x80000001.  Intel CPUs only supports this feature in
+         * long mode.  Since we're not running 64-bit guests in raw-mode there
+         * are no issues with 32-bit intel hosts.
+         */
+        uint32_t cExt = 0;
+        ASMCpuId(0x80000000, &cExt, &u32Dummy, &u32Dummy, &u32Dummy);
+        if (ASMIsValidExtRange(cExt))
+        {
+            uint32_t fExtFeaturesEDX = ASMCpuId_EDX(0x80000001);
+            if (fExtFeaturesEDX & X86_CPUID_EXT_FEATURE_EDX_SYSCALL)
+            {
+#ifdef RT_ARCH_X86
+                if (!ASMIsIntelCpu())
+#endif
+                {
+                    uint64_t fEfer = ASMRdMsr(MSR_K6_EFER);
+                    if (fEfer & MSR_K6_EFER_SCE)
+                    {
+                        pVM->cpum.s.fHostUseFlags |= CPUM_USE_SYSCALL;
+                        Log(("CPUMR0Init: host uses syscall\n"));
+                    }
+                }
+            }
+        }
+
+        /*
+         * Copy MSR_IA32_ARCH_CAPABILITIES bits over into the host and guest feature
+         * structure and as well as the guest MSR.
+         * Note! we assume this happens after the CPUMR3Init is done, so CPUID bits are settled.
+         */
+        pVM->cpum.s.HostFeatures.fArchRdclNo             = 0;
+        pVM->cpum.s.HostFeatures.fArchIbrsAll            = 0;
+        pVM->cpum.s.HostFeatures.fArchRsbOverride        = 0;
+        pVM->cpum.s.HostFeatures.fArchVmmNeedNotFlushL1d = 0;
+        pVM->cpum.s.HostFeatures.fArchMdsNo              = 0;
+        uint32_t const cStdRange = ASMCpuId_EAX(0);
+        if (   ASMIsValidStdRange(cStdRange)
+            && cStdRange >= 7)
+        {
+            uint32_t fEdxFeatures = ASMCpuId_EDX(7);
+            if (   (fEdxFeatures & X86_CPUID_STEXT_FEATURE_EDX_ARCHCAP)
+                && (fFeatures & X86_CPUID_FEATURE_EDX_MSR))
+            {
+                /* Host: */
+                uint64_t fArchVal = ASMRdMsr(MSR_IA32_ARCH_CAPABILITIES);
+                pVM->cpum.s.HostFeatures.fArchRdclNo             = RT_BOOL(fArchVal & MSR_IA32_ARCH_CAP_F_RDCL_NO);
+                pVM->cpum.s.HostFeatures.fArchIbrsAll            = RT_BOOL(fArchVal & MSR_IA32_ARCH_CAP_F_IBRS_ALL);
+                pVM->cpum.s.HostFeatures.fArchRsbOverride        = RT_BOOL(fArchVal & MSR_IA32_ARCH_CAP_F_RSBO);
+                pVM->cpum.s.HostFeatures.fArchVmmNeedNotFlushL1d = RT_BOOL(fArchVal & MSR_IA32_ARCH_CAP_F_VMM_NEED_NOT_FLUSH_L1D);
+                pVM->cpum.s.HostFeatures.fArchMdsNo              = RT_BOOL(fArchVal & MSR_IA32_ARCH_CAP_F_MDS_NO);
+
+                /* guest: */
+                if (!pVM->cpum.s.GuestFeatures.fArchCap)
+                    fArchVal = 0;
+                else if (!pVM->cpum.s.GuestFeatures.fIbrs)
+                    fArchVal &= ~MSR_IA32_ARCH_CAP_F_IBRS_ALL;
+                VMCC_FOR_EACH_VMCPU_STMT(pVM, pVCpu->cpum.s.GuestMsrs.msr.ArchCaps = fArchVal);
+                pVM->cpum.s.GuestFeatures.fArchRdclNo             = RT_BOOL(fArchVal & MSR_IA32_ARCH_CAP_F_RDCL_NO);
+                pVM->cpum.s.GuestFeatures.fArchIbrsAll            = RT_BOOL(fArchVal & MSR_IA32_ARCH_CAP_F_IBRS_ALL);
+                pVM->cpum.s.GuestFeatures.fArchRsbOverride        = RT_BOOL(fArchVal & MSR_IA32_ARCH_CAP_F_RSBO);
+                pVM->cpum.s.GuestFeatures.fArchVmmNeedNotFlushL1d = RT_BOOL(fArchVal & MSR_IA32_ARCH_CAP_F_VMM_NEED_NOT_FLUSH_L1D);
+                pVM->cpum.s.GuestFeatures.fArchMdsNo              = RT_BOOL(fArchVal & MSR_IA32_ARCH_CAP_F_MDS_NO);
+            }
+            else
+                pVM->cpum.s.HostFeatures.fArchCap = 0;
+        }
+
+        /*
+         * Unify/cross check some CPUID feature bits on all available CPU cores
+         * and threads.  We've seen CPUs where the monitor support differed.
+         *
+         * Because the hyper heap isn't always mapped into ring-0, we cannot
+         * access it from a RTMpOnAll callback.  We use the legacy CPUID arrays
+         * as temp ring-0 accessible memory instead, ASSUMING that they're all
+         * up to date when we get here.
+         */
+        RTMpOnAll(cpumR0CheckCpuid, pVM, NULL);
+
+        for (uint32_t i = 0; i < RT_ELEMENTS(g_aCpuidUnifyBits); i++)
+        {
+            bool            fIgnored;
+            uint32_t        uLeaf = g_aCpuidUnifyBits[i].uLeaf;
+            PCPUMCPUIDLEAF  pLeaf = cpumCpuIdGetLeafEx(pVM, uLeaf, 0, &fIgnored);
+            if (pLeaf)
+            {
+                PCPUMCPUID pLegacyLeaf;
+                if (uLeaf < RT_ELEMENTS(pVM->cpum.s.aGuestCpuIdPatmStd))
+                    pLegacyLeaf = &pVM->cpum.s.aGuestCpuIdPatmStd[uLeaf];
+                else if (uLeaf - UINT32_C(0x80000000) < RT_ELEMENTS(pVM->cpum.s.aGuestCpuIdPatmExt))
+                    pLegacyLeaf = &pVM->cpum.s.aGuestCpuIdPatmExt[uLeaf - UINT32_C(0x80000000)];
+                else if (uLeaf - UINT32_C(0xc0000000) < RT_ELEMENTS(pVM->cpum.s.aGuestCpuIdPatmCentaur))
+                    pLegacyLeaf = &pVM->cpum.s.aGuestCpuIdPatmCentaur[uLeaf - UINT32_C(0xc0000000)];
+                else
+                    continue;
+
+                pLeaf->uEcx = pLegacyLeaf->uEcx;
+                pLeaf->uEdx = pLegacyLeaf->uEdx;
+            }
+        }
+
+    }
+
+
+    /*
+     * Check if debug registers are armed.
+     * This ASSUMES that DR7.GD is not set, or that it's handled transparently!
+     */
+    uint32_t u32DR7 = ASMGetDR7();
+    if (u32DR7 & X86_DR7_ENABLED_MASK)
+    {
+        VMCC_FOR_EACH_VMCPU_STMT(pVM, pVCpu->cpum.s.fUseFlags |= CPUM_USE_DEBUG_REGS_HOST);
+        Log(("CPUMR0Init: host uses debug registers (dr7=%x)\n", u32DR7));
+    }
+
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Trap handler for device-not-available fault (\#NM).
+ * Device not available, FP or (F)WAIT instruction.
+ *
+ * @returns VBox status code.
+ * @retval VINF_SUCCESS           if the guest FPU state is loaded.
+ * @retval VINF_EM_RAW_GUEST_TRAP if it is a guest trap.
+ * @retval VINF_CPUM_HOST_CR0_MODIFIED if we modified the host CR0.
+ *
+ * @param   pVM         The cross context VM structure.
+ * @param   pVCpu       The cross context virtual CPU structure.
+ */
+VMMR0_INT_DECL(int) CPUMR0Trap07Handler(PVMCC pVM, PVMCPUCC pVCpu)
+{
+    Assert(pVM->cpum.s.HostFeatures.fFxSaveRstor);
+    Assert(ASMGetCR4() & X86_CR4_OSFXSR);
+
+    /* If the FPU state has already been loaded, then it's a guest trap. */
+    if (CPUMIsGuestFPUStateActive(pVCpu))
+    {
+        Assert(    ((pVCpu->cpum.s.Guest.cr0 & (X86_CR0_MP | X86_CR0_EM | X86_CR0_TS)) == (X86_CR0_MP | X86_CR0_TS))
+               ||  ((pVCpu->cpum.s.Guest.cr0 & (X86_CR0_MP | X86_CR0_EM | X86_CR0_TS)) == (X86_CR0_MP | X86_CR0_TS | X86_CR0_EM)));
+        return VINF_EM_RAW_GUEST_TRAP;
+    }
+
+    /*
+     * There are two basic actions:
+     *   1. Save host fpu and restore guest fpu.
+     *   2. Generate guest trap.
+     *
+     * When entering the hypervisor we'll always enable MP (for proper wait
+     * trapping) and TS (for intercepting all fpu/mmx/sse stuff). The EM flag
+     * is taken from the guest OS in order to get proper SSE handling.
+     *
+     *
+     * Actions taken depending on the guest CR0 flags:
+     *
+     *   3    2    1
+     *  TS | EM | MP | FPUInstr | WAIT :: VMM Action
+     * ------------------------------------------------------------------------
+     *   0 |  0 |  0 | Exec     | Exec :: Clear TS & MP, Save HC, Load GC.
+     *   0 |  0 |  1 | Exec     | Exec :: Clear TS, Save HC, Load GC.
+     *   0 |  1 |  0 | #NM      | Exec :: Clear TS & MP, Save HC, Load GC.
+     *   0 |  1 |  1 | #NM      | Exec :: Clear TS, Save HC, Load GC.
+     *   1 |  0 |  0 | #NM      | Exec :: Clear MP, Save HC, Load GC. (EM is already cleared.)
+     *   1 |  0 |  1 | #NM      | #NM  :: Go to guest taking trap there.
+     *   1 |  1 |  0 | #NM      | Exec :: Clear MP, Save HC, Load GC. (EM is already set.)
+     *   1 |  1 |  1 | #NM      | #NM  :: Go to guest taking trap there.
+     */
+
+    switch (pVCpu->cpum.s.Guest.cr0 & (X86_CR0_MP | X86_CR0_EM | X86_CR0_TS))
+    {
+        case X86_CR0_MP | X86_CR0_TS:
+        case X86_CR0_MP | X86_CR0_TS | X86_CR0_EM:
+            return VINF_EM_RAW_GUEST_TRAP;
+        default:
+            break;
+    }
+
+    return CPUMR0LoadGuestFPU(pVM, pVCpu);
+}
+
+
+/**
+ * Saves the host-FPU/XMM state (if necessary) and (always) loads the guest-FPU
+ * state into the CPU.
+ *
+ * @returns VINF_SUCCESS on success, host CR0 unmodified.
+ * @returns VINF_CPUM_HOST_CR0_MODIFIED on success when the host CR0 was
+ *          modified and VT-x needs to update the value in the VMCS.
+ *
+ * @param   pVM     The cross context VM structure.
+ * @param   pVCpu   The cross context virtual CPU structure.
+ */
+VMMR0_INT_DECL(int) CPUMR0LoadGuestFPU(PVMCC pVM, PVMCPUCC pVCpu)
+{
+    int rc;
+    Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+    Assert(!(pVCpu->cpum.s.fUseFlags & CPUM_USED_FPU_GUEST));
+    Assert(!(pVCpu->cpum.s.fUseFlags & CPUM_SYNC_FPU_STATE));
+
+    if (!pVM->cpum.s.HostFeatures.fLeakyFxSR)
+    {
+        Assert(!(pVCpu->cpum.s.fUseFlags & CPUM_USED_MANUAL_XMM_RESTORE));
+        rc = cpumR0SaveHostRestoreGuestFPUState(&pVCpu->cpum.s);
+    }
+    else
+    {
+        Assert(!(pVCpu->cpum.s.fUseFlags & CPUM_USED_MANUAL_XMM_RESTORE) || (pVCpu->cpum.s.fUseFlags & CPUM_USED_FPU_HOST));
+        /** @todo r=ramshankar: Can't we used a cached value here
+         *        instead of reading the MSR? host EFER doesn't usually
+         *        change. */
+        uint64_t uHostEfer = ASMRdMsr(MSR_K6_EFER);
+        if (!(uHostEfer & MSR_K6_EFER_FFXSR))
+            rc = cpumR0SaveHostRestoreGuestFPUState(&pVCpu->cpum.s);
+        else
+        {
+            RTCCUINTREG const uSavedFlags = ASMIntDisableFlags();
+            pVCpu->cpum.s.fUseFlags |= CPUM_USED_MANUAL_XMM_RESTORE;
+            ASMWrMsr(MSR_K6_EFER, uHostEfer & ~MSR_K6_EFER_FFXSR);
+            rc = cpumR0SaveHostRestoreGuestFPUState(&pVCpu->cpum.s);
+            ASMWrMsr(MSR_K6_EFER, uHostEfer | MSR_K6_EFER_FFXSR);
+            ASMSetFlags(uSavedFlags);
+        }
+    }
+    Assert(   (pVCpu->cpum.s.fUseFlags & (CPUM_USED_FPU_GUEST | CPUM_USED_FPU_HOST | CPUM_USED_FPU_SINCE_REM))
+           ==                            (CPUM_USED_FPU_GUEST | CPUM_USED_FPU_HOST | CPUM_USED_FPU_SINCE_REM));
+    return rc;
+}
+
+
+/**
+ * Saves the guest FPU/XMM state if needed, restores the host FPU/XMM state as
+ * needed.
+ *
+ * @returns true if we saved the guest state.
+ * @param   pVCpu       The cross context virtual CPU structure.
+ */
+VMMR0_INT_DECL(bool) CPUMR0FpuStateMaybeSaveGuestAndRestoreHost(PVMCPUCC pVCpu)
+{
+    bool fSavedGuest;
+    Assert(pVCpu->CTX_SUFF(pVM)->cpum.s.HostFeatures.fFxSaveRstor);
+    Assert(ASMGetCR4() & X86_CR4_OSFXSR);
+    if (pVCpu->cpum.s.fUseFlags & (CPUM_USED_FPU_GUEST | CPUM_USED_FPU_HOST))
+    {
+        fSavedGuest = RT_BOOL(pVCpu->cpum.s.fUseFlags & CPUM_USED_FPU_GUEST);
+        if (!(pVCpu->cpum.s.fUseFlags & CPUM_USED_MANUAL_XMM_RESTORE))
+            cpumR0SaveGuestRestoreHostFPUState(&pVCpu->cpum.s);
+        else
+        {
+            /* Temporarily clear MSR_K6_EFER_FFXSR or else we'll be unable to
+               save/restore the XMM state with fxsave/fxrstor. */
+            uint64_t uHostEfer = ASMRdMsr(MSR_K6_EFER);
+            if (uHostEfer & MSR_K6_EFER_FFXSR)
+            {
+                RTCCUINTREG const uSavedFlags = ASMIntDisableFlags();
+                ASMWrMsr(MSR_K6_EFER, uHostEfer & ~MSR_K6_EFER_FFXSR);
+                cpumR0SaveGuestRestoreHostFPUState(&pVCpu->cpum.s);
+                ASMWrMsr(MSR_K6_EFER, uHostEfer | MSR_K6_EFER_FFXSR);
+                ASMSetFlags(uSavedFlags);
+            }
+            else
+                cpumR0SaveGuestRestoreHostFPUState(&pVCpu->cpum.s);
+            pVCpu->cpum.s.fUseFlags &= ~CPUM_USED_MANUAL_XMM_RESTORE;
+        }
+    }
+    else
+        fSavedGuest = false;
+    Assert(!(  pVCpu->cpum.s.fUseFlags
+             & (CPUM_USED_FPU_GUEST | CPUM_USED_FPU_HOST | CPUM_SYNC_FPU_STATE | CPUM_USED_MANUAL_XMM_RESTORE)));
+    return fSavedGuest;
+}
+
+
+/**
+ * Saves the host debug state, setting CPUM_USED_HOST_DEBUG_STATE and loading
+ * DR7 with safe values.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu       The cross context virtual CPU structure.
+ */
+static int cpumR0SaveHostDebugState(PVMCPUCC pVCpu)
+{
+    /*
+     * Save the host state.
+     */
+    pVCpu->cpum.s.Host.dr0 = ASMGetDR0();
+    pVCpu->cpum.s.Host.dr1 = ASMGetDR1();
+    pVCpu->cpum.s.Host.dr2 = ASMGetDR2();
+    pVCpu->cpum.s.Host.dr3 = ASMGetDR3();
+    pVCpu->cpum.s.Host.dr6 = ASMGetDR6();
+    /** @todo dr7 might already have been changed to 0x400; don't care right now as it's harmless. */
+    pVCpu->cpum.s.Host.dr7 = ASMGetDR7();
+
+    /* Preemption paranoia. */
+    ASMAtomicOrU32(&pVCpu->cpum.s.fUseFlags, CPUM_USED_DEBUG_REGS_HOST);
+
+    /*
+     * Make sure DR7 is harmless or else we could trigger breakpoints when
+     * load guest or hypervisor DRx values later.
+     */
+    if (pVCpu->cpum.s.Host.dr7 != X86_DR7_INIT_VAL)
+        ASMSetDR7(X86_DR7_INIT_VAL);
+
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Saves the guest DRx state residing in host registers and restore the host
+ * register values.
+ *
+ * The guest DRx state is only saved if CPUMR0LoadGuestDebugState was called,
+ * since it's assumed that we're shadowing the guest DRx register values
+ * accurately when using the combined hypervisor debug register values
+ * (CPUMR0LoadHyperDebugState).
+ *
+ * @returns true if either guest or hypervisor debug registers were loaded.
+ * @param   pVCpu       The cross context virtual CPU structure of the calling EMT.
+ * @param   fDr6        Whether to include DR6 or not.
+ * @thread  EMT(pVCpu)
+ */
+VMMR0_INT_DECL(bool) CPUMR0DebugStateMaybeSaveGuestAndRestoreHost(PVMCPUCC pVCpu, bool fDr6)
+{
+    Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+    bool const fDrXLoaded = RT_BOOL(pVCpu->cpum.s.fUseFlags & (CPUM_USED_DEBUG_REGS_GUEST | CPUM_USED_DEBUG_REGS_HYPER));
+
+    /*
+     * Do we need to save the guest DRx registered loaded into host registers?
+     * (DR7 and DR6 (if fDr6 is true) are left to the caller.)
+     */
+    if (pVCpu->cpum.s.fUseFlags & CPUM_USED_DEBUG_REGS_GUEST)
+    {
+        pVCpu->cpum.s.Guest.dr[0] = ASMGetDR0();
+        pVCpu->cpum.s.Guest.dr[1] = ASMGetDR1();
+        pVCpu->cpum.s.Guest.dr[2] = ASMGetDR2();
+        pVCpu->cpum.s.Guest.dr[3] = ASMGetDR3();
+        if (fDr6)
+            pVCpu->cpum.s.Guest.dr[6] = ASMGetDR6();
+    }
+    ASMAtomicAndU32(&pVCpu->cpum.s.fUseFlags, ~(  CPUM_USED_DEBUG_REGS_GUEST | CPUM_USED_DEBUG_REGS_HYPER
+                                                | CPUM_SYNC_DEBUG_REGS_GUEST | CPUM_SYNC_DEBUG_REGS_HYPER));
+
+    /*
+     * Restore the host's debug state. DR0-3, DR6 and only then DR7!
+     */
+    if (pVCpu->cpum.s.fUseFlags & CPUM_USED_DEBUG_REGS_HOST)
+    {
+        /* A bit of paranoia first... */
+        uint64_t uCurDR7 = ASMGetDR7();
+        if (uCurDR7 != X86_DR7_INIT_VAL)
+            ASMSetDR7(X86_DR7_INIT_VAL);
+
+        ASMSetDR0(pVCpu->cpum.s.Host.dr0);
+        ASMSetDR1(pVCpu->cpum.s.Host.dr1);
+        ASMSetDR2(pVCpu->cpum.s.Host.dr2);
+        ASMSetDR3(pVCpu->cpum.s.Host.dr3);
+        /** @todo consider only updating if they differ, esp. DR6. Need to figure how
+         *        expensive DRx reads are over DRx writes.  */
+        ASMSetDR6(pVCpu->cpum.s.Host.dr6);
+        ASMSetDR7(pVCpu->cpum.s.Host.dr7);
+
+        ASMAtomicAndU32(&pVCpu->cpum.s.fUseFlags, ~CPUM_USED_DEBUG_REGS_HOST);
+    }
+
+    return fDrXLoaded;
+}
+
+
+/**
+ * Saves the guest DRx state if it resides host registers.
+ *
+ * This does NOT clear any use flags, so the host registers remains loaded with
+ * the guest DRx state upon return.  The purpose is only to make sure the values
+ * in the CPU context structure is up to date.
+ *
+ * @returns true if the host registers contains guest values, false if not.
+ * @param   pVCpu       The cross context virtual CPU structure of the calling EMT.
+ * @param   fDr6        Whether to include DR6 or not.
+ * @thread  EMT(pVCpu)
+ */
+VMMR0_INT_DECL(bool) CPUMR0DebugStateMaybeSaveGuest(PVMCPUCC pVCpu, bool fDr6)
+{
+    /*
+     * Do we need to save the guest DRx registered loaded into host registers?
+     * (DR7 and DR6 (if fDr6 is true) are left to the caller.)
+     */
+    if (pVCpu->cpum.s.fUseFlags & CPUM_USED_DEBUG_REGS_GUEST)
+    {
+        pVCpu->cpum.s.Guest.dr[0] = ASMGetDR0();
+        pVCpu->cpum.s.Guest.dr[1] = ASMGetDR1();
+        pVCpu->cpum.s.Guest.dr[2] = ASMGetDR2();
+        pVCpu->cpum.s.Guest.dr[3] = ASMGetDR3();
+        if (fDr6)
+            pVCpu->cpum.s.Guest.dr[6] = ASMGetDR6();
+        return true;
+    }
+    return false;
+}
+
+
+/**
+ * Lazily sync in the debug state.
+ *
+ * @param   pVCpu       The cross context virtual CPU structure of the calling EMT.
+ * @param   fDr6        Whether to include DR6 or not.
+ * @thread  EMT(pVCpu)
+ */
+VMMR0_INT_DECL(void) CPUMR0LoadGuestDebugState(PVMCPUCC pVCpu, bool fDr6)
+{
+    /*
+     * Save the host state and disarm all host BPs.
+     */
+    cpumR0SaveHostDebugState(pVCpu);
+    Assert(ASMGetDR7() == X86_DR7_INIT_VAL);
+
+    /*
+     * Activate the guest state DR0-3.
+     * DR7 and DR6 (if fDr6 is true) are left to the caller.
+     */
+    ASMSetDR0(pVCpu->cpum.s.Guest.dr[0]);
+    ASMSetDR1(pVCpu->cpum.s.Guest.dr[1]);
+    ASMSetDR2(pVCpu->cpum.s.Guest.dr[2]);
+    ASMSetDR3(pVCpu->cpum.s.Guest.dr[3]);
+    if (fDr6)
+        ASMSetDR6(pVCpu->cpum.s.Guest.dr[6]);
+
+    ASMAtomicOrU32(&pVCpu->cpum.s.fUseFlags, CPUM_USED_DEBUG_REGS_GUEST);
+}
+
+
+/**
+ * Lazily sync in the hypervisor debug state
+ *
+ * @returns VBox status code.
+ * @param   pVCpu       The cross context virtual CPU structure of the calling EMT.
+ * @param   fDr6        Whether to include DR6 or not.
+ * @thread  EMT(pVCpu)
+ */
+VMMR0_INT_DECL(void) CPUMR0LoadHyperDebugState(PVMCPUCC pVCpu, bool fDr6)
+{
+    /*
+     * Save the host state and disarm all host BPs.
+     */
+    cpumR0SaveHostDebugState(pVCpu);
+    Assert(ASMGetDR7() == X86_DR7_INIT_VAL);
+
+    /*
+     * Make sure the hypervisor values are up to date.
+     */
+    CPUMRecalcHyperDRx(pVCpu, UINT8_MAX /* no loading, please */, true);
+
+    /*
+     * Activate the guest state DR0-3.
+     * DR7 and DR6 (if fDr6 is true) are left to the caller.
+     */
+    ASMSetDR0(pVCpu->cpum.s.Hyper.dr[0]);
+    ASMSetDR1(pVCpu->cpum.s.Hyper.dr[1]);
+    ASMSetDR2(pVCpu->cpum.s.Hyper.dr[2]);
+    ASMSetDR3(pVCpu->cpum.s.Hyper.dr[3]);
+    if (fDr6)
+        ASMSetDR6(X86_DR6_INIT_VAL);
+
+    ASMAtomicOrU32(&pVCpu->cpum.s.fUseFlags, CPUM_USED_DEBUG_REGS_HYPER);
+}
+
+#ifdef VBOX_WITH_VMMR0_DISABLE_LAPIC_NMI
+
+/**
+ * Per-CPU callback that probes the CPU for APIC support.
+ *
+ * @param   idCpu       The identifier for the CPU the function is called on.
+ * @param   pvUser1     Ignored.
+ * @param   pvUser2     Ignored.
+ */
+static DECLCALLBACK(void) cpumR0MapLocalApicCpuProber(RTCPUID idCpu, void *pvUser1, void *pvUser2)
+{
+    NOREF(pvUser1); NOREF(pvUser2);
+    int iCpu = RTMpCpuIdToSetIndex(idCpu);
+    AssertReturnVoid(iCpu >= 0 && (unsigned)iCpu < RT_ELEMENTS(g_aLApics));
+
+    /*
+     * Check for APIC support.
+     */
+    uint32_t uMaxLeaf, u32EBX, u32ECX, u32EDX;
+    ASMCpuId(0, &uMaxLeaf, &u32EBX, &u32ECX, &u32EDX);
+    if (   (   ASMIsIntelCpuEx(u32EBX, u32ECX, u32EDX)
+            || ASMIsAmdCpuEx(u32EBX, u32ECX, u32EDX)
+            || ASMIsViaCentaurCpuEx(u32EBX, u32ECX, u32EDX)
+            || ASMIsShanghaiCpuEx(u32EBX, u32ECX, u32EDX)
+            || ASMIsHygonCpuEx(u32EBX, u32ECX, u32EDX))
+        && ASMIsValidStdRange(uMaxLeaf))
+    {
+        uint32_t uDummy;
+        ASMCpuId(1, &uDummy, &u32EBX, &u32ECX, &u32EDX);
+        if (    (u32EDX & X86_CPUID_FEATURE_EDX_APIC)
+            &&  (u32EDX & X86_CPUID_FEATURE_EDX_MSR))
+        {
+            /*
+             * Safe to access the MSR. Read it and calc the BASE (a little complicated).
+             */
+            uint64_t u64ApicBase = ASMRdMsr(MSR_IA32_APICBASE);
+            uint64_t u64Mask     = MSR_IA32_APICBASE_BASE_MIN;
+
+            /* see Intel Manual: Local APIC Status and Location: MAXPHYADDR default is bit 36 */
+            uint32_t uMaxExtLeaf;
+            ASMCpuId(0x80000000, &uMaxExtLeaf, &u32EBX, &u32ECX, &u32EDX);
+            if (   uMaxExtLeaf >= UINT32_C(0x80000008)
+                && ASMIsValidExtRange(uMaxExtLeaf))
+            {
+                uint32_t u32PhysBits;
+                ASMCpuId(0x80000008, &u32PhysBits, &u32EBX, &u32ECX, &u32EDX);
+                u32PhysBits &= 0xff;
+                u64Mask = ((UINT64_C(1) << u32PhysBits) - 1) & UINT64_C(0xfffffffffffff000);
+            }
+
+            AssertCompile(sizeof(g_aLApics[iCpu].PhysBase) == sizeof(u64ApicBase));
+            g_aLApics[iCpu].PhysBase    = u64ApicBase & u64Mask;
+            g_aLApics[iCpu].fEnabled    = RT_BOOL(u64ApicBase & MSR_IA32_APICBASE_EN);
+            g_aLApics[iCpu].fX2Apic     =    (u64ApicBase & (MSR_IA32_APICBASE_EXTD | MSR_IA32_APICBASE_EN))
+                                          == (MSR_IA32_APICBASE_EXTD | MSR_IA32_APICBASE_EN);
+        }
+    }
+}
+
+
+
+/**
+ * Per-CPU callback that verifies our APIC expectations.
+ *
+ * @param   idCpu       The identifier for the CPU the function is called on.
+ * @param   pvUser1     Ignored.
+ * @param   pvUser2     Ignored.
+ */
+static DECLCALLBACK(void) cpumR0MapLocalApicCpuChecker(RTCPUID idCpu, void *pvUser1, void *pvUser2)
+{
+    NOREF(pvUser1); NOREF(pvUser2);
+
+    int iCpu = RTMpCpuIdToSetIndex(idCpu);
+    AssertReturnVoid(iCpu >= 0 && (unsigned)iCpu < RT_ELEMENTS(g_aLApics));
+    if (!g_aLApics[iCpu].fEnabled)
+        return;
+
+    /*
+     * 0x0X       82489 external APIC
+     * 0x1X       Local APIC
+     * 0x2X..0xFF reserved
+     */
+    uint32_t uApicVersion;
+    if (g_aLApics[iCpu].fX2Apic)
+        uApicVersion = ApicX2RegRead32(APIC_REG_VERSION);
+    else
+        uApicVersion = ApicRegRead(g_aLApics[iCpu].pv, APIC_REG_VERSION);
+    if ((APIC_REG_VERSION_GET_VER(uApicVersion) & 0xF0) == 0x10)
+    {
+        g_aLApics[iCpu].uVersion    = uApicVersion;
+
+# if 0 /* enable if you need it. */
+        if (g_aLApics[iCpu].fX2Apic)
+            SUPR0Printf("CPUM: X2APIC %02u - ver %#010x, lint0=%#07x lint1=%#07x pc=%#07x thmr=%#07x cmci=%#07x\n",
+                        iCpu, uApicVersion,
+                        ApicX2RegRead32(APIC_REG_LVT_LINT0), ApicX2RegRead32(APIC_REG_LVT_LINT1),
+                        ApicX2RegRead32(APIC_REG_LVT_PC), ApicX2RegRead32(APIC_REG_LVT_THMR),
+                        ApicX2RegRead32(APIC_REG_LVT_CMCI));
+        else
+        {
+            SUPR0Printf("CPUM: APIC %02u at %RGp (mapped at %p) - ver %#010x, lint0=%#07x lint1=%#07x pc=%#07x thmr=%#07x cmci=%#07x\n",
+                        iCpu, g_aLApics[iCpu].PhysBase, g_aLApics[iCpu].pv, uApicVersion,
+                        ApicRegRead(g_aLApics[iCpu].pv, APIC_REG_LVT_LINT0), ApicRegRead(g_aLApics[iCpu].pv, APIC_REG_LVT_LINT1),
+                        ApicRegRead(g_aLApics[iCpu].pv, APIC_REG_LVT_PC), ApicRegRead(g_aLApics[iCpu].pv, APIC_REG_LVT_THMR),
+                        ApicRegRead(g_aLApics[iCpu].pv, APIC_REG_LVT_CMCI));
+            if (uApicVersion & 0x80000000)
+            {
+                uint32_t uExtFeatures = ApicRegRead(g_aLApics[iCpu].pv, 0x400);
+                uint32_t cEiLvt = (uExtFeatures >> 16) & 0xff;
+                SUPR0Printf("CPUM: APIC %02u: ExtSpace available. extfeat=%08x eilvt[0..3]=%08x %08x %08x %08x\n",
+                            iCpu,
+                            ApicRegRead(g_aLApics[iCpu].pv, 0x400),
+                            cEiLvt >= 1 ? ApicRegRead(g_aLApics[iCpu].pv, 0x500) : 0,
+                            cEiLvt >= 2 ? ApicRegRead(g_aLApics[iCpu].pv, 0x510) : 0,
+                            cEiLvt >= 3 ? ApicRegRead(g_aLApics[iCpu].pv, 0x520) : 0,
+                            cEiLvt >= 4 ? ApicRegRead(g_aLApics[iCpu].pv, 0x530) : 0);
+            }
+        }
+# endif
+    }
+    else
+    {
+        g_aLApics[iCpu].fEnabled = false;
+        g_aLApics[iCpu].fX2Apic  = false;
+        SUPR0Printf("VBox/CPUM: Unsupported APIC version %#x (iCpu=%d)\n", uApicVersion, iCpu);
+    }
+}
+
+
+/**
+ * Map the MMIO page of each local APIC in the system.
+ */
+static int cpumR0MapLocalApics(void)
+{
+    /*
+     * Check that we'll always stay within the array bounds.
+     */
+    if (RTMpGetArraySize() > RT_ELEMENTS(g_aLApics))
+    {
+        LogRel(("CPUM: Too many real CPUs/cores/threads - %u, max %u\n", RTMpGetArraySize(), RT_ELEMENTS(g_aLApics)));
+        return VERR_TOO_MANY_CPUS;
+    }
+
+    /*
+     * Create mappings for all online CPUs we think have legacy APICs.
+     */
+    int rc = RTMpOnAll(cpumR0MapLocalApicCpuProber, NULL, NULL);
+
+    for (unsigned iCpu = 0; RT_SUCCESS(rc) && iCpu < RT_ELEMENTS(g_aLApics); iCpu++)
+    {
+        if (g_aLApics[iCpu].fEnabled && !g_aLApics[iCpu].fX2Apic)
+        {
+            rc = RTR0MemObjEnterPhys(&g_aLApics[iCpu].hMemObj, g_aLApics[iCpu].PhysBase,
+                                     PAGE_SIZE, RTMEM_CACHE_POLICY_MMIO);
+            if (RT_SUCCESS(rc))
+            {
+                rc = RTR0MemObjMapKernel(&g_aLApics[iCpu].hMapObj, g_aLApics[iCpu].hMemObj, (void *)-1,
+                                         PAGE_SIZE, RTMEM_PROT_READ | RTMEM_PROT_WRITE);
+                if (RT_SUCCESS(rc))
+                {
+                    g_aLApics[iCpu].pv = RTR0MemObjAddress(g_aLApics[iCpu].hMapObj);
+                    continue;
+                }
+                RTR0MemObjFree(g_aLApics[iCpu].hMemObj, true /* fFreeMappings */);
+            }
+            g_aLApics[iCpu].fEnabled = false;
+        }
+        g_aLApics[iCpu].pv = NULL;
+    }
+
+    /*
+     * Check the APICs.
+     */
+    if (RT_SUCCESS(rc))
+        rc = RTMpOnAll(cpumR0MapLocalApicCpuChecker, NULL, NULL);
+
+    if (RT_FAILURE(rc))
+    {
+        cpumR0UnmapLocalApics();
+        return rc;
+    }
+
+# ifdef LOG_ENABLED
+    /*
+     * Log the result (pretty useless, requires enabling CPUM in VBoxDrv
+     * and !VBOX_WITH_R0_LOGGING).
+     */
+    if (LogIsEnabled())
+    {
+        uint32_t cEnabled = 0;
+        uint32_t cX2Apics = 0;
+        for (unsigned iCpu = 0; iCpu < RT_ELEMENTS(g_aLApics); iCpu++)
+            if (g_aLApics[iCpu].fEnabled)
+            {
+                cEnabled++;
+                cX2Apics += g_aLApics[iCpu].fX2Apic;
+            }
+        Log(("CPUM: %u APICs, %u X2APICs\n", cEnabled, cX2Apics));
+    }
+# endif
+
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Unmap the Local APIC of all host CPUs.
+ */
+static void cpumR0UnmapLocalApics(void)
+{
+    for (unsigned iCpu = RT_ELEMENTS(g_aLApics); iCpu-- > 0;)
+    {
+        if (g_aLApics[iCpu].pv)
+        {
+            RTR0MemObjFree(g_aLApics[iCpu].hMapObj, true /* fFreeMappings */);
+            RTR0MemObjFree(g_aLApics[iCpu].hMemObj, true /* fFreeMappings */);
+            g_aLApics[iCpu].hMapObj  = NIL_RTR0MEMOBJ;
+            g_aLApics[iCpu].hMemObj  = NIL_RTR0MEMOBJ;
+            g_aLApics[iCpu].fEnabled = false;
+            g_aLApics[iCpu].fX2Apic  = false;
+            g_aLApics[iCpu].pv       = NULL;
+        }
+    }
+}
+
+
+/**
+ * Updates CPUMCPU::pvApicBase and CPUMCPU::fX2Apic prior to world switch.
+ *
+ * Writes the Local APIC mapping address of the current host CPU to CPUMCPU so
+ * the world switchers can access the APIC registers for the purpose of
+ * disabling and re-enabling the NMIs.  Must be called with disabled preemption
+ * or disabled interrupts!
+ *
+ * @param   pVCpu       The cross context virtual CPU structure of the calling EMT.
+ * @param   iHostCpuSet The CPU set index of the current host CPU.
+ */
+VMMR0_INT_DECL(void) CPUMR0SetLApic(PVMCPUCC pVCpu, uint32_t iHostCpuSet)
+{
+    Assert(iHostCpuSet <= RT_ELEMENTS(g_aLApics));
+    pVCpu->cpum.s.pvApicBase = g_aLApics[iHostCpuSet].pv;
+    pVCpu->cpum.s.fX2Apic    = g_aLApics[iHostCpuSet].fX2Apic;
+//    Log6(("CPUMR0SetLApic: pvApicBase=%p fX2Apic=%d\n", g_aLApics[idxCpu].pv, g_aLApics[idxCpu].fX2Apic));
+}
+
+#endif /* VBOX_WITH_VMMR0_DISABLE_LAPIC_NMI */
+
diff --git a/src/VBox/VMM/VMMR0/CPUMR0A.asm b/src/VBox/VMM/VMMR0/CPUMR0A.asm
new file mode 100644
index 00000000..3452a815
--- /dev/null
+++ b/src/VBox/VMM/VMMR0/CPUMR0A.asm
@@ -0,0 +1,358 @@
+ ; $Id: CPUMR0A.asm $
+;; @file
+; CPUM - Ring-0 Assembly Routines (supporting HM and IEM).
+;
+
+;
+; Copyright (C) 2006-2020 Oracle Corporation
+;
+; This file is part of VirtualBox Open Source Edition (OSE), as
+; available from http://www.virtualbox.org. This file is free software;
+; you can redistribute it and/or modify it under the terms of the GNU
+; General Public License (GPL) as published by the Free Software
+; Foundation, in version 2 as it comes in the "COPYING" file of the
+; VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+;
+
+
+;*******************************************************************************
+;* Header Files                                                                *
+;*******************************************************************************
+%define RT_ASM_WITH_SEH64
+%include "iprt/asmdefs.mac"
+%include "VBox/asmdefs.mac"
+%include "VBox/vmm/vm.mac"
+%include "VBox/err.mac"
+%include "VBox/vmm/stam.mac"
+%include "CPUMInternal.mac"
+%include "iprt/x86.mac"
+%include "VBox/vmm/cpum.mac"
+
+
+BEGINCODE
+
+;;
+; Makes sure the EMTs have a FPU state associated with them on hosts where we're
+; allowed to use it in ring-0 too.
+;
+; This ensure that we don't have to allocate the state lazily while trying to execute
+; guest code with preemption disabled or worse.
+;
+; @cproto VMMR0_INT_DECL(void) CPUMR0RegisterVCpuThread(PVMCPU pVCpu);
+;
+BEGINPROC CPUMR0RegisterVCpuThread
+        push    xBP
+        SEH64_PUSH_xBP
+        mov     xBP, xSP
+        SEH64_SET_FRAME_xBP 0
+SEH64_END_PROLOGUE
+
+%ifdef VMM_R0_TOUCH_FPU
+        movdqa  xmm0, xmm0              ; hope this is harmless.
+%endif
+
+.return:
+        xor     eax, eax                ; paranoia
+        leave
+        ret
+ENDPROC   CPUMR0RegisterVCpuThread
+
+
+%ifdef VMM_R0_TOUCH_FPU
+;;
+; Touches the host FPU state.
+;
+; @uses nothing (well, maybe cr0)
+;
+ %ifndef RT_ASM_WITH_SEH64 ; workaround for yasm 1.3.0 bug (error: prologue -1 bytes, must be <256)
+ALIGNCODE(16)
+ %endif
+BEGINPROC CPUMR0TouchHostFpu
+        push    xBP
+        SEH64_PUSH_xBP
+        mov     xBP, xSP
+        SEH64_SET_FRAME_xBP 0
+SEH64_END_PROLOGUE
+
+        movdqa  xmm0, xmm0              ; Hope this is harmless.
+
+        leave
+        ret
+ENDPROC   CPUMR0TouchHostFpu
+%endif ; VMM_R0_TOUCH_FPU
+
+
+;;
+; Saves the host FPU/SSE/AVX state and restores the guest FPU/SSE/AVX state.
+;
+; @returns  VINF_SUCCESS (0) or VINF_CPUM_HOST_CR0_MODIFIED. (EAX)
+; @param    pCpumCpu  x86:[ebp+8] gcc:rdi msc:rcx     CPUMCPU pointer
+;
+; @remarks  64-bit Windows drivers shouldn't use AVX registers without saving+loading:
+;               https://msdn.microsoft.com/en-us/library/windows/hardware/ff545910%28v=vs.85%29.aspx?f=255&MSPPError=-2147217396
+;           However the compiler docs have different idea:
+;               https://msdn.microsoft.com/en-us/library/9z1stfyw.aspx
+;           We'll go with the former for now.
+;
+%ifndef RT_ASM_WITH_SEH64 ; workaround for yasm 1.3.0 bug (error: prologue -1 bytes, must be <256)
+ALIGNCODE(16)
+%endif
+BEGINPROC cpumR0SaveHostRestoreGuestFPUState
+        push    xBP
+        SEH64_PUSH_xBP
+        mov     xBP, xSP
+        SEH64_SET_FRAME_xBP 0
+SEH64_END_PROLOGUE
+
+        ;
+        ; Prologue - xAX+xDX must be free for XSAVE/XRSTOR input.
+        ;
+%ifdef RT_ARCH_AMD64
+ %ifdef RT_OS_WINDOWS
+        mov     r11, rcx
+ %else
+        mov     r11, rdi
+ %endif
+ %define pCpumCpu   r11
+ %define pXState    r10
+%else
+        push    ebx
+        push    esi
+        mov     ebx, dword [ebp + 8]
+ %define pCpumCpu ebx
+ %define pXState  esi
+%endif
+
+        pushf                           ; The darwin kernel can get upset or upset things if an
+        cli                             ; interrupt occurs while we're doing fxsave/fxrstor/cr0.
+
+        ;
+        ; Save the host state.
+        ;
+        test    dword [pCpumCpu + CPUMCPU.fUseFlags], CPUM_USED_FPU_HOST
+        jnz     .already_saved_host
+
+        CPUMRZ_TOUCH_FPU_CLEAR_CR0_FPU_TRAPS_SET_RC xCX, xAX, pCpumCpu ; xCX is the return value for VT-x; xAX is scratch.
+
+        CPUMR0_SAVE_HOST
+
+%ifdef VBOX_WITH_KERNEL_USING_XMM
+        jmp     .load_guest
+%endif
+.already_saved_host:
+%ifdef VBOX_WITH_KERNEL_USING_XMM
+        ; If we didn't save the host state, we must save the non-volatile XMM registers.
+        mov     pXState, [pCpumCpu + CPUMCPU.Host.pXStateR0]
+        stmxcsr [pXState + X86FXSTATE.MXCSR]
+        movdqa  [pXState + X86FXSTATE.xmm6 ], xmm6
+        movdqa  [pXState + X86FXSTATE.xmm7 ], xmm7
+        movdqa  [pXState + X86FXSTATE.xmm8 ], xmm8
+        movdqa  [pXState + X86FXSTATE.xmm9 ], xmm9
+        movdqa  [pXState + X86FXSTATE.xmm10], xmm10
+        movdqa  [pXState + X86FXSTATE.xmm11], xmm11
+        movdqa  [pXState + X86FXSTATE.xmm12], xmm12
+        movdqa  [pXState + X86FXSTATE.xmm13], xmm13
+        movdqa  [pXState + X86FXSTATE.xmm14], xmm14
+        movdqa  [pXState + X86FXSTATE.xmm15], xmm15
+
+        ;
+        ; Load the guest state.
+        ;
+.load_guest:
+%endif
+        CPUMR0_LOAD_GUEST
+
+%ifdef VBOX_WITH_KERNEL_USING_XMM
+        ; Restore the non-volatile xmm registers. ASSUMING 64-bit host.
+        mov     pXState, [pCpumCpu + CPUMCPU.Host.pXStateR0]
+        movdqa  xmm6,  [pXState + X86FXSTATE.xmm6]
+        movdqa  xmm7,  [pXState + X86FXSTATE.xmm7]
+        movdqa  xmm8,  [pXState + X86FXSTATE.xmm8]
+        movdqa  xmm9,  [pXState + X86FXSTATE.xmm9]
+        movdqa  xmm10, [pXState + X86FXSTATE.xmm10]
+        movdqa  xmm11, [pXState + X86FXSTATE.xmm11]
+        movdqa  xmm12, [pXState + X86FXSTATE.xmm12]
+        movdqa  xmm13, [pXState + X86FXSTATE.xmm13]
+        movdqa  xmm14, [pXState + X86FXSTATE.xmm14]
+        movdqa  xmm15, [pXState + X86FXSTATE.xmm15]
+        ldmxcsr        [pXState + X86FXSTATE.MXCSR]
+%endif
+
+        or      dword [pCpumCpu + CPUMCPU.fUseFlags], (CPUM_USED_FPU_GUEST | CPUM_USED_FPU_SINCE_REM | CPUM_USED_FPU_HOST)
+        popf
+
+        mov     eax, ecx
+.return:
+%ifdef RT_ARCH_X86
+        pop     esi
+        pop     ebx
+%endif
+        leave
+        ret
+ENDPROC   cpumR0SaveHostRestoreGuestFPUState
+
+
+;;
+; Saves the guest FPU/SSE/AVX state and restores the host FPU/SSE/AVX state.
+;
+; @param    pCpumCpu  x86:[ebp+8] gcc:rdi msc:rcx     CPUMCPU pointer
+;
+; @remarks  64-bit Windows drivers shouldn't use AVX registers without saving+loading:
+;               https://msdn.microsoft.com/en-us/library/windows/hardware/ff545910%28v=vs.85%29.aspx?f=255&MSPPError=-2147217396
+;           However the compiler docs have different idea:
+;               https://msdn.microsoft.com/en-us/library/9z1stfyw.aspx
+;           We'll go with the former for now.
+;
+%ifndef RT_ASM_WITH_SEH64 ; workaround for yasm 1.3.0 bug (error: prologue -1 bytes, must be <256)
+ALIGNCODE(16)
+%endif
+BEGINPROC cpumR0SaveGuestRestoreHostFPUState
+        push    xBP
+        SEH64_PUSH_xBP
+        mov     xBP, xSP
+        SEH64_SET_FRAME_xBP 0
+SEH64_END_PROLOGUE
+
+        ;
+        ; Prologue - xAX+xDX must be free for XSAVE/XRSTOR input.
+        ;
+%ifdef RT_ARCH_AMD64
+ %ifdef RT_OS_WINDOWS
+        mov     r11, rcx
+ %else
+        mov     r11, rdi
+ %endif
+ %define pCpumCpu   r11
+ %define pXState    r10
+%else
+        push    ebx
+        push    esi
+        mov     ebx, dword [ebp + 8]
+ %define pCpumCpu   ebx
+ %define pXState    esi
+%endif
+        pushf                           ; The darwin kernel can get upset or upset things if an
+        cli                             ; interrupt occurs while we're doing fxsave/fxrstor/cr0.
+
+ %ifdef VBOX_WITH_KERNEL_USING_XMM
+        ;
+        ; Copy non-volatile XMM registers to the host state so we can use
+        ; them while saving the guest state (we've gotta do this anyway).
+        ;
+        mov     pXState, [pCpumCpu + CPUMCPU.Host.pXStateR0]
+        stmxcsr [pXState + X86FXSTATE.MXCSR]
+        movdqa  [pXState + X86FXSTATE.xmm6], xmm6
+        movdqa  [pXState + X86FXSTATE.xmm7], xmm7
+        movdqa  [pXState + X86FXSTATE.xmm8], xmm8
+        movdqa  [pXState + X86FXSTATE.xmm9], xmm9
+        movdqa  [pXState + X86FXSTATE.xmm10], xmm10
+        movdqa  [pXState + X86FXSTATE.xmm11], xmm11
+        movdqa  [pXState + X86FXSTATE.xmm12], xmm12
+        movdqa  [pXState + X86FXSTATE.xmm13], xmm13
+        movdqa  [pXState + X86FXSTATE.xmm14], xmm14
+        movdqa  [pXState + X86FXSTATE.xmm15], xmm15
+ %endif
+
+        ;
+        ; Save the guest state if necessary.
+        ;
+        test    dword [pCpumCpu + CPUMCPU.fUseFlags], CPUM_USED_FPU_GUEST
+        jz      .load_only_host
+
+ %ifdef VBOX_WITH_KERNEL_USING_XMM
+        ; Load the guest XMM register values we already saved in HMR0VMXStartVMWrapXMM.
+        mov     pXState, [pCpumCpu + CPUMCPU.Guest.pXStateR0]
+        movdqa  xmm0,  [pXState + X86FXSTATE.xmm0]
+        movdqa  xmm1,  [pXState + X86FXSTATE.xmm1]
+        movdqa  xmm2,  [pXState + X86FXSTATE.xmm2]
+        movdqa  xmm3,  [pXState + X86FXSTATE.xmm3]
+        movdqa  xmm4,  [pXState + X86FXSTATE.xmm4]
+        movdqa  xmm5,  [pXState + X86FXSTATE.xmm5]
+        movdqa  xmm6,  [pXState + X86FXSTATE.xmm6]
+        movdqa  xmm7,  [pXState + X86FXSTATE.xmm7]
+        movdqa  xmm8,  [pXState + X86FXSTATE.xmm8]
+        movdqa  xmm9,  [pXState + X86FXSTATE.xmm9]
+        movdqa  xmm10, [pXState + X86FXSTATE.xmm10]
+        movdqa  xmm11, [pXState + X86FXSTATE.xmm11]
+        movdqa  xmm12, [pXState + X86FXSTATE.xmm12]
+        movdqa  xmm13, [pXState + X86FXSTATE.xmm13]
+        movdqa  xmm14, [pXState + X86FXSTATE.xmm14]
+        movdqa  xmm15, [pXState + X86FXSTATE.xmm15]
+        ldmxcsr        [pXState + X86FXSTATE.MXCSR]
+ %endif
+        CPUMR0_SAVE_GUEST
+
+        ;
+        ; Load the host state.
+        ;
+.load_only_host:
+        CPUMR0_LOAD_HOST
+
+        ; Restore the CR0 value we saved in cpumR0SaveHostRestoreGuestFPUState or
+        ; in cpumRZSaveHostFPUState.
+        mov     xCX, [pCpumCpu + CPUMCPU.Host.cr0Fpu]
+        CPUMRZ_RESTORE_CR0_IF_TS_OR_EM_SET xCX
+        and     dword [pCpumCpu + CPUMCPU.fUseFlags], ~(CPUM_USED_FPU_GUEST | CPUM_USED_FPU_HOST)
+
+        popf
+%ifdef RT_ARCH_X86
+        pop     esi
+        pop     ebx
+%endif
+        leave
+        ret
+%undef pCpumCpu
+%undef pXState
+ENDPROC   cpumR0SaveGuestRestoreHostFPUState
+
+
+%if ARCH_BITS == 32
+ %ifdef VBOX_WITH_64_BITS_GUESTS
+;;
+; Restores the host's FPU/SSE/AVX state from pCpumCpu->Host.
+;
+; @param    pCpumCpu  x86:[ebp+8] gcc:rdi msc:rcx     CPUMCPU pointer
+;
+  %ifndef RT_ASM_WITH_SEH64 ; workaround for yasm 1.3.0 bug (error: prologue -1 bytes, must be <256)
+ALIGNCODE(16)
+  %endif
+BEGINPROC cpumR0RestoreHostFPUState
+        ;
+        ; Prologue - xAX+xDX must be free for XSAVE/XRSTOR input.
+        ;
+        push    ebp
+        mov     ebp, esp
+        push    ebx
+        push    esi
+        mov     ebx, dword [ebp + 8]
+  %define pCpumCpu ebx
+  %define pXState  esi
+
+        ;
+        ; Restore host CPU state.
+        ;
+        pushf                           ; The darwin kernel can get upset or upset things if an
+        cli                             ; interrupt occurs while we're doing fxsave/fxrstor/cr0.
+
+        CPUMR0_LOAD_HOST
+
+        ; Restore the CR0 value we saved in cpumR0SaveHostRestoreGuestFPUState or
+        ; in cpumRZSaveHostFPUState.
+        ;; @todo What about XCR0?
+        mov     xCX, [pCpumCpu + CPUMCPU.Host.cr0Fpu]
+        CPUMRZ_RESTORE_CR0_IF_TS_OR_EM_SET xCX
+
+        and     dword [pCpumCpu + CPUMCPU.fUseFlags], ~CPUM_USED_FPU_HOST
+        popf
+
+        pop     esi
+        pop     ebx
+        leave
+        ret
+  %undef pCpumCPu
+  %undef pXState
+ENDPROC   cpumR0RestoreHostFPUState
+ %endif ; VBOX_WITH_64_BITS_GUESTS
+%endif  ; ARCH_BITS == 32
+
diff --git a/src/VBox/VMM/VMMR0/EMR0.cpp b/src/VBox/VMM/VMMR0/EMR0.cpp
new file mode 100644
index 00000000..8cc3b0fb
--- /dev/null
+++ b/src/VBox/VMM/VMMR0/EMR0.cpp
@@ -0,0 +1,61 @@
+/* $Id: EMR0.cpp $ */
+/** @file
+ * EM - Host Context Ring 0.
+ */
+
+/*
+ * Copyright (C) 2006-2020 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+
+/*********************************************************************************************************************************
+*   Header Files                                                                                                                 *
+*********************************************************************************************************************************/
+#define LOG_GROUP LOG_GROUP_EM
+#include <VBox/vmm/em.h>
+#include "EMInternal.h"
+#include <VBox/vmm/vmcc.h>
+#include <VBox/vmm/gvm.h>
+#include <iprt/errcore.h>
+#include <VBox/log.h>
+#include <iprt/assert.h>
+#include <iprt/thread.h>
+
+
+
+/**
+ * Adjusts EM configuration options.
+ *
+ * @returns VBox status code.
+ * @param   pGVM            The ring-0 VM structure.
+ */
+VMMR0_INT_DECL(int) EMR0InitVM(PGVM pGVM)
+{
+    /*
+     * Override ring-0 exit optimizations settings.
+     */
+    PVMCPUCC pVCpu0 = &pGVM->aCpus[0];
+    bool fEnabledR0                = pVCpu0->em.s.fExitOptimizationEnabled
+                                  && pVCpu0->em.s.fExitOptimizationEnabledR0
+                                  && (RTThreadPreemptIsPossible() || RTThreadPreemptIsPendingTrusty());
+    bool fEnabledR0PreemptDisabled = fEnabledR0
+                                  && pVCpu0->em.s.fExitOptimizationEnabledR0PreemptDisabled
+                                  && RTThreadPreemptIsPendingTrusty();
+    for (VMCPUID idCpu = 0; idCpu < pGVM->cCpus; idCpu++)
+    {
+        PVMCPUCC pVCpu = &pGVM->aCpus[idCpu];
+        pVCpu->em.s.fExitOptimizationEnabledR0                = fEnabledR0;
+        pVCpu->em.s.fExitOptimizationEnabledR0PreemptDisabled = fEnabledR0PreemptDisabled;
+    }
+
+    return VINF_SUCCESS;
+}
+
diff --git a/src/VBox/VMM/VMMR0/GIMR0.cpp b/src/VBox/VMM/VMMR0/GIMR0.cpp
new file mode 100644
index 00000000..69667273
--- /dev/null
+++ b/src/VBox/VMM/VMMR0/GIMR0.cpp
@@ -0,0 +1,111 @@
+/* $Id: GIMR0.cpp $ */
+/** @file
+ * Guest Interface Manager (GIM) - Host Context Ring-0.
+ */
+
+/*
+ * Copyright (C) 2014-2020 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+
+/*********************************************************************************************************************************
+*   Header Files                                                                                                                 *
+*********************************************************************************************************************************/
+#define LOG_GROUP LOG_GROUP_GIM
+#include <VBox/vmm/gim.h>
+#include "GIMInternal.h"
+#include "GIMHvInternal.h"
+#include <VBox/vmm/vmcc.h>
+
+#include <VBox/err.h>
+
+
+/**
+ * Does ring-0 per-VM GIM initialization.
+ *
+ * @returns VBox status code.
+ * @param   pVM     The cross context VM structure.
+ */
+VMMR0_INT_DECL(int) GIMR0InitVM(PVMCC pVM)
+{
+    if (!GIMIsEnabled(pVM))
+        return VINF_SUCCESS;
+
+    switch (pVM->gim.s.enmProviderId)
+    {
+        case GIMPROVIDERID_HYPERV:
+            return gimR0HvInitVM(pVM);
+
+        default:
+            break;
+    }
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Does ring-0 per-VM GIM termination.
+ *
+ * @returns VBox status code.
+ * @param   pVM     The cross context VM structure.
+ */
+VMMR0_INT_DECL(int) GIMR0TermVM(PVMCC pVM)
+{
+    if (!GIMIsEnabled(pVM))
+        return VINF_SUCCESS;
+
+    switch (pVM->gim.s.enmProviderId)
+    {
+        case GIMPROVIDERID_HYPERV:
+            return gimR0HvTermVM(pVM);
+
+        default:
+            break;
+    }
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Updates the paravirtualized TSC supported by the GIM provider.
+ *
+ * @returns VBox status code.
+ * @retval VINF_SUCCESS if the paravirt. TSC is setup and in use.
+ * @retval VERR_GIM_NOT_ENABLED if no GIM provider is configured for this VM.
+ * @retval VERR_GIM_PVTSC_NOT_AVAILABLE if the GIM provider does not support any
+ *         paravirt. TSC.
+ * @retval VERR_GIM_PVTSC_NOT_IN_USE if the GIM provider supports paravirt. TSC
+ *         but the guest isn't currently using it.
+ *
+ * @param   pVM         The cross context VM structure.
+ * @param   u64Offset   The computed TSC offset.
+ *
+ * @thread EMT(pVCpu)
+ */
+VMMR0_INT_DECL(int) GIMR0UpdateParavirtTsc(PVMCC pVM, uint64_t u64Offset)
+{
+    switch (pVM->gim.s.enmProviderId)
+    {
+        case GIMPROVIDERID_HYPERV:
+            return gimR0HvUpdateParavirtTsc(pVM, u64Offset);
+
+        case GIMPROVIDERID_KVM:
+            return VINF_SUCCESS;
+
+        case GIMPROVIDERID_NONE:
+            return VERR_GIM_NOT_ENABLED;
+
+        default:
+            break;
+    }
+    return VERR_GIM_PVTSC_NOT_AVAILABLE;
+}
+
diff --git a/src/VBox/VMM/VMMR0/GIMR0Hv.cpp b/src/VBox/VMM/VMMR0/GIMR0Hv.cpp
new file mode 100644
index 00000000..a4ec1d3b
--- /dev/null
+++ b/src/VBox/VMM/VMMR0/GIMR0Hv.cpp
@@ -0,0 +1,182 @@
+/* $Id: GIMR0Hv.cpp $ */
+/** @file
+ * Guest Interface Manager (GIM), Hyper-V - Host Context Ring-0.
+ */
+
+/*
+ * Copyright (C) 2014-2020 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+
+/*********************************************************************************************************************************
+*   Header Files                                                                                                                 *
+*********************************************************************************************************************************/
+#define LOG_GROUP LOG_GROUP_GIM
+#include <VBox/vmm/gim.h>
+#include <VBox/vmm/tm.h>
+#include "GIMInternal.h"
+#include "GIMHvInternal.h"
+#include <VBox/vmm/vmcc.h>
+
+#include <VBox/err.h>
+
+#include <iprt/spinlock.h>
+
+
+#if 0
+/**
+ * Allocates and maps one physically contiguous page. The allocated page is
+ * zero'd out.
+ *
+ * @returns IPRT status code.
+ * @param   pMemObj         Pointer to the ring-0 memory object.
+ * @param   ppVirt          Where to store the virtual address of the
+ *                          allocation.
+ * @param   pPhys           Where to store the physical address of the
+ *                          allocation.
+ */
+static int gimR0HvPageAllocZ(PRTR0MEMOBJ pMemObj, PRTR0PTR ppVirt, PRTHCPHYS pHCPhys)
+{
+    AssertPtr(pMemObj);
+    AssertPtr(ppVirt);
+    AssertPtr(pHCPhys);
+
+    int rc = RTR0MemObjAllocCont(pMemObj, PAGE_SIZE, false /* fExecutable */);
+    if (RT_FAILURE(rc))
+        return rc;
+    *ppVirt  = RTR0MemObjAddress(*pMemObj);
+    *pHCPhys = RTR0MemObjGetPagePhysAddr(*pMemObj, 0 /* iPage */);
+    ASMMemZero32(*ppVirt, PAGE_SIZE);
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Frees and unmaps an allocated physical page.
+ *
+ * @param   pMemObj         Pointer to the ring-0 memory object.
+ * @param   ppVirt          Where to re-initialize the virtual address of
+ *                          allocation as 0.
+ * @param   pHCPhys         Where to re-initialize the physical address of the
+ *                          allocation as 0.
+ */
+static void gimR0HvPageFree(PRTR0MEMOBJ pMemObj, PRTR0PTR ppVirt, PRTHCPHYS pHCPhys)
+{
+    AssertPtr(pMemObj);
+    AssertPtr(ppVirt);
+    AssertPtr(pHCPhys);
+    if (*pMemObj != NIL_RTR0MEMOBJ)
+    {
+        int rc = RTR0MemObjFree(*pMemObj, true /* fFreeMappings */);
+        AssertRC(rc);
+        *pMemObj = NIL_RTR0MEMOBJ;
+        *ppVirt  = 0;
+        *pHCPhys = 0;
+    }
+}
+#endif
+
+/**
+ * Updates Hyper-V's reference TSC page.
+ *
+ * @returns VBox status code.
+ * @param   pVM         The cross context VM structure.
+ * @param   u64Offset   The computed TSC offset.
+ * @thread  EMT.
+ */
+VMM_INT_DECL(int) gimR0HvUpdateParavirtTsc(PVMCC pVM, uint64_t u64Offset)
+{
+    Assert(GIMIsEnabled(pVM));
+    bool fHvTscEnabled = MSR_GIM_HV_REF_TSC_IS_ENABLED(pVM->gim.s.u.Hv.u64TscPageMsr);
+    if (RT_UNLIKELY(!fHvTscEnabled))
+        return VERR_GIM_PVTSC_NOT_ENABLED;
+
+    /** @todo this is buggy when large pages are used due to a PGM limitation, see
+     *        @bugref{7532}.
+     *
+     *        In any case, we do not ever update this page while the guest is
+     *        running after setting it up (in ring-3, see gimR3HvEnableTscPage()) as
+     *        the TSC offset is handled in the VMCS/VMCB (HM) or by trapping RDTSC
+     *        (raw-mode). */
+#if 0
+    PCGIMHV          pcHv     = &pVM->gim.s.u.Hv;
+    PCGIMMMIO2REGION pcRegion = &pcHv->aMmio2Regions[GIM_HV_REF_TSC_PAGE_REGION_IDX];
+    PGIMHVREFTSC     pRefTsc  = (PGIMHVREFTSC)pcRegion->CTX_SUFF(pvPage);
+    Assert(pRefTsc);
+
+    /*
+     * Hyper-V reports the reference time in 100 nanosecond units.
+     */
+    uint64_t u64Tsc100Ns = pcHv->cTscTicksPerSecond / RT_NS_10MS;
+    int64_t i64TscOffset = (int64_t)u64Offset / u64Tsc100Ns;
+
+    /*
+     * The TSC page can be simulatenously read by other VCPUs in the guest. The
+     * spinlock is only for protecting simultaneous hypervisor writes from other
+     * EMTs.
+     */
+    RTSpinlockAcquire(pcHv->hSpinlockR0);
+    if (pRefTsc->i64TscOffset != i64TscOffset)
+    {
+        if (pRefTsc->u32TscSequence < UINT32_C(0xfffffffe))
+            ASMAtomicIncU32(&pRefTsc->u32TscSequence);
+        else
+            ASMAtomicWriteU32(&pRefTsc->u32TscSequence, 1);
+        ASMAtomicWriteS64(&pRefTsc->i64TscOffset, i64TscOffset);
+    }
+    RTSpinlockRelease(pcHv->hSpinlockR0);
+
+    Assert(pRefTsc->u32TscSequence != 0);
+    Assert(pRefTsc->u32TscSequence != UINT32_C(0xffffffff));
+#else
+    NOREF(u64Offset);
+#endif
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Does ring-0 per-VM GIM Hyper-V initialization.
+ *
+ * @returns VBox status code.
+ * @param   pVM     The cross context VM structure.
+ */
+VMMR0_INT_DECL(int) gimR0HvInitVM(PVMCC pVM)
+{
+    AssertPtr(pVM);
+    Assert(GIMIsEnabled(pVM));
+
+    PGIMHV pHv = &pVM->gim.s.u.Hv;
+    Assert(pHv->hSpinlockR0 == NIL_RTSPINLOCK);
+
+    int rc = RTSpinlockCreate(&pHv->hSpinlockR0, RTSPINLOCK_FLAGS_INTERRUPT_UNSAFE, "Hyper-V");
+    return rc;
+}
+
+
+/**
+ * Does ring-0 per-VM GIM Hyper-V termination.
+ *
+ * @returns VBox status code.
+ * @param   pVM     The cross context VM structure.
+ */
+VMMR0_INT_DECL(int) gimR0HvTermVM(PVMCC pVM)
+{
+    AssertPtr(pVM);
+    Assert(GIMIsEnabled(pVM));
+
+    PGIMHV pHv = &pVM->gim.s.u.Hv;
+    RTSpinlockDestroy(pHv->hSpinlockR0);
+    pHv->hSpinlockR0 = NIL_RTSPINLOCK;
+
+    return VINF_SUCCESS;
+}
+
diff --git a/src/VBox/VMM/VMMR0/GMMR0.cpp b/src/VBox/VMM/VMMR0/GMMR0.cpp
new file mode 100644
index 00000000..3c4f77ff
--- /dev/null
+++ b/src/VBox/VMM/VMMR0/GMMR0.cpp
@@ -0,0 +1,5746 @@
+/* $Id: GMMR0.cpp $ */
+/** @file
+ * GMM - Global Memory Manager.
+ */
+
+/*
+ * Copyright (C) 2007-2020 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+
+/** @page pg_gmm    GMM - The Global Memory Manager
+ *
+ * As the name indicates, this component is responsible for global memory
+ * management. Currently only guest RAM is allocated from the GMM, but this
+ * may change to include shadow page tables and other bits later.
+ *
+ * Guest RAM is managed as individual pages, but allocated from the host OS
+ * in chunks for reasons of portability / efficiency. To minimize the memory
+ * footprint all tracking structure must be as small as possible without
+ * unnecessary performance penalties.
+ *
+ * The allocation chunks has fixed sized, the size defined at compile time
+ * by the #GMM_CHUNK_SIZE \#define.
+ *
+ * Each chunk is given an unique ID. Each page also has a unique ID. The
+ * relationship between the two IDs is:
+ * @code
+ *  GMM_CHUNK_SHIFT = log2(GMM_CHUNK_SIZE / PAGE_SIZE);
+ *  idPage = (idChunk << GMM_CHUNK_SHIFT) | iPage;
+ * @endcode
+ * Where iPage is the index of the page within the chunk. This ID scheme
+ * permits for efficient chunk and page lookup, but it relies on the chunk size
+ * to be set at compile time. The chunks are organized in an AVL tree with their
+ * IDs being the keys.
+ *
+ * The physical address of each page in an allocation chunk is maintained by
+ * the #RTR0MEMOBJ and obtained using #RTR0MemObjGetPagePhysAddr. There is no
+ * need to duplicate this information (it'll cost 8-bytes per page if we did).
+ *
+ * So what do we need to track per page? Most importantly we need to know
+ * which state the page is in:
+ *   - Private - Allocated for (eventually) backing one particular VM page.
+ *   - Shared  - Readonly page that is used by one or more VMs and treated
+ *               as COW by PGM.
+ *   - Free    - Not used by anyone.
+ *
+ * For the page replacement operations (sharing, defragmenting and freeing)
+ * to be somewhat efficient, private pages needs to be associated with a
+ * particular page in a particular VM.
+ *
+ * Tracking the usage of shared pages is impractical and expensive, so we'll
+ * settle for a reference counting system instead.
+ *
+ * Free pages will be chained on LIFOs
+ *
+ * On 64-bit systems we will use a 64-bit bitfield per page, while on 32-bit
+ * systems a 32-bit bitfield will have to suffice because of address space
+ * limitations. The #GMMPAGE structure shows the details.
+ *
+ *
+ * @section sec_gmm_alloc_strat Page Allocation Strategy
+ *
+ * The strategy for allocating pages has to take fragmentation and shared
+ * pages into account, or we may end up with with 2000 chunks with only
+ * a few pages in each. Shared pages cannot easily be reallocated because
+ * of the inaccurate usage accounting (see above). Private pages can be
+ * reallocated by a defragmentation thread in the same manner that sharing
+ * is done.
+ *
+ * The first approach is to manage the free pages in two sets depending on
+ * whether they are mainly for the allocation of shared or private pages.
+ * In the initial implementation there will be almost no possibility for
+ * mixing shared and private pages in the same chunk (only if we're really
+ * stressed on memory), but when we implement forking of VMs and have to
+ * deal with lots of COW pages it'll start getting kind of interesting.
+ *
+ * The sets are lists of chunks with approximately the same number of
+ * free pages. Say the chunk size is 1MB, meaning 256 pages, and a set
+ * consists of 16 lists. So, the first list will contain the chunks with
+ * 1-7 free pages, the second covers 8-15, and so on. The chunks will be
+ * moved between the lists as pages are freed up or allocated.
+ *
+ *
+ * @section sec_gmm_costs       Costs
+ *
+ * The per page cost in kernel space is 32-bit plus whatever RTR0MEMOBJ
+ * entails. In addition there is the chunk cost of approximately
+ * (sizeof(RT0MEMOBJ) + sizeof(CHUNK)) / 2^CHUNK_SHIFT bytes per page.
+ *
+ * On Windows the per page #RTR0MEMOBJ cost is 32-bit on 32-bit windows
+ * and 64-bit on 64-bit windows (a PFN_NUMBER in the MDL). So, 64-bit per page.
+ * The cost on Linux is identical, but here it's because of sizeof(struct page *).
+ *
+ *
+ * @section sec_gmm_legacy      Legacy Mode for Non-Tier-1 Platforms
+ *
+ * In legacy mode the page source is locked user pages and not
+ * #RTR0MemObjAllocPhysNC, this means that a page can only be allocated
+ * by the VM that locked it. We will make no attempt at implementing
+ * page sharing on these systems, just do enough to make it all work.
+ *
+ * @note With 6.1 really dropping 32-bit support, the legacy mode is obsoleted
+ *       under the assumption that there is sufficient kernel virtual address
+ *       space to map all of the guest memory allocations.  So, we'll be using
+ *       #RTR0MemObjAllocPage on some platforms as an alternative to
+ *       #RTR0MemObjAllocPhysNC.
+ *
+ *
+ * @subsection sub_gmm_locking  Serializing
+ *
+ * One simple fast mutex will be employed in the initial implementation, not
+ * two as mentioned in @ref sec_pgmPhys_Serializing.
+ *
+ * @see @ref sec_pgmPhys_Serializing
+ *
+ *
+ * @section sec_gmm_overcommit  Memory Over-Commitment Management
+ *
+ * The GVM will have to do the system wide memory over-commitment
+ * management. My current ideas are:
+ *      - Per VM oc policy that indicates how much to initially commit
+ *        to it and what to do in a out-of-memory situation.
+ *      - Prevent overtaxing the host.
+ *
+ * There are some challenges here, the main ones are configurability and
+ * security. Should we for instance permit anyone to request 100% memory
+ * commitment? Who should be allowed to do runtime adjustments of the
+ * config. And how to prevent these settings from being lost when the last
+ * VM process exits? The solution is probably to have an optional root
+ * daemon the will keep VMMR0.r0 in memory and enable the security measures.
+ *
+ *
+ *
+ * @section sec_gmm_numa        NUMA
+ *
+ * NUMA considerations will be designed and implemented a bit later.
+ *
+ * The preliminary guesses is that we will have to try allocate memory as
+ * close as possible to the CPUs the VM is executed on (EMT and additional CPU
+ * threads). Which means it's mostly about allocation and sharing policies.
+ * Both the scheduler and allocator interface will to supply some NUMA info
+ * and we'll need to have a way to calc access costs.
+ *
+ */
+
+
+/*********************************************************************************************************************************
+*   Header Files                                                                                                                 *
+*********************************************************************************************************************************/
+#define LOG_GROUP LOG_GROUP_GMM
+#include <VBox/rawpci.h>
+#include <VBox/vmm/gmm.h>
+#include "GMMR0Internal.h"
+#include <VBox/vmm/vmcc.h>
+#include <VBox/vmm/pgm.h>
+#include <VBox/log.h>
+#include <VBox/param.h>
+#include <VBox/err.h>
+#include <VBox/VMMDev.h>
+#include <iprt/asm.h>
+#include <iprt/avl.h>
+#ifdef VBOX_STRICT
+# include <iprt/crc.h>
+#endif
+#include <iprt/critsect.h>
+#include <iprt/list.h>
+#include <iprt/mem.h>
+#include <iprt/memobj.h>
+#include <iprt/mp.h>
+#include <iprt/semaphore.h>
+#include <iprt/spinlock.h>
+#include <iprt/string.h>
+#include <iprt/time.h>
+
+
+/*********************************************************************************************************************************
+*   Defined Constants And Macros                                                                                                 *
+*********************************************************************************************************************************/
+/** @def VBOX_USE_CRIT_SECT_FOR_GIANT
+ * Use a critical section instead of a fast mutex for the giant GMM lock.
+ *
+ * @remarks This is primarily a way of avoiding the deadlock checks in the
+ *          windows driver verifier. */
+#if defined(RT_OS_WINDOWS) || defined(RT_OS_DARWIN) || defined(DOXYGEN_RUNNING)
+# define VBOX_USE_CRIT_SECT_FOR_GIANT
+#endif
+
+#if (!defined(VBOX_WITH_RAM_IN_KERNEL) || defined(VBOX_WITH_LINEAR_HOST_PHYS_MEM)) \
+ && !defined(RT_OS_DARWIN)
+/** Enable the legacy mode code (will be dropped soon). */
+# define GMM_WITH_LEGACY_MODE
+#endif
+
+
+/*********************************************************************************************************************************
+*   Structures and Typedefs                                                                                                      *
+*********************************************************************************************************************************/
+/** Pointer to set of free chunks.  */
+typedef struct GMMCHUNKFREESET *PGMMCHUNKFREESET;
+
+/**
+ * The per-page tracking structure employed by the GMM.
+ *
+ * On 32-bit hosts we'll some trickery is necessary to compress all
+ * the information into 32-bits. When the fSharedFree member is set,
+ * the 30th bit decides whether it's a free page or not.
+ *
+ * Because of the different layout on 32-bit and 64-bit hosts, macros
+ * are used to get and set some of the data.
+ */
+typedef union GMMPAGE
+{
+#if HC_ARCH_BITS == 64
+    /** Unsigned integer view. */
+    uint64_t u;
+
+    /** The common view. */
+    struct GMMPAGECOMMON
+    {
+        uint32_t    uStuff1 : 32;
+        uint32_t    uStuff2 : 30;
+        /** The page state. */
+        uint32_t    u2State : 2;
+    } Common;
+
+    /** The view of a private page. */
+    struct GMMPAGEPRIVATE
+    {
+        /** The guest page frame number. (Max addressable: 2 ^ 44 - 16) */
+        uint32_t    pfn;
+        /** The GVM handle. (64K VMs) */
+        uint32_t    hGVM : 16;
+        /** Reserved. */
+        uint32_t    u16Reserved : 14;
+        /** The page state. */
+        uint32_t    u2State : 2;
+    } Private;
+
+    /** The view of a shared page. */
+    struct GMMPAGESHARED
+    {
+        /** The host page frame number. (Max addressable: 2 ^ 44 - 16) */
+        uint32_t    pfn;
+        /** The reference count (64K VMs). */
+        uint32_t    cRefs : 16;
+        /** Used for debug checksumming. */
+        uint32_t    u14Checksum : 14;
+        /** The page state. */
+        uint32_t    u2State : 2;
+    } Shared;
+
+    /** The view of a free page. */
+    struct GMMPAGEFREE
+    {
+        /** The index of the next page in the free list. UINT16_MAX is NIL. */
+        uint16_t    iNext;
+        /** Reserved. Checksum or something? */
+        uint16_t    u16Reserved0;
+        /** Reserved. Checksum or something? */
+        uint32_t    u30Reserved1 : 30;
+        /** The page state. */
+        uint32_t    u2State : 2;
+    } Free;
+
+#else /* 32-bit */
+    /** Unsigned integer view. */
+    uint32_t u;
+
+    /** The common view. */
+    struct GMMPAGECOMMON
+    {
+        uint32_t    uStuff : 30;
+        /** The page state. */
+        uint32_t    u2State : 2;
+    } Common;
+
+    /** The view of a private page. */
+    struct GMMPAGEPRIVATE
+    {
+        /** The guest page frame number. (Max addressable: 2 ^ 36) */
+        uint32_t    pfn : 24;
+        /** The GVM handle. (127 VMs) */
+        uint32_t    hGVM : 7;
+        /** The top page state bit, MBZ. */
+        uint32_t    fZero : 1;
+    } Private;
+
+    /** The view of a shared page. */
+    struct GMMPAGESHARED
+    {
+        /** The reference count. */
+        uint32_t    cRefs : 30;
+        /** The page state. */
+        uint32_t    u2State : 2;
+    } Shared;
+
+    /** The view of a free page. */
+    struct GMMPAGEFREE
+    {
+        /** The index of the next page in the free list. UINT16_MAX is NIL. */
+        uint32_t    iNext : 16;
+        /** Reserved. Checksum or something? */
+        uint32_t    u14Reserved : 14;
+        /** The page state. */
+        uint32_t    u2State : 2;
+    } Free;
+#endif
+} GMMPAGE;
+AssertCompileSize(GMMPAGE, sizeof(RTHCUINTPTR));
+/** Pointer to a GMMPAGE. */
+typedef GMMPAGE *PGMMPAGE;
+
+
+/** @name The Page States.
+ * @{ */
+/** A private page. */
+#define GMM_PAGE_STATE_PRIVATE          0
+/** A private page - alternative value used on the 32-bit implementation.
+ * This will never be used on 64-bit hosts. */
+#define GMM_PAGE_STATE_PRIVATE_32       1
+/** A shared page. */
+#define GMM_PAGE_STATE_SHARED           2
+/** A free page. */
+#define GMM_PAGE_STATE_FREE             3
+/** @} */
+
+
+/** @def GMM_PAGE_IS_PRIVATE
+ *
+ * @returns true if private, false if not.
+ * @param   pPage       The GMM page.
+ */
+#if HC_ARCH_BITS == 64
+# define GMM_PAGE_IS_PRIVATE(pPage) ( (pPage)->Common.u2State == GMM_PAGE_STATE_PRIVATE )
+#else
+# define GMM_PAGE_IS_PRIVATE(pPage) ( (pPage)->Private.fZero == 0 )
+#endif
+
+/** @def GMM_PAGE_IS_SHARED
+ *
+ * @returns true if shared, false if not.
+ * @param   pPage       The GMM page.
+ */
+#define GMM_PAGE_IS_SHARED(pPage)   ( (pPage)->Common.u2State == GMM_PAGE_STATE_SHARED )
+
+/** @def GMM_PAGE_IS_FREE
+ *
+ * @returns true if free, false if not.
+ * @param   pPage       The GMM page.
+ */
+#define GMM_PAGE_IS_FREE(pPage)     ( (pPage)->Common.u2State == GMM_PAGE_STATE_FREE )
+
+/** @def GMM_PAGE_PFN_LAST
+ * The last valid guest pfn range.
+ * @remark Some of the values outside the range has special meaning,
+ *         see GMM_PAGE_PFN_UNSHAREABLE.
+ */
+#if HC_ARCH_BITS == 64
+# define GMM_PAGE_PFN_LAST           UINT32_C(0xfffffff0)
+#else
+# define GMM_PAGE_PFN_LAST           UINT32_C(0x00fffff0)
+#endif
+AssertCompile(GMM_PAGE_PFN_LAST        == (GMM_GCPHYS_LAST >> PAGE_SHIFT));
+
+/** @def GMM_PAGE_PFN_UNSHAREABLE
+ * Indicates that this page isn't used for normal guest memory and thus isn't shareable.
+ */
+#if HC_ARCH_BITS == 64
+# define GMM_PAGE_PFN_UNSHAREABLE   UINT32_C(0xfffffff1)
+#else
+# define GMM_PAGE_PFN_UNSHAREABLE   UINT32_C(0x00fffff1)
+#endif
+AssertCompile(GMM_PAGE_PFN_UNSHAREABLE == (GMM_GCPHYS_UNSHAREABLE >> PAGE_SHIFT));
+
+
+/**
+ * A GMM allocation chunk ring-3 mapping record.
+ *
+ * This should really be associated with a session and not a VM, but
+ * it's simpler to associated with a VM and cleanup with the VM object
+ * is destroyed.
+ */
+typedef struct GMMCHUNKMAP
+{
+    /** The mapping object. */
+    RTR0MEMOBJ          hMapObj;
+    /** The VM owning the mapping. */
+    PGVM                pGVM;
+} GMMCHUNKMAP;
+/** Pointer to a GMM allocation chunk mapping. */
+typedef struct GMMCHUNKMAP *PGMMCHUNKMAP;
+
+
+/**
+ * A GMM allocation chunk.
+ */
+typedef struct GMMCHUNK
+{
+    /** The AVL node core.
+     * The Key is the chunk ID.  (Giant mtx.) */
+    AVLU32NODECORE      Core;
+    /** The memory object.
+     * Either from RTR0MemObjAllocPhysNC or RTR0MemObjLockUser depending on
+     * what the host can dish up with.  (Chunk mtx protects mapping accesses
+     * and related frees.) */
+    RTR0MEMOBJ          hMemObj;
+#if defined(VBOX_WITH_RAM_IN_KERNEL) && !defined(VBOX_WITH_LINEAR_HOST_PHYS_MEM)
+    /** Pointer to the kernel mapping. */
+    uint8_t            *pbMapping;
+#endif
+    /** Pointer to the next chunk in the free list.  (Giant mtx.) */
+    PGMMCHUNK           pFreeNext;
+    /** Pointer to the previous chunk in the free list. (Giant mtx.) */
+    PGMMCHUNK           pFreePrev;
+    /** Pointer to the free set this chunk belongs to.  NULL for
+     * chunks with no free pages. (Giant mtx.) */
+    PGMMCHUNKFREESET    pSet;
+    /** List node in the chunk list (GMM::ChunkList).  (Giant mtx.) */
+    RTLISTNODE          ListNode;
+    /** Pointer to an array of mappings.  (Chunk mtx.) */
+    PGMMCHUNKMAP        paMappingsX;
+    /** The number of mappings.  (Chunk mtx.) */
+    uint16_t            cMappingsX;
+    /** The mapping lock this chunk is using using.  UINT16_MAX if nobody is
+     *  mapping or freeing anything.  (Giant mtx.) */
+    uint8_t volatile    iChunkMtx;
+    /** GMM_CHUNK_FLAGS_XXX. (Giant mtx.) */
+    uint8_t             fFlags;
+    /** The head of the list of free pages. UINT16_MAX is the NIL value.
+     *  (Giant mtx.) */
+    uint16_t            iFreeHead;
+    /** The number of free pages.  (Giant mtx.) */
+    uint16_t            cFree;
+    /** The GVM handle of the VM that first allocated pages from this chunk, this
+     * is used as a preference when there are several chunks to choose from.
+     * When in bound memory mode this isn't a preference any longer.  (Giant
+     * mtx.) */
+    uint16_t            hGVM;
+    /** The ID of the NUMA node the memory mostly resides on.  (Reserved for
+     *  future use.)  (Giant mtx.) */
+    uint16_t            idNumaNode;
+    /** The number of private pages.  (Giant mtx.) */
+    uint16_t            cPrivate;
+    /** The number of shared pages.  (Giant mtx.) */
+    uint16_t            cShared;
+    /** The pages.  (Giant mtx.) */
+    GMMPAGE             aPages[GMM_CHUNK_SIZE >> PAGE_SHIFT];
+} GMMCHUNK;
+
+/** Indicates that the NUMA properies of the memory is unknown. */
+#define GMM_CHUNK_NUMA_ID_UNKNOWN   UINT16_C(0xfffe)
+
+/** @name GMM_CHUNK_FLAGS_XXX - chunk flags.
+ * @{ */
+/** Indicates that the chunk is a large page (2MB). */
+#define GMM_CHUNK_FLAGS_LARGE_PAGE  UINT16_C(0x0001)
+#ifdef GMM_WITH_LEGACY_MODE
+/** Indicates that the chunk was locked rather than allocated directly. */
+# define GMM_CHUNK_FLAGS_SEEDED     UINT16_C(0x0002)
+#endif
+/** @}  */
+
+
+/**
+ * An allocation chunk TLB entry.
+ */
+typedef struct GMMCHUNKTLBE
+{
+    /** The chunk id. */
+    uint32_t            idChunk;
+    /** Pointer to the chunk. */
+    PGMMCHUNK           pChunk;
+} GMMCHUNKTLBE;
+/** Pointer to an allocation chunk TLB entry. */
+typedef GMMCHUNKTLBE *PGMMCHUNKTLBE;
+
+
+/** The number of entries in the allocation chunk TLB. */
+#define GMM_CHUNKTLB_ENTRIES        32
+/** Gets the TLB entry index for the given Chunk ID. */
+#define GMM_CHUNKTLB_IDX(idChunk)   ( (idChunk) & (GMM_CHUNKTLB_ENTRIES - 1) )
+
+/**
+ * An allocation chunk TLB.
+ */
+typedef struct GMMCHUNKTLB
+{
+    /** The TLB entries. */
+    GMMCHUNKTLBE    aEntries[GMM_CHUNKTLB_ENTRIES];
+} GMMCHUNKTLB;
+/** Pointer to an allocation chunk TLB. */
+typedef GMMCHUNKTLB *PGMMCHUNKTLB;
+
+
+/**
+ * The GMM instance data.
+ */
+typedef struct GMM
+{
+    /** Magic / eye catcher. GMM_MAGIC */
+    uint32_t            u32Magic;
+    /** The number of threads waiting on the mutex. */
+    uint32_t            cMtxContenders;
+#ifdef VBOX_USE_CRIT_SECT_FOR_GIANT
+    /** The critical section protecting the GMM.
+     * More fine grained locking can be implemented later if necessary. */
+    RTCRITSECT          GiantCritSect;
+#else
+    /** The fast mutex protecting the GMM.
+     * More fine grained locking can be implemented later if necessary. */
+    RTSEMFASTMUTEX      hMtx;
+#endif
+#ifdef VBOX_STRICT
+    /** The current mutex owner. */
+    RTNATIVETHREAD      hMtxOwner;
+#endif
+    /** Spinlock protecting the AVL tree.
+     * @todo Make this a read-write spinlock as we should allow concurrent
+     *       lookups. */
+    RTSPINLOCK          hSpinLockTree;
+    /** The chunk tree.
+     * Protected by hSpinLockTree. */
+    PAVLU32NODECORE     pChunks;
+    /** Chunk freeing generation - incremented whenever a chunk is freed.  Used
+     * for validating the per-VM chunk TLB entries.  Valid range is 1 to 2^62
+     * (exclusive), though higher numbers may temporarily occure while
+     * invalidating the individual TLBs during wrap-around processing. */
+    uint64_t volatile   idFreeGeneration;
+    /** The chunk TLB.
+     * Protected by hSpinLockTree. */
+    GMMCHUNKTLB         ChunkTLB;
+    /** The private free set. */
+    GMMCHUNKFREESET     PrivateX;
+    /** The shared free set. */
+    GMMCHUNKFREESET     Shared;
+
+    /** Shared module tree (global).
+     * @todo separate trees for distinctly different guest OSes. */
+    PAVLLU32NODECORE    pGlobalSharedModuleTree;
+    /** Sharable modules (count of nodes in pGlobalSharedModuleTree). */
+    uint32_t            cShareableModules;
+
+    /** The chunk list.  For simplifying the cleanup process and avoid tree
+     * traversal. */
+    RTLISTANCHOR        ChunkList;
+
+    /** The maximum number of pages we're allowed to allocate.
+     * @gcfgm{GMM/MaxPages,64-bit, Direct.}
+     * @gcfgm{GMM/PctPages,32-bit, Relative to the number of host pages.} */
+    uint64_t            cMaxPages;
+    /** The number of pages that has been reserved.
+     * The deal is that cReservedPages - cOverCommittedPages <= cMaxPages. */
+    uint64_t            cReservedPages;
+    /** The number of pages that we have over-committed in reservations. */
+    uint64_t            cOverCommittedPages;
+    /** The number of actually allocated (committed if you like) pages. */
+    uint64_t            cAllocatedPages;
+    /** The number of pages that are shared. A subset of cAllocatedPages. */
+    uint64_t            cSharedPages;
+    /** The number of pages that are actually shared between VMs. */
+    uint64_t            cDuplicatePages;
+    /** The number of pages that are shared that has been left behind by
+     * VMs not doing proper cleanups. */
+    uint64_t            cLeftBehindSharedPages;
+    /** The number of allocation chunks.
+     * (The number of pages we've allocated from the host can be derived from this.) */
+    uint32_t            cChunks;
+    /** The number of current ballooned pages. */
+    uint64_t            cBalloonedPages;
+
+#ifndef GMM_WITH_LEGACY_MODE
+# ifdef VBOX_WITH_LINEAR_HOST_PHYS_MEM
+    /** Whether #RTR0MemObjAllocPhysNC works.   */
+    bool                fHasWorkingAllocPhysNC;
+# else
+    bool                fPadding;
+# endif
+#else
+    /** The legacy allocation mode indicator.
+     * This is determined at initialization time. */
+    bool                fLegacyAllocationMode;
+#endif
+    /** The bound memory mode indicator.
+     * When set, the memory will be bound to a specific VM and never
+     * shared. This is always set if fLegacyAllocationMode is set.
+     * (Also determined at initialization time.) */
+    bool                fBoundMemoryMode;
+    /** The number of registered VMs. */
+    uint16_t            cRegisteredVMs;
+
+    /** The number of freed chunks ever.  This is used a list generation to
+     *  avoid restarting the cleanup scanning when the list wasn't modified. */
+    uint32_t            cFreedChunks;
+    /** The previous allocated Chunk ID.
+     * Used as a hint to avoid scanning the whole bitmap. */
+    uint32_t            idChunkPrev;
+    /** Chunk ID allocation bitmap.
+     * Bits of allocated IDs are set, free ones are clear.
+     * The NIL id (0) is marked allocated. */
+    uint32_t            bmChunkId[(GMM_CHUNKID_LAST + 1 + 31) / 32];
+
+    /** The index of the next mutex to use. */
+    uint32_t            iNextChunkMtx;
+    /** Chunk locks for reducing lock contention without having to allocate
+     * one lock per chunk. */
+    struct
+    {
+        /** The mutex */
+        RTSEMFASTMUTEX      hMtx;
+        /** The number of threads currently using this mutex. */
+        uint32_t volatile   cUsers;
+    } aChunkMtx[64];
+} GMM;
+/** Pointer to the GMM instance. */
+typedef GMM *PGMM;
+
+/** The value of GMM::u32Magic (Katsuhiro Otomo). */
+#define GMM_MAGIC       UINT32_C(0x19540414)
+
+
+/**
+ * GMM chunk mutex state.
+ *
+ * This is returned by gmmR0ChunkMutexAcquire and is used by the other
+ * gmmR0ChunkMutex* methods.
+ */
+typedef struct GMMR0CHUNKMTXSTATE
+{
+    PGMM                pGMM;
+    /** The index of the chunk mutex. */
+    uint8_t             iChunkMtx;
+    /** The relevant flags (GMMR0CHUNK_MTX_XXX). */
+    uint8_t             fFlags;
+} GMMR0CHUNKMTXSTATE;
+/** Pointer to a chunk mutex state. */
+typedef GMMR0CHUNKMTXSTATE *PGMMR0CHUNKMTXSTATE;
+
+/** @name GMMR0CHUNK_MTX_XXX
+ * @{ */
+#define GMMR0CHUNK_MTX_INVALID          UINT32_C(0)
+#define GMMR0CHUNK_MTX_KEEP_GIANT       UINT32_C(1)
+#define GMMR0CHUNK_MTX_RETAKE_GIANT     UINT32_C(2)
+#define GMMR0CHUNK_MTX_DROP_GIANT       UINT32_C(3)
+#define GMMR0CHUNK_MTX_END              UINT32_C(4)
+/** @} */
+
+
+/** The maximum number of shared modules per-vm. */
+#define GMM_MAX_SHARED_PER_VM_MODULES   2048
+/** The maximum number of shared modules GMM is allowed to track. */
+#define GMM_MAX_SHARED_GLOBAL_MODULES   16834
+
+
+/**
+ * Argument packet for gmmR0SharedModuleCleanup.
+ */
+typedef struct GMMR0SHMODPERVMDTORARGS
+{
+    PGVM    pGVM;
+    PGMM    pGMM;
+} GMMR0SHMODPERVMDTORARGS;
+
+/**
+ * Argument packet for gmmR0CheckSharedModule.
+ */
+typedef struct GMMCHECKSHAREDMODULEINFO
+{
+    PGVM                    pGVM;
+    VMCPUID                 idCpu;
+} GMMCHECKSHAREDMODULEINFO;
+
+
+/*********************************************************************************************************************************
+*   Global Variables                                                                                                             *
+*********************************************************************************************************************************/
+/** Pointer to the GMM instance data. */
+static PGMM g_pGMM = NULL;
+
+/** Macro for obtaining and validating the g_pGMM pointer.
+ *
+ * On failure it will return from the invoking function with the specified
+ * return value.
+ *
+ * @param   pGMM    The name of the pGMM variable.
+ * @param   rc      The return value on failure. Use VERR_GMM_INSTANCE for VBox
+ *                  status codes.
+ */
+#define GMM_GET_VALID_INSTANCE(pGMM, rc) \
+    do { \
+        (pGMM) = g_pGMM; \
+        AssertPtrReturn((pGMM), (rc)); \
+        AssertMsgReturn((pGMM)->u32Magic == GMM_MAGIC, ("%p - %#x\n", (pGMM), (pGMM)->u32Magic), (rc)); \
+    } while (0)
+
+/** Macro for obtaining and validating the g_pGMM pointer, void function
+ * variant.
+ *
+ * On failure it will return from the invoking function.
+ *
+ * @param   pGMM    The name of the pGMM variable.
+ */
+#define GMM_GET_VALID_INSTANCE_VOID(pGMM) \
+    do { \
+        (pGMM) = g_pGMM; \
+        AssertPtrReturnVoid((pGMM)); \
+        AssertMsgReturnVoid((pGMM)->u32Magic == GMM_MAGIC, ("%p - %#x\n", (pGMM), (pGMM)->u32Magic)); \
+    } while (0)
+
+
+/** @def GMM_CHECK_SANITY_UPON_ENTERING
+ * Checks the sanity of the GMM instance data before making changes.
+ *
+ * This is macro is a stub by default and must be enabled manually in the code.
+ *
+ * @returns true if sane, false if not.
+ * @param   pGMM    The name of the pGMM variable.
+ */
+#if defined(VBOX_STRICT) && defined(GMMR0_WITH_SANITY_CHECK) && 0
+# define GMM_CHECK_SANITY_UPON_ENTERING(pGMM)   (gmmR0SanityCheck((pGMM), __PRETTY_FUNCTION__, __LINE__) == 0)
+#else
+# define GMM_CHECK_SANITY_UPON_ENTERING(pGMM)   (true)
+#endif
+
+/** @def GMM_CHECK_SANITY_UPON_LEAVING
+ * Checks the sanity of the GMM instance data after making changes.
+ *
+ * This is macro is a stub by default and must be enabled manually in the code.
+ *
+ * @returns true if sane, false if not.
+ * @param   pGMM    The name of the pGMM variable.
+ */
+#if defined(VBOX_STRICT) && defined(GMMR0_WITH_SANITY_CHECK) && 0
+# define GMM_CHECK_SANITY_UPON_LEAVING(pGMM)    (gmmR0SanityCheck((pGMM), __PRETTY_FUNCTION__, __LINE__) == 0)
+#else
+# define GMM_CHECK_SANITY_UPON_LEAVING(pGMM)    (true)
+#endif
+
+/** @def GMM_CHECK_SANITY_IN_LOOPS
+ * Checks the sanity of the GMM instance in the allocation loops.
+ *
+ * This is macro is a stub by default and must be enabled manually in the code.
+ *
+ * @returns true if sane, false if not.
+ * @param   pGMM    The name of the pGMM variable.
+ */
+#if defined(VBOX_STRICT) && defined(GMMR0_WITH_SANITY_CHECK) && 0
+# define GMM_CHECK_SANITY_IN_LOOPS(pGMM)        (gmmR0SanityCheck((pGMM), __PRETTY_FUNCTION__, __LINE__) == 0)
+#else
+# define GMM_CHECK_SANITY_IN_LOOPS(pGMM)        (true)
+#endif
+
+
+/*********************************************************************************************************************************
+*   Internal Functions                                                                                                           *
+*********************************************************************************************************************************/
+static DECLCALLBACK(int)    gmmR0TermDestroyChunk(PAVLU32NODECORE pNode, void *pvGMM);
+static bool                 gmmR0CleanupVMScanChunk(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk);
+DECLINLINE(void)            gmmR0UnlinkChunk(PGMMCHUNK pChunk);
+DECLINLINE(void)            gmmR0LinkChunk(PGMMCHUNK pChunk, PGMMCHUNKFREESET pSet);
+DECLINLINE(void)            gmmR0SelectSetAndLinkChunk(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk);
+#ifdef GMMR0_WITH_SANITY_CHECK
+static uint32_t             gmmR0SanityCheck(PGMM pGMM, const char *pszFunction, unsigned uLineNo);
+#endif
+static bool                 gmmR0FreeChunk(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk, bool fRelaxedSem);
+DECLINLINE(void)            gmmR0FreePrivatePage(PGMM pGMM, PGVM pGVM, uint32_t idPage, PGMMPAGE pPage);
+DECLINLINE(void)            gmmR0FreeSharedPage(PGMM pGMM, PGVM pGVM, uint32_t idPage, PGMMPAGE pPage);
+static int                  gmmR0UnmapChunkLocked(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk);
+#ifdef VBOX_WITH_PAGE_SHARING
+static void                 gmmR0SharedModuleCleanup(PGMM pGMM, PGVM pGVM);
+# ifdef VBOX_STRICT
+static uint32_t             gmmR0StrictPageChecksum(PGMM pGMM, PGVM pGVM, uint32_t idPage);
+# endif
+#endif
+
+
+
+/**
+ * Initializes the GMM component.
+ *
+ * This is called when the VMMR0.r0 module is loaded and protected by the
+ * loader semaphore.
+ *
+ * @returns VBox status code.
+ */
+GMMR0DECL(int) GMMR0Init(void)
+{
+    LogFlow(("GMMInit:\n"));
+
+    /*
+     * Allocate the instance data and the locks.
+     */
+    PGMM pGMM = (PGMM)RTMemAllocZ(sizeof(*pGMM));
+    if (!pGMM)
+        return VERR_NO_MEMORY;
+
+    pGMM->u32Magic = GMM_MAGIC;
+    for (unsigned i = 0; i < RT_ELEMENTS(pGMM->ChunkTLB.aEntries); i++)
+        pGMM->ChunkTLB.aEntries[i].idChunk = NIL_GMM_CHUNKID;
+    RTListInit(&pGMM->ChunkList);
+    ASMBitSet(&pGMM->bmChunkId[0], NIL_GMM_CHUNKID);
+
+#ifdef VBOX_USE_CRIT_SECT_FOR_GIANT
+    int rc = RTCritSectInit(&pGMM->GiantCritSect);
+#else
+    int rc = RTSemFastMutexCreate(&pGMM->hMtx);
+#endif
+    if (RT_SUCCESS(rc))
+    {
+        unsigned iMtx;
+        for (iMtx = 0; iMtx < RT_ELEMENTS(pGMM->aChunkMtx); iMtx++)
+        {
+            rc = RTSemFastMutexCreate(&pGMM->aChunkMtx[iMtx].hMtx);
+            if (RT_FAILURE(rc))
+                break;
+        }
+        pGMM->hSpinLockTree = NIL_RTSPINLOCK;
+        if (RT_SUCCESS(rc))
+            rc = RTSpinlockCreate(&pGMM->hSpinLockTree, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "gmm-chunk-tree");
+        if (RT_SUCCESS(rc))
+        {
+#ifndef GMM_WITH_LEGACY_MODE
+            /*
+             * Figure out how we're going to allocate stuff (only applicable to
+             * host with linear physical memory mappings).
+             */
+            pGMM->fBoundMemoryMode = false;
+# ifdef VBOX_WITH_LINEAR_HOST_PHYS_MEM
+            pGMM->fHasWorkingAllocPhysNC = false;
+
+            RTR0MEMOBJ hMemObj;
+            rc = RTR0MemObjAllocPhysNC(&hMemObj, GMM_CHUNK_SIZE, NIL_RTHCPHYS);
+            if (RT_SUCCESS(rc))
+            {
+                rc = RTR0MemObjFree(hMemObj, true);
+                AssertRC(rc);
+                pGMM->fHasWorkingAllocPhysNC = true;
+            }
+            else if (rc != VERR_NOT_SUPPORTED)
+                SUPR0Printf("GMMR0Init: Warning! RTR0MemObjAllocPhysNC(, %u, NIL_RTHCPHYS) -> %d!\n", GMM_CHUNK_SIZE, rc);
+# endif
+#else /* GMM_WITH_LEGACY_MODE */
+            /*
+             * Check and see if RTR0MemObjAllocPhysNC works.
+             */
+# if 0 /* later, see @bufref{3170}. */
+            RTR0MEMOBJ MemObj;
+            rc = RTR0MemObjAllocPhysNC(&MemObj, _64K, NIL_RTHCPHYS);
+            if (RT_SUCCESS(rc))
+            {
+                rc = RTR0MemObjFree(MemObj, true);
+                AssertRC(rc);
+            }
+            else if (rc == VERR_NOT_SUPPORTED)
+                pGMM->fLegacyAllocationMode = pGMM->fBoundMemoryMode = true;
+            else
+                SUPR0Printf("GMMR0Init: RTR0MemObjAllocPhysNC(,64K,Any) -> %d!\n", rc);
+# else
+#  if defined(RT_OS_WINDOWS) || (defined(RT_OS_SOLARIS) && ARCH_BITS == 64) || defined(RT_OS_LINUX) || defined(RT_OS_FREEBSD)
+            pGMM->fLegacyAllocationMode = false;
+#   if ARCH_BITS == 32
+            /* Don't reuse possibly partial chunks because of the virtual
+               address space limitation. */
+            pGMM->fBoundMemoryMode      = true;
+#   else
+            pGMM->fBoundMemoryMode      = false;
+#   endif
+#  else
+            pGMM->fLegacyAllocationMode = true;
+            pGMM->fBoundMemoryMode      = true;
+#  endif
+# endif
+#endif /* GMM_WITH_LEGACY_MODE */
+
+            /*
+             * Query system page count and guess a reasonable cMaxPages value.
+             */
+            pGMM->cMaxPages = UINT32_MAX; /** @todo IPRT function for query ram size and such. */
+
+            /*
+             * The idFreeGeneration value should be set so we actually trigger the
+             * wrap-around invalidation handling during a typical test run.
+             */
+            pGMM->idFreeGeneration = UINT64_MAX / 4 - 128;
+
+            g_pGMM = pGMM;
+#ifdef GMM_WITH_LEGACY_MODE
+            LogFlow(("GMMInit: pGMM=%p fLegacyAllocationMode=%RTbool fBoundMemoryMode=%RTbool\n", pGMM, pGMM->fLegacyAllocationMode, pGMM->fBoundMemoryMode));
+#elif defined(VBOX_WITH_LINEAR_HOST_PHYS_MEM)
+            LogFlow(("GMMInit: pGMM=%p fBoundMemoryMode=%RTbool fHasWorkingAllocPhysNC=%RTbool\n", pGMM, pGMM->fBoundMemoryMode, pGMM->fHasWorkingAllocPhysNC));
+#else
+            LogFlow(("GMMInit: pGMM=%p fBoundMemoryMode=%RTbool\n", pGMM, pGMM->fBoundMemoryMode));
+#endif
+            return VINF_SUCCESS;
+        }
+
+        /*
+         * Bail out.
+         */
+        RTSpinlockDestroy(pGMM->hSpinLockTree);
+        while (iMtx-- > 0)
+            RTSemFastMutexDestroy(pGMM->aChunkMtx[iMtx].hMtx);
+#ifdef VBOX_USE_CRIT_SECT_FOR_GIANT
+        RTCritSectDelete(&pGMM->GiantCritSect);
+#else
+        RTSemFastMutexDestroy(pGMM->hMtx);
+#endif
+    }
+
+    pGMM->u32Magic = 0;
+    RTMemFree(pGMM);
+    SUPR0Printf("GMMR0Init: failed! rc=%d\n", rc);
+    return rc;
+}
+
+
+/**
+ * Terminates the GMM component.
+ */
+GMMR0DECL(void) GMMR0Term(void)
+{
+    LogFlow(("GMMTerm:\n"));
+
+    /*
+     * Take care / be paranoid...
+     */
+    PGMM pGMM = g_pGMM;
+    if (!VALID_PTR(pGMM))
+        return;
+    if (pGMM->u32Magic != GMM_MAGIC)
+    {
+        SUPR0Printf("GMMR0Term: u32Magic=%#x\n", pGMM->u32Magic);
+        return;
+    }
+
+    /*
+     * Undo what init did and free all the resources we've acquired.
+     */
+    /* Destroy the fundamentals. */
+    g_pGMM = NULL;
+    pGMM->u32Magic    = ~GMM_MAGIC;
+#ifdef VBOX_USE_CRIT_SECT_FOR_GIANT
+    RTCritSectDelete(&pGMM->GiantCritSect);
+#else
+    RTSemFastMutexDestroy(pGMM->hMtx);
+    pGMM->hMtx        = NIL_RTSEMFASTMUTEX;
+#endif
+    RTSpinlockDestroy(pGMM->hSpinLockTree);
+    pGMM->hSpinLockTree = NIL_RTSPINLOCK;
+
+    /* Free any chunks still hanging around. */
+    RTAvlU32Destroy(&pGMM->pChunks, gmmR0TermDestroyChunk, pGMM);
+
+    /* Destroy the chunk locks. */
+    for (unsigned iMtx = 0; iMtx < RT_ELEMENTS(pGMM->aChunkMtx); iMtx++)
+    {
+        Assert(pGMM->aChunkMtx[iMtx].cUsers == 0);
+        RTSemFastMutexDestroy(pGMM->aChunkMtx[iMtx].hMtx);
+        pGMM->aChunkMtx[iMtx].hMtx = NIL_RTSEMFASTMUTEX;
+    }
+
+    /* Finally the instance data itself. */
+    RTMemFree(pGMM);
+    LogFlow(("GMMTerm: done\n"));
+}
+
+
+/**
+ * RTAvlU32Destroy callback.
+ *
+ * @returns 0
+ * @param   pNode   The node to destroy.
+ * @param   pvGMM   The GMM handle.
+ */
+static DECLCALLBACK(int) gmmR0TermDestroyChunk(PAVLU32NODECORE pNode, void *pvGMM)
+{
+    PGMMCHUNK pChunk = (PGMMCHUNK)pNode;
+
+    if (pChunk->cFree != (GMM_CHUNK_SIZE >> PAGE_SHIFT))
+        SUPR0Printf("GMMR0Term: %RKv/%#x: cFree=%d cPrivate=%d cShared=%d cMappings=%d\n", pChunk,
+                    pChunk->Core.Key, pChunk->cFree, pChunk->cPrivate, pChunk->cShared, pChunk->cMappingsX);
+
+    int rc = RTR0MemObjFree(pChunk->hMemObj, true /* fFreeMappings */);
+    if (RT_FAILURE(rc))
+    {
+        SUPR0Printf("GMMR0Term: %RKv/%#x: RTRMemObjFree(%RKv,true) -> %d (cMappings=%d)\n", pChunk,
+                    pChunk->Core.Key, pChunk->hMemObj, rc, pChunk->cMappingsX);
+        AssertRC(rc);
+    }
+    pChunk->hMemObj = NIL_RTR0MEMOBJ;
+
+    RTMemFree(pChunk->paMappingsX);
+    pChunk->paMappingsX = NULL;
+
+    RTMemFree(pChunk);
+    NOREF(pvGMM);
+    return 0;
+}
+
+
+/**
+ * Initializes the per-VM data for the GMM.
+ *
+ * This is called from within the GVMM lock (from GVMMR0CreateVM)
+ * and should only initialize the data members so GMMR0CleanupVM
+ * can deal with them. We reserve no memory or anything here,
+ * that's done later in GMMR0InitVM.
+ *
+ * @param   pGVM    Pointer to the Global VM structure.
+ */
+GMMR0DECL(int) GMMR0InitPerVMData(PGVM pGVM)
+{
+    AssertCompile(RT_SIZEOFMEMB(GVM,gmm.s) <= RT_SIZEOFMEMB(GVM,gmm.padding));
+
+    pGVM->gmm.s.Stats.enmPolicy = GMMOCPOLICY_INVALID;
+    pGVM->gmm.s.Stats.enmPriority = GMMPRIORITY_INVALID;
+    pGVM->gmm.s.Stats.fMayAllocate = false;
+
+    pGVM->gmm.s.hChunkTlbSpinLock = NIL_RTSPINLOCK;
+    int rc = RTSpinlockCreate(&pGVM->gmm.s.hChunkTlbSpinLock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "per-vm-chunk-tlb");
+    AssertRCReturn(rc, rc);
+
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Acquires the GMM giant lock.
+ *
+ * @returns Assert status code from RTSemFastMutexRequest.
+ * @param   pGMM        Pointer to the GMM instance.
+ */
+static int gmmR0MutexAcquire(PGMM pGMM)
+{
+    ASMAtomicIncU32(&pGMM->cMtxContenders);
+#ifdef VBOX_USE_CRIT_SECT_FOR_GIANT
+    int rc = RTCritSectEnter(&pGMM->GiantCritSect);
+#else
+    int rc = RTSemFastMutexRequest(pGMM->hMtx);
+#endif
+    ASMAtomicDecU32(&pGMM->cMtxContenders);
+    AssertRC(rc);
+#ifdef VBOX_STRICT
+    pGMM->hMtxOwner = RTThreadNativeSelf();
+#endif
+    return rc;
+}
+
+
+/**
+ * Releases the GMM giant lock.
+ *
+ * @returns Assert status code from RTSemFastMutexRequest.
+ * @param   pGMM        Pointer to the GMM instance.
+ */
+static int gmmR0MutexRelease(PGMM pGMM)
+{
+#ifdef VBOX_STRICT
+    pGMM->hMtxOwner = NIL_RTNATIVETHREAD;
+#endif
+#ifdef VBOX_USE_CRIT_SECT_FOR_GIANT
+    int rc = RTCritSectLeave(&pGMM->GiantCritSect);
+#else
+    int rc = RTSemFastMutexRelease(pGMM->hMtx);
+    AssertRC(rc);
+#endif
+    return rc;
+}
+
+
+/**
+ * Yields the GMM giant lock if there is contention and a certain minimum time
+ * has elapsed since we took it.
+ *
+ * @returns @c true if the mutex was yielded, @c false if not.
+ * @param   pGMM            Pointer to the GMM instance.
+ * @param   puLockNanoTS    Where the lock acquisition time stamp is kept
+ *                          (in/out).
+ */
+static bool gmmR0MutexYield(PGMM pGMM, uint64_t *puLockNanoTS)
+{
+    /*
+     * If nobody is contending the mutex, don't bother checking the time.
+     */
+    if (ASMAtomicReadU32(&pGMM->cMtxContenders) == 0)
+        return false;
+
+    /*
+     * Don't yield if we haven't executed for at least 2 milliseconds.
+     */
+    uint64_t uNanoNow = RTTimeSystemNanoTS();
+    if (uNanoNow - *puLockNanoTS < UINT32_C(2000000))
+        return false;
+
+    /*
+     * Yield the mutex.
+     */
+#ifdef VBOX_STRICT
+    pGMM->hMtxOwner = NIL_RTNATIVETHREAD;
+#endif
+    ASMAtomicIncU32(&pGMM->cMtxContenders);
+#ifdef VBOX_USE_CRIT_SECT_FOR_GIANT
+    int rc1 = RTCritSectLeave(&pGMM->GiantCritSect); AssertRC(rc1);
+#else
+    int rc1 = RTSemFastMutexRelease(pGMM->hMtx); AssertRC(rc1);
+#endif
+
+    RTThreadYield();
+
+#ifdef VBOX_USE_CRIT_SECT_FOR_GIANT
+    int rc2 = RTCritSectEnter(&pGMM->GiantCritSect); AssertRC(rc2);
+#else
+    int rc2 = RTSemFastMutexRequest(pGMM->hMtx); AssertRC(rc2);
+#endif
+    *puLockNanoTS = RTTimeSystemNanoTS();
+    ASMAtomicDecU32(&pGMM->cMtxContenders);
+#ifdef VBOX_STRICT
+    pGMM->hMtxOwner = RTThreadNativeSelf();
+#endif
+
+    return true;
+}
+
+
+/**
+ * Acquires a chunk lock.
+ *
+ * The caller must own the giant lock.
+ *
+ * @returns Assert status code from RTSemFastMutexRequest.
+ * @param   pMtxState   The chunk mutex state info.  (Avoids
+ *                      passing the same flags and stuff around
+ *                      for subsequent release and drop-giant
+ *                      calls.)
+ * @param   pGMM        Pointer to the GMM instance.
+ * @param   pChunk      Pointer to the chunk.
+ * @param   fFlags      Flags regarding the giant lock, GMMR0CHUNK_MTX_XXX.
+ */
+static int gmmR0ChunkMutexAcquire(PGMMR0CHUNKMTXSTATE pMtxState, PGMM pGMM, PGMMCHUNK pChunk, uint32_t fFlags)
+{
+    Assert(fFlags > GMMR0CHUNK_MTX_INVALID && fFlags < GMMR0CHUNK_MTX_END);
+    Assert(pGMM->hMtxOwner == RTThreadNativeSelf());
+
+    pMtxState->pGMM   = pGMM;
+    pMtxState->fFlags = (uint8_t)fFlags;
+
+    /*
+     * Get the lock index and reference the lock.
+     */
+    Assert(pGMM->hMtxOwner == RTThreadNativeSelf());
+    uint32_t iChunkMtx = pChunk->iChunkMtx;
+    if (iChunkMtx == UINT8_MAX)
+    {
+        iChunkMtx = pGMM->iNextChunkMtx++;
+        iChunkMtx %= RT_ELEMENTS(pGMM->aChunkMtx);
+
+        /* Try get an unused one... */
+        if (pGMM->aChunkMtx[iChunkMtx].cUsers)
+        {
+            iChunkMtx = pGMM->iNextChunkMtx++;
+            iChunkMtx %= RT_ELEMENTS(pGMM->aChunkMtx);
+            if (pGMM->aChunkMtx[iChunkMtx].cUsers)
+            {
+                iChunkMtx = pGMM->iNextChunkMtx++;
+                iChunkMtx %= RT_ELEMENTS(pGMM->aChunkMtx);
+                if (pGMM->aChunkMtx[iChunkMtx].cUsers)
+                {
+                    iChunkMtx = pGMM->iNextChunkMtx++;
+                    iChunkMtx %= RT_ELEMENTS(pGMM->aChunkMtx);
+                }
+            }
+        }
+
+        pChunk->iChunkMtx = iChunkMtx;
+    }
+    AssertCompile(RT_ELEMENTS(pGMM->aChunkMtx) < UINT8_MAX);
+    pMtxState->iChunkMtx = (uint8_t)iChunkMtx;
+    ASMAtomicIncU32(&pGMM->aChunkMtx[iChunkMtx].cUsers);
+
+    /*
+     * Drop the giant?
+     */
+    if (fFlags != GMMR0CHUNK_MTX_KEEP_GIANT)
+    {
+        /** @todo GMM life cycle cleanup (we may race someone
+         *        destroying and cleaning up GMM)? */
+        gmmR0MutexRelease(pGMM);
+    }
+
+    /*
+     * Take the chunk mutex.
+     */
+    int rc = RTSemFastMutexRequest(pGMM->aChunkMtx[iChunkMtx].hMtx);
+    AssertRC(rc);
+    return rc;
+}
+
+
+/**
+ * Releases the GMM giant lock.
+ *
+ * @returns Assert status code from RTSemFastMutexRequest.
+ * @param   pMtxState   Pointer to the chunk mutex state.
+ * @param   pChunk      Pointer to the chunk if it's still
+ *                      alive, NULL if it isn't.  This is used to deassociate
+ *                      the chunk from the mutex on the way out so a new one
+ *                      can be selected next time, thus avoiding contented
+ *                      mutexes.
+ */
+static int gmmR0ChunkMutexRelease(PGMMR0CHUNKMTXSTATE pMtxState, PGMMCHUNK pChunk)
+{
+    PGMM pGMM = pMtxState->pGMM;
+
+    /*
+     * Release the chunk mutex and reacquire the giant if requested.
+     */
+    int rc = RTSemFastMutexRelease(pGMM->aChunkMtx[pMtxState->iChunkMtx].hMtx);
+    AssertRC(rc);
+    if (pMtxState->fFlags == GMMR0CHUNK_MTX_RETAKE_GIANT)
+        rc = gmmR0MutexAcquire(pGMM);
+    else
+        Assert((pMtxState->fFlags != GMMR0CHUNK_MTX_DROP_GIANT) == (pGMM->hMtxOwner == RTThreadNativeSelf()));
+
+    /*
+     * Drop the chunk mutex user reference and deassociate it from the chunk
+     * when possible.
+     */
+    if (   ASMAtomicDecU32(&pGMM->aChunkMtx[pMtxState->iChunkMtx].cUsers) == 0
+        && pChunk
+        && RT_SUCCESS(rc) )
+    {
+        if (pMtxState->fFlags != GMMR0CHUNK_MTX_DROP_GIANT)
+            pChunk->iChunkMtx = UINT8_MAX;
+        else
+        {
+            rc = gmmR0MutexAcquire(pGMM);
+            if (RT_SUCCESS(rc))
+            {
+                if (pGMM->aChunkMtx[pMtxState->iChunkMtx].cUsers == 0)
+                    pChunk->iChunkMtx = UINT8_MAX;
+                rc = gmmR0MutexRelease(pGMM);
+            }
+        }
+    }
+
+    pMtxState->pGMM = NULL;
+    return rc;
+}
+
+
+/**
+ * Drops the giant GMM lock we kept in gmmR0ChunkMutexAcquire while keeping the
+ * chunk locked.
+ *
+ * This only works if gmmR0ChunkMutexAcquire was called with
+ * GMMR0CHUNK_MTX_KEEP_GIANT.  gmmR0ChunkMutexRelease will retake the giant
+ * mutex, i.e. behave as if GMMR0CHUNK_MTX_RETAKE_GIANT was used.
+ *
+ * @returns VBox status code (assuming success is ok).
+ * @param   pMtxState   Pointer to the chunk mutex state.
+ */
+static int gmmR0ChunkMutexDropGiant(PGMMR0CHUNKMTXSTATE pMtxState)
+{
+    AssertReturn(pMtxState->fFlags == GMMR0CHUNK_MTX_KEEP_GIANT, VERR_GMM_MTX_FLAGS);
+    Assert(pMtxState->pGMM->hMtxOwner == RTThreadNativeSelf());
+    pMtxState->fFlags = GMMR0CHUNK_MTX_RETAKE_GIANT;
+    /** @todo GMM life cycle cleanup (we may race someone
+     *        destroying and cleaning up GMM)? */
+    return gmmR0MutexRelease(pMtxState->pGMM);
+}
+
+
+/**
+ * For experimenting with NUMA affinity and such.
+ *
+ * @returns The current NUMA Node ID.
+ */
+static uint16_t gmmR0GetCurrentNumaNodeId(void)
+{
+#if 1
+    return GMM_CHUNK_NUMA_ID_UNKNOWN;
+#else
+    return RTMpCpuId() / 16;
+#endif
+}
+
+
+
+/**
+ * Cleans up when a VM is terminating.
+ *
+ * @param   pGVM    Pointer to the Global VM structure.
+ */
+GMMR0DECL(void) GMMR0CleanupVM(PGVM pGVM)
+{
+    LogFlow(("GMMR0CleanupVM: pGVM=%p:{.hSelf=%#x}\n", pGVM, pGVM->hSelf));
+
+    PGMM pGMM;
+    GMM_GET_VALID_INSTANCE_VOID(pGMM);
+
+#ifdef VBOX_WITH_PAGE_SHARING
+    /*
+     * Clean up all registered shared modules first.
+     */
+    gmmR0SharedModuleCleanup(pGMM, pGVM);
+#endif
+
+    gmmR0MutexAcquire(pGMM);
+    uint64_t uLockNanoTS = RTTimeSystemNanoTS();
+    GMM_CHECK_SANITY_UPON_ENTERING(pGMM);
+
+    /*
+     * The policy is 'INVALID' until the initial reservation
+     * request has been serviced.
+     */
+    if (    pGVM->gmm.s.Stats.enmPolicy > GMMOCPOLICY_INVALID
+        &&  pGVM->gmm.s.Stats.enmPolicy < GMMOCPOLICY_END)
+    {
+        /*
+         * If it's the last VM around, we can skip walking all the chunk looking
+         * for the pages owned by this VM and instead flush the whole shebang.
+         *
+         * This takes care of the eventuality that a VM has left shared page
+         * references behind (shouldn't happen of course, but you never know).
+         */
+        Assert(pGMM->cRegisteredVMs);
+        pGMM->cRegisteredVMs--;
+
+        /*
+         * Walk the entire pool looking for pages that belong to this VM
+         * and leftover mappings.  (This'll only catch private pages,
+         * shared pages will be 'left behind'.)
+         */
+        /** @todo r=bird: This scanning+freeing could be optimized in bound mode! */
+        uint64_t    cPrivatePages = pGVM->gmm.s.Stats.cPrivatePages; /* save */
+
+        unsigned    iCountDown = 64;
+        bool        fRedoFromStart;
+        PGMMCHUNK   pChunk;
+        do
+        {
+            fRedoFromStart = false;
+            RTListForEachReverse(&pGMM->ChunkList, pChunk, GMMCHUNK, ListNode)
+            {
+                uint32_t const cFreeChunksOld = pGMM->cFreedChunks;
+                if (   (   !pGMM->fBoundMemoryMode
+                        || pChunk->hGVM == pGVM->hSelf)
+                    && gmmR0CleanupVMScanChunk(pGMM, pGVM, pChunk))
+                {
+                    /* We left the giant mutex, so reset the yield counters. */
+                    uLockNanoTS = RTTimeSystemNanoTS();
+                    iCountDown  = 64;
+                }
+                else
+                {
+                    /* Didn't leave it, so do normal yielding. */
+                    if (!iCountDown)
+                        gmmR0MutexYield(pGMM, &uLockNanoTS);
+                    else
+                        iCountDown--;
+                }
+                if (pGMM->cFreedChunks != cFreeChunksOld)
+                {
+                    fRedoFromStart = true;
+                    break;
+                }
+            }
+        } while (fRedoFromStart);
+
+        if (pGVM->gmm.s.Stats.cPrivatePages)
+            SUPR0Printf("GMMR0CleanupVM: hGVM=%#x has %#x private pages that cannot be found!\n", pGVM->hSelf, pGVM->gmm.s.Stats.cPrivatePages);
+
+        pGMM->cAllocatedPages -= cPrivatePages;
+
+        /*
+         * Free empty chunks.
+         */
+        PGMMCHUNKFREESET pPrivateSet = pGMM->fBoundMemoryMode ? &pGVM->gmm.s.Private : &pGMM->PrivateX;
+        do
+        {
+            fRedoFromStart = false;
+            iCountDown = 10240;
+            pChunk = pPrivateSet->apLists[GMM_CHUNK_FREE_SET_UNUSED_LIST];
+            while (pChunk)
+            {
+                PGMMCHUNK pNext = pChunk->pFreeNext;
+                Assert(pChunk->cFree == GMM_CHUNK_NUM_PAGES);
+                if (   !pGMM->fBoundMemoryMode
+                    || pChunk->hGVM == pGVM->hSelf)
+                {
+                    uint64_t const idGenerationOld = pPrivateSet->idGeneration;
+                    if (gmmR0FreeChunk(pGMM, pGVM, pChunk, true /*fRelaxedSem*/))
+                    {
+                        /* We've left the giant mutex, restart? (+1 for our unlink) */
+                        fRedoFromStart = pPrivateSet->idGeneration != idGenerationOld + 1;
+                        if (fRedoFromStart)
+                            break;
+                        uLockNanoTS = RTTimeSystemNanoTS();
+                        iCountDown = 10240;
+                    }
+                }
+
+                /* Advance and maybe yield the lock. */
+                pChunk = pNext;
+                if (--iCountDown == 0)
+                {
+                    uint64_t const idGenerationOld = pPrivateSet->idGeneration;
+                    fRedoFromStart = gmmR0MutexYield(pGMM, &uLockNanoTS)
+                                  && pPrivateSet->idGeneration != idGenerationOld;
+                    if (fRedoFromStart)
+                        break;
+                    iCountDown = 10240;
+                }
+            }
+        } while (fRedoFromStart);
+
+        /*
+         * Account for shared pages that weren't freed.
+         */
+        if (pGVM->gmm.s.Stats.cSharedPages)
+        {
+            Assert(pGMM->cSharedPages >= pGVM->gmm.s.Stats.cSharedPages);
+            SUPR0Printf("GMMR0CleanupVM: hGVM=%#x left %#x shared pages behind!\n", pGVM->hSelf, pGVM->gmm.s.Stats.cSharedPages);
+            pGMM->cLeftBehindSharedPages += pGVM->gmm.s.Stats.cSharedPages;
+        }
+
+        /*
+         * Clean up balloon statistics in case the VM process crashed.
+         */
+        Assert(pGMM->cBalloonedPages >= pGVM->gmm.s.Stats.cBalloonedPages);
+        pGMM->cBalloonedPages -= pGVM->gmm.s.Stats.cBalloonedPages;
+
+        /*
+         * Update the over-commitment management statistics.
+         */
+        pGMM->cReservedPages -= pGVM->gmm.s.Stats.Reserved.cBasePages
+                              + pGVM->gmm.s.Stats.Reserved.cFixedPages
+                              + pGVM->gmm.s.Stats.Reserved.cShadowPages;
+        switch (pGVM->gmm.s.Stats.enmPolicy)
+        {
+            case GMMOCPOLICY_NO_OC:
+                break;
+            default:
+                /** @todo Update GMM->cOverCommittedPages */
+                break;
+        }
+    }
+
+    /* zap the GVM data. */
+    pGVM->gmm.s.Stats.enmPolicy    = GMMOCPOLICY_INVALID;
+    pGVM->gmm.s.Stats.enmPriority  = GMMPRIORITY_INVALID;
+    pGVM->gmm.s.Stats.fMayAllocate = false;
+
+    GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
+    gmmR0MutexRelease(pGMM);
+
+    /*
+     * Destroy the spinlock.
+     */
+    RTSPINLOCK hSpinlock = NIL_RTSPINLOCK;
+    ASMAtomicXchgHandle(&pGVM->gmm.s.hChunkTlbSpinLock, NIL_RTSPINLOCK, &hSpinlock);
+    RTSpinlockDestroy(hSpinlock);
+
+    LogFlow(("GMMR0CleanupVM: returns\n"));
+}
+
+
+/**
+ * Scan one chunk for private pages belonging to the specified VM.
+ *
+ * @note    This function may drop the giant mutex!
+ *
+ * @returns @c true if we've temporarily dropped the giant mutex, @c false if
+ *          we didn't.
+ * @param   pGMM        Pointer to the GMM instance.
+ * @param   pGVM        The global VM handle.
+ * @param   pChunk      The chunk to scan.
+ */
+static bool gmmR0CleanupVMScanChunk(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk)
+{
+    Assert(!pGMM->fBoundMemoryMode || pChunk->hGVM == pGVM->hSelf);
+
+    /*
+     * Look for pages belonging to the VM.
+     * (Perform some internal checks while we're scanning.)
+     */
+#ifndef VBOX_STRICT
+    if (pChunk->cFree != (GMM_CHUNK_SIZE >> PAGE_SHIFT))
+#endif
+    {
+        unsigned cPrivate = 0;
+        unsigned cShared = 0;
+        unsigned cFree = 0;
+
+        gmmR0UnlinkChunk(pChunk);       /* avoiding cFreePages updates. */
+
+        uint16_t hGVM = pGVM->hSelf;
+        unsigned iPage = (GMM_CHUNK_SIZE >> PAGE_SHIFT);
+        while (iPage-- > 0)
+            if (GMM_PAGE_IS_PRIVATE(&pChunk->aPages[iPage]))
+            {
+                if (pChunk->aPages[iPage].Private.hGVM == hGVM)
+                {
+                    /*
+                     * Free the page.
+                     *
+                     * The reason for not using gmmR0FreePrivatePage here is that we
+                     * must *not* cause the chunk to be freed from under us - we're in
+                     * an AVL tree walk here.
+                     */
+                    pChunk->aPages[iPage].u = 0;
+                    pChunk->aPages[iPage].Free.iNext = pChunk->iFreeHead;
+                    pChunk->aPages[iPage].Free.u2State = GMM_PAGE_STATE_FREE;
+                    pChunk->iFreeHead = iPage;
+                    pChunk->cPrivate--;
+                    pChunk->cFree++;
+                    pGVM->gmm.s.Stats.cPrivatePages--;
+                    cFree++;
+                }
+                else
+                    cPrivate++;
+            }
+            else if (GMM_PAGE_IS_FREE(&pChunk->aPages[iPage]))
+                cFree++;
+            else
+                cShared++;
+
+        gmmR0SelectSetAndLinkChunk(pGMM, pGVM, pChunk);
+
+        /*
+         * Did it add up?
+         */
+        if (RT_UNLIKELY(    pChunk->cFree != cFree
+                        ||  pChunk->cPrivate != cPrivate
+                        ||  pChunk->cShared != cShared))
+        {
+            SUPR0Printf("gmmR0CleanupVMScanChunk: Chunk %RKv/%#x has bogus stats - free=%d/%d private=%d/%d shared=%d/%d\n",
+                        pChunk, pChunk->Core.Key, pChunk->cFree, cFree, pChunk->cPrivate, cPrivate, pChunk->cShared, cShared);
+            pChunk->cFree = cFree;
+            pChunk->cPrivate = cPrivate;
+            pChunk->cShared = cShared;
+        }
+    }
+
+    /*
+     * If not in bound memory mode, we should reset the hGVM field
+     * if it has our handle in it.
+     */
+    if (pChunk->hGVM == pGVM->hSelf)
+    {
+        if (!g_pGMM->fBoundMemoryMode)
+            pChunk->hGVM = NIL_GVM_HANDLE;
+        else if (pChunk->cFree != GMM_CHUNK_NUM_PAGES)
+        {
+            SUPR0Printf("gmmR0CleanupVMScanChunk: %RKv/%#x: cFree=%#x - it should be 0 in bound mode!\n",
+                        pChunk, pChunk->Core.Key, pChunk->cFree);
+            AssertMsgFailed(("%p/%#x: cFree=%#x - it should be 0 in bound mode!\n", pChunk, pChunk->Core.Key, pChunk->cFree));
+
+            gmmR0UnlinkChunk(pChunk);
+            pChunk->cFree = GMM_CHUNK_NUM_PAGES;
+            gmmR0SelectSetAndLinkChunk(pGMM, pGVM, pChunk);
+        }
+    }
+
+    /*
+     * Look for a mapping belonging to the terminating VM.
+     */
+    GMMR0CHUNKMTXSTATE MtxState;
+    gmmR0ChunkMutexAcquire(&MtxState, pGMM, pChunk, GMMR0CHUNK_MTX_KEEP_GIANT);
+    unsigned cMappings = pChunk->cMappingsX;
+    for (unsigned i = 0; i < cMappings; i++)
+        if (pChunk->paMappingsX[i].pGVM == pGVM)
+        {
+            gmmR0ChunkMutexDropGiant(&MtxState);
+
+            RTR0MEMOBJ hMemObj = pChunk->paMappingsX[i].hMapObj;
+
+            cMappings--;
+            if (i < cMappings)
+                 pChunk->paMappingsX[i] = pChunk->paMappingsX[cMappings];
+            pChunk->paMappingsX[cMappings].pGVM    = NULL;
+            pChunk->paMappingsX[cMappings].hMapObj = NIL_RTR0MEMOBJ;
+            Assert(pChunk->cMappingsX - 1U == cMappings);
+            pChunk->cMappingsX = cMappings;
+
+            int rc = RTR0MemObjFree(hMemObj, false /* fFreeMappings (NA) */);
+            if (RT_FAILURE(rc))
+            {
+                SUPR0Printf("gmmR0CleanupVMScanChunk: %RKv/%#x: mapping #%x: RTRMemObjFree(%RKv,false) -> %d \n",
+                            pChunk, pChunk->Core.Key, i, hMemObj, rc);
+                AssertRC(rc);
+            }
+
+            gmmR0ChunkMutexRelease(&MtxState, pChunk);
+            return true;
+        }
+
+    gmmR0ChunkMutexRelease(&MtxState, pChunk);
+    return false;
+}
+
+
+/**
+ * The initial resource reservations.
+ *
+ * This will make memory reservations according to policy and priority. If there aren't
+ * sufficient resources available to sustain the VM this function will fail and all
+ * future allocations requests will fail as well.
+ *
+ * These are just the initial reservations made very very early during the VM creation
+ * process and will be adjusted later in the GMMR0UpdateReservation call after the
+ * ring-3 init has completed.
+ *
+ * @returns VBox status code.
+ * @retval  VERR_GMM_MEMORY_RESERVATION_DECLINED
+ * @retval  VERR_GMM_
+ *
+ * @param   pGVM            The global (ring-0) VM structure.
+ * @param   idCpu           The VCPU id - must be zero.
+ * @param   cBasePages      The number of pages that may be allocated for the base RAM and ROMs.
+ *                          This does not include MMIO2 and similar.
+ * @param   cShadowPages    The number of pages that may be allocated for shadow paging structures.
+ * @param   cFixedPages     The number of pages that may be allocated for fixed objects like the
+ *                          hyper heap, MMIO2 and similar.
+ * @param   enmPolicy       The OC policy to use on this VM.
+ * @param   enmPriority     The priority in an out-of-memory situation.
+ *
+ * @thread  The creator thread / EMT(0).
+ */
+GMMR0DECL(int) GMMR0InitialReservation(PGVM pGVM, VMCPUID idCpu, uint64_t cBasePages, uint32_t cShadowPages,
+                                       uint32_t cFixedPages, GMMOCPOLICY enmPolicy, GMMPRIORITY enmPriority)
+{
+    LogFlow(("GMMR0InitialReservation: pGVM=%p cBasePages=%#llx cShadowPages=%#x cFixedPages=%#x enmPolicy=%d enmPriority=%d\n",
+             pGVM, cBasePages, cShadowPages, cFixedPages, enmPolicy, enmPriority));
+
+    /*
+     * Validate, get basics and take the semaphore.
+     */
+    AssertReturn(idCpu == 0, VERR_INVALID_CPU_ID);
+    PGMM pGMM;
+    GMM_GET_VALID_INSTANCE(pGMM, VERR_GMM_INSTANCE);
+    int rc = GVMMR0ValidateGVMandEMT(pGVM, idCpu);
+    if (RT_FAILURE(rc))
+        return rc;
+
+    AssertReturn(cBasePages, VERR_INVALID_PARAMETER);
+    AssertReturn(cShadowPages, VERR_INVALID_PARAMETER);
+    AssertReturn(cFixedPages, VERR_INVALID_PARAMETER);
+    AssertReturn(enmPolicy > GMMOCPOLICY_INVALID && enmPolicy < GMMOCPOLICY_END, VERR_INVALID_PARAMETER);
+    AssertReturn(enmPriority > GMMPRIORITY_INVALID && enmPriority < GMMPRIORITY_END, VERR_INVALID_PARAMETER);
+
+    gmmR0MutexAcquire(pGMM);
+    if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
+    {
+        if (    !pGVM->gmm.s.Stats.Reserved.cBasePages
+            &&  !pGVM->gmm.s.Stats.Reserved.cFixedPages
+            &&  !pGVM->gmm.s.Stats.Reserved.cShadowPages)
+        {
+            /*
+             * Check if we can accommodate this.
+             */
+            /* ... later ... */
+            if (RT_SUCCESS(rc))
+            {
+                /*
+                 * Update the records.
+                 */
+                pGVM->gmm.s.Stats.Reserved.cBasePages   = cBasePages;
+                pGVM->gmm.s.Stats.Reserved.cFixedPages  = cFixedPages;
+                pGVM->gmm.s.Stats.Reserved.cShadowPages = cShadowPages;
+                pGVM->gmm.s.Stats.enmPolicy             = enmPolicy;
+                pGVM->gmm.s.Stats.enmPriority           = enmPriority;
+                pGVM->gmm.s.Stats.fMayAllocate          = true;
+
+                pGMM->cReservedPages += cBasePages + cFixedPages + cShadowPages;
+                pGMM->cRegisteredVMs++;
+            }
+        }
+        else
+            rc = VERR_WRONG_ORDER;
+        GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
+    }
+    else
+        rc = VERR_GMM_IS_NOT_SANE;
+    gmmR0MutexRelease(pGMM);
+    LogFlow(("GMMR0InitialReservation: returns %Rrc\n", rc));
+    return rc;
+}
+
+
+/**
+ * VMMR0 request wrapper for GMMR0InitialReservation.
+ *
+ * @returns see GMMR0InitialReservation.
+ * @param   pGVM            The global (ring-0) VM structure.
+ * @param   idCpu           The VCPU id.
+ * @param   pReq            Pointer to the request packet.
+ */
+GMMR0DECL(int) GMMR0InitialReservationReq(PGVM pGVM, VMCPUID idCpu, PGMMINITIALRESERVATIONREQ pReq)
+{
+    /*
+     * Validate input and pass it on.
+     */
+    AssertPtrReturn(pGVM, VERR_INVALID_POINTER);
+    AssertPtrReturn(pReq, VERR_INVALID_POINTER);
+    AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
+
+    return GMMR0InitialReservation(pGVM, idCpu, pReq->cBasePages, pReq->cShadowPages,
+                                   pReq->cFixedPages, pReq->enmPolicy, pReq->enmPriority);
+}
+
+
+/**
+ * This updates the memory reservation with the additional MMIO2 and ROM pages.
+ *
+ * @returns VBox status code.
+ * @retval  VERR_GMM_MEMORY_RESERVATION_DECLINED
+ *
+ * @param   pGVM            The global (ring-0) VM structure.
+ * @param   idCpu           The VCPU id.
+ * @param   cBasePages      The number of pages that may be allocated for the base RAM and ROMs.
+ *                          This does not include MMIO2 and similar.
+ * @param   cShadowPages    The number of pages that may be allocated for shadow paging structures.
+ * @param   cFixedPages     The number of pages that may be allocated for fixed objects like the
+ *                          hyper heap, MMIO2 and similar.
+ *
+ * @thread  EMT(idCpu)
+ */
+GMMR0DECL(int) GMMR0UpdateReservation(PGVM pGVM, VMCPUID idCpu, uint64_t cBasePages,
+                                      uint32_t cShadowPages, uint32_t cFixedPages)
+{
+    LogFlow(("GMMR0UpdateReservation: pGVM=%p cBasePages=%#llx cShadowPages=%#x cFixedPages=%#x\n",
+             pGVM, cBasePages, cShadowPages, cFixedPages));
+
+    /*
+     * Validate, get basics and take the semaphore.
+     */
+    PGMM pGMM;
+    GMM_GET_VALID_INSTANCE(pGMM, VERR_GMM_INSTANCE);
+    int rc = GVMMR0ValidateGVMandEMT(pGVM, idCpu);
+    if (RT_FAILURE(rc))
+        return rc;
+
+    AssertReturn(cBasePages, VERR_INVALID_PARAMETER);
+    AssertReturn(cShadowPages, VERR_INVALID_PARAMETER);
+    AssertReturn(cFixedPages, VERR_INVALID_PARAMETER);
+
+    gmmR0MutexAcquire(pGMM);
+    if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
+    {
+        if (    pGVM->gmm.s.Stats.Reserved.cBasePages
+            &&  pGVM->gmm.s.Stats.Reserved.cFixedPages
+            &&  pGVM->gmm.s.Stats.Reserved.cShadowPages)
+        {
+            /*
+             * Check if we can accommodate this.
+             */
+            /* ... later ... */
+            if (RT_SUCCESS(rc))
+            {
+                /*
+                 * Update the records.
+                 */
+                pGMM->cReservedPages -= pGVM->gmm.s.Stats.Reserved.cBasePages
+                                      + pGVM->gmm.s.Stats.Reserved.cFixedPages
+                                      + pGVM->gmm.s.Stats.Reserved.cShadowPages;
+                pGMM->cReservedPages += cBasePages + cFixedPages + cShadowPages;
+
+                pGVM->gmm.s.Stats.Reserved.cBasePages   = cBasePages;
+                pGVM->gmm.s.Stats.Reserved.cFixedPages  = cFixedPages;
+                pGVM->gmm.s.Stats.Reserved.cShadowPages = cShadowPages;
+            }
+        }
+        else
+            rc = VERR_WRONG_ORDER;
+        GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
+    }
+    else
+        rc = VERR_GMM_IS_NOT_SANE;
+    gmmR0MutexRelease(pGMM);
+    LogFlow(("GMMR0UpdateReservation: returns %Rrc\n", rc));
+    return rc;
+}
+
+
+/**
+ * VMMR0 request wrapper for GMMR0UpdateReservation.
+ *
+ * @returns see GMMR0UpdateReservation.
+ * @param   pGVM            The global (ring-0) VM structure.
+ * @param   idCpu           The VCPU id.
+ * @param   pReq            Pointer to the request packet.
+ */
+GMMR0DECL(int) GMMR0UpdateReservationReq(PGVM pGVM, VMCPUID idCpu, PGMMUPDATERESERVATIONREQ pReq)
+{
+    /*
+     * Validate input and pass it on.
+     */
+    AssertPtrReturn(pReq, VERR_INVALID_POINTER);
+    AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
+
+    return GMMR0UpdateReservation(pGVM, idCpu, pReq->cBasePages, pReq->cShadowPages, pReq->cFixedPages);
+}
+
+#ifdef GMMR0_WITH_SANITY_CHECK
+
+/**
+ * Performs sanity checks on a free set.
+ *
+ * @returns Error count.
+ *
+ * @param   pGMM        Pointer to the GMM instance.
+ * @param   pSet        Pointer to the set.
+ * @param   pszSetName  The set name.
+ * @param   pszFunction The function from which it was called.
+ * @param   uLine       The line number.
+ */
+static uint32_t gmmR0SanityCheckSet(PGMM pGMM, PGMMCHUNKFREESET pSet, const char *pszSetName,
+                                    const char *pszFunction, unsigned uLineNo)
+{
+    uint32_t cErrors = 0;
+
+    /*
+     * Count the free pages in all the chunks and match it against pSet->cFreePages.
+     */
+    uint32_t cPages = 0;
+    for (unsigned i = 0; i < RT_ELEMENTS(pSet->apLists); i++)
+    {
+        for (PGMMCHUNK pCur = pSet->apLists[i]; pCur; pCur = pCur->pFreeNext)
+        {
+            /** @todo check that the chunk is hash into the right set. */
+            cPages += pCur->cFree;
+        }
+    }
+    if (RT_UNLIKELY(cPages != pSet->cFreePages))
+    {
+        SUPR0Printf("GMM insanity: found %#x pages in the %s set, expected %#x. (%s, line %u)\n",
+                    cPages, pszSetName, pSet->cFreePages, pszFunction, uLineNo);
+        cErrors++;
+    }
+
+    return cErrors;
+}
+
+
+/**
+ * Performs some sanity checks on the GMM while owning lock.
+ *
+ * @returns Error count.
+ *
+ * @param   pGMM        Pointer to the GMM instance.
+ * @param   pszFunction The function from which it is called.
+ * @param   uLineNo     The line number.
+ */
+static uint32_t gmmR0SanityCheck(PGMM pGMM, const char *pszFunction, unsigned uLineNo)
+{
+    uint32_t cErrors = 0;
+
+    cErrors += gmmR0SanityCheckSet(pGMM, &pGMM->PrivateX, "private", pszFunction, uLineNo);
+    cErrors += gmmR0SanityCheckSet(pGMM, &pGMM->Shared,   "shared",  pszFunction, uLineNo);
+    /** @todo add more sanity checks. */
+
+    return cErrors;
+}
+
+#endif /* GMMR0_WITH_SANITY_CHECK */
+
+/**
+ * Looks up a chunk in the tree and fill in the TLB entry for it.
+ *
+ * This is not expected to fail and will bitch if it does.
+ *
+ * @returns Pointer to the allocation chunk, NULL if not found.
+ * @param   pGMM        Pointer to the GMM instance.
+ * @param   idChunk     The ID of the chunk to find.
+ * @param   pTlbe       Pointer to the TLB entry.
+ *
+ * @note    Caller owns spinlock.
+ */
+static PGMMCHUNK gmmR0GetChunkSlow(PGMM pGMM, uint32_t idChunk, PGMMCHUNKTLBE pTlbe)
+{
+    PGMMCHUNK pChunk = (PGMMCHUNK)RTAvlU32Get(&pGMM->pChunks, idChunk);
+    AssertMsgReturn(pChunk, ("Chunk %#x not found!\n", idChunk), NULL);
+    pTlbe->idChunk = idChunk;
+    pTlbe->pChunk = pChunk;
+    return pChunk;
+}
+
+
+/**
+ * Finds a allocation chunk, spin-locked.
+ *
+ * This is not expected to fail and will bitch if it does.
+ *
+ * @returns Pointer to the allocation chunk, NULL if not found.
+ * @param   pGMM        Pointer to the GMM instance.
+ * @param   idChunk     The ID of the chunk to find.
+ */
+DECLINLINE(PGMMCHUNK) gmmR0GetChunkLocked(PGMM pGMM, uint32_t idChunk)
+{
+    /*
+     * Do a TLB lookup, branch if not in the TLB.
+     */
+    PGMMCHUNKTLBE pTlbe  = &pGMM->ChunkTLB.aEntries[GMM_CHUNKTLB_IDX(idChunk)];
+    PGMMCHUNK     pChunk = pTlbe->pChunk;
+    if (   pChunk == NULL
+        || pTlbe->idChunk != idChunk)
+        pChunk = gmmR0GetChunkSlow(pGMM, idChunk, pTlbe);
+    return pChunk;
+}
+
+
+/**
+ * Finds a allocation chunk.
+ *
+ * This is not expected to fail and will bitch if it does.
+ *
+ * @returns Pointer to the allocation chunk, NULL if not found.
+ * @param   pGMM        Pointer to the GMM instance.
+ * @param   idChunk     The ID of the chunk to find.
+ */
+DECLINLINE(PGMMCHUNK) gmmR0GetChunk(PGMM pGMM, uint32_t idChunk)
+{
+    RTSpinlockAcquire(pGMM->hSpinLockTree);
+    PGMMCHUNK pChunk = gmmR0GetChunkLocked(pGMM, idChunk);
+    RTSpinlockRelease(pGMM->hSpinLockTree);
+    return pChunk;
+}
+
+
+/**
+ * Finds a page.
+ *
+ * This is not expected to fail and will bitch if it does.
+ *
+ * @returns Pointer to the page, NULL if not found.
+ * @param   pGMM        Pointer to the GMM instance.
+ * @param   idPage      The ID of the page to find.
+ */
+DECLINLINE(PGMMPAGE) gmmR0GetPage(PGMM pGMM, uint32_t idPage)
+{
+    PGMMCHUNK pChunk = gmmR0GetChunk(pGMM, idPage >> GMM_CHUNKID_SHIFT);
+    if (RT_LIKELY(pChunk))
+        return &pChunk->aPages[idPage & GMM_PAGEID_IDX_MASK];
+    return NULL;
+}
+
+
+#if 0 /* unused */
+/**
+ * Gets the host physical address for a page given by it's ID.
+ *
+ * @returns The host physical address or NIL_RTHCPHYS.
+ * @param   pGMM        Pointer to the GMM instance.
+ * @param   idPage      The ID of the page to find.
+ */
+DECLINLINE(RTHCPHYS) gmmR0GetPageHCPhys(PGMM pGMM,  uint32_t idPage)
+{
+    PGMMCHUNK pChunk = gmmR0GetChunk(pGMM, idPage >> GMM_CHUNKID_SHIFT);
+    if (RT_LIKELY(pChunk))
+        return RTR0MemObjGetPagePhysAddr(pChunk->hMemObj, idPage & GMM_PAGEID_IDX_MASK);
+    return NIL_RTHCPHYS;
+}
+#endif /* unused */
+
+
+/**
+ * Selects the appropriate free list given the number of free pages.
+ *
+ * @returns Free list index.
+ * @param   cFree       The number of free pages in the chunk.
+ */
+DECLINLINE(unsigned) gmmR0SelectFreeSetList(unsigned cFree)
+{
+    unsigned iList = cFree >> GMM_CHUNK_FREE_SET_SHIFT;
+    AssertMsg(iList < RT_SIZEOFMEMB(GMMCHUNKFREESET, apLists) / RT_SIZEOFMEMB(GMMCHUNKFREESET, apLists[0]),
+              ("%d (%u)\n", iList, cFree));
+    return iList;
+}
+
+
+/**
+ * Unlinks the chunk from the free list it's currently on (if any).
+ *
+ * @param   pChunk      The allocation chunk.
+ */
+DECLINLINE(void) gmmR0UnlinkChunk(PGMMCHUNK pChunk)
+{
+    PGMMCHUNKFREESET pSet = pChunk->pSet;
+    if (RT_LIKELY(pSet))
+    {
+        pSet->cFreePages -= pChunk->cFree;
+        pSet->idGeneration++;
+
+        PGMMCHUNK pPrev = pChunk->pFreePrev;
+        PGMMCHUNK pNext = pChunk->pFreeNext;
+        if (pPrev)
+            pPrev->pFreeNext = pNext;
+        else
+            pSet->apLists[gmmR0SelectFreeSetList(pChunk->cFree)] = pNext;
+        if (pNext)
+            pNext->pFreePrev = pPrev;
+
+        pChunk->pSet = NULL;
+        pChunk->pFreeNext = NULL;
+        pChunk->pFreePrev = NULL;
+    }
+    else
+    {
+        Assert(!pChunk->pFreeNext);
+        Assert(!pChunk->pFreePrev);
+        Assert(!pChunk->cFree);
+    }
+}
+
+
+/**
+ * Links the chunk onto the appropriate free list in the specified free set.
+ *
+ * If no free entries, it's not linked into any list.
+ *
+ * @param   pChunk      The allocation chunk.
+ * @param   pSet        The free set.
+ */
+DECLINLINE(void) gmmR0LinkChunk(PGMMCHUNK pChunk, PGMMCHUNKFREESET pSet)
+{
+    Assert(!pChunk->pSet);
+    Assert(!pChunk->pFreeNext);
+    Assert(!pChunk->pFreePrev);
+
+    if (pChunk->cFree > 0)
+    {
+        pChunk->pSet = pSet;
+        pChunk->pFreePrev = NULL;
+        unsigned const iList = gmmR0SelectFreeSetList(pChunk->cFree);
+        pChunk->pFreeNext = pSet->apLists[iList];
+        if (pChunk->pFreeNext)
+            pChunk->pFreeNext->pFreePrev = pChunk;
+        pSet->apLists[iList] = pChunk;
+
+        pSet->cFreePages += pChunk->cFree;
+        pSet->idGeneration++;
+    }
+}
+
+
+/**
+ * Links the chunk onto the appropriate free list in the specified free set.
+ *
+ * If no free entries, it's not linked into any list.
+ *
+ * @param   pGMM        Pointer to the GMM instance.
+ * @param   pGVM        Pointer to the kernel-only VM instace data.
+ * @param   pChunk      The allocation chunk.
+ */
+DECLINLINE(void) gmmR0SelectSetAndLinkChunk(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk)
+{
+    PGMMCHUNKFREESET pSet;
+    if (pGMM->fBoundMemoryMode)
+        pSet = &pGVM->gmm.s.Private;
+    else if (pChunk->cShared)
+        pSet = &pGMM->Shared;
+    else
+        pSet = &pGMM->PrivateX;
+    gmmR0LinkChunk(pChunk, pSet);
+}
+
+
+/**
+ * Frees a Chunk ID.
+ *
+ * @param   pGMM        Pointer to the GMM instance.
+ * @param   idChunk     The Chunk ID to free.
+ */
+static void gmmR0FreeChunkId(PGMM pGMM, uint32_t idChunk)
+{
+    AssertReturnVoid(idChunk != NIL_GMM_CHUNKID);
+    AssertMsg(ASMBitTest(&pGMM->bmChunkId[0], idChunk), ("%#x\n", idChunk));
+    ASMAtomicBitClear(&pGMM->bmChunkId[0], idChunk);
+}
+
+
+/**
+ * Allocates a new Chunk ID.
+ *
+ * @returns The Chunk ID.
+ * @param   pGMM        Pointer to the GMM instance.
+ */
+static uint32_t gmmR0AllocateChunkId(PGMM pGMM)
+{
+    AssertCompile(!((GMM_CHUNKID_LAST + 1) & 31)); /* must be a multiple of 32 */
+    AssertCompile(NIL_GMM_CHUNKID == 0);
+
+    /*
+     * Try the next sequential one.
+     */
+    int32_t idChunk = ++pGMM->idChunkPrev;
+#if 0 /** @todo enable this code */
+    if (    idChunk <= GMM_CHUNKID_LAST
+        &&  idChunk > NIL_GMM_CHUNKID
+        &&  !ASMAtomicBitTestAndSet(&pVMM->bmChunkId[0], idChunk))
+        return idChunk;
+#endif
+
+    /*
+     * Scan sequentially from the last one.
+     */
+    if (    (uint32_t)idChunk < GMM_CHUNKID_LAST
+        &&  idChunk > NIL_GMM_CHUNKID)
+    {
+        idChunk = ASMBitNextClear(&pGMM->bmChunkId[0], GMM_CHUNKID_LAST + 1, idChunk - 1);
+        if (idChunk > NIL_GMM_CHUNKID)
+        {
+            AssertMsgReturn(!ASMAtomicBitTestAndSet(&pGMM->bmChunkId[0], idChunk), ("%#x\n", idChunk), NIL_GMM_CHUNKID);
+            return pGMM->idChunkPrev = idChunk;
+        }
+    }
+
+    /*
+     * Ok, scan from the start.
+     * We're not racing anyone, so there is no need to expect failures or have restart loops.
+     */
+    idChunk = ASMBitFirstClear(&pGMM->bmChunkId[0], GMM_CHUNKID_LAST + 1);
+    AssertMsgReturn(idChunk > NIL_GMM_CHUNKID, ("%#x\n", idChunk), NIL_GVM_HANDLE);
+    AssertMsgReturn(!ASMAtomicBitTestAndSet(&pGMM->bmChunkId[0], idChunk), ("%#x\n", idChunk), NIL_GMM_CHUNKID);
+
+    return pGMM->idChunkPrev = idChunk;
+}
+
+
+/**
+ * Allocates one private page.
+ *
+ * Worker for gmmR0AllocatePages.
+ *
+ * @param   pChunk      The chunk to allocate it from.
+ * @param   hGVM        The GVM handle of the VM requesting memory.
+ * @param   pPageDesc   The page descriptor.
+ */
+static void gmmR0AllocatePage(PGMMCHUNK pChunk, uint32_t hGVM, PGMMPAGEDESC pPageDesc)
+{
+    /* update the chunk stats. */
+    if (pChunk->hGVM == NIL_GVM_HANDLE)
+        pChunk->hGVM = hGVM;
+    Assert(pChunk->cFree);
+    pChunk->cFree--;
+    pChunk->cPrivate++;
+
+    /* unlink the first free page. */
+    const uint32_t iPage = pChunk->iFreeHead;
+    AssertReleaseMsg(iPage < RT_ELEMENTS(pChunk->aPages), ("%d\n", iPage));
+    PGMMPAGE pPage = &pChunk->aPages[iPage];
+    Assert(GMM_PAGE_IS_FREE(pPage));
+    pChunk->iFreeHead = pPage->Free.iNext;
+    Log3(("A pPage=%p iPage=%#x/%#x u2State=%d iFreeHead=%#x iNext=%#x\n",
+          pPage, iPage, (pChunk->Core.Key << GMM_CHUNKID_SHIFT) | iPage,
+          pPage->Common.u2State, pChunk->iFreeHead, pPage->Free.iNext));
+
+    /* make the page private. */
+    pPage->u = 0;
+    AssertCompile(GMM_PAGE_STATE_PRIVATE == 0);
+    pPage->Private.hGVM = hGVM;
+    AssertCompile(NIL_RTHCPHYS >= GMM_GCPHYS_LAST);
+    AssertCompile(GMM_GCPHYS_UNSHAREABLE >= GMM_GCPHYS_LAST);
+    if (pPageDesc->HCPhysGCPhys <= GMM_GCPHYS_LAST)
+        pPage->Private.pfn = pPageDesc->HCPhysGCPhys >> PAGE_SHIFT;
+    else
+        pPage->Private.pfn = GMM_PAGE_PFN_UNSHAREABLE; /* unshareable / unassigned - same thing. */
+
+    /* update the page descriptor. */
+    pPageDesc->HCPhysGCPhys = RTR0MemObjGetPagePhysAddr(pChunk->hMemObj, iPage);
+    Assert(pPageDesc->HCPhysGCPhys != NIL_RTHCPHYS);
+    pPageDesc->idPage = (pChunk->Core.Key << GMM_CHUNKID_SHIFT) | iPage;
+    pPageDesc->idSharedPage = NIL_GMM_PAGEID;
+}
+
+
+/**
+ * Picks the free pages from a chunk.
+ *
+ * @returns The new page descriptor table index.
+ * @param   pChunk      The chunk.
+ * @param   hGVM        The affinity of the chunk. NIL_GVM_HANDLE for no
+ *                      affinity.
+ * @param   iPage       The current page descriptor table index.
+ * @param   cPages      The total number of pages to allocate.
+ * @param   paPages     The page descriptor table (input + ouput).
+ */
+static uint32_t gmmR0AllocatePagesFromChunk(PGMMCHUNK pChunk, uint16_t const hGVM, uint32_t iPage, uint32_t cPages,
+                                            PGMMPAGEDESC paPages)
+{
+    PGMMCHUNKFREESET pSet = pChunk->pSet; Assert(pSet);
+    gmmR0UnlinkChunk(pChunk);
+
+    for (; pChunk->cFree && iPage < cPages; iPage++)
+        gmmR0AllocatePage(pChunk, hGVM, &paPages[iPage]);
+
+    gmmR0LinkChunk(pChunk, pSet);
+    return iPage;
+}
+
+
+/**
+ * Registers a new chunk of memory.
+ *
+ * This is called by both gmmR0AllocateOneChunk and GMMR0SeedChunk.
+ *
+ * @returns VBox status code.  On success, the giant GMM lock will be held, the
+ *          caller must release it (ugly).
+ * @param   pGMM        Pointer to the GMM instance.
+ * @param   pSet        Pointer to the set.
+ * @param   hMemObj     The memory object for the chunk.
+ * @param   hGVM        The affinity of the chunk. NIL_GVM_HANDLE for no
+ *                      affinity.
+ * @param   fChunkFlags The chunk flags, GMM_CHUNK_FLAGS_XXX.
+ * @param   ppChunk     Chunk address (out).  Optional.
+ *
+ * @remarks The caller must not own the giant GMM mutex.
+ *          The giant GMM mutex will be acquired and returned acquired in
+ *          the success path.   On failure, no locks will be held.
+ */
+static int gmmR0RegisterChunk(PGMM pGMM, PGMMCHUNKFREESET pSet, RTR0MEMOBJ hMemObj, uint16_t hGVM, uint16_t fChunkFlags,
+                              PGMMCHUNK *ppChunk)
+{
+    Assert(pGMM->hMtxOwner != RTThreadNativeSelf());
+    Assert(hGVM != NIL_GVM_HANDLE || pGMM->fBoundMemoryMode);
+#ifdef GMM_WITH_LEGACY_MODE
+    Assert(fChunkFlags == 0 || fChunkFlags == GMM_CHUNK_FLAGS_LARGE_PAGE || fChunkFlags == GMM_CHUNK_FLAGS_SEEDED);
+#else
+    Assert(fChunkFlags == 0 || fChunkFlags == GMM_CHUNK_FLAGS_LARGE_PAGE);
+#endif
+
+#if defined(VBOX_WITH_RAM_IN_KERNEL) && !defined(VBOX_WITH_LINEAR_HOST_PHYS_MEM)
+    /*
+     * Get a ring-0 mapping of the object.
+     */
+# ifdef GMM_WITH_LEGACY_MODE
+    uint8_t *pbMapping = !(fChunkFlags & GMM_CHUNK_FLAGS_SEEDED) ? (uint8_t *)RTR0MemObjAddress(hMemObj) : NULL;
+# else
+    uint8_t *pbMapping = (uint8_t *)RTR0MemObjAddress(hMemObj);
+# endif
+    if (!pbMapping)
+    {
+        RTR0MEMOBJ hMapObj;
+        int rc = RTR0MemObjMapKernel(&hMapObj, hMemObj, (void *)-1, 0,  RTMEM_PROT_READ | RTMEM_PROT_WRITE);
+        if (RT_SUCCESS(rc))
+            pbMapping = (uint8_t *)RTR0MemObjAddress(hMapObj);
+        else
+            return rc;
+        AssertPtr(pbMapping);
+    }
+#endif
+
+    /*
+     * Allocate a chunk.
+     */
+    int rc;
+    PGMMCHUNK pChunk = (PGMMCHUNK)RTMemAllocZ(sizeof(*pChunk));
+    if (pChunk)
+    {
+        /*
+         * Initialize it.
+         */
+        pChunk->hMemObj     = hMemObj;
+#if defined(VBOX_WITH_RAM_IN_KERNEL) && !defined(VBOX_WITH_LINEAR_HOST_PHYS_MEM)
+        pChunk->pbMapping   = pbMapping;
+#endif
+        pChunk->cFree       = GMM_CHUNK_NUM_PAGES;
+        pChunk->hGVM        = hGVM;
+        /*pChunk->iFreeHead = 0;*/
+        pChunk->idNumaNode  = gmmR0GetCurrentNumaNodeId();
+        pChunk->iChunkMtx   = UINT8_MAX;
+        pChunk->fFlags      = fChunkFlags;
+        for (unsigned iPage = 0; iPage < RT_ELEMENTS(pChunk->aPages) - 1; iPage++)
+        {
+            pChunk->aPages[iPage].Free.u2State = GMM_PAGE_STATE_FREE;
+            pChunk->aPages[iPage].Free.iNext = iPage + 1;
+        }
+        pChunk->aPages[RT_ELEMENTS(pChunk->aPages) - 1].Free.u2State = GMM_PAGE_STATE_FREE;
+        pChunk->aPages[RT_ELEMENTS(pChunk->aPages) - 1].Free.iNext   = UINT16_MAX;
+
+        /*
+         * Allocate a Chunk ID and insert it into the tree.
+         * This has to be done behind the mutex of course.
+         */
+        rc = gmmR0MutexAcquire(pGMM);
+        if (RT_SUCCESS(rc))
+        {
+            if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
+            {
+                pChunk->Core.Key = gmmR0AllocateChunkId(pGMM);
+                if (   pChunk->Core.Key != NIL_GMM_CHUNKID
+                    && pChunk->Core.Key <= GMM_CHUNKID_LAST)
+                {
+                    RTSpinlockAcquire(pGMM->hSpinLockTree);
+                    if (RTAvlU32Insert(&pGMM->pChunks, &pChunk->Core))
+                    {
+                        pGMM->cChunks++;
+                        RTListAppend(&pGMM->ChunkList, &pChunk->ListNode);
+                        RTSpinlockRelease(pGMM->hSpinLockTree);
+
+                        gmmR0LinkChunk(pChunk, pSet);
+
+                        LogFlow(("gmmR0RegisterChunk: pChunk=%p id=%#x cChunks=%d\n", pChunk, pChunk->Core.Key, pGMM->cChunks));
+
+                        if (ppChunk)
+                            *ppChunk = pChunk;
+                        GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
+                        return VINF_SUCCESS;
+                    }
+                    RTSpinlockRelease(pGMM->hSpinLockTree);
+                }
+
+                /* bail out */
+                rc = VERR_GMM_CHUNK_INSERT;
+            }
+            else
+                rc = VERR_GMM_IS_NOT_SANE;
+            gmmR0MutexRelease(pGMM);
+        }
+
+        RTMemFree(pChunk);
+    }
+    else
+        rc = VERR_NO_MEMORY;
+    return rc;
+}
+
+
+/**
+ * Allocate a new chunk, immediately pick the requested pages from it, and adds
+ * what's remaining to the specified free set.
+ *
+ * @note    This will leave the giant mutex while allocating the new chunk!
+ *
+ * @returns VBox status code.
+ * @param   pGMM        Pointer to the GMM instance data.
+ * @param   pGVM        Pointer to the kernel-only VM instace data.
+ * @param   pSet        Pointer to the free set.
+ * @param   cPages      The number of pages requested.
+ * @param   paPages     The page descriptor table (input + output).
+ * @param   piPage      The pointer to the page descriptor table index variable.
+ *                      This will be updated.
+ */
+static int gmmR0AllocateChunkNew(PGMM pGMM, PGVM pGVM, PGMMCHUNKFREESET pSet, uint32_t cPages,
+                                 PGMMPAGEDESC paPages, uint32_t *piPage)
+{
+    gmmR0MutexRelease(pGMM);
+
+    RTR0MEMOBJ hMemObj;
+#ifndef GMM_WITH_LEGACY_MODE
+    int rc;
+# ifdef VBOX_WITH_LINEAR_HOST_PHYS_MEM
+    if (pGMM->fHasWorkingAllocPhysNC)
+        rc = RTR0MemObjAllocPhysNC(&hMemObj, GMM_CHUNK_SIZE, NIL_RTHCPHYS);
+    else
+# endif
+        rc = RTR0MemObjAllocPage(&hMemObj, GMM_CHUNK_SIZE, false /*fExecutable*/);
+#else
+    int rc = RTR0MemObjAllocPhysNC(&hMemObj, GMM_CHUNK_SIZE, NIL_RTHCPHYS);
+#endif
+    if (RT_SUCCESS(rc))
+    {
+        /** @todo Duplicate gmmR0RegisterChunk here so we can avoid chaining up the
+         *        free pages first and then unchaining them right afterwards. Instead
+         *        do as much work as possible without holding the giant lock. */
+        PGMMCHUNK pChunk;
+        rc = gmmR0RegisterChunk(pGMM, pSet, hMemObj, pGVM->hSelf, 0 /*fChunkFlags*/, &pChunk);
+        if (RT_SUCCESS(rc))
+        {
+            *piPage = gmmR0AllocatePagesFromChunk(pChunk, pGVM->hSelf, *piPage, cPages, paPages);
+            return VINF_SUCCESS;
+        }
+
+        /* bail out */
+        RTR0MemObjFree(hMemObj, true /* fFreeMappings */);
+    }
+
+    int rc2 = gmmR0MutexAcquire(pGMM);
+    AssertRCReturn(rc2, RT_FAILURE(rc) ? rc : rc2);
+    return rc;
+
+}
+
+
+/**
+ * As a last restort we'll pick any page we can get.
+ *
+ * @returns The new page descriptor table index.
+ * @param   pSet        The set to pick from.
+ * @param   pGVM        Pointer to the global VM structure.
+ * @param   iPage       The current page descriptor table index.
+ * @param   cPages      The total number of pages to allocate.
+ * @param   paPages     The page descriptor table (input + ouput).
+ */
+static uint32_t gmmR0AllocatePagesIndiscriminately(PGMMCHUNKFREESET pSet, PGVM pGVM,
+                                                   uint32_t iPage, uint32_t cPages, PGMMPAGEDESC paPages)
+{
+    unsigned iList = RT_ELEMENTS(pSet->apLists);
+    while (iList-- > 0)
+    {
+        PGMMCHUNK pChunk = pSet->apLists[iList];
+        while (pChunk)
+        {
+            PGMMCHUNK pNext = pChunk->pFreeNext;
+
+            iPage = gmmR0AllocatePagesFromChunk(pChunk, pGVM->hSelf, iPage, cPages, paPages);
+            if (iPage >= cPages)
+                return iPage;
+
+            pChunk = pNext;
+        }
+    }
+    return iPage;
+}
+
+
+/**
+ * Pick pages from empty chunks on the same NUMA node.
+ *
+ * @returns The new page descriptor table index.
+ * @param   pSet        The set to pick from.
+ * @param   pGVM        Pointer to the global VM structure.
+ * @param   iPage       The current page descriptor table index.
+ * @param   cPages      The total number of pages to allocate.
+ * @param   paPages     The page descriptor table (input + ouput).
+ */
+static uint32_t gmmR0AllocatePagesFromEmptyChunksOnSameNode(PGMMCHUNKFREESET pSet, PGVM pGVM,
+                                                            uint32_t iPage, uint32_t cPages, PGMMPAGEDESC paPages)
+{
+    PGMMCHUNK pChunk = pSet->apLists[GMM_CHUNK_FREE_SET_UNUSED_LIST];
+    if (pChunk)
+    {
+        uint16_t const idNumaNode = gmmR0GetCurrentNumaNodeId();
+        while (pChunk)
+        {
+            PGMMCHUNK pNext = pChunk->pFreeNext;
+
+            if (pChunk->idNumaNode == idNumaNode)
+            {
+                pChunk->hGVM = pGVM->hSelf;
+                iPage = gmmR0AllocatePagesFromChunk(pChunk, pGVM->hSelf, iPage, cPages, paPages);
+                if (iPage >= cPages)
+                {
+                    pGVM->gmm.s.idLastChunkHint = pChunk->cFree ? pChunk->Core.Key : NIL_GMM_CHUNKID;
+                    return iPage;
+                }
+            }
+
+            pChunk = pNext;
+        }
+    }
+    return iPage;
+}
+
+
+/**
+ * Pick pages from non-empty chunks on the same NUMA node.
+ *
+ * @returns The new page descriptor table index.
+ * @param   pSet        The set to pick from.
+ * @param   pGVM        Pointer to the global VM structure.
+ * @param   iPage       The current page descriptor table index.
+ * @param   cPages      The total number of pages to allocate.
+ * @param   paPages     The page descriptor table (input + ouput).
+ */
+static uint32_t gmmR0AllocatePagesFromSameNode(PGMMCHUNKFREESET pSet, PGVM pGVM,
+                                               uint32_t iPage, uint32_t cPages, PGMMPAGEDESC paPages)
+{
+    /** @todo start by picking from chunks with about the right size first?  */
+    uint16_t const  idNumaNode = gmmR0GetCurrentNumaNodeId();
+    unsigned        iList      = GMM_CHUNK_FREE_SET_UNUSED_LIST;
+    while (iList-- > 0)
+    {
+        PGMMCHUNK pChunk = pSet->apLists[iList];
+        while (pChunk)
+        {
+            PGMMCHUNK pNext = pChunk->pFreeNext;
+
+            if (pChunk->idNumaNode == idNumaNode)
+            {
+                iPage = gmmR0AllocatePagesFromChunk(pChunk, pGVM->hSelf, iPage, cPages, paPages);
+                if (iPage >= cPages)
+                {
+                    pGVM->gmm.s.idLastChunkHint = pChunk->cFree ? pChunk->Core.Key : NIL_GMM_CHUNKID;
+                    return iPage;
+                }
+            }
+
+            pChunk = pNext;
+        }
+    }
+    return iPage;
+}
+
+
+/**
+ * Pick pages that are in chunks already associated with the VM.
+ *
+ * @returns The new page descriptor table index.
+ * @param   pGMM        Pointer to the GMM instance data.
+ * @param   pGVM        Pointer to the global VM structure.
+ * @param   pSet        The set to pick from.
+ * @param   iPage       The current page descriptor table index.
+ * @param   cPages      The total number of pages to allocate.
+ * @param   paPages     The page descriptor table (input + ouput).
+ */
+static uint32_t gmmR0AllocatePagesAssociatedWithVM(PGMM pGMM, PGVM pGVM, PGMMCHUNKFREESET pSet,
+                                                   uint32_t iPage, uint32_t cPages, PGMMPAGEDESC paPages)
+{
+    uint16_t const hGVM = pGVM->hSelf;
+
+    /* Hint. */
+    if (pGVM->gmm.s.idLastChunkHint != NIL_GMM_CHUNKID)
+    {
+        PGMMCHUNK pChunk = gmmR0GetChunk(pGMM, pGVM->gmm.s.idLastChunkHint);
+        if (pChunk && pChunk->cFree)
+        {
+            iPage = gmmR0AllocatePagesFromChunk(pChunk, hGVM, iPage, cPages, paPages);
+            if (iPage >= cPages)
+                return iPage;
+        }
+    }
+
+    /* Scan. */
+    for (unsigned iList = 0; iList < RT_ELEMENTS(pSet->apLists); iList++)
+    {
+        PGMMCHUNK pChunk = pSet->apLists[iList];
+        while (pChunk)
+        {
+            PGMMCHUNK pNext = pChunk->pFreeNext;
+
+            if (pChunk->hGVM == hGVM)
+            {
+                iPage = gmmR0AllocatePagesFromChunk(pChunk, hGVM, iPage, cPages, paPages);
+                if (iPage >= cPages)
+                {
+                    pGVM->gmm.s.idLastChunkHint = pChunk->cFree ? pChunk->Core.Key : NIL_GMM_CHUNKID;
+                    return iPage;
+                }
+            }
+
+            pChunk = pNext;
+        }
+    }
+    return iPage;
+}
+
+
+
+/**
+ * Pick pages in bound memory mode.
+ *
+ * @returns The new page descriptor table index.
+ * @param   pGVM        Pointer to the global VM structure.
+ * @param   iPage       The current page descriptor table index.
+ * @param   cPages      The total number of pages to allocate.
+ * @param   paPages     The page descriptor table (input + ouput).
+ */
+static uint32_t gmmR0AllocatePagesInBoundMode(PGVM pGVM, uint32_t iPage, uint32_t cPages, PGMMPAGEDESC paPages)
+{
+    for (unsigned iList = 0; iList < RT_ELEMENTS(pGVM->gmm.s.Private.apLists); iList++)
+    {
+        PGMMCHUNK pChunk = pGVM->gmm.s.Private.apLists[iList];
+        while (pChunk)
+        {
+            Assert(pChunk->hGVM == pGVM->hSelf);
+            PGMMCHUNK pNext = pChunk->pFreeNext;
+            iPage = gmmR0AllocatePagesFromChunk(pChunk, pGVM->hSelf, iPage, cPages, paPages);
+            if (iPage >= cPages)
+                return iPage;
+            pChunk = pNext;
+        }
+    }
+    return iPage;
+}
+
+
+/**
+ * Checks if we should start picking pages from chunks of other VMs because
+ * we're getting close to the system memory or reserved limit.
+ *
+ * @returns @c true if we should, @c false if we should first try allocate more
+ *          chunks.
+ */
+static bool gmmR0ShouldAllocatePagesInOtherChunksBecauseOfLimits(PGVM pGVM)
+{
+    /*
+     * Don't allocate a new chunk if we're
+     */
+    uint64_t cPgReserved  = pGVM->gmm.s.Stats.Reserved.cBasePages
+                          + pGVM->gmm.s.Stats.Reserved.cFixedPages
+                          - pGVM->gmm.s.Stats.cBalloonedPages
+                          /** @todo what about shared pages? */;
+    uint64_t cPgAllocated = pGVM->gmm.s.Stats.Allocated.cBasePages
+                          + pGVM->gmm.s.Stats.Allocated.cFixedPages;
+    uint64_t cPgDelta = cPgReserved - cPgAllocated;
+    if (cPgDelta < GMM_CHUNK_NUM_PAGES * 4)
+        return true;
+    /** @todo make the threshold configurable, also test the code to see if
+     *        this ever kicks in (we might be reserving too much or smth). */
+
+    /*
+     * Check how close we're to the max memory limit and how many fragments
+     * there are?...
+     */
+    /** @todo  */
+
+    return false;
+}
+
+
+/**
+ * Checks if we should start picking pages from chunks of other VMs because
+ * there is a lot of free pages around.
+ *
+ * @returns @c true if we should, @c false if we should first try allocate more
+ *          chunks.
+ */
+static bool gmmR0ShouldAllocatePagesInOtherChunksBecauseOfLotsFree(PGMM pGMM)
+{
+    /*
+     * Setting the limit at 16 chunks (32 MB) at the moment.
+     */
+    if (pGMM->PrivateX.cFreePages >= GMM_CHUNK_NUM_PAGES * 16)
+        return true;
+    return false;
+}
+
+
+/**
+ * Common worker for GMMR0AllocateHandyPages and GMMR0AllocatePages.
+ *
+ * @returns VBox status code:
+ * @retval  VINF_SUCCESS on success.
+ * @retval  VERR_GMM_SEED_ME if seeding via GMMR0SeedChunk or
+ *          gmmR0AllocateMoreChunks is necessary.
+ * @retval  VERR_GMM_HIT_GLOBAL_LIMIT if we've exhausted the available pages.
+ * @retval  VERR_GMM_HIT_VM_ACCOUNT_LIMIT if we've hit the VM account limit,
+ *          that is we're trying to allocate more than we've reserved.
+ *
+ * @param   pGMM        Pointer to the GMM instance data.
+ * @param   pGVM        Pointer to the VM.
+ * @param   cPages      The number of pages to allocate.
+ * @param   paPages     Pointer to the page descriptors. See GMMPAGEDESC for
+ *                      details on what is expected on input.
+ * @param   enmAccount  The account to charge.
+ *
+ * @remarks Call takes the giant GMM lock.
+ */
+static int gmmR0AllocatePagesNew(PGMM pGMM, PGVM pGVM, uint32_t cPages, PGMMPAGEDESC paPages, GMMACCOUNT enmAccount)
+{
+    Assert(pGMM->hMtxOwner == RTThreadNativeSelf());
+
+    /*
+     * Check allocation limits.
+     */
+    if (RT_UNLIKELY(pGMM->cAllocatedPages + cPages > pGMM->cMaxPages))
+        return VERR_GMM_HIT_GLOBAL_LIMIT;
+
+    switch (enmAccount)
+    {
+        case GMMACCOUNT_BASE:
+            if (RT_UNLIKELY(  pGVM->gmm.s.Stats.Allocated.cBasePages + pGVM->gmm.s.Stats.cBalloonedPages + cPages
+                            > pGVM->gmm.s.Stats.Reserved.cBasePages))
+            {
+                Log(("gmmR0AllocatePages:Base: Reserved=%#llx Allocated+Ballooned+Requested=%#llx+%#llx+%#x!\n",
+                     pGVM->gmm.s.Stats.Reserved.cBasePages, pGVM->gmm.s.Stats.Allocated.cBasePages,
+                     pGVM->gmm.s.Stats.cBalloonedPages, cPages));
+                return VERR_GMM_HIT_VM_ACCOUNT_LIMIT;
+            }
+            break;
+        case GMMACCOUNT_SHADOW:
+            if (RT_UNLIKELY(pGVM->gmm.s.Stats.Allocated.cShadowPages + cPages > pGVM->gmm.s.Stats.Reserved.cShadowPages))
+            {
+                Log(("gmmR0AllocatePages:Shadow: Reserved=%#x Allocated+Requested=%#x+%#x!\n",
+                     pGVM->gmm.s.Stats.Reserved.cShadowPages, pGVM->gmm.s.Stats.Allocated.cShadowPages, cPages));
+                return VERR_GMM_HIT_VM_ACCOUNT_LIMIT;
+            }
+            break;
+        case GMMACCOUNT_FIXED:
+            if (RT_UNLIKELY(pGVM->gmm.s.Stats.Allocated.cFixedPages + cPages > pGVM->gmm.s.Stats.Reserved.cFixedPages))
+            {
+                Log(("gmmR0AllocatePages:Fixed: Reserved=%#x Allocated+Requested=%#x+%#x!\n",
+                     pGVM->gmm.s.Stats.Reserved.cFixedPages, pGVM->gmm.s.Stats.Allocated.cFixedPages, cPages));
+                return VERR_GMM_HIT_VM_ACCOUNT_LIMIT;
+            }
+            break;
+        default:
+            AssertMsgFailedReturn(("enmAccount=%d\n", enmAccount), VERR_IPE_NOT_REACHED_DEFAULT_CASE);
+    }
+
+#ifdef GMM_WITH_LEGACY_MODE
+    /*
+     * If we're in legacy memory mode, it's easy to figure if we have
+     * sufficient number of pages up-front.
+     */
+    if (   pGMM->fLegacyAllocationMode
+        && pGVM->gmm.s.Private.cFreePages < cPages)
+    {
+        Assert(pGMM->fBoundMemoryMode);
+        return VERR_GMM_SEED_ME;
+    }
+#endif
+
+    /*
+     * Update the accounts before we proceed because we might be leaving the
+     * protection of the global mutex and thus run the risk of permitting
+     * too much memory to be allocated.
+     */
+    switch (enmAccount)
+    {
+        case GMMACCOUNT_BASE:   pGVM->gmm.s.Stats.Allocated.cBasePages   += cPages; break;
+        case GMMACCOUNT_SHADOW: pGVM->gmm.s.Stats.Allocated.cShadowPages += cPages; break;
+        case GMMACCOUNT_FIXED:  pGVM->gmm.s.Stats.Allocated.cFixedPages  += cPages; break;
+        default:                AssertMsgFailedReturn(("enmAccount=%d\n", enmAccount), VERR_IPE_NOT_REACHED_DEFAULT_CASE);
+    }
+    pGVM->gmm.s.Stats.cPrivatePages += cPages;
+    pGMM->cAllocatedPages           += cPages;
+
+#ifdef GMM_WITH_LEGACY_MODE
+    /*
+     * Part two of it's-easy-in-legacy-memory-mode.
+     */
+    if (pGMM->fLegacyAllocationMode)
+    {
+        uint32_t iPage = gmmR0AllocatePagesInBoundMode(pGVM, 0, cPages, paPages);
+        AssertReleaseReturn(iPage == cPages, VERR_GMM_ALLOC_PAGES_IPE);
+        return VINF_SUCCESS;
+    }
+#endif
+
+    /*
+     * Bound mode is also relatively straightforward.
+     */
+    uint32_t iPage = 0;
+    int rc = VINF_SUCCESS;
+    if (pGMM->fBoundMemoryMode)
+    {
+        iPage = gmmR0AllocatePagesInBoundMode(pGVM, iPage, cPages, paPages);
+        if (iPage < cPages)
+            do
+                rc = gmmR0AllocateChunkNew(pGMM, pGVM, &pGVM->gmm.s.Private, cPages, paPages, &iPage);
+            while (iPage < cPages && RT_SUCCESS(rc));
+    }
+    /*
+     * Shared mode is trickier as we should try archive the same locality as
+     * in bound mode, but smartly make use of non-full chunks allocated by
+     * other VMs if we're low on memory.
+     */
+    else
+    {
+        /* Pick the most optimal pages first. */
+        iPage = gmmR0AllocatePagesAssociatedWithVM(pGMM, pGVM, &pGMM->PrivateX, iPage, cPages, paPages);
+        if (iPage < cPages)
+        {
+            /* Maybe we should try getting pages from chunks "belonging" to
+               other VMs before allocating more chunks? */
+            bool fTriedOnSameAlready = false;
+            if (gmmR0ShouldAllocatePagesInOtherChunksBecauseOfLimits(pGVM))
+            {
+                iPage = gmmR0AllocatePagesFromSameNode(&pGMM->PrivateX, pGVM, iPage, cPages, paPages);
+                fTriedOnSameAlready = true;
+            }
+
+            /* Allocate memory from empty chunks. */
+            if (iPage < cPages)
+                iPage = gmmR0AllocatePagesFromEmptyChunksOnSameNode(&pGMM->PrivateX, pGVM, iPage, cPages, paPages);
+
+            /* Grab empty shared chunks. */
+            if (iPage < cPages)
+                iPage = gmmR0AllocatePagesFromEmptyChunksOnSameNode(&pGMM->Shared, pGVM, iPage, cPages, paPages);
+
+            /* If there is a lof of free pages spread around, try not waste
+               system memory on more chunks. (Should trigger defragmentation.) */
+            if (   !fTriedOnSameAlready
+                && gmmR0ShouldAllocatePagesInOtherChunksBecauseOfLotsFree(pGMM))
+            {
+                iPage = gmmR0AllocatePagesFromSameNode(&pGMM->PrivateX, pGVM, iPage, cPages, paPages);
+                if (iPage < cPages)
+                    iPage = gmmR0AllocatePagesIndiscriminately(&pGMM->PrivateX, pGVM, iPage, cPages, paPages);
+            }
+
+            /*
+             * Ok, try allocate new chunks.
+             */
+            if (iPage < cPages)
+            {
+                do
+                    rc = gmmR0AllocateChunkNew(pGMM, pGVM, &pGMM->PrivateX, cPages, paPages, &iPage);
+                while (iPage < cPages && RT_SUCCESS(rc));
+
+                /* If the host is out of memory, take whatever we can get. */
+                if (   (rc == VERR_NO_MEMORY || rc == VERR_NO_PHYS_MEMORY)
+                    && pGMM->PrivateX.cFreePages + pGMM->Shared.cFreePages >= cPages - iPage)
+                {
+                    iPage = gmmR0AllocatePagesIndiscriminately(&pGMM->PrivateX, pGVM, iPage, cPages, paPages);
+                    if (iPage < cPages)
+                        iPage = gmmR0AllocatePagesIndiscriminately(&pGMM->Shared, pGVM, iPage, cPages, paPages);
+                    AssertRelease(iPage == cPages);
+                    rc = VINF_SUCCESS;
+                }
+            }
+        }
+    }
+
+    /*
+     * Clean up on failure.  Since this is bound to be a low-memory condition
+     * we will give back any empty chunks that might be hanging around.
+     */
+    if (RT_FAILURE(rc))
+    {
+        /* Update the statistics. */
+        pGVM->gmm.s.Stats.cPrivatePages -= cPages;
+        pGMM->cAllocatedPages           -= cPages - iPage;
+        switch (enmAccount)
+        {
+            case GMMACCOUNT_BASE:   pGVM->gmm.s.Stats.Allocated.cBasePages   -= cPages; break;
+            case GMMACCOUNT_SHADOW: pGVM->gmm.s.Stats.Allocated.cShadowPages -= cPages; break;
+            case GMMACCOUNT_FIXED:  pGVM->gmm.s.Stats.Allocated.cFixedPages  -= cPages; break;
+            default:                AssertMsgFailedReturn(("enmAccount=%d\n", enmAccount), VERR_IPE_NOT_REACHED_DEFAULT_CASE);
+        }
+
+        /* Release the pages. */
+        while (iPage-- > 0)
+        {
+            uint32_t idPage = paPages[iPage].idPage;
+            PGMMPAGE pPage = gmmR0GetPage(pGMM, idPage);
+            if (RT_LIKELY(pPage))
+            {
+                Assert(GMM_PAGE_IS_PRIVATE(pPage));
+                Assert(pPage->Private.hGVM == pGVM->hSelf);
+                gmmR0FreePrivatePage(pGMM, pGVM, idPage, pPage);
+            }
+            else
+                AssertMsgFailed(("idPage=%#x\n", idPage));
+
+            paPages[iPage].idPage       = NIL_GMM_PAGEID;
+            paPages[iPage].idSharedPage = NIL_GMM_PAGEID;
+            paPages[iPage].HCPhysGCPhys = NIL_RTHCPHYS;
+        }
+
+        /* Free empty chunks. */
+        /** @todo  */
+
+        /* return the fail status on failure */
+        return rc;
+    }
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Updates the previous allocations and allocates more pages.
+ *
+ * The handy pages are always taken from the 'base' memory account.
+ * The allocated pages are not cleared and will contains random garbage.
+ *
+ * @returns VBox status code:
+ * @retval  VINF_SUCCESS on success.
+ * @retval  VERR_NOT_OWNER if the caller is not an EMT.
+ * @retval  VERR_GMM_PAGE_NOT_FOUND if one of the pages to update wasn't found.
+ * @retval  VERR_GMM_PAGE_NOT_PRIVATE if one of the pages to update wasn't a
+ *          private page.
+ * @retval  VERR_GMM_PAGE_NOT_SHARED if one of the pages to update wasn't a
+ *          shared page.
+ * @retval  VERR_GMM_NOT_PAGE_OWNER if one of the pages to be updated wasn't
+ *          owned by the VM.
+ * @retval  VERR_GMM_SEED_ME if seeding via GMMR0SeedChunk is necessary.
+ * @retval  VERR_GMM_HIT_GLOBAL_LIMIT if we've exhausted the available pages.
+ * @retval  VERR_GMM_HIT_VM_ACCOUNT_LIMIT if we've hit the VM account limit,
+ *          that is we're trying to allocate more than we've reserved.
+ *
+ * @param   pGVM                The global (ring-0) VM structure.
+ * @param   idCpu               The VCPU id.
+ * @param   cPagesToUpdate      The number of pages to update (starting from the head).
+ * @param   cPagesToAlloc       The number of pages to allocate (starting from the head).
+ * @param   paPages             The array of page descriptors.
+ *                              See GMMPAGEDESC for details on what is expected on input.
+ * @thread  EMT(idCpu)
+ */
+GMMR0DECL(int) GMMR0AllocateHandyPages(PGVM pGVM, VMCPUID idCpu, uint32_t cPagesToUpdate,
+                                       uint32_t cPagesToAlloc, PGMMPAGEDESC paPages)
+{
+    LogFlow(("GMMR0AllocateHandyPages: pGVM=%p cPagesToUpdate=%#x cPagesToAlloc=%#x paPages=%p\n",
+             pGVM, cPagesToUpdate, cPagesToAlloc, paPages));
+
+    /*
+     * Validate, get basics and take the semaphore.
+     * (This is a relatively busy path, so make predictions where possible.)
+     */
+    PGMM pGMM;
+    GMM_GET_VALID_INSTANCE(pGMM, VERR_GMM_INSTANCE);
+    int rc = GVMMR0ValidateGVMandEMT(pGVM, idCpu);
+    if (RT_FAILURE(rc))
+        return rc;
+
+    AssertPtrReturn(paPages, VERR_INVALID_PARAMETER);
+    AssertMsgReturn(    (cPagesToUpdate && cPagesToUpdate < 1024)
+                    ||  (cPagesToAlloc  && cPagesToAlloc < 1024),
+                    ("cPagesToUpdate=%#x cPagesToAlloc=%#x\n", cPagesToUpdate, cPagesToAlloc),
+                    VERR_INVALID_PARAMETER);
+
+    unsigned iPage = 0;
+    for (; iPage < cPagesToUpdate; iPage++)
+    {
+        AssertMsgReturn(    (    paPages[iPage].HCPhysGCPhys <= GMM_GCPHYS_LAST
+                             && !(paPages[iPage].HCPhysGCPhys & PAGE_OFFSET_MASK))
+                        ||  paPages[iPage].HCPhysGCPhys == NIL_RTHCPHYS
+                        ||  paPages[iPage].HCPhysGCPhys == GMM_GCPHYS_UNSHAREABLE,
+                        ("#%#x: %RHp\n", iPage, paPages[iPage].HCPhysGCPhys),
+                        VERR_INVALID_PARAMETER);
+        AssertMsgReturn(    paPages[iPage].idPage <= GMM_PAGEID_LAST
+                        /*||  paPages[iPage].idPage == NIL_GMM_PAGEID*/,
+                        ("#%#x: %#x\n", iPage, paPages[iPage].idPage), VERR_INVALID_PARAMETER);
+        AssertMsgReturn(    paPages[iPage].idPage <= GMM_PAGEID_LAST
+                        /*||  paPages[iPage].idSharedPage == NIL_GMM_PAGEID*/,
+                        ("#%#x: %#x\n", iPage, paPages[iPage].idSharedPage), VERR_INVALID_PARAMETER);
+    }
+
+    for (; iPage < cPagesToAlloc; iPage++)
+    {
+        AssertMsgReturn(paPages[iPage].HCPhysGCPhys == NIL_RTHCPHYS,   ("#%#x: %RHp\n", iPage, paPages[iPage].HCPhysGCPhys), VERR_INVALID_PARAMETER);
+        AssertMsgReturn(paPages[iPage].idPage       == NIL_GMM_PAGEID, ("#%#x: %#x\n", iPage, paPages[iPage].idPage),        VERR_INVALID_PARAMETER);
+        AssertMsgReturn(paPages[iPage].idSharedPage == NIL_GMM_PAGEID, ("#%#x: %#x\n", iPage, paPages[iPage].idSharedPage),  VERR_INVALID_PARAMETER);
+    }
+
+    gmmR0MutexAcquire(pGMM);
+    if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
+    {
+        /* No allocations before the initial reservation has been made! */
+        if (RT_LIKELY(    pGVM->gmm.s.Stats.Reserved.cBasePages
+                      &&  pGVM->gmm.s.Stats.Reserved.cFixedPages
+                      &&  pGVM->gmm.s.Stats.Reserved.cShadowPages))
+        {
+            /*
+             * Perform the updates.
+             * Stop on the first error.
+             */
+            for (iPage = 0; iPage < cPagesToUpdate; iPage++)
+            {
+                if (paPages[iPage].idPage != NIL_GMM_PAGEID)
+                {
+                    PGMMPAGE pPage = gmmR0GetPage(pGMM, paPages[iPage].idPage);
+                    if (RT_LIKELY(pPage))
+                    {
+                        if (RT_LIKELY(GMM_PAGE_IS_PRIVATE(pPage)))
+                        {
+                            if (RT_LIKELY(pPage->Private.hGVM == pGVM->hSelf))
+                            {
+                                AssertCompile(NIL_RTHCPHYS > GMM_GCPHYS_LAST && GMM_GCPHYS_UNSHAREABLE > GMM_GCPHYS_LAST);
+                                if (RT_LIKELY(paPages[iPage].HCPhysGCPhys <= GMM_GCPHYS_LAST))
+                                    pPage->Private.pfn = paPages[iPage].HCPhysGCPhys >> PAGE_SHIFT;
+                                else if (paPages[iPage].HCPhysGCPhys == GMM_GCPHYS_UNSHAREABLE)
+                                    pPage->Private.pfn = GMM_PAGE_PFN_UNSHAREABLE;
+                                /* else: NIL_RTHCPHYS nothing */
+
+                                paPages[iPage].idPage       = NIL_GMM_PAGEID;
+                                paPages[iPage].HCPhysGCPhys = NIL_RTHCPHYS;
+                            }
+                            else
+                            {
+                                Log(("GMMR0AllocateHandyPages: #%#x/%#x: Not owner! hGVM=%#x hSelf=%#x\n",
+                                     iPage, paPages[iPage].idPage, pPage->Private.hGVM, pGVM->hSelf));
+                                rc = VERR_GMM_NOT_PAGE_OWNER;
+                                break;
+                            }
+                        }
+                        else
+                        {
+                            Log(("GMMR0AllocateHandyPages: #%#x/%#x: Not private! %.*Rhxs (type %d)\n", iPage, paPages[iPage].idPage, sizeof(*pPage), pPage, pPage->Common.u2State));
+                            rc = VERR_GMM_PAGE_NOT_PRIVATE;
+                            break;
+                        }
+                    }
+                    else
+                    {
+                        Log(("GMMR0AllocateHandyPages: #%#x/%#x: Not found! (private)\n", iPage, paPages[iPage].idPage));
+                        rc = VERR_GMM_PAGE_NOT_FOUND;
+                        break;
+                    }
+                }
+
+                if (paPages[iPage].idSharedPage != NIL_GMM_PAGEID)
+                {
+                    PGMMPAGE pPage = gmmR0GetPage(pGMM, paPages[iPage].idSharedPage);
+                    if (RT_LIKELY(pPage))
+                    {
+                        if (RT_LIKELY(GMM_PAGE_IS_SHARED(pPage)))
+                        {
+                            AssertCompile(NIL_RTHCPHYS > GMM_GCPHYS_LAST && GMM_GCPHYS_UNSHAREABLE > GMM_GCPHYS_LAST);
+                            Assert(pPage->Shared.cRefs);
+                            Assert(pGVM->gmm.s.Stats.cSharedPages);
+                            Assert(pGVM->gmm.s.Stats.Allocated.cBasePages);
+
+                            Log(("GMMR0AllocateHandyPages: free shared page %x cRefs=%d\n", paPages[iPage].idSharedPage, pPage->Shared.cRefs));
+                            pGVM->gmm.s.Stats.cSharedPages--;
+                            pGVM->gmm.s.Stats.Allocated.cBasePages--;
+                            if (!--pPage->Shared.cRefs)
+                                gmmR0FreeSharedPage(pGMM, pGVM, paPages[iPage].idSharedPage, pPage);
+                            else
+                            {
+                                Assert(pGMM->cDuplicatePages);
+                                pGMM->cDuplicatePages--;
+                            }
+
+                            paPages[iPage].idSharedPage = NIL_GMM_PAGEID;
+                        }
+                        else
+                        {
+                            Log(("GMMR0AllocateHandyPages: #%#x/%#x: Not shared!\n", iPage, paPages[iPage].idSharedPage));
+                            rc = VERR_GMM_PAGE_NOT_SHARED;
+                            break;
+                        }
+                    }
+                    else
+                    {
+                        Log(("GMMR0AllocateHandyPages: #%#x/%#x: Not found! (shared)\n", iPage, paPages[iPage].idSharedPage));
+                        rc = VERR_GMM_PAGE_NOT_FOUND;
+                        break;
+                    }
+                }
+            } /* for each page to update */
+
+            if (RT_SUCCESS(rc) && cPagesToAlloc > 0)
+            {
+#if defined(VBOX_STRICT) && 0 /** @todo re-test this later. Appeared to be a PGM init bug. */
+                for (iPage = 0; iPage < cPagesToAlloc; iPage++)
+                {
+                    Assert(paPages[iPage].HCPhysGCPhys  == NIL_RTHCPHYS);
+                    Assert(paPages[iPage].idPage        == NIL_GMM_PAGEID);
+                    Assert(paPages[iPage].idSharedPage  == NIL_GMM_PAGEID);
+                }
+#endif
+
+                /*
+                 * Join paths with GMMR0AllocatePages for the allocation.
+                 * Note! gmmR0AllocateMoreChunks may leave the protection of the mutex!
+                 */
+                rc = gmmR0AllocatePagesNew(pGMM, pGVM, cPagesToAlloc, paPages, GMMACCOUNT_BASE);
+            }
+        }
+        else
+            rc = VERR_WRONG_ORDER;
+        GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
+    }
+    else
+        rc = VERR_GMM_IS_NOT_SANE;
+    gmmR0MutexRelease(pGMM);
+    LogFlow(("GMMR0AllocateHandyPages: returns %Rrc\n", rc));
+    return rc;
+}
+
+
+/**
+ * Allocate one or more pages.
+ *
+ * This is typically used for ROMs and MMIO2 (VRAM) during VM creation.
+ * The allocated pages are not cleared and will contain random garbage.
+ *
+ * @returns VBox status code:
+ * @retval  VINF_SUCCESS on success.
+ * @retval  VERR_NOT_OWNER if the caller is not an EMT.
+ * @retval  VERR_GMM_SEED_ME if seeding via GMMR0SeedChunk is necessary.
+ * @retval  VERR_GMM_HIT_GLOBAL_LIMIT if we've exhausted the available pages.
+ * @retval  VERR_GMM_HIT_VM_ACCOUNT_LIMIT if we've hit the VM account limit,
+ *          that is we're trying to allocate more than we've reserved.
+ *
+ * @param   pGVM        The global (ring-0) VM structure.
+ * @param   idCpu       The VCPU id.
+ * @param   cPages      The number of pages to allocate.
+ * @param   paPages     Pointer to the page descriptors.
+ *                      See GMMPAGEDESC for details on what is expected on
+ *                      input.
+ * @param   enmAccount  The account to charge.
+ *
+ * @thread  EMT.
+ */
+GMMR0DECL(int) GMMR0AllocatePages(PGVM pGVM, VMCPUID idCpu, uint32_t cPages, PGMMPAGEDESC paPages, GMMACCOUNT enmAccount)
+{
+    LogFlow(("GMMR0AllocatePages: pGVM=%p cPages=%#x paPages=%p enmAccount=%d\n", pGVM, cPages, paPages, enmAccount));
+
+    /*
+     * Validate, get basics and take the semaphore.
+     */
+    PGMM pGMM;
+    GMM_GET_VALID_INSTANCE(pGMM, VERR_GMM_INSTANCE);
+    int rc = GVMMR0ValidateGVMandEMT(pGVM, idCpu);
+    if (RT_FAILURE(rc))
+        return rc;
+
+    AssertPtrReturn(paPages, VERR_INVALID_PARAMETER);
+    AssertMsgReturn(enmAccount > GMMACCOUNT_INVALID && enmAccount < GMMACCOUNT_END, ("%d\n", enmAccount), VERR_INVALID_PARAMETER);
+    AssertMsgReturn(cPages > 0 && cPages < RT_BIT(32 - PAGE_SHIFT), ("%#x\n", cPages), VERR_INVALID_PARAMETER);
+
+    for (unsigned iPage = 0; iPage < cPages; iPage++)
+    {
+        AssertMsgReturn(    paPages[iPage].HCPhysGCPhys == NIL_RTHCPHYS
+                        ||  paPages[iPage].HCPhysGCPhys == GMM_GCPHYS_UNSHAREABLE
+                        ||  (    enmAccount == GMMACCOUNT_BASE
+                             &&  paPages[iPage].HCPhysGCPhys <= GMM_GCPHYS_LAST
+                             && !(paPages[iPage].HCPhysGCPhys & PAGE_OFFSET_MASK)),
+                        ("#%#x: %RHp enmAccount=%d\n", iPage, paPages[iPage].HCPhysGCPhys, enmAccount),
+                        VERR_INVALID_PARAMETER);
+        AssertMsgReturn(paPages[iPage].idPage == NIL_GMM_PAGEID, ("#%#x: %#x\n", iPage, paPages[iPage].idPage), VERR_INVALID_PARAMETER);
+        AssertMsgReturn(paPages[iPage].idSharedPage == NIL_GMM_PAGEID, ("#%#x: %#x\n", iPage, paPages[iPage].idSharedPage), VERR_INVALID_PARAMETER);
+    }
+
+    gmmR0MutexAcquire(pGMM);
+    if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
+    {
+
+        /* No allocations before the initial reservation has been made! */
+        if (RT_LIKELY(    pGVM->gmm.s.Stats.Reserved.cBasePages
+                      &&  pGVM->gmm.s.Stats.Reserved.cFixedPages
+                      &&  pGVM->gmm.s.Stats.Reserved.cShadowPages))
+            rc = gmmR0AllocatePagesNew(pGMM, pGVM, cPages, paPages, enmAccount);
+        else
+            rc = VERR_WRONG_ORDER;
+        GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
+    }
+    else
+        rc = VERR_GMM_IS_NOT_SANE;
+    gmmR0MutexRelease(pGMM);
+    LogFlow(("GMMR0AllocatePages: returns %Rrc\n", rc));
+    return rc;
+}
+
+
+/**
+ * VMMR0 request wrapper for GMMR0AllocatePages.
+ *
+ * @returns see GMMR0AllocatePages.
+ * @param   pGVM        The global (ring-0) VM structure.
+ * @param   idCpu       The VCPU id.
+ * @param   pReq        Pointer to the request packet.
+ */
+GMMR0DECL(int) GMMR0AllocatePagesReq(PGVM pGVM, VMCPUID idCpu, PGMMALLOCATEPAGESREQ pReq)
+{
+    /*
+     * Validate input and pass it on.
+     */
+    AssertPtrReturn(pReq, VERR_INVALID_POINTER);
+    AssertMsgReturn(pReq->Hdr.cbReq >= RT_UOFFSETOF(GMMALLOCATEPAGESREQ, aPages[0]),
+                    ("%#x < %#x\n", pReq->Hdr.cbReq, RT_UOFFSETOF(GMMALLOCATEPAGESREQ, aPages[0])),
+                    VERR_INVALID_PARAMETER);
+    AssertMsgReturn(pReq->Hdr.cbReq == RT_UOFFSETOF_DYN(GMMALLOCATEPAGESREQ, aPages[pReq->cPages]),
+                    ("%#x != %#x\n", pReq->Hdr.cbReq, RT_UOFFSETOF_DYN(GMMALLOCATEPAGESREQ, aPages[pReq->cPages])),
+                    VERR_INVALID_PARAMETER);
+
+    return GMMR0AllocatePages(pGVM, idCpu, pReq->cPages, &pReq->aPages[0], pReq->enmAccount);
+}
+
+
+/**
+ * Allocate a large page to represent guest RAM
+ *
+ * The allocated pages are not cleared and will contains random garbage.
+ *
+ * @returns VBox status code:
+ * @retval  VINF_SUCCESS on success.
+ * @retval  VERR_NOT_OWNER if the caller is not an EMT.
+ * @retval  VERR_GMM_SEED_ME if seeding via GMMR0SeedChunk is necessary.
+ * @retval  VERR_GMM_HIT_GLOBAL_LIMIT if we've exhausted the available pages.
+ * @retval  VERR_GMM_HIT_VM_ACCOUNT_LIMIT if we've hit the VM account limit,
+ *          that is we're trying to allocate more than we've reserved.
+ * @returns see GMMR0AllocatePages.
+ *
+ * @param   pGVM        The global (ring-0) VM structure.
+ * @param   idCpu       The VCPU id.
+ * @param   cbPage      Large page size.
+ * @param   pIdPage     Where to return the GMM page ID of the page.
+ * @param   pHCPhys     Where to return the host physical address of the page.
+ */
+GMMR0DECL(int)  GMMR0AllocateLargePage(PGVM pGVM, VMCPUID idCpu, uint32_t cbPage, uint32_t *pIdPage, RTHCPHYS *pHCPhys)
+{
+    LogFlow(("GMMR0AllocateLargePage: pGVM=%p cbPage=%x\n", pGVM, cbPage));
+
+    AssertReturn(cbPage == GMM_CHUNK_SIZE, VERR_INVALID_PARAMETER);
+    AssertPtrReturn(pIdPage, VERR_INVALID_PARAMETER);
+    AssertPtrReturn(pHCPhys, VERR_INVALID_PARAMETER);
+
+    /*
+     * Validate, get basics and take the semaphore.
+     */
+    PGMM pGMM;
+    GMM_GET_VALID_INSTANCE(pGMM, VERR_GMM_INSTANCE);
+    int rc = GVMMR0ValidateGVMandEMT(pGVM, idCpu);
+    if (RT_FAILURE(rc))
+        return rc;
+
+#ifdef GMM_WITH_LEGACY_MODE
+    // /* Not supported in legacy mode where we allocate the memory in ring 3 and lock it in ring 0. */
+    // if (pGMM->fLegacyAllocationMode)
+    //     return VERR_NOT_SUPPORTED;
+#endif
+
+    *pHCPhys = NIL_RTHCPHYS;
+    *pIdPage = NIL_GMM_PAGEID;
+
+    gmmR0MutexAcquire(pGMM);
+    if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
+    {
+        const unsigned cPages = (GMM_CHUNK_SIZE >> PAGE_SHIFT);
+        if (RT_UNLIKELY(  pGVM->gmm.s.Stats.Allocated.cBasePages + pGVM->gmm.s.Stats.cBalloonedPages + cPages
+                        > pGVM->gmm.s.Stats.Reserved.cBasePages))
+        {
+            Log(("GMMR0AllocateLargePage: Reserved=%#llx Allocated+Requested=%#llx+%#x!\n",
+                 pGVM->gmm.s.Stats.Reserved.cBasePages, pGVM->gmm.s.Stats.Allocated.cBasePages, cPages));
+            gmmR0MutexRelease(pGMM);
+            return VERR_GMM_HIT_VM_ACCOUNT_LIMIT;
+        }
+
+        /*
+         * Allocate a new large page chunk.
+         *
+         * Note! We leave the giant GMM lock temporarily as the allocation might
+         *       take a long time.  gmmR0RegisterChunk will retake it (ugly).
+         */
+        AssertCompile(GMM_CHUNK_SIZE == _2M);
+        gmmR0MutexRelease(pGMM);
+
+        RTR0MEMOBJ hMemObj;
+        rc = RTR0MemObjAllocPhysEx(&hMemObj, GMM_CHUNK_SIZE, NIL_RTHCPHYS, GMM_CHUNK_SIZE);
+        if (RT_SUCCESS(rc))
+        {
+            PGMMCHUNKFREESET pSet = pGMM->fBoundMemoryMode ? &pGVM->gmm.s.Private : &pGMM->PrivateX;
+            PGMMCHUNK pChunk;
+            rc = gmmR0RegisterChunk(pGMM, pSet, hMemObj, pGVM->hSelf, GMM_CHUNK_FLAGS_LARGE_PAGE, &pChunk);
+            if (RT_SUCCESS(rc))
+            {
+                /*
+                 * Allocate all the pages in the chunk.
+                 */
+                /* Unlink the new chunk from the free list. */
+                gmmR0UnlinkChunk(pChunk);
+
+                /** @todo rewrite this to skip the looping. */
+                /* Allocate all pages. */
+                GMMPAGEDESC PageDesc;
+                gmmR0AllocatePage(pChunk, pGVM->hSelf, &PageDesc);
+
+                /* Return the first page as we'll use the whole chunk as one big page. */
+                *pIdPage = PageDesc.idPage;
+                *pHCPhys = PageDesc.HCPhysGCPhys;
+
+                for (unsigned i = 1; i < cPages; i++)
+                    gmmR0AllocatePage(pChunk, pGVM->hSelf, &PageDesc);
+
+                /* Update accounting. */
+                pGVM->gmm.s.Stats.Allocated.cBasePages += cPages;
+                pGVM->gmm.s.Stats.cPrivatePages        += cPages;
+                pGMM->cAllocatedPages                  += cPages;
+
+                gmmR0LinkChunk(pChunk, pSet);
+                gmmR0MutexRelease(pGMM);
+                LogFlow(("GMMR0AllocateLargePage: returns VINF_SUCCESS\n"));
+                return VINF_SUCCESS;
+            }
+            RTR0MemObjFree(hMemObj, true /* fFreeMappings */);
+        }
+    }
+    else
+    {
+        gmmR0MutexRelease(pGMM);
+        rc = VERR_GMM_IS_NOT_SANE;
+    }
+
+    LogFlow(("GMMR0AllocateLargePage: returns %Rrc\n", rc));
+    return rc;
+}
+
+
+/**
+ * Free a large page.
+ *
+ * @returns VBox status code:
+ * @param   pGVM        The global (ring-0) VM structure.
+ * @param   idCpu       The VCPU id.
+ * @param   idPage      The large page id.
+ */
+GMMR0DECL(int)  GMMR0FreeLargePage(PGVM pGVM, VMCPUID idCpu, uint32_t idPage)
+{
+    LogFlow(("GMMR0FreeLargePage: pGVM=%p idPage=%x\n", pGVM, idPage));
+
+    /*
+     * Validate, get basics and take the semaphore.
+     */
+    PGMM pGMM;
+    GMM_GET_VALID_INSTANCE(pGMM, VERR_GMM_INSTANCE);
+    int rc = GVMMR0ValidateGVMandEMT(pGVM, idCpu);
+    if (RT_FAILURE(rc))
+        return rc;
+
+#ifdef GMM_WITH_LEGACY_MODE
+    // /* Not supported in legacy mode where we allocate the memory in ring 3 and lock it in ring 0. */
+    // if (pGMM->fLegacyAllocationMode)
+    //     return VERR_NOT_SUPPORTED;
+#endif
+
+    gmmR0MutexAcquire(pGMM);
+    if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
+    {
+        const unsigned cPages = (GMM_CHUNK_SIZE >> PAGE_SHIFT);
+
+        if (RT_UNLIKELY(pGVM->gmm.s.Stats.Allocated.cBasePages < cPages))
+        {
+            Log(("GMMR0FreeLargePage: allocated=%#llx cPages=%#x!\n", pGVM->gmm.s.Stats.Allocated.cBasePages, cPages));
+            gmmR0MutexRelease(pGMM);
+            return VERR_GMM_ATTEMPT_TO_FREE_TOO_MUCH;
+        }
+
+        PGMMPAGE pPage = gmmR0GetPage(pGMM, idPage);
+        if (RT_LIKELY(   pPage
+                      && GMM_PAGE_IS_PRIVATE(pPage)))
+        {
+            PGMMCHUNK pChunk = gmmR0GetChunk(pGMM, idPage >> GMM_CHUNKID_SHIFT);
+            Assert(pChunk);
+            Assert(pChunk->cFree < GMM_CHUNK_NUM_PAGES);
+            Assert(pChunk->cPrivate > 0);
+
+            /* Release the memory immediately. */
+            gmmR0FreeChunk(pGMM, NULL, pChunk, false /*fRelaxedSem*/); /** @todo this can be relaxed too! */
+
+            /* Update accounting. */
+            pGVM->gmm.s.Stats.Allocated.cBasePages -= cPages;
+            pGVM->gmm.s.Stats.cPrivatePages        -= cPages;
+            pGMM->cAllocatedPages                  -= cPages;
+        }
+        else
+            rc = VERR_GMM_PAGE_NOT_FOUND;
+    }
+    else
+        rc = VERR_GMM_IS_NOT_SANE;
+
+    gmmR0MutexRelease(pGMM);
+    LogFlow(("GMMR0FreeLargePage: returns %Rrc\n", rc));
+    return rc;
+}
+
+
+/**
+ * VMMR0 request wrapper for GMMR0FreeLargePage.
+ *
+ * @returns see GMMR0FreeLargePage.
+ * @param   pGVM        The global (ring-0) VM structure.
+ * @param   idCpu       The VCPU id.
+ * @param   pReq        Pointer to the request packet.
+ */
+GMMR0DECL(int) GMMR0FreeLargePageReq(PGVM pGVM, VMCPUID idCpu, PGMMFREELARGEPAGEREQ pReq)
+{
+    /*
+     * Validate input and pass it on.
+     */
+    AssertPtrReturn(pReq, VERR_INVALID_POINTER);
+    AssertMsgReturn(pReq->Hdr.cbReq == sizeof(GMMFREEPAGESREQ),
+                    ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(GMMFREEPAGESREQ)),
+                    VERR_INVALID_PARAMETER);
+
+    return GMMR0FreeLargePage(pGVM, idCpu, pReq->idPage);
+}
+
+
+/**
+ * @callback_method_impl{FNGVMMR0ENUMCALLBACK,
+ * Used by gmmR0FreeChunkFlushPerVmTlbs().}
+ */
+static DECLCALLBACK(int) gmmR0InvalidatePerVmChunkTlbCallback(PGVM pGVM, void *pvUser)
+{
+    RT_NOREF(pvUser);
+    if (pGVM->gmm.s.hChunkTlbSpinLock != NIL_RTSPINLOCK)
+    {
+        RTSpinlockAcquire(pGVM->gmm.s.hChunkTlbSpinLock);
+        uintptr_t i = RT_ELEMENTS(pGVM->gmm.s.aChunkTlbEntries);
+        while (i-- > 0)
+        {
+            pGVM->gmm.s.aChunkTlbEntries[i].idGeneration = UINT64_MAX;
+            pGVM->gmm.s.aChunkTlbEntries[i].pChunk       = NULL;
+        }
+        RTSpinlockRelease(pGVM->gmm.s.hChunkTlbSpinLock);
+    }
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Called by gmmR0FreeChunk when we reach the threshold for wrapping around the
+ * free generation ID value.
+ *
+ * This is done at 2^62 - 1, which allows us to drop all locks and as it will
+ * take a while before 12 exa (2 305 843 009 213 693 952) calls to
+ * gmmR0FreeChunk can be made and causes a real wrap-around.  We do two
+ * invalidation passes and resets the generation ID between then.  This will
+ * make sure there are no false positives.
+ *
+ * @param   pGMM        Pointer to the GMM instance.
+ */
+static void gmmR0FreeChunkFlushPerVmTlbs(PGMM pGMM)
+{
+    /*
+     * First invalidation pass.
+     */
+    int rc = GVMMR0EnumVMs(gmmR0InvalidatePerVmChunkTlbCallback, NULL);
+    AssertRCSuccess(rc);
+
+    /*
+     * Reset the generation number.
+     */
+    RTSpinlockAcquire(pGMM->hSpinLockTree);
+    ASMAtomicWriteU64(&pGMM->idFreeGeneration, 1);
+    RTSpinlockRelease(pGMM->hSpinLockTree);
+
+    /*
+     * Second invalidation pass.
+     */
+    rc = GVMMR0EnumVMs(gmmR0InvalidatePerVmChunkTlbCallback, NULL);
+    AssertRCSuccess(rc);
+}
+
+
+/**
+ * Frees a chunk, giving it back to the host OS.
+ *
+ * @param   pGMM        Pointer to the GMM instance.
+ * @param   pGVM        This is set when called from GMMR0CleanupVM so we can
+ *                      unmap and free the chunk in one go.
+ * @param   pChunk      The chunk to free.
+ * @param   fRelaxedSem Whether we can release the semaphore while doing the
+ *                      freeing (@c true) or not.
+ */
+static bool gmmR0FreeChunk(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk, bool fRelaxedSem)
+{
+    Assert(pChunk->Core.Key != NIL_GMM_CHUNKID);
+
+    GMMR0CHUNKMTXSTATE MtxState;
+    gmmR0ChunkMutexAcquire(&MtxState, pGMM, pChunk, GMMR0CHUNK_MTX_KEEP_GIANT);
+
+    /*
+     * Cleanup hack! Unmap the chunk from the callers address space.
+     * This shouldn't happen, so screw lock contention...
+     */
+    if (    pChunk->cMappingsX
+#ifdef GMM_WITH_LEGACY_MODE
+        &&  (!pGMM->fLegacyAllocationMode || (pChunk->fFlags & GMM_CHUNK_FLAGS_LARGE_PAGE))
+#endif
+        &&  pGVM)
+        gmmR0UnmapChunkLocked(pGMM, pGVM, pChunk);
+
+    /*
+     * If there are current mappings of the chunk, then request the
+     * VMs to unmap them. Reposition the chunk in the free list so
+     * it won't be a likely candidate for allocations.
+     */
+    if (pChunk->cMappingsX)
+    {
+        /** @todo R0 -> VM request */
+        /* The chunk can be mapped by more than one VM if fBoundMemoryMode is false! */
+        Log(("gmmR0FreeChunk: chunk still has %d mappings; don't free!\n", pChunk->cMappingsX));
+        gmmR0ChunkMutexRelease(&MtxState, pChunk);
+        return false;
+    }
+
+
+    /*
+     * Save and trash the handle.
+     */
+    RTR0MEMOBJ const hMemObj = pChunk->hMemObj;
+    pChunk->hMemObj = NIL_RTR0MEMOBJ;
+
+    /*
+     * Unlink it from everywhere.
+     */
+    gmmR0UnlinkChunk(pChunk);
+
+    RTSpinlockAcquire(pGMM->hSpinLockTree);
+
+    RTListNodeRemove(&pChunk->ListNode);
+
+    PAVLU32NODECORE pCore = RTAvlU32Remove(&pGMM->pChunks, pChunk->Core.Key);
+    Assert(pCore == &pChunk->Core); NOREF(pCore);
+
+    PGMMCHUNKTLBE pTlbe = &pGMM->ChunkTLB.aEntries[GMM_CHUNKTLB_IDX(pChunk->Core.Key)];
+    if (pTlbe->pChunk == pChunk)
+    {
+        pTlbe->idChunk = NIL_GMM_CHUNKID;
+        pTlbe->pChunk = NULL;
+    }
+
+    Assert(pGMM->cChunks > 0);
+    pGMM->cChunks--;
+
+    uint64_t const idFreeGeneration = ASMAtomicIncU64(&pGMM->idFreeGeneration);
+
+    RTSpinlockRelease(pGMM->hSpinLockTree);
+
+    /*
+     * Free the Chunk ID before dropping the locks and freeing the rest.
+     */
+    gmmR0FreeChunkId(pGMM, pChunk->Core.Key);
+    pChunk->Core.Key = NIL_GMM_CHUNKID;
+
+    pGMM->cFreedChunks++;
+
+    gmmR0ChunkMutexRelease(&MtxState, NULL);
+    if (fRelaxedSem)
+        gmmR0MutexRelease(pGMM);
+
+    if (idFreeGeneration == UINT64_MAX / 4)
+        gmmR0FreeChunkFlushPerVmTlbs(pGMM);
+
+    RTMemFree(pChunk->paMappingsX);
+    pChunk->paMappingsX = NULL;
+
+    RTMemFree(pChunk);
+
+#if defined(VBOX_WITH_RAM_IN_KERNEL) && !defined(VBOX_WITH_LINEAR_HOST_PHYS_MEM)
+    int rc = RTR0MemObjFree(hMemObj, true /* fFreeMappings */);
+#else
+    int rc = RTR0MemObjFree(hMemObj, false /* fFreeMappings */);
+#endif
+    AssertLogRelRC(rc);
+
+    if (fRelaxedSem)
+        gmmR0MutexAcquire(pGMM);
+    return fRelaxedSem;
+}
+
+
+/**
+ * Free page worker.
+ *
+ * The caller does all the statistic decrementing, we do all the incrementing.
+ *
+ * @param   pGMM        Pointer to the GMM instance data.
+ * @param   pGVM        Pointer to the GVM instance.
+ * @param   pChunk      Pointer to the chunk this page belongs to.
+ * @param   idPage      The Page ID.
+ * @param   pPage       Pointer to the page.
+ */
+static void gmmR0FreePageWorker(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk, uint32_t idPage, PGMMPAGE pPage)
+{
+    Log3(("F pPage=%p iPage=%#x/%#x u2State=%d iFreeHead=%#x\n",
+          pPage, pPage - &pChunk->aPages[0], idPage, pPage->Common.u2State, pChunk->iFreeHead)); NOREF(idPage);
+
+    /*
+     * Put the page on the free list.
+     */
+    pPage->u = 0;
+    pPage->Free.u2State = GMM_PAGE_STATE_FREE;
+    Assert(pChunk->iFreeHead < RT_ELEMENTS(pChunk->aPages) || pChunk->iFreeHead == UINT16_MAX);
+    pPage->Free.iNext = pChunk->iFreeHead;
+    pChunk->iFreeHead = pPage - &pChunk->aPages[0];
+
+    /*
+     * Update statistics (the cShared/cPrivate stats are up to date already),
+     * and relink the chunk if necessary.
+     */
+    unsigned const cFree = pChunk->cFree;
+    if (   !cFree
+        || gmmR0SelectFreeSetList(cFree) != gmmR0SelectFreeSetList(cFree + 1))
+    {
+        gmmR0UnlinkChunk(pChunk);
+        pChunk->cFree++;
+        gmmR0SelectSetAndLinkChunk(pGMM, pGVM, pChunk);
+    }
+    else
+    {
+        pChunk->cFree = cFree + 1;
+        pChunk->pSet->cFreePages++;
+    }
+
+    /*
+     * If the chunk becomes empty, consider giving memory back to the host OS.
+     *
+     * The current strategy is to try give it back if there are other chunks
+     * in this free list, meaning if there are at least 240 free pages in this
+     * category. Note that since there are probably mappings of the chunk,
+     * it won't be freed up instantly, which probably screws up this logic
+     * a bit...
+     */
+    /** @todo Do this on the way out. */
+    if (RT_LIKELY(   pChunk->cFree != GMM_CHUNK_NUM_PAGES
+                  || pChunk->pFreeNext == NULL
+                  || pChunk->pFreePrev == NULL /** @todo this is probably misfiring, see reset... */))
+    { /* likely */ }
+#ifdef GMM_WITH_LEGACY_MODE
+    else if (RT_LIKELY(pGMM->fLegacyAllocationMode && !(pChunk->fFlags & GMM_CHUNK_FLAGS_LARGE_PAGE)))
+    { /* likely */ }
+#endif
+    else
+        gmmR0FreeChunk(pGMM, NULL, pChunk, false);
+
+}
+
+
+/**
+ * Frees a shared page, the page is known to exist and be valid and such.
+ *
+ * @param   pGMM        Pointer to the GMM instance.
+ * @param   pGVM        Pointer to the GVM instance.
+ * @param   idPage      The page id.
+ * @param   pPage       The page structure.
+ */
+DECLINLINE(void) gmmR0FreeSharedPage(PGMM pGMM, PGVM pGVM, uint32_t idPage, PGMMPAGE pPage)
+{
+    PGMMCHUNK pChunk = gmmR0GetChunk(pGMM, idPage >> GMM_CHUNKID_SHIFT);
+    Assert(pChunk);
+    Assert(pChunk->cFree < GMM_CHUNK_NUM_PAGES);
+    Assert(pChunk->cShared > 0);
+    Assert(pGMM->cSharedPages > 0);
+    Assert(pGMM->cAllocatedPages > 0);
+    Assert(!pPage->Shared.cRefs);
+
+    pChunk->cShared--;
+    pGMM->cAllocatedPages--;
+    pGMM->cSharedPages--;
+    gmmR0FreePageWorker(pGMM, pGVM, pChunk, idPage, pPage);
+}
+
+
+/**
+ * Frees a private page, the page is known to exist and be valid and such.
+ *
+ * @param   pGMM        Pointer to the GMM instance.
+ * @param   pGVM        Pointer to the GVM instance.
+ * @param   idPage      The page id.
+ * @param   pPage       The page structure.
+ */
+DECLINLINE(void) gmmR0FreePrivatePage(PGMM pGMM, PGVM pGVM, uint32_t idPage, PGMMPAGE pPage)
+{
+    PGMMCHUNK pChunk = gmmR0GetChunk(pGMM, idPage >> GMM_CHUNKID_SHIFT);
+    Assert(pChunk);
+    Assert(pChunk->cFree < GMM_CHUNK_NUM_PAGES);
+    Assert(pChunk->cPrivate > 0);
+    Assert(pGMM->cAllocatedPages > 0);
+
+    pChunk->cPrivate--;
+    pGMM->cAllocatedPages--;
+    gmmR0FreePageWorker(pGMM, pGVM, pChunk, idPage, pPage);
+}
+
+
+/**
+ * Common worker for GMMR0FreePages and GMMR0BalloonedPages.
+ *
+ * @returns VBox status code:
+ * @retval  xxx
+ *
+ * @param   pGMM        Pointer to the GMM instance data.
+ * @param   pGVM        Pointer to the VM.
+ * @param   cPages      The number of pages to free.
+ * @param   paPages     Pointer to the page descriptors.
+ * @param   enmAccount  The account this relates to.
+ */
+static int gmmR0FreePages(PGMM pGMM, PGVM pGVM, uint32_t cPages, PGMMFREEPAGEDESC paPages, GMMACCOUNT enmAccount)
+{
+    /*
+     * Check that the request isn't impossible wrt to the account status.
+     */
+    switch (enmAccount)
+    {
+        case GMMACCOUNT_BASE:
+            if (RT_UNLIKELY(pGVM->gmm.s.Stats.Allocated.cBasePages < cPages))
+            {
+                Log(("gmmR0FreePages: allocated=%#llx cPages=%#x!\n", pGVM->gmm.s.Stats.Allocated.cBasePages, cPages));
+                return VERR_GMM_ATTEMPT_TO_FREE_TOO_MUCH;
+            }
+            break;
+        case GMMACCOUNT_SHADOW:
+            if (RT_UNLIKELY(pGVM->gmm.s.Stats.Allocated.cShadowPages < cPages))
+            {
+                Log(("gmmR0FreePages: allocated=%#llx cPages=%#x!\n", pGVM->gmm.s.Stats.Allocated.cShadowPages, cPages));
+                return VERR_GMM_ATTEMPT_TO_FREE_TOO_MUCH;
+            }
+            break;
+        case GMMACCOUNT_FIXED:
+            if (RT_UNLIKELY(pGVM->gmm.s.Stats.Allocated.cFixedPages < cPages))
+            {
+                Log(("gmmR0FreePages: allocated=%#llx cPages=%#x!\n", pGVM->gmm.s.Stats.Allocated.cFixedPages, cPages));
+                return VERR_GMM_ATTEMPT_TO_FREE_TOO_MUCH;
+            }
+            break;
+        default:
+            AssertMsgFailedReturn(("enmAccount=%d\n", enmAccount), VERR_IPE_NOT_REACHED_DEFAULT_CASE);
+    }
+
+    /*
+     * Walk the descriptors and free the pages.
+     *
+     * Statistics (except the account) are being updated as we go along,
+     * unlike the alloc code. Also, stop on the first error.
+     */
+    int rc = VINF_SUCCESS;
+    uint32_t iPage;
+    for (iPage = 0; iPage < cPages; iPage++)
+    {
+        uint32_t idPage = paPages[iPage].idPage;
+        PGMMPAGE pPage = gmmR0GetPage(pGMM, idPage);
+        if (RT_LIKELY(pPage))
+        {
+            if (RT_LIKELY(GMM_PAGE_IS_PRIVATE(pPage)))
+            {
+                if (RT_LIKELY(pPage->Private.hGVM == pGVM->hSelf))
+                {
+                    Assert(pGVM->gmm.s.Stats.cPrivatePages);
+                    pGVM->gmm.s.Stats.cPrivatePages--;
+                    gmmR0FreePrivatePage(pGMM, pGVM, idPage, pPage);
+                }
+                else
+                {
+                    Log(("gmmR0AllocatePages: #%#x/%#x: not owner! hGVM=%#x hSelf=%#x\n", iPage, idPage,
+                         pPage->Private.hGVM, pGVM->hSelf));
+                    rc = VERR_GMM_NOT_PAGE_OWNER;
+                    break;
+                }
+            }
+            else if (RT_LIKELY(GMM_PAGE_IS_SHARED(pPage)))
+            {
+                Assert(pGVM->gmm.s.Stats.cSharedPages);
+                Assert(pPage->Shared.cRefs);
+#if defined(VBOX_WITH_PAGE_SHARING) && defined(VBOX_STRICT) && HC_ARCH_BITS == 64
+                if (pPage->Shared.u14Checksum)
+                {
+                    uint32_t uChecksum = gmmR0StrictPageChecksum(pGMM, pGVM, idPage);
+                    uChecksum &= UINT32_C(0x00003fff);
+                    AssertMsg(!uChecksum || uChecksum == pPage->Shared.u14Checksum,
+                              ("%#x vs %#x - idPage=%#x\n", uChecksum, pPage->Shared.u14Checksum, idPage));
+                }
+#endif
+                pGVM->gmm.s.Stats.cSharedPages--;
+                if (!--pPage->Shared.cRefs)
+                    gmmR0FreeSharedPage(pGMM, pGVM, idPage, pPage);
+                else
+                {
+                    Assert(pGMM->cDuplicatePages);
+                    pGMM->cDuplicatePages--;
+                }
+            }
+            else
+            {
+                Log(("gmmR0AllocatePages: #%#x/%#x: already free!\n", iPage, idPage));
+                rc = VERR_GMM_PAGE_ALREADY_FREE;
+                break;
+            }
+        }
+        else
+        {
+            Log(("gmmR0AllocatePages: #%#x/%#x: not found!\n", iPage, idPage));
+            rc = VERR_GMM_PAGE_NOT_FOUND;
+            break;
+        }
+        paPages[iPage].idPage = NIL_GMM_PAGEID;
+    }
+
+    /*
+     * Update the account.
+     */
+    switch (enmAccount)
+    {
+        case GMMACCOUNT_BASE:   pGVM->gmm.s.Stats.Allocated.cBasePages   -= iPage; break;
+        case GMMACCOUNT_SHADOW: pGVM->gmm.s.Stats.Allocated.cShadowPages -= iPage; break;
+        case GMMACCOUNT_FIXED:  pGVM->gmm.s.Stats.Allocated.cFixedPages  -= iPage; break;
+        default:
+            AssertMsgFailedReturn(("enmAccount=%d\n", enmAccount), VERR_IPE_NOT_REACHED_DEFAULT_CASE);
+    }
+
+    /*
+     * Any threshold stuff to be done here?
+     */
+
+    return rc;
+}
+
+
+/**
+ * Free one or more pages.
+ *
+ * This is typically used at reset time or power off.
+ *
+ * @returns VBox status code:
+ * @retval  xxx
+ *
+ * @param   pGVM        The global (ring-0) VM structure.
+ * @param   idCpu       The VCPU id.
+ * @param   cPages      The number of pages to allocate.
+ * @param   paPages     Pointer to the page descriptors containing the page IDs
+ *                      for each page.
+ * @param   enmAccount  The account this relates to.
+ * @thread  EMT.
+ */
+GMMR0DECL(int) GMMR0FreePages(PGVM pGVM, VMCPUID idCpu, uint32_t cPages, PGMMFREEPAGEDESC paPages, GMMACCOUNT enmAccount)
+{
+    LogFlow(("GMMR0FreePages: pGVM=%p cPages=%#x paPages=%p enmAccount=%d\n", pGVM, cPages, paPages, enmAccount));
+
+    /*
+     * Validate input and get the basics.
+     */
+    PGMM pGMM;
+    GMM_GET_VALID_INSTANCE(pGMM, VERR_GMM_INSTANCE);
+    int rc = GVMMR0ValidateGVMandEMT(pGVM, idCpu);
+    if (RT_FAILURE(rc))
+        return rc;
+
+    AssertPtrReturn(paPages, VERR_INVALID_PARAMETER);
+    AssertMsgReturn(enmAccount > GMMACCOUNT_INVALID && enmAccount < GMMACCOUNT_END, ("%d\n", enmAccount), VERR_INVALID_PARAMETER);
+    AssertMsgReturn(cPages > 0 && cPages < RT_BIT(32 - PAGE_SHIFT), ("%#x\n", cPages), VERR_INVALID_PARAMETER);
+
+    for (unsigned iPage = 0; iPage < cPages; iPage++)
+        AssertMsgReturn(    paPages[iPage].idPage <= GMM_PAGEID_LAST
+                        /*||  paPages[iPage].idPage == NIL_GMM_PAGEID*/,
+                        ("#%#x: %#x\n", iPage, paPages[iPage].idPage), VERR_INVALID_PARAMETER);
+
+    /*
+     * Take the semaphore and call the worker function.
+     */
+    gmmR0MutexAcquire(pGMM);
+    if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
+    {
+        rc = gmmR0FreePages(pGMM, pGVM, cPages, paPages, enmAccount);
+        GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
+    }
+    else
+        rc = VERR_GMM_IS_NOT_SANE;
+    gmmR0MutexRelease(pGMM);
+    LogFlow(("GMMR0FreePages: returns %Rrc\n", rc));
+    return rc;
+}
+
+
+/**
+ * VMMR0 request wrapper for GMMR0FreePages.
+ *
+ * @returns see GMMR0FreePages.
+ * @param   pGVM        The global (ring-0) VM structure.
+ * @param   idCpu       The VCPU id.
+ * @param   pReq        Pointer to the request packet.
+ */
+GMMR0DECL(int) GMMR0FreePagesReq(PGVM pGVM, VMCPUID idCpu, PGMMFREEPAGESREQ pReq)
+{
+    /*
+     * Validate input and pass it on.
+     */
+    AssertPtrReturn(pReq, VERR_INVALID_POINTER);
+    AssertMsgReturn(pReq->Hdr.cbReq >= RT_UOFFSETOF(GMMFREEPAGESREQ, aPages[0]),
+                    ("%#x < %#x\n", pReq->Hdr.cbReq, RT_UOFFSETOF(GMMFREEPAGESREQ, aPages[0])),
+                    VERR_INVALID_PARAMETER);
+    AssertMsgReturn(pReq->Hdr.cbReq == RT_UOFFSETOF_DYN(GMMFREEPAGESREQ, aPages[pReq->cPages]),
+                    ("%#x != %#x\n", pReq->Hdr.cbReq, RT_UOFFSETOF_DYN(GMMFREEPAGESREQ, aPages[pReq->cPages])),
+                    VERR_INVALID_PARAMETER);
+
+    return GMMR0FreePages(pGVM, idCpu, pReq->cPages, &pReq->aPages[0], pReq->enmAccount);
+}
+
+
+/**
+ * Report back on a memory ballooning request.
+ *
+ * The request may or may not have been initiated by the GMM. If it was initiated
+ * by the GMM it is important that this function is called even if no pages were
+ * ballooned.
+ *
+ * @returns VBox status code:
+ * @retval  VERR_GMM_ATTEMPT_TO_FREE_TOO_MUCH
+ * @retval  VERR_GMM_ATTEMPT_TO_DEFLATE_TOO_MUCH
+ * @retval  VERR_GMM_OVERCOMMITTED_TRY_AGAIN_IN_A_BIT - reset condition
+ *          indicating that we won't necessarily have sufficient RAM to boot
+ *          the VM again and that it should pause until this changes (we'll try
+ *          balloon some other VM).  (For standard deflate we have little choice
+ *          but to hope the VM won't use the memory that was returned to it.)
+ *
+ * @param   pGVM                The global (ring-0) VM structure.
+ * @param   idCpu               The VCPU id.
+ * @param   enmAction           Inflate/deflate/reset.
+ * @param   cBalloonedPages     The number of pages that was ballooned.
+ *
+ * @thread  EMT(idCpu)
+ */
+GMMR0DECL(int) GMMR0BalloonedPages(PGVM pGVM, VMCPUID idCpu, GMMBALLOONACTION enmAction, uint32_t cBalloonedPages)
+{
+    LogFlow(("GMMR0BalloonedPages: pGVM=%p enmAction=%d cBalloonedPages=%#x\n",
+             pGVM, enmAction, cBalloonedPages));
+
+    AssertMsgReturn(cBalloonedPages < RT_BIT(32 - PAGE_SHIFT), ("%#x\n", cBalloonedPages), VERR_INVALID_PARAMETER);
+
+    /*
+     * Validate input and get the basics.
+     */
+    PGMM pGMM;
+    GMM_GET_VALID_INSTANCE(pGMM, VERR_GMM_INSTANCE);
+    int rc = GVMMR0ValidateGVMandEMT(pGVM, idCpu);
+    if (RT_FAILURE(rc))
+        return rc;
+
+    /*
+     * Take the semaphore and do some more validations.
+     */
+    gmmR0MutexAcquire(pGMM);
+    if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
+    {
+        switch (enmAction)
+        {
+            case GMMBALLOONACTION_INFLATE:
+            {
+                if (RT_LIKELY(pGVM->gmm.s.Stats.Allocated.cBasePages + pGVM->gmm.s.Stats.cBalloonedPages + cBalloonedPages
+                              <= pGVM->gmm.s.Stats.Reserved.cBasePages))
+                {
+                    /*
+                     * Record the ballooned memory.
+                     */
+                    pGMM->cBalloonedPages += cBalloonedPages;
+                    if (pGVM->gmm.s.Stats.cReqBalloonedPages)
+                    {
+                        /* Codepath never taken. Might be interesting in the future to request ballooned memory from guests in low memory conditions.. */
+                        AssertFailed();
+
+                        pGVM->gmm.s.Stats.cBalloonedPages            += cBalloonedPages;
+                        pGVM->gmm.s.Stats.cReqActuallyBalloonedPages += cBalloonedPages;
+                        Log(("GMMR0BalloonedPages: +%#x - Global=%#llx / VM: Total=%#llx Req=%#llx Actual=%#llx (pending)\n",
+                             cBalloonedPages, pGMM->cBalloonedPages, pGVM->gmm.s.Stats.cBalloonedPages,
+                             pGVM->gmm.s.Stats.cReqBalloonedPages, pGVM->gmm.s.Stats.cReqActuallyBalloonedPages));
+                    }
+                    else
+                    {
+                        pGVM->gmm.s.Stats.cBalloonedPages += cBalloonedPages;
+                        Log(("GMMR0BalloonedPages: +%#x - Global=%#llx / VM: Total=%#llx (user)\n",
+                             cBalloonedPages, pGMM->cBalloonedPages, pGVM->gmm.s.Stats.cBalloonedPages));
+                    }
+                }
+                else
+                {
+                    Log(("GMMR0BalloonedPages: cBasePages=%#llx Total=%#llx cBalloonedPages=%#llx Reserved=%#llx\n",
+                         pGVM->gmm.s.Stats.Allocated.cBasePages, pGVM->gmm.s.Stats.cBalloonedPages, cBalloonedPages,
+                         pGVM->gmm.s.Stats.Reserved.cBasePages));
+                    rc = VERR_GMM_ATTEMPT_TO_FREE_TOO_MUCH;
+                }
+                break;
+            }
+
+            case GMMBALLOONACTION_DEFLATE:
+            {
+                /* Deflate. */
+                if (pGVM->gmm.s.Stats.cBalloonedPages >= cBalloonedPages)
+                {
+                    /*
+                     * Record the ballooned memory.
+                     */
+                    Assert(pGMM->cBalloonedPages >= cBalloonedPages);
+                    pGMM->cBalloonedPages             -= cBalloonedPages;
+                    pGVM->gmm.s.Stats.cBalloonedPages -= cBalloonedPages;
+                    if (pGVM->gmm.s.Stats.cReqDeflatePages)
+                    {
+                        AssertFailed(); /* This is path is for later. */
+                        Log(("GMMR0BalloonedPages: -%#x - Global=%#llx / VM: Total=%#llx Req=%#llx\n",
+                             cBalloonedPages, pGMM->cBalloonedPages, pGVM->gmm.s.Stats.cBalloonedPages, pGVM->gmm.s.Stats.cReqDeflatePages));
+
+                        /*
+                         * Anything we need to do here now when the request has been completed?
+                         */
+                        pGVM->gmm.s.Stats.cReqDeflatePages = 0;
+                    }
+                    else
+                        Log(("GMMR0BalloonedPages: -%#x - Global=%#llx / VM: Total=%#llx (user)\n",
+                             cBalloonedPages, pGMM->cBalloonedPages, pGVM->gmm.s.Stats.cBalloonedPages));
+                }
+                else
+                {
+                    Log(("GMMR0BalloonedPages: Total=%#llx cBalloonedPages=%#llx\n", pGVM->gmm.s.Stats.cBalloonedPages, cBalloonedPages));
+                    rc = VERR_GMM_ATTEMPT_TO_DEFLATE_TOO_MUCH;
+                }
+                break;
+            }
+
+            case GMMBALLOONACTION_RESET:
+            {
+                /* Reset to an empty balloon. */
+                Assert(pGMM->cBalloonedPages >= pGVM->gmm.s.Stats.cBalloonedPages);
+
+                pGMM->cBalloonedPages             -= pGVM->gmm.s.Stats.cBalloonedPages;
+                pGVM->gmm.s.Stats.cBalloonedPages  = 0;
+                break;
+            }
+
+            default:
+                rc = VERR_INVALID_PARAMETER;
+                break;
+        }
+        GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
+    }
+    else
+        rc = VERR_GMM_IS_NOT_SANE;
+
+    gmmR0MutexRelease(pGMM);
+    LogFlow(("GMMR0BalloonedPages: returns %Rrc\n", rc));
+    return rc;
+}
+
+
+/**
+ * VMMR0 request wrapper for GMMR0BalloonedPages.
+ *
+ * @returns see GMMR0BalloonedPages.
+ * @param   pGVM        The global (ring-0) VM structure.
+ * @param   idCpu       The VCPU id.
+ * @param   pReq        Pointer to the request packet.
+ */
+GMMR0DECL(int) GMMR0BalloonedPagesReq(PGVM pGVM, VMCPUID idCpu, PGMMBALLOONEDPAGESREQ pReq)
+{
+    /*
+     * Validate input and pass it on.
+     */
+    AssertPtrReturn(pReq, VERR_INVALID_POINTER);
+    AssertMsgReturn(pReq->Hdr.cbReq == sizeof(GMMBALLOONEDPAGESREQ),
+                    ("%#x < %#x\n", pReq->Hdr.cbReq, sizeof(GMMBALLOONEDPAGESREQ)),
+                    VERR_INVALID_PARAMETER);
+
+    return GMMR0BalloonedPages(pGVM, idCpu, pReq->enmAction, pReq->cBalloonedPages);
+}
+
+
+/**
+ * Return memory statistics for the hypervisor
+ *
+ * @returns VBox status code.
+ * @param   pReq        Pointer to the request packet.
+ */
+GMMR0DECL(int) GMMR0QueryHypervisorMemoryStatsReq(PGMMMEMSTATSREQ pReq)
+{
+    /*
+     * Validate input and pass it on.
+     */
+    AssertPtrReturn(pReq, VERR_INVALID_POINTER);
+    AssertMsgReturn(pReq->Hdr.cbReq == sizeof(GMMMEMSTATSREQ),
+                    ("%#x < %#x\n", pReq->Hdr.cbReq, sizeof(GMMMEMSTATSREQ)),
+                    VERR_INVALID_PARAMETER);
+
+    /*
+     * Validate input and get the basics.
+     */
+    PGMM pGMM;
+    GMM_GET_VALID_INSTANCE(pGMM, VERR_GMM_INSTANCE);
+    pReq->cAllocPages     = pGMM->cAllocatedPages;
+    pReq->cFreePages      = (pGMM->cChunks << (GMM_CHUNK_SHIFT- PAGE_SHIFT)) - pGMM->cAllocatedPages;
+    pReq->cBalloonedPages = pGMM->cBalloonedPages;
+    pReq->cMaxPages       = pGMM->cMaxPages;
+    pReq->cSharedPages    = pGMM->cDuplicatePages;
+    GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
+
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Return memory statistics for the VM
+ *
+ * @returns VBox status code.
+ * @param   pGVM        The global (ring-0) VM structure.
+ * @param   idCpu       Cpu id.
+ * @param   pReq        Pointer to the request packet.
+ *
+ * @thread  EMT(idCpu)
+ */
+GMMR0DECL(int) GMMR0QueryMemoryStatsReq(PGVM pGVM, VMCPUID idCpu, PGMMMEMSTATSREQ pReq)
+{
+    /*
+     * Validate input and pass it on.
+     */
+    AssertPtrReturn(pReq, VERR_INVALID_POINTER);
+    AssertMsgReturn(pReq->Hdr.cbReq == sizeof(GMMMEMSTATSREQ),
+                    ("%#x < %#x\n", pReq->Hdr.cbReq, sizeof(GMMMEMSTATSREQ)),
+                    VERR_INVALID_PARAMETER);
+
+    /*
+     * Validate input and get the basics.
+     */
+    PGMM pGMM;
+    GMM_GET_VALID_INSTANCE(pGMM, VERR_GMM_INSTANCE);
+    int rc = GVMMR0ValidateGVMandEMT(pGVM, idCpu);
+    if (RT_FAILURE(rc))
+        return rc;
+
+    /*
+     * Take the semaphore and do some more validations.
+     */
+    gmmR0MutexAcquire(pGMM);
+    if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
+    {
+        pReq->cAllocPages     = pGVM->gmm.s.Stats.Allocated.cBasePages;
+        pReq->cBalloonedPages = pGVM->gmm.s.Stats.cBalloonedPages;
+        pReq->cMaxPages       = pGVM->gmm.s.Stats.Reserved.cBasePages;
+        pReq->cFreePages      = pReq->cMaxPages - pReq->cAllocPages;
+    }
+    else
+        rc = VERR_GMM_IS_NOT_SANE;
+
+    gmmR0MutexRelease(pGMM);
+    LogFlow(("GMMR3QueryVMMemoryStats: returns %Rrc\n", rc));
+    return rc;
+}
+
+
+/**
+ * Worker for gmmR0UnmapChunk and gmmr0FreeChunk.
+ *
+ * Don't call this in legacy allocation mode!
+ *
+ * @returns VBox status code.
+ * @param   pGMM        Pointer to the GMM instance data.
+ * @param   pGVM        Pointer to the Global VM structure.
+ * @param   pChunk      Pointer to the chunk to be unmapped.
+ */
+static int gmmR0UnmapChunkLocked(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk)
+{
+    RT_NOREF_PV(pGMM);
+#ifdef GMM_WITH_LEGACY_MODE
+    Assert(!pGMM->fLegacyAllocationMode || (pChunk->fFlags & GMM_CHUNK_FLAGS_LARGE_PAGE));
+#endif
+
+    /*
+     * Find the mapping and try unmapping it.
+     */
+    uint32_t cMappings = pChunk->cMappingsX;
+    for (uint32_t i = 0; i < cMappings; i++)
+    {
+        Assert(pChunk->paMappingsX[i].pGVM && pChunk->paMappingsX[i].hMapObj != NIL_RTR0MEMOBJ);
+        if (pChunk->paMappingsX[i].pGVM == pGVM)
+        {
+            /* unmap */
+            int rc = RTR0MemObjFree(pChunk->paMappingsX[i].hMapObj, false /* fFreeMappings (NA) */);
+            if (RT_SUCCESS(rc))
+            {
+                /* update the record. */
+                cMappings--;
+                if (i < cMappings)
+                    pChunk->paMappingsX[i] = pChunk->paMappingsX[cMappings];
+                pChunk->paMappingsX[cMappings].hMapObj = NIL_RTR0MEMOBJ;
+                pChunk->paMappingsX[cMappings].pGVM    = NULL;
+                Assert(pChunk->cMappingsX - 1U == cMappings);
+                pChunk->cMappingsX = cMappings;
+            }
+
+            return rc;
+        }
+    }
+
+    Log(("gmmR0UnmapChunk: Chunk %#x is not mapped into pGVM=%p/%#x\n", pChunk->Core.Key, pGVM, pGVM->hSelf));
+    return VERR_GMM_CHUNK_NOT_MAPPED;
+}
+
+
+/**
+ * Unmaps a chunk previously mapped into the address space of the current process.
+ *
+ * @returns VBox status code.
+ * @param   pGMM        Pointer to the GMM instance data.
+ * @param   pGVM        Pointer to the Global VM structure.
+ * @param   pChunk      Pointer to the chunk to be unmapped.
+ * @param   fRelaxedSem Whether we can release the semaphore while doing the
+ *                      mapping (@c true) or not.
+ */
+static int gmmR0UnmapChunk(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk, bool fRelaxedSem)
+{
+#ifdef GMM_WITH_LEGACY_MODE
+    if (!pGMM->fLegacyAllocationMode || (pChunk->fFlags & GMM_CHUNK_FLAGS_LARGE_PAGE))
+    {
+#endif
+        /*
+         * Lock the chunk and if possible leave the giant GMM lock.
+         */
+        GMMR0CHUNKMTXSTATE MtxState;
+        int rc = gmmR0ChunkMutexAcquire(&MtxState, pGMM, pChunk,
+                                        fRelaxedSem ? GMMR0CHUNK_MTX_RETAKE_GIANT : GMMR0CHUNK_MTX_KEEP_GIANT);
+        if (RT_SUCCESS(rc))
+        {
+            rc = gmmR0UnmapChunkLocked(pGMM, pGVM, pChunk);
+            gmmR0ChunkMutexRelease(&MtxState, pChunk);
+        }
+        return rc;
+#ifdef GMM_WITH_LEGACY_MODE
+    }
+
+    if (pChunk->hGVM == pGVM->hSelf)
+        return VINF_SUCCESS;
+
+    Log(("gmmR0UnmapChunk: Chunk %#x is not mapped into pGVM=%p/%#x (legacy)\n", pChunk->Core.Key, pGVM, pGVM->hSelf));
+    return VERR_GMM_CHUNK_NOT_MAPPED;
+#endif
+}
+
+
+/**
+ * Worker for gmmR0MapChunk.
+ *
+ * @returns VBox status code.
+ * @param   pGMM        Pointer to the GMM instance data.
+ * @param   pGVM        Pointer to the Global VM structure.
+ * @param   pChunk      Pointer to the chunk to be mapped.
+ * @param   ppvR3       Where to store the ring-3 address of the mapping.
+ *                      In the VERR_GMM_CHUNK_ALREADY_MAPPED case, this will be
+ *                      contain the address of the existing mapping.
+ */
+static int gmmR0MapChunkLocked(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk, PRTR3PTR ppvR3)
+{
+#ifdef GMM_WITH_LEGACY_MODE
+    /*
+     * If we're in legacy mode this is simple.
+     */
+    if (pGMM->fLegacyAllocationMode && !(pChunk->fFlags & GMM_CHUNK_FLAGS_LARGE_PAGE))
+    {
+        if (pChunk->hGVM != pGVM->hSelf)
+        {
+            Log(("gmmR0MapChunk: chunk %#x is already mapped at %p!\n", pChunk->Core.Key, *ppvR3));
+            return VERR_GMM_CHUNK_NOT_FOUND;
+        }
+
+        *ppvR3 = RTR0MemObjAddressR3(pChunk->hMemObj);
+        return VINF_SUCCESS;
+    }
+#else
+    RT_NOREF(pGMM);
+#endif
+
+    /*
+     * Check to see if the chunk is already mapped.
+     */
+    for (uint32_t i = 0; i < pChunk->cMappingsX; i++)
+    {
+        Assert(pChunk->paMappingsX[i].pGVM && pChunk->paMappingsX[i].hMapObj != NIL_RTR0MEMOBJ);
+        if (pChunk->paMappingsX[i].pGVM == pGVM)
+        {
+            *ppvR3 = RTR0MemObjAddressR3(pChunk->paMappingsX[i].hMapObj);
+            Log(("gmmR0MapChunk: chunk %#x is already mapped at %p!\n", pChunk->Core.Key, *ppvR3));
+#ifdef VBOX_WITH_PAGE_SHARING
+            /* The ring-3 chunk cache can be out of sync; don't fail. */
+            return VINF_SUCCESS;
+#else
+            return VERR_GMM_CHUNK_ALREADY_MAPPED;
+#endif
+        }
+    }
+
+    /*
+     * Do the mapping.
+     */
+    RTR0MEMOBJ hMapObj;
+    int rc = RTR0MemObjMapUser(&hMapObj, pChunk->hMemObj, (RTR3PTR)-1, 0, RTMEM_PROT_READ | RTMEM_PROT_WRITE, NIL_RTR0PROCESS);
+    if (RT_SUCCESS(rc))
+    {
+        /* reallocate the array? assumes few users per chunk (usually one). */
+        unsigned iMapping = pChunk->cMappingsX;
+        if (   iMapping <= 3
+            || (iMapping & 3) == 0)
+        {
+            unsigned cNewSize = iMapping <= 3
+                              ? iMapping + 1
+                              : iMapping + 4;
+            Assert(cNewSize < 4 || RT_ALIGN_32(cNewSize, 4) == cNewSize);
+            if (RT_UNLIKELY(cNewSize > UINT16_MAX))
+            {
+                rc = RTR0MemObjFree(hMapObj, false /* fFreeMappings (NA) */); AssertRC(rc);
+                return VERR_GMM_TOO_MANY_CHUNK_MAPPINGS;
+            }
+
+            void *pvMappings = RTMemRealloc(pChunk->paMappingsX, cNewSize * sizeof(pChunk->paMappingsX[0]));
+            if (RT_UNLIKELY(!pvMappings))
+            {
+                rc = RTR0MemObjFree(hMapObj, false /* fFreeMappings (NA) */); AssertRC(rc);
+                return VERR_NO_MEMORY;
+            }
+            pChunk->paMappingsX = (PGMMCHUNKMAP)pvMappings;
+        }
+
+        /* insert new entry */
+        pChunk->paMappingsX[iMapping].hMapObj = hMapObj;
+        pChunk->paMappingsX[iMapping].pGVM    = pGVM;
+        Assert(pChunk->cMappingsX == iMapping);
+        pChunk->cMappingsX = iMapping + 1;
+
+        *ppvR3 = RTR0MemObjAddressR3(hMapObj);
+    }
+
+    return rc;
+}
+
+
+/**
+ * Maps a chunk into the user address space of the current process.
+ *
+ * @returns VBox status code.
+ * @param   pGMM        Pointer to the GMM instance data.
+ * @param   pGVM        Pointer to the Global VM structure.
+ * @param   pChunk      Pointer to the chunk to be mapped.
+ * @param   fRelaxedSem Whether we can release the semaphore while doing the
+ *                      mapping (@c true) or not.
+ * @param   ppvR3       Where to store the ring-3 address of the mapping.
+ *                      In the VERR_GMM_CHUNK_ALREADY_MAPPED case, this will be
+ *                      contain the address of the existing mapping.
+ */
+static int gmmR0MapChunk(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk, bool fRelaxedSem, PRTR3PTR ppvR3)
+{
+    /*
+     * Take the chunk lock and leave the giant GMM lock when possible, then
+     * call the worker function.
+     */
+    GMMR0CHUNKMTXSTATE MtxState;
+    int rc = gmmR0ChunkMutexAcquire(&MtxState, pGMM, pChunk,
+                                    fRelaxedSem ? GMMR0CHUNK_MTX_RETAKE_GIANT : GMMR0CHUNK_MTX_KEEP_GIANT);
+    if (RT_SUCCESS(rc))
+    {
+        rc = gmmR0MapChunkLocked(pGMM, pGVM, pChunk, ppvR3);
+        gmmR0ChunkMutexRelease(&MtxState, pChunk);
+    }
+
+    return rc;
+}
+
+
+
+#if defined(VBOX_WITH_PAGE_SHARING) || (defined(VBOX_STRICT) && HC_ARCH_BITS == 64)
+/**
+ * Check if a chunk is mapped into the specified VM
+ *
+ * @returns mapped yes/no
+ * @param   pGMM        Pointer to the GMM instance.
+ * @param   pGVM        Pointer to the Global VM structure.
+ * @param   pChunk      Pointer to the chunk to be mapped.
+ * @param   ppvR3       Where to store the ring-3 address of the mapping.
+ */
+static bool gmmR0IsChunkMapped(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk, PRTR3PTR ppvR3)
+{
+    GMMR0CHUNKMTXSTATE MtxState;
+    gmmR0ChunkMutexAcquire(&MtxState, pGMM, pChunk, GMMR0CHUNK_MTX_KEEP_GIANT);
+    for (uint32_t i = 0; i < pChunk->cMappingsX; i++)
+    {
+        Assert(pChunk->paMappingsX[i].pGVM && pChunk->paMappingsX[i].hMapObj != NIL_RTR0MEMOBJ);
+        if (pChunk->paMappingsX[i].pGVM == pGVM)
+        {
+            *ppvR3 = RTR0MemObjAddressR3(pChunk->paMappingsX[i].hMapObj);
+            gmmR0ChunkMutexRelease(&MtxState, pChunk);
+            return true;
+        }
+    }
+    *ppvR3 = NULL;
+    gmmR0ChunkMutexRelease(&MtxState, pChunk);
+    return false;
+}
+#endif /* VBOX_WITH_PAGE_SHARING || (VBOX_STRICT && 64-BIT) */
+
+
+/**
+ * Map a chunk and/or unmap another chunk.
+ *
+ * The mapping and unmapping applies to the current process.
+ *
+ * This API does two things because it saves a kernel call per mapping when
+ * when the ring-3 mapping cache is full.
+ *
+ * @returns VBox status code.
+ * @param   pGVM            The global (ring-0) VM structure.
+ * @param   idChunkMap      The chunk to map. NIL_GMM_CHUNKID if nothing to map.
+ * @param   idChunkUnmap    The chunk to unmap. NIL_GMM_CHUNKID if nothing to unmap.
+ * @param   ppvR3           Where to store the address of the mapped chunk. NULL is ok if nothing to map.
+ * @thread  EMT ???
+ */
+GMMR0DECL(int) GMMR0MapUnmapChunk(PGVM pGVM, uint32_t idChunkMap, uint32_t idChunkUnmap, PRTR3PTR ppvR3)
+{
+    LogFlow(("GMMR0MapUnmapChunk: pGVM=%p idChunkMap=%#x idChunkUnmap=%#x ppvR3=%p\n",
+             pGVM, idChunkMap, idChunkUnmap, ppvR3));
+
+    /*
+     * Validate input and get the basics.
+     */
+    PGMM pGMM;
+    GMM_GET_VALID_INSTANCE(pGMM, VERR_GMM_INSTANCE);
+    int rc = GVMMR0ValidateGVM(pGVM);
+    if (RT_FAILURE(rc))
+        return rc;
+
+    AssertCompile(NIL_GMM_CHUNKID == 0);
+    AssertMsgReturn(idChunkMap <= GMM_CHUNKID_LAST, ("%#x\n", idChunkMap), VERR_INVALID_PARAMETER);
+    AssertMsgReturn(idChunkUnmap <= GMM_CHUNKID_LAST, ("%#x\n", idChunkUnmap), VERR_INVALID_PARAMETER);
+
+    if (    idChunkMap == NIL_GMM_CHUNKID
+        &&  idChunkUnmap == NIL_GMM_CHUNKID)
+        return VERR_INVALID_PARAMETER;
+
+    if (idChunkMap != NIL_GMM_CHUNKID)
+    {
+        AssertPtrReturn(ppvR3, VERR_INVALID_POINTER);
+        *ppvR3 = NIL_RTR3PTR;
+    }
+
+    /*
+     * Take the semaphore and do the work.
+     *
+     * The unmapping is done last since it's easier to undo a mapping than
+     * undoing an unmapping. The ring-3 mapping cache cannot not be so big
+     * that it pushes the user virtual address space to within a chunk of
+     * it it's limits, so, no problem here.
+     */
+    gmmR0MutexAcquire(pGMM);
+    if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
+    {
+        PGMMCHUNK pMap = NULL;
+        if (idChunkMap != NIL_GVM_HANDLE)
+        {
+            pMap = gmmR0GetChunk(pGMM, idChunkMap);
+            if (RT_LIKELY(pMap))
+                rc = gmmR0MapChunk(pGMM, pGVM, pMap, true /*fRelaxedSem*/, ppvR3);
+            else
+            {
+                Log(("GMMR0MapUnmapChunk: idChunkMap=%#x\n", idChunkMap));
+                rc = VERR_GMM_CHUNK_NOT_FOUND;
+            }
+        }
+/** @todo split this operation, the bail out might (theoretcially) not be
+ *        entirely safe. */
+
+        if (    idChunkUnmap != NIL_GMM_CHUNKID
+            &&  RT_SUCCESS(rc))
+        {
+            PGMMCHUNK pUnmap = gmmR0GetChunk(pGMM, idChunkUnmap);
+            if (RT_LIKELY(pUnmap))
+                rc = gmmR0UnmapChunk(pGMM, pGVM, pUnmap, true /*fRelaxedSem*/);
+            else
+            {
+                Log(("GMMR0MapUnmapChunk: idChunkUnmap=%#x\n", idChunkUnmap));
+                rc = VERR_GMM_CHUNK_NOT_FOUND;
+            }
+
+            if (RT_FAILURE(rc) && pMap)
+                gmmR0UnmapChunk(pGMM, pGVM, pMap, false /*fRelaxedSem*/);
+        }
+
+        GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
+    }
+    else
+        rc = VERR_GMM_IS_NOT_SANE;
+    gmmR0MutexRelease(pGMM);
+
+    LogFlow(("GMMR0MapUnmapChunk: returns %Rrc\n", rc));
+    return rc;
+}
+
+
+/**
+ * VMMR0 request wrapper for GMMR0MapUnmapChunk.
+ *
+ * @returns see GMMR0MapUnmapChunk.
+ * @param   pGVM        The global (ring-0) VM structure.
+ * @param   pReq        Pointer to the request packet.
+ */
+GMMR0DECL(int)  GMMR0MapUnmapChunkReq(PGVM pGVM, PGMMMAPUNMAPCHUNKREQ pReq)
+{
+    /*
+     * Validate input and pass it on.
+     */
+    AssertPtrReturn(pReq, VERR_INVALID_POINTER);
+    AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
+
+    return GMMR0MapUnmapChunk(pGVM, pReq->idChunkMap, pReq->idChunkUnmap, &pReq->pvR3);
+}
+
+
+/**
+ * Legacy mode API for supplying pages.
+ *
+ * The specified user address points to a allocation chunk sized block that
+ * will be locked down and used by the GMM when the GM asks for pages.
+ *
+ * @returns VBox status code.
+ * @param   pGVM        The global (ring-0) VM structure.
+ * @param   idCpu       The VCPU id.
+ * @param   pvR3        Pointer to the chunk size memory block to lock down.
+ */
+GMMR0DECL(int) GMMR0SeedChunk(PGVM pGVM, VMCPUID idCpu, RTR3PTR pvR3)
+{
+#ifdef GMM_WITH_LEGACY_MODE
+    /*
+     * Validate input and get the basics.
+     */
+    PGMM pGMM;
+    GMM_GET_VALID_INSTANCE(pGMM, VERR_GMM_INSTANCE);
+    int rc = GVMMR0ValidateGVMandEMT(pGVM, idCpu);
+    if (RT_FAILURE(rc))
+        return rc;
+
+    AssertPtrReturn(pvR3, VERR_INVALID_POINTER);
+    AssertReturn(!(PAGE_OFFSET_MASK & pvR3), VERR_INVALID_POINTER);
+
+    if (!pGMM->fLegacyAllocationMode)
+    {
+        Log(("GMMR0SeedChunk: not in legacy allocation mode!\n"));
+        return VERR_NOT_SUPPORTED;
+    }
+
+    /*
+     * Lock the memory and add it as new chunk with our hGVM.
+     * (The GMM locking is done inside gmmR0RegisterChunk.)
+     */
+    RTR0MEMOBJ hMemObj;
+    rc = RTR0MemObjLockUser(&hMemObj, pvR3, GMM_CHUNK_SIZE, RTMEM_PROT_READ | RTMEM_PROT_WRITE, NIL_RTR0PROCESS);
+    if (RT_SUCCESS(rc))
+    {
+        rc = gmmR0RegisterChunk(pGMM, &pGVM->gmm.s.Private, hMemObj, pGVM->hSelf, GMM_CHUNK_FLAGS_SEEDED, NULL);
+        if (RT_SUCCESS(rc))
+            gmmR0MutexRelease(pGMM);
+        else
+            RTR0MemObjFree(hMemObj, true /* fFreeMappings */);
+    }
+
+    LogFlow(("GMMR0SeedChunk: rc=%d (pvR3=%p)\n", rc, pvR3));
+    return rc;
+#else
+    RT_NOREF(pGVM, idCpu, pvR3);
+    return VERR_NOT_SUPPORTED;
+#endif
+}
+
+#if defined(VBOX_WITH_RAM_IN_KERNEL) && !defined(VBOX_WITH_LINEAR_HOST_PHYS_MEM)
+
+/**
+ * Gets the ring-0 virtual address for the given page.
+ *
+ * This is used by PGM when IEM and such wants to access guest RAM from ring-0.
+ * One of the ASSUMPTIONS here is that the @a idPage is used by the VM and the
+ * corresponding chunk will remain valid beyond the call (at least till the EMT
+ * returns to ring-3).
+ *
+ * @returns VBox status code.
+ * @param   pGVM        Pointer to the kernel-only VM instace data.
+ * @param   idPage      The page ID.
+ * @param   ppv         Where to store the address.
+ * @thread  EMT
+ */
+GMMR0DECL(int)  GMMR0PageIdToVirt(PGVM pGVM, uint32_t idPage, void **ppv)
+{
+    *ppv = NULL;
+    PGMM pGMM;
+    GMM_GET_VALID_INSTANCE(pGMM, VERR_GMM_INSTANCE);
+
+    uint32_t const idChunk = idPage >> GMM_CHUNKID_SHIFT;
+
+    /*
+     * Start with the per-VM TLB.
+     */
+    RTSpinlockAcquire(pGVM->gmm.s.hChunkTlbSpinLock);
+
+    PGMMPERVMCHUNKTLBE pTlbe = &pGVM->gmm.s.aChunkTlbEntries[GMMPERVM_CHUNKTLB_IDX(idChunk)];
+    PGMMCHUNK pChunk = pTlbe->pChunk;
+    if (   pChunk              != NULL
+        && pTlbe->idGeneration == ASMAtomicUoReadU64(&pGMM->idFreeGeneration)
+        && pChunk->Core.Key    == idChunk)
+        pGVM->R0Stats.gmm.cChunkTlbHits++; /* hopefully this is a likely outcome */
+    else
+    {
+        pGVM->R0Stats.gmm.cChunkTlbMisses++;
+
+        /*
+         * Look it up in the chunk tree.
+         */
+        RTSpinlockAcquire(pGMM->hSpinLockTree);
+        pChunk = gmmR0GetChunkLocked(pGMM, idChunk);
+        if (RT_LIKELY(pChunk))
+        {
+            pTlbe->idGeneration = pGMM->idFreeGeneration;
+            RTSpinlockRelease(pGMM->hSpinLockTree);
+            pTlbe->pChunk       = pChunk;
+        }
+        else
+        {
+            RTSpinlockRelease(pGMM->hSpinLockTree);
+            RTSpinlockRelease(pGVM->gmm.s.hChunkTlbSpinLock);
+            AssertMsgFailed(("idPage=%#x\n", idPage));
+            return VERR_GMM_PAGE_NOT_FOUND;
+        }
+    }
+
+    RTSpinlockRelease(pGVM->gmm.s.hChunkTlbSpinLock);
+
+    /*
+     * Got a chunk, now validate the page ownership and calcuate it's address.
+     */
+    const GMMPAGE * const pPage = &pChunk->aPages[idPage & GMM_PAGEID_IDX_MASK];
+    if (RT_LIKELY(   (   GMM_PAGE_IS_PRIVATE(pPage)
+                      && pPage->Private.hGVM == pGVM->hSelf)
+                  || GMM_PAGE_IS_SHARED(pPage)))
+    {
+        AssertPtr(pChunk->pbMapping);
+        *ppv = &pChunk->pbMapping[(idPage & GMM_PAGEID_IDX_MASK) << PAGE_SHIFT];
+        return VINF_SUCCESS;
+    }
+    AssertMsgFailed(("idPage=%#x is-private=%RTbool Private.hGVM=%u pGVM->hGVM=%u\n",
+                     idPage, GMM_PAGE_IS_PRIVATE(pPage), pPage->Private.hGVM, pGVM->hSelf));
+    return VERR_GMM_NOT_PAGE_OWNER;
+}
+
+#endif
+
+#ifdef VBOX_WITH_PAGE_SHARING
+
+# ifdef VBOX_STRICT
+/**
+ * For checksumming shared pages in strict builds.
+ *
+ * The purpose is making sure that a page doesn't change.
+ *
+ * @returns Checksum, 0 on failure.
+ * @param   pGMM        The GMM instance data.
+ * @param   pGVM        Pointer to the kernel-only VM instace data.
+ * @param   idPage      The page ID.
+ */
+static uint32_t gmmR0StrictPageChecksum(PGMM pGMM, PGVM pGVM, uint32_t idPage)
+{
+    PGMMCHUNK pChunk = gmmR0GetChunk(pGMM, idPage >> GMM_CHUNKID_SHIFT);
+    AssertMsgReturn(pChunk, ("idPage=%#x\n", idPage), 0);
+
+    uint8_t *pbChunk;
+    if (!gmmR0IsChunkMapped(pGMM, pGVM, pChunk, (PRTR3PTR)&pbChunk))
+        return 0;
+    uint8_t const *pbPage = pbChunk + ((idPage & GMM_PAGEID_IDX_MASK) << PAGE_SHIFT);
+
+    return RTCrc32(pbPage, PAGE_SIZE);
+}
+# endif /* VBOX_STRICT */
+
+
+/**
+ * Calculates the module hash value.
+ *
+ * @returns Hash value.
+ * @param   pszModuleName   The module name.
+ * @param   pszVersion      The module version string.
+ */
+static uint32_t gmmR0ShModCalcHash(const char *pszModuleName, const char *pszVersion)
+{
+    return RTStrHash1ExN(3, pszModuleName, RTSTR_MAX, "::", (size_t)2, pszVersion, RTSTR_MAX);
+}
+
+
+/**
+ * Finds a global module.
+ *
+ * @returns Pointer to the global module on success, NULL if not found.
+ * @param   pGMM            The GMM instance data.
+ * @param   uHash           The hash as calculated by gmmR0ShModCalcHash.
+ * @param   cbModule        The module size.
+ * @param   enmGuestOS      The guest OS type.
+ * @param   cRegions        The number of regions.
+ * @param   pszModuleName   The module name.
+ * @param   pszVersion      The module version.
+ * @param   paRegions       The region descriptions.
+ */
+static PGMMSHAREDMODULE gmmR0ShModFindGlobal(PGMM pGMM, uint32_t uHash, uint32_t cbModule, VBOXOSFAMILY enmGuestOS,
+                                             uint32_t cRegions, const char *pszModuleName, const char *pszVersion,
+                                             struct VMMDEVSHAREDREGIONDESC const *paRegions)
+{
+    for (PGMMSHAREDMODULE pGblMod = (PGMMSHAREDMODULE)RTAvllU32Get(&pGMM->pGlobalSharedModuleTree, uHash);
+         pGblMod;
+         pGblMod = (PGMMSHAREDMODULE)pGblMod->Core.pList)
+    {
+        if (pGblMod->cbModule   != cbModule)
+            continue;
+        if (pGblMod->enmGuestOS != enmGuestOS)
+            continue;
+        if (pGblMod->cRegions   != cRegions)
+            continue;
+        if (strcmp(pGblMod->szName, pszModuleName))
+            continue;
+        if (strcmp(pGblMod->szVersion, pszVersion))
+            continue;
+
+        uint32_t i;
+        for (i = 0; i < cRegions; i++)
+        {
+            uint32_t off = paRegions[i].GCRegionAddr & PAGE_OFFSET_MASK;
+            if (pGblMod->aRegions[i].off != off)
+                break;
+
+            uint32_t cb  = RT_ALIGN_32(paRegions[i].cbRegion + off, PAGE_SIZE);
+            if (pGblMod->aRegions[i].cb != cb)
+                break;
+        }
+
+        if (i == cRegions)
+            return pGblMod;
+    }
+
+    return NULL;
+}
+
+
+/**
+ * Creates a new global module.
+ *
+ * @returns VBox status code.
+ * @param   pGMM            The GMM instance data.
+ * @param   uHash           The hash as calculated by gmmR0ShModCalcHash.
+ * @param   cbModule        The module size.
+ * @param   enmGuestOS      The guest OS type.
+ * @param   cRegions        The number of regions.
+ * @param   pszModuleName   The module name.
+ * @param   pszVersion      The module version.
+ * @param   paRegions       The region descriptions.
+ * @param   ppGblMod        Where to return the new module on success.
+ */
+static int gmmR0ShModNewGlobal(PGMM pGMM, uint32_t uHash, uint32_t cbModule, VBOXOSFAMILY enmGuestOS,
+                               uint32_t cRegions, const char *pszModuleName, const char *pszVersion,
+                               struct VMMDEVSHAREDREGIONDESC const *paRegions, PGMMSHAREDMODULE *ppGblMod)
+{
+    Log(("gmmR0ShModNewGlobal: %s %s size %#x os %u rgn %u\n", pszModuleName, pszVersion, cbModule, enmGuestOS, cRegions));
+    if (pGMM->cShareableModules >= GMM_MAX_SHARED_GLOBAL_MODULES)
+    {
+        Log(("gmmR0ShModNewGlobal: Too many modules\n"));
+        return VERR_GMM_TOO_MANY_GLOBAL_MODULES;
+    }
+
+    PGMMSHAREDMODULE pGblMod = (PGMMSHAREDMODULE)RTMemAllocZ(RT_UOFFSETOF_DYN(GMMSHAREDMODULE, aRegions[cRegions]));
+    if (!pGblMod)
+    {
+        Log(("gmmR0ShModNewGlobal: No memory\n"));
+        return VERR_NO_MEMORY;
+    }
+
+    pGblMod->Core.Key   = uHash;
+    pGblMod->cbModule   = cbModule;
+    pGblMod->cRegions   = cRegions;
+    pGblMod->cUsers     = 1;
+    pGblMod->enmGuestOS = enmGuestOS;
+    strcpy(pGblMod->szName, pszModuleName);
+    strcpy(pGblMod->szVersion, pszVersion);
+
+    for (uint32_t i = 0; i < cRegions; i++)
+    {
+        Log(("gmmR0ShModNewGlobal: rgn[%u]=%RGvLB%#x\n", i, paRegions[i].GCRegionAddr, paRegions[i].cbRegion));
+        pGblMod->aRegions[i].off        = paRegions[i].GCRegionAddr & PAGE_OFFSET_MASK;
+        pGblMod->aRegions[i].cb         = paRegions[i].cbRegion + pGblMod->aRegions[i].off;
+        pGblMod->aRegions[i].cb         = RT_ALIGN_32(pGblMod->aRegions[i].cb, PAGE_SIZE);
+        pGblMod->aRegions[i].paidPages  = NULL; /* allocated when needed. */
+    }
+
+    bool fInsert = RTAvllU32Insert(&pGMM->pGlobalSharedModuleTree, &pGblMod->Core);
+    Assert(fInsert); NOREF(fInsert);
+    pGMM->cShareableModules++;
+
+    *ppGblMod = pGblMod;
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Deletes a global module which is no longer referenced by anyone.
+ *
+ * @param   pGMM                The GMM instance data.
+ * @param   pGblMod             The module to delete.
+ */
+static void gmmR0ShModDeleteGlobal(PGMM pGMM, PGMMSHAREDMODULE pGblMod)
+{
+    Assert(pGblMod->cUsers == 0);
+    Assert(pGMM->cShareableModules > 0 && pGMM->cShareableModules <= GMM_MAX_SHARED_GLOBAL_MODULES);
+
+    void *pvTest = RTAvllU32RemoveNode(&pGMM->pGlobalSharedModuleTree, &pGblMod->Core);
+    Assert(pvTest == pGblMod); NOREF(pvTest);
+    pGMM->cShareableModules--;
+
+    uint32_t i = pGblMod->cRegions;
+    while (i-- > 0)
+    {
+        if (pGblMod->aRegions[i].paidPages)
+        {
+            /* We don't doing anything to the pages as they are handled by the
+               copy-on-write mechanism in PGM. */
+            RTMemFree(pGblMod->aRegions[i].paidPages);
+            pGblMod->aRegions[i].paidPages = NULL;
+        }
+    }
+    RTMemFree(pGblMod);
+}
+
+
+static int gmmR0ShModNewPerVM(PGVM pGVM, RTGCPTR GCBaseAddr, uint32_t cRegions, const VMMDEVSHAREDREGIONDESC *paRegions,
+                              PGMMSHAREDMODULEPERVM *ppRecVM)
+{
+    if (pGVM->gmm.s.Stats.cShareableModules >= GMM_MAX_SHARED_PER_VM_MODULES)
+        return VERR_GMM_TOO_MANY_PER_VM_MODULES;
+
+    PGMMSHAREDMODULEPERVM pRecVM;
+    pRecVM = (PGMMSHAREDMODULEPERVM)RTMemAllocZ(RT_UOFFSETOF_DYN(GMMSHAREDMODULEPERVM, aRegionsGCPtrs[cRegions]));
+    if (!pRecVM)
+        return VERR_NO_MEMORY;
+
+    pRecVM->Core.Key = GCBaseAddr;
+    for (uint32_t i = 0; i < cRegions; i++)
+        pRecVM->aRegionsGCPtrs[i] = paRegions[i].GCRegionAddr;
+
+    bool fInsert = RTAvlGCPtrInsert(&pGVM->gmm.s.pSharedModuleTree, &pRecVM->Core);
+    Assert(fInsert); NOREF(fInsert);
+    pGVM->gmm.s.Stats.cShareableModules++;
+
+    *ppRecVM = pRecVM;
+    return VINF_SUCCESS;
+}
+
+
+static void gmmR0ShModDeletePerVM(PGMM pGMM, PGVM pGVM, PGMMSHAREDMODULEPERVM pRecVM, bool fRemove)
+{
+    /*
+     * Free the per-VM module.
+     */
+    PGMMSHAREDMODULE pGblMod = pRecVM->pGlobalModule;
+    pRecVM->pGlobalModule    = NULL;
+
+    if (fRemove)
+    {
+        void *pvTest = RTAvlGCPtrRemove(&pGVM->gmm.s.pSharedModuleTree, pRecVM->Core.Key);
+        Assert(pvTest == &pRecVM->Core); NOREF(pvTest);
+    }
+
+    RTMemFree(pRecVM);
+
+    /*
+     * Release the global module.
+     * (In the registration bailout case, it might not be.)
+     */
+    if (pGblMod)
+    {
+        Assert(pGblMod->cUsers > 0);
+        pGblMod->cUsers--;
+        if (pGblMod->cUsers == 0)
+            gmmR0ShModDeleteGlobal(pGMM, pGblMod);
+    }
+}
+
+#endif /* VBOX_WITH_PAGE_SHARING */
+
+/**
+ * Registers a new shared module for the VM.
+ *
+ * @returns VBox status code.
+ * @param   pGVM            The global (ring-0) VM structure.
+ * @param   idCpu           The VCPU id.
+ * @param   enmGuestOS      The guest OS type.
+ * @param   pszModuleName   The module name.
+ * @param   pszVersion      The module version.
+ * @param   GCPtrModBase    The module base address.
+ * @param   cbModule        The module size.
+ * @param   cRegions        The mumber of shared region descriptors.
+ * @param   paRegions       Pointer to an array of shared region(s).
+ * @thread  EMT(idCpu)
+ */
+GMMR0DECL(int) GMMR0RegisterSharedModule(PGVM pGVM, VMCPUID idCpu, VBOXOSFAMILY enmGuestOS, char *pszModuleName,
+                                         char *pszVersion, RTGCPTR GCPtrModBase, uint32_t cbModule,
+                                         uint32_t cRegions, struct VMMDEVSHAREDREGIONDESC const *paRegions)
+{
+#ifdef VBOX_WITH_PAGE_SHARING
+    /*
+     * Validate input and get the basics.
+     *
+     * Note! Turns out the module size does necessarily match the size of the
+     *       regions. (iTunes on XP)
+     */
+    PGMM pGMM;
+    GMM_GET_VALID_INSTANCE(pGMM, VERR_GMM_INSTANCE);
+    int rc = GVMMR0ValidateGVMandEMT(pGVM, idCpu);
+    if (RT_FAILURE(rc))
+        return rc;
+
+    if (RT_UNLIKELY(cRegions > VMMDEVSHAREDREGIONDESC_MAX))
+        return VERR_GMM_TOO_MANY_REGIONS;
+
+    if (RT_UNLIKELY(cbModule == 0 || cbModule > _1G))
+        return VERR_GMM_BAD_SHARED_MODULE_SIZE;
+
+    uint32_t cbTotal = 0;
+    for (uint32_t i = 0; i < cRegions; i++)
+    {
+        if (RT_UNLIKELY(paRegions[i].cbRegion == 0 || paRegions[i].cbRegion > _1G))
+            return VERR_GMM_SHARED_MODULE_BAD_REGIONS_SIZE;
+
+        cbTotal += paRegions[i].cbRegion;
+        if (RT_UNLIKELY(cbTotal > _1G))
+            return VERR_GMM_SHARED_MODULE_BAD_REGIONS_SIZE;
+    }
+
+    AssertPtrReturn(pszModuleName, VERR_INVALID_POINTER);
+    if (RT_UNLIKELY(!memchr(pszModuleName, '\0', GMM_SHARED_MODULE_MAX_NAME_STRING)))
+        return VERR_GMM_MODULE_NAME_TOO_LONG;
+
+    AssertPtrReturn(pszVersion, VERR_INVALID_POINTER);
+    if (RT_UNLIKELY(!memchr(pszVersion, '\0', GMM_SHARED_MODULE_MAX_VERSION_STRING)))
+        return VERR_GMM_MODULE_NAME_TOO_LONG;
+
+    uint32_t const uHash = gmmR0ShModCalcHash(pszModuleName, pszVersion);
+    Log(("GMMR0RegisterSharedModule %s %s base %RGv size %x hash %x\n", pszModuleName, pszVersion, GCPtrModBase, cbModule, uHash));
+
+    /*
+     * Take the semaphore and do some more validations.
+     */
+    gmmR0MutexAcquire(pGMM);
+    if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
+    {
+        /*
+         * Check if this module is already locally registered and register
+         * it if it isn't.  The base address is a unique module identifier
+         * locally.
+         */
+        PGMMSHAREDMODULEPERVM pRecVM = (PGMMSHAREDMODULEPERVM)RTAvlGCPtrGet(&pGVM->gmm.s.pSharedModuleTree, GCPtrModBase);
+        bool fNewModule = pRecVM == NULL;
+        if (fNewModule)
+        {
+            rc = gmmR0ShModNewPerVM(pGVM, GCPtrModBase, cRegions, paRegions, &pRecVM);
+            if (RT_SUCCESS(rc))
+            {
+                /*
+                 * Find a matching global module, register a new one if needed.
+                 */
+                PGMMSHAREDMODULE pGblMod = gmmR0ShModFindGlobal(pGMM, uHash, cbModule, enmGuestOS, cRegions,
+                                                                pszModuleName, pszVersion, paRegions);
+                if (!pGblMod)
+                {
+                    Assert(fNewModule);
+                    rc = gmmR0ShModNewGlobal(pGMM, uHash, cbModule, enmGuestOS, cRegions,
+                                             pszModuleName, pszVersion, paRegions, &pGblMod);
+                    if (RT_SUCCESS(rc))
+                    {
+                        pRecVM->pGlobalModule = pGblMod; /* (One referenced returned by gmmR0ShModNewGlobal.) */
+                        Log(("GMMR0RegisterSharedModule: new module %s %s\n", pszModuleName, pszVersion));
+                    }
+                    else
+                        gmmR0ShModDeletePerVM(pGMM, pGVM, pRecVM, true /*fRemove*/);
+                }
+                else
+                {
+                    Assert(pGblMod->cUsers > 0 && pGblMod->cUsers < UINT32_MAX / 2);
+                    pGblMod->cUsers++;
+                    pRecVM->pGlobalModule = pGblMod;
+
+                    Log(("GMMR0RegisterSharedModule: new per vm module %s %s, gbl users %d\n", pszModuleName, pszVersion, pGblMod->cUsers));
+                }
+            }
+        }
+        else
+        {
+            /*
+             * Attempt to re-register an existing module.
+             */
+            PGMMSHAREDMODULE pGblMod = gmmR0ShModFindGlobal(pGMM, uHash, cbModule, enmGuestOS, cRegions,
+                                                            pszModuleName, pszVersion, paRegions);
+            if (pRecVM->pGlobalModule == pGblMod)
+            {
+                Log(("GMMR0RegisterSharedModule: already registered %s %s, gbl users %d\n", pszModuleName, pszVersion, pGblMod->cUsers));
+                rc = VINF_GMM_SHARED_MODULE_ALREADY_REGISTERED;
+            }
+            else
+            {
+                /** @todo may have to unregister+register when this happens in case it's caused
+                 * by VBoxService crashing and being restarted... */
+                Log(("GMMR0RegisterSharedModule: Address clash!\n"
+                     "  incoming at %RGvLB%#x %s %s rgns %u\n"
+                     "  existing at %RGvLB%#x %s %s rgns %u\n",
+                     GCPtrModBase, cbModule, pszModuleName, pszVersion, cRegions,
+                     pRecVM->Core.Key, pRecVM->pGlobalModule->cbModule, pRecVM->pGlobalModule->szName,
+                     pRecVM->pGlobalModule->szVersion, pRecVM->pGlobalModule->cRegions));
+                rc = VERR_GMM_SHARED_MODULE_ADDRESS_CLASH;
+            }
+        }
+        GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
+    }
+    else
+        rc = VERR_GMM_IS_NOT_SANE;
+
+    gmmR0MutexRelease(pGMM);
+    return rc;
+#else
+
+    NOREF(pGVM); NOREF(idCpu); NOREF(enmGuestOS); NOREF(pszModuleName); NOREF(pszVersion);
+    NOREF(GCPtrModBase); NOREF(cbModule); NOREF(cRegions); NOREF(paRegions);
+    return VERR_NOT_IMPLEMENTED;
+#endif
+}
+
+
+/**
+ * VMMR0 request wrapper for GMMR0RegisterSharedModule.
+ *
+ * @returns see GMMR0RegisterSharedModule.
+ * @param   pGVM        The global (ring-0) VM structure.
+ * @param   idCpu       The VCPU id.
+ * @param   pReq        Pointer to the request packet.
+ */
+GMMR0DECL(int) GMMR0RegisterSharedModuleReq(PGVM pGVM, VMCPUID idCpu, PGMMREGISTERSHAREDMODULEREQ pReq)
+{
+    /*
+     * Validate input and pass it on.
+     */
+    AssertPtrReturn(pReq, VERR_INVALID_POINTER);
+    AssertMsgReturn(   pReq->Hdr.cbReq >= sizeof(*pReq)
+                    && pReq->Hdr.cbReq == RT_UOFFSETOF_DYN(GMMREGISTERSHAREDMODULEREQ, aRegions[pReq->cRegions]),
+                    ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
+
+    /* Pass back return code in the request packet to preserve informational codes. (VMMR3CallR0 chokes on them) */
+    pReq->rc = GMMR0RegisterSharedModule(pGVM, idCpu, pReq->enmGuestOS, pReq->szName, pReq->szVersion,
+                                         pReq->GCBaseAddr, pReq->cbModule, pReq->cRegions, pReq->aRegions);
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Unregisters a shared module for the VM
+ *
+ * @returns VBox status code.
+ * @param   pGVM            The global (ring-0) VM structure.
+ * @param   idCpu           The VCPU id.
+ * @param   pszModuleName   The module name.
+ * @param   pszVersion      The module version.
+ * @param   GCPtrModBase    The module base address.
+ * @param   cbModule        The module size.
+ */
+GMMR0DECL(int) GMMR0UnregisterSharedModule(PGVM pGVM, VMCPUID idCpu, char *pszModuleName, char *pszVersion,
+                                           RTGCPTR GCPtrModBase, uint32_t cbModule)
+{
+#ifdef VBOX_WITH_PAGE_SHARING
+    /*
+     * Validate input and get the basics.
+     */
+    PGMM pGMM;
+    GMM_GET_VALID_INSTANCE(pGMM, VERR_GMM_INSTANCE);
+    int rc = GVMMR0ValidateGVMandEMT(pGVM, idCpu);
+    if (RT_FAILURE(rc))
+        return rc;
+
+    AssertPtrReturn(pszModuleName, VERR_INVALID_POINTER);
+    AssertPtrReturn(pszVersion, VERR_INVALID_POINTER);
+    if (RT_UNLIKELY(!memchr(pszModuleName, '\0', GMM_SHARED_MODULE_MAX_NAME_STRING)))
+        return VERR_GMM_MODULE_NAME_TOO_LONG;
+    if (RT_UNLIKELY(!memchr(pszVersion, '\0', GMM_SHARED_MODULE_MAX_VERSION_STRING)))
+        return VERR_GMM_MODULE_NAME_TOO_LONG;
+
+    Log(("GMMR0UnregisterSharedModule %s %s base=%RGv size %x\n", pszModuleName, pszVersion, GCPtrModBase, cbModule));
+
+    /*
+     * Take the semaphore and do some more validations.
+     */
+    gmmR0MutexAcquire(pGMM);
+    if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
+    {
+        /*
+         * Locate and remove the specified module.
+         */
+        PGMMSHAREDMODULEPERVM pRecVM = (PGMMSHAREDMODULEPERVM)RTAvlGCPtrGet(&pGVM->gmm.s.pSharedModuleTree, GCPtrModBase);
+        if (pRecVM)
+        {
+            /** @todo Do we need to do more validations here, like that the
+             *        name + version + cbModule matches? */
+            NOREF(cbModule);
+            Assert(pRecVM->pGlobalModule);
+            gmmR0ShModDeletePerVM(pGMM, pGVM, pRecVM, true /*fRemove*/);
+        }
+        else
+            rc = VERR_GMM_SHARED_MODULE_NOT_FOUND;
+
+        GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
+    }
+    else
+        rc = VERR_GMM_IS_NOT_SANE;
+
+    gmmR0MutexRelease(pGMM);
+    return rc;
+#else
+
+    NOREF(pGVM); NOREF(idCpu); NOREF(pszModuleName); NOREF(pszVersion); NOREF(GCPtrModBase); NOREF(cbModule);
+    return VERR_NOT_IMPLEMENTED;
+#endif
+}
+
+
+/**
+ * VMMR0 request wrapper for GMMR0UnregisterSharedModule.
+ *
+ * @returns see GMMR0UnregisterSharedModule.
+ * @param   pGVM        The global (ring-0) VM structure.
+ * @param   idCpu       The VCPU id.
+ * @param   pReq        Pointer to the request packet.
+ */
+GMMR0DECL(int)  GMMR0UnregisterSharedModuleReq(PGVM pGVM, VMCPUID idCpu, PGMMUNREGISTERSHAREDMODULEREQ pReq)
+{
+    /*
+     * Validate input and pass it on.
+     */
+    AssertPtrReturn(pReq, VERR_INVALID_POINTER);
+    AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
+
+    return GMMR0UnregisterSharedModule(pGVM, idCpu, pReq->szName, pReq->szVersion, pReq->GCBaseAddr, pReq->cbModule);
+}
+
+#ifdef VBOX_WITH_PAGE_SHARING
+
+/**
+ * Increase the use count of a shared page, the page is known to exist and be valid and such.
+ *
+ * @param   pGMM        Pointer to the GMM instance.
+ * @param   pGVM        Pointer to the GVM instance.
+ * @param   pPage       The page structure.
+ */
+DECLINLINE(void) gmmR0UseSharedPage(PGMM pGMM, PGVM pGVM, PGMMPAGE pPage)
+{
+    Assert(pGMM->cSharedPages > 0);
+    Assert(pGMM->cAllocatedPages > 0);
+
+    pGMM->cDuplicatePages++;
+
+    pPage->Shared.cRefs++;
+    pGVM->gmm.s.Stats.cSharedPages++;
+    pGVM->gmm.s.Stats.Allocated.cBasePages++;
+}
+
+
+/**
+ * Converts a private page to a shared page, the page is known to exist and be valid and such.
+ *
+ * @param   pGMM        Pointer to the GMM instance.
+ * @param   pGVM        Pointer to the GVM instance.
+ * @param   HCPhys      Host physical address
+ * @param   idPage      The Page ID
+ * @param   pPage       The page structure.
+ * @param   pPageDesc   Shared page descriptor
+ */
+DECLINLINE(void) gmmR0ConvertToSharedPage(PGMM pGMM, PGVM pGVM, RTHCPHYS HCPhys, uint32_t idPage, PGMMPAGE pPage,
+                                          PGMMSHAREDPAGEDESC pPageDesc)
+{
+    PGMMCHUNK pChunk = gmmR0GetChunk(pGMM, idPage >> GMM_CHUNKID_SHIFT);
+    Assert(pChunk);
+    Assert(pChunk->cFree < GMM_CHUNK_NUM_PAGES);
+    Assert(GMM_PAGE_IS_PRIVATE(pPage));
+
+    pChunk->cPrivate--;
+    pChunk->cShared++;
+
+    pGMM->cSharedPages++;
+
+    pGVM->gmm.s.Stats.cSharedPages++;
+    pGVM->gmm.s.Stats.cPrivatePages--;
+
+    /* Modify the page structure. */
+    pPage->Shared.pfn         = (uint32_t)(uint64_t)(HCPhys >> PAGE_SHIFT);
+    pPage->Shared.cRefs       = 1;
+#ifdef VBOX_STRICT
+    pPageDesc->u32StrictChecksum = gmmR0StrictPageChecksum(pGMM, pGVM, idPage);
+    pPage->Shared.u14Checksum = pPageDesc->u32StrictChecksum;
+#else
+    NOREF(pPageDesc);
+    pPage->Shared.u14Checksum = 0;
+#endif
+    pPage->Shared.u2State     = GMM_PAGE_STATE_SHARED;
+}
+
+
+static int gmmR0SharedModuleCheckPageFirstTime(PGMM pGMM, PGVM pGVM, PGMMSHAREDMODULE pModule,
+                                               unsigned idxRegion, unsigned idxPage,
+                                               PGMMSHAREDPAGEDESC pPageDesc, PGMMSHAREDREGIONDESC pGlobalRegion)
+{
+    NOREF(pModule);
+
+    /* Easy case: just change the internal page type. */
+    PGMMPAGE pPage = gmmR0GetPage(pGMM, pPageDesc->idPage);
+    AssertMsgReturn(pPage, ("idPage=%#x (GCPhys=%RGp HCPhys=%RHp idxRegion=%#x idxPage=%#x) #1\n",
+                            pPageDesc->idPage, pPageDesc->GCPhys, pPageDesc->HCPhys, idxRegion, idxPage),
+                    VERR_PGM_PHYS_INVALID_PAGE_ID);
+    NOREF(idxRegion);
+
+    AssertMsg(pPageDesc->GCPhys == (pPage->Private.pfn << 12), ("desc %RGp gmm %RGp\n", pPageDesc->HCPhys, (pPage->Private.pfn << 12)));
+
+    gmmR0ConvertToSharedPage(pGMM, pGVM, pPageDesc->HCPhys, pPageDesc->idPage, pPage, pPageDesc);
+
+    /* Keep track of these references. */
+    pGlobalRegion->paidPages[idxPage] = pPageDesc->idPage;
+
+    return VINF_SUCCESS;
+}
+
+/**
+ * Checks specified shared module range for changes
+ *
+ * Performs the following tasks:
+ *  - If a shared page is new, then it changes the GMM page type to shared and
+ *    returns it in the pPageDesc descriptor.
+ *  - If a shared page already exists, then it checks if the VM page is
+ *    identical and if so frees the VM page and returns the shared page in
+ *    pPageDesc descriptor.
+ *
+ * @remarks ASSUMES the caller has acquired the GMM semaphore!!
+ *
+ * @returns VBox status code.
+ * @param   pGVM        Pointer to the GVM instance data.
+ * @param   pModule     Module description
+ * @param   idxRegion   Region index
+ * @param   idxPage     Page index
+ * @param   pPageDesc   Page descriptor
+ */
+GMMR0DECL(int) GMMR0SharedModuleCheckPage(PGVM pGVM, PGMMSHAREDMODULE pModule, uint32_t idxRegion, uint32_t idxPage,
+                                          PGMMSHAREDPAGEDESC pPageDesc)
+{
+    int     rc;
+    PGMM    pGMM;
+    GMM_GET_VALID_INSTANCE(pGMM, VERR_GMM_INSTANCE);
+    pPageDesc->u32StrictChecksum = 0;
+
+    AssertMsgReturn(idxRegion < pModule->cRegions,
+                    ("idxRegion=%#x cRegions=%#x %s %s\n", idxRegion, pModule->cRegions, pModule->szName, pModule->szVersion),
+                    VERR_INVALID_PARAMETER);
+
+    uint32_t const cPages = pModule->aRegions[idxRegion].cb >> PAGE_SHIFT;
+    AssertMsgReturn(idxPage < cPages,
+                    ("idxRegion=%#x cRegions=%#x %s %s\n", idxRegion, pModule->cRegions, pModule->szName, pModule->szVersion),
+                    VERR_INVALID_PARAMETER);
+
+    LogFlow(("GMMR0SharedModuleCheckRange %s base %RGv region %d idxPage %d\n", pModule->szName, pModule->Core.Key, idxRegion, idxPage));
+
+    /*
+     * First time; create a page descriptor array.
+     */
+    PGMMSHAREDREGIONDESC pGlobalRegion = &pModule->aRegions[idxRegion];
+    if (!pGlobalRegion->paidPages)
+    {
+        Log(("Allocate page descriptor array for %d pages\n", cPages));
+        pGlobalRegion->paidPages = (uint32_t *)RTMemAlloc(cPages * sizeof(pGlobalRegion->paidPages[0]));
+        AssertReturn(pGlobalRegion->paidPages, VERR_NO_MEMORY);
+
+        /* Invalidate all descriptors. */
+        uint32_t i = cPages;
+        while (i-- > 0)
+            pGlobalRegion->paidPages[i] = NIL_GMM_PAGEID;
+    }
+
+    /*
+     * We've seen this shared page for the first time?
+     */
+    if (pGlobalRegion->paidPages[idxPage] == NIL_GMM_PAGEID)
+    {
+        Log(("New shared page guest %RGp host %RHp\n", pPageDesc->GCPhys, pPageDesc->HCPhys));
+        return gmmR0SharedModuleCheckPageFirstTime(pGMM, pGVM, pModule, idxRegion, idxPage, pPageDesc, pGlobalRegion);
+    }
+
+    /*
+     * We've seen it before...
+     */
+    Log(("Replace existing page guest %RGp host %RHp id %#x -> id %#x\n",
+         pPageDesc->GCPhys, pPageDesc->HCPhys, pPageDesc->idPage, pGlobalRegion->paidPages[idxPage]));
+    Assert(pPageDesc->idPage != pGlobalRegion->paidPages[idxPage]);
+
+    /*
+     * Get the shared page source.
+     */
+    PGMMPAGE pPage = gmmR0GetPage(pGMM, pGlobalRegion->paidPages[idxPage]);
+    AssertMsgReturn(pPage, ("idPage=%#x (idxRegion=%#x idxPage=%#x) #2\n", pPageDesc->idPage, idxRegion, idxPage),
+                    VERR_PGM_PHYS_INVALID_PAGE_ID);
+
+    if (pPage->Common.u2State != GMM_PAGE_STATE_SHARED)
+    {
+        /*
+         * Page was freed at some point; invalidate this entry.
+         */
+        /** @todo this isn't really bullet proof. */
+        Log(("Old shared page was freed -> create a new one\n"));
+        pGlobalRegion->paidPages[idxPage] = NIL_GMM_PAGEID;
+        return gmmR0SharedModuleCheckPageFirstTime(pGMM, pGVM, pModule, idxRegion, idxPage, pPageDesc, pGlobalRegion);
+    }
+
+    Log(("Replace existing page guest host %RHp -> %RHp\n", pPageDesc->HCPhys, ((uint64_t)pPage->Shared.pfn) << PAGE_SHIFT));
+
+    /*
+     * Calculate the virtual address of the local page.
+     */
+    PGMMCHUNK pChunk = gmmR0GetChunk(pGMM, pPageDesc->idPage >> GMM_CHUNKID_SHIFT);
+    AssertMsgReturn(pChunk, ("idPage=%#x (idxRegion=%#x idxPage=%#x) #4\n", pPageDesc->idPage, idxRegion, idxPage),
+                    VERR_PGM_PHYS_INVALID_PAGE_ID);
+
+    uint8_t *pbChunk;
+    AssertMsgReturn(gmmR0IsChunkMapped(pGMM, pGVM, pChunk, (PRTR3PTR)&pbChunk),
+                    ("idPage=%#x (idxRegion=%#x idxPage=%#x) #3\n", pPageDesc->idPage, idxRegion, idxPage),
+                    VERR_PGM_PHYS_INVALID_PAGE_ID);
+    uint8_t  *pbLocalPage = pbChunk + ((pPageDesc->idPage & GMM_PAGEID_IDX_MASK) << PAGE_SHIFT);
+
+    /*
+     * Calculate the virtual address of the shared page.
+     */
+    pChunk = gmmR0GetChunk(pGMM, pGlobalRegion->paidPages[idxPage] >> GMM_CHUNKID_SHIFT);
+    Assert(pChunk); /* can't fail as gmmR0GetPage succeeded. */
+
+    /*
+     * Get the virtual address of the physical page; map the chunk into the VM
+     * process if not already done.
+     */
+    if (!gmmR0IsChunkMapped(pGMM, pGVM, pChunk, (PRTR3PTR)&pbChunk))
+    {
+        Log(("Map chunk into process!\n"));
+        rc = gmmR0MapChunk(pGMM, pGVM, pChunk, false /*fRelaxedSem*/, (PRTR3PTR)&pbChunk);
+        AssertRCReturn(rc, rc);
+    }
+    uint8_t *pbSharedPage = pbChunk + ((pGlobalRegion->paidPages[idxPage] & GMM_PAGEID_IDX_MASK) << PAGE_SHIFT);
+
+#ifdef VBOX_STRICT
+    pPageDesc->u32StrictChecksum = RTCrc32(pbSharedPage, PAGE_SIZE);
+    uint32_t uChecksum = pPageDesc->u32StrictChecksum & UINT32_C(0x00003fff);
+    AssertMsg(!uChecksum || uChecksum == pPage->Shared.u14Checksum || !pPage->Shared.u14Checksum,
+              ("%#x vs %#x - idPage=%#x - %s %s\n", uChecksum, pPage->Shared.u14Checksum,
+               pGlobalRegion->paidPages[idxPage], pModule->szName, pModule->szVersion));
+#endif
+
+    /** @todo write ASMMemComparePage. */
+    if (memcmp(pbSharedPage, pbLocalPage, PAGE_SIZE))
+    {
+        Log(("Unexpected differences found between local and shared page; skip\n"));
+        /* Signal to the caller that this one hasn't changed. */
+        pPageDesc->idPage = NIL_GMM_PAGEID;
+        return VINF_SUCCESS;
+    }
+
+    /*
+     * Free the old local page.
+     */
+    GMMFREEPAGEDESC PageDesc;
+    PageDesc.idPage = pPageDesc->idPage;
+    rc = gmmR0FreePages(pGMM, pGVM, 1, &PageDesc, GMMACCOUNT_BASE);
+    AssertRCReturn(rc, rc);
+
+    gmmR0UseSharedPage(pGMM, pGVM, pPage);
+
+    /*
+     * Pass along the new physical address & page id.
+     */
+    pPageDesc->HCPhys = ((uint64_t)pPage->Shared.pfn) << PAGE_SHIFT;
+    pPageDesc->idPage = pGlobalRegion->paidPages[idxPage];
+
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * RTAvlGCPtrDestroy callback.
+ *
+ * @returns 0 or VERR_GMM_INSTANCE.
+ * @param   pNode       The node to destroy.
+ * @param   pvArgs      Pointer to an argument packet.
+ */
+static DECLCALLBACK(int) gmmR0CleanupSharedModule(PAVLGCPTRNODECORE pNode, void *pvArgs)
+{
+    gmmR0ShModDeletePerVM(((GMMR0SHMODPERVMDTORARGS *)pvArgs)->pGMM,
+                          ((GMMR0SHMODPERVMDTORARGS *)pvArgs)->pGVM,
+                          (PGMMSHAREDMODULEPERVM)pNode,
+                          false /*fRemove*/);
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Used by GMMR0CleanupVM to clean up shared modules.
+ *
+ * This is called without taking the GMM lock so that it can be yielded as
+ * needed here.
+ *
+ * @param   pGMM        The GMM handle.
+ * @param   pGVM        The global VM handle.
+ */
+static void gmmR0SharedModuleCleanup(PGMM pGMM, PGVM pGVM)
+{
+    gmmR0MutexAcquire(pGMM);
+    GMM_CHECK_SANITY_UPON_ENTERING(pGMM);
+
+    GMMR0SHMODPERVMDTORARGS Args;
+    Args.pGVM = pGVM;
+    Args.pGMM = pGMM;
+    RTAvlGCPtrDestroy(&pGVM->gmm.s.pSharedModuleTree, gmmR0CleanupSharedModule, &Args);
+
+    AssertMsg(pGVM->gmm.s.Stats.cShareableModules == 0, ("%d\n", pGVM->gmm.s.Stats.cShareableModules));
+    pGVM->gmm.s.Stats.cShareableModules = 0;
+
+    gmmR0MutexRelease(pGMM);
+}
+
+#endif /* VBOX_WITH_PAGE_SHARING */
+
+/**
+ * Removes all shared modules for the specified VM
+ *
+ * @returns VBox status code.
+ * @param   pGVM        The global (ring-0) VM structure.
+ * @param   idCpu       The VCPU id.
+ */
+GMMR0DECL(int) GMMR0ResetSharedModules(PGVM pGVM, VMCPUID idCpu)
+{
+#ifdef VBOX_WITH_PAGE_SHARING
+    /*
+     * Validate input and get the basics.
+     */
+    PGMM pGMM;
+    GMM_GET_VALID_INSTANCE(pGMM, VERR_GMM_INSTANCE);
+    int rc = GVMMR0ValidateGVMandEMT(pGVM, idCpu);
+    if (RT_FAILURE(rc))
+        return rc;
+
+    /*
+     * Take the semaphore and do some more validations.
+     */
+    gmmR0MutexAcquire(pGMM);
+    if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
+    {
+        Log(("GMMR0ResetSharedModules\n"));
+        GMMR0SHMODPERVMDTORARGS Args;
+        Args.pGVM = pGVM;
+        Args.pGMM = pGMM;
+        RTAvlGCPtrDestroy(&pGVM->gmm.s.pSharedModuleTree, gmmR0CleanupSharedModule, &Args);
+        pGVM->gmm.s.Stats.cShareableModules = 0;
+
+        rc = VINF_SUCCESS;
+        GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
+    }
+    else
+        rc = VERR_GMM_IS_NOT_SANE;
+
+    gmmR0MutexRelease(pGMM);
+    return rc;
+#else
+    RT_NOREF(pGVM, idCpu);
+    return VERR_NOT_IMPLEMENTED;
+#endif
+}
+
+#ifdef VBOX_WITH_PAGE_SHARING
+
+/**
+ * Tree enumeration callback for checking a shared module.
+ */
+static DECLCALLBACK(int) gmmR0CheckSharedModule(PAVLGCPTRNODECORE pNode, void *pvUser)
+{
+    GMMCHECKSHAREDMODULEINFO   *pArgs   = (GMMCHECKSHAREDMODULEINFO*)pvUser;
+    PGMMSHAREDMODULEPERVM       pRecVM  = (PGMMSHAREDMODULEPERVM)pNode;
+    PGMMSHAREDMODULE            pGblMod = pRecVM->pGlobalModule;
+
+    Log(("gmmR0CheckSharedModule: check %s %s base=%RGv size=%x\n",
+         pGblMod->szName, pGblMod->szVersion, pGblMod->Core.Key, pGblMod->cbModule));
+
+    int rc = PGMR0SharedModuleCheck(pArgs->pGVM, pArgs->pGVM, pArgs->idCpu, pGblMod, pRecVM->aRegionsGCPtrs);
+    if (RT_FAILURE(rc))
+        return rc;
+    return VINF_SUCCESS;
+}
+
+#endif /* VBOX_WITH_PAGE_SHARING */
+
+/**
+ * Check all shared modules for the specified VM.
+ *
+ * @returns VBox status code.
+ * @param   pGVM        The global (ring-0) VM structure.
+ * @param   idCpu       The calling EMT number.
+ * @thread  EMT(idCpu)
+ */
+GMMR0DECL(int) GMMR0CheckSharedModules(PGVM pGVM, VMCPUID idCpu)
+{
+#ifdef VBOX_WITH_PAGE_SHARING
+    /*
+     * Validate input and get the basics.
+     */
+    PGMM pGMM;
+    GMM_GET_VALID_INSTANCE(pGMM, VERR_GMM_INSTANCE);
+    int rc = GVMMR0ValidateGVMandEMT(pGVM, idCpu);
+    if (RT_FAILURE(rc))
+        return rc;
+
+# ifndef DEBUG_sandervl
+    /*
+     * Take the semaphore and do some more validations.
+     */
+    gmmR0MutexAcquire(pGMM);
+# endif
+    if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
+    {
+        /*
+         * Walk the tree, checking each module.
+         */
+        Log(("GMMR0CheckSharedModules\n"));
+
+        GMMCHECKSHAREDMODULEINFO Args;
+        Args.pGVM     = pGVM;
+        Args.idCpu    = idCpu;
+        rc = RTAvlGCPtrDoWithAll(&pGVM->gmm.s.pSharedModuleTree, true /* fFromLeft */, gmmR0CheckSharedModule, &Args);
+
+        Log(("GMMR0CheckSharedModules done (rc=%Rrc)!\n", rc));
+        GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
+    }
+    else
+        rc = VERR_GMM_IS_NOT_SANE;
+
+# ifndef DEBUG_sandervl
+    gmmR0MutexRelease(pGMM);
+# endif
+    return rc;
+#else
+    RT_NOREF(pGVM, idCpu);
+    return VERR_NOT_IMPLEMENTED;
+#endif
+}
+
+#if defined(VBOX_STRICT) && HC_ARCH_BITS == 64
+
+/**
+ * Worker for GMMR0FindDuplicatePageReq.
+ *
+ * @returns true if duplicate, false if not.
+ */
+static bool gmmR0FindDupPageInChunk(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk, uint8_t const *pbSourcePage)
+{
+    bool fFoundDuplicate = false;
+    /* Only take chunks not mapped into this VM process; not entirely correct. */
+    uint8_t *pbChunk;
+    if (!gmmR0IsChunkMapped(pGMM, pGVM, pChunk, (PRTR3PTR)&pbChunk))
+    {
+        int rc = gmmR0MapChunk(pGMM, pGVM, pChunk, false /*fRelaxedSem*/, (PRTR3PTR)&pbChunk);
+        if (RT_SUCCESS(rc))
+        {
+            /*
+             * Look for duplicate pages
+             */
+            uintptr_t iPage = (GMM_CHUNK_SIZE >> PAGE_SHIFT);
+            while (iPage-- > 0)
+            {
+                if (GMM_PAGE_IS_PRIVATE(&pChunk->aPages[iPage]))
+                {
+                    uint8_t *pbDestPage = pbChunk + (iPage  << PAGE_SHIFT);
+                    if (!memcmp(pbSourcePage, pbDestPage, PAGE_SIZE))
+                    {
+                        fFoundDuplicate = true;
+                        break;
+                    }
+                }
+            }
+            gmmR0UnmapChunk(pGMM, pGVM, pChunk, false /*fRelaxedSem*/);
+        }
+    }
+    return fFoundDuplicate;
+}
+
+
+/**
+ * Find a duplicate of the specified page in other active VMs
+ *
+ * @returns VBox status code.
+ * @param   pGVM        The global (ring-0) VM structure.
+ * @param   pReq        Pointer to the request packet.
+ */
+GMMR0DECL(int) GMMR0FindDuplicatePageReq(PGVM pGVM, PGMMFINDDUPLICATEPAGEREQ pReq)
+{
+    /*
+     * Validate input and pass it on.
+     */
+    AssertPtrReturn(pReq, VERR_INVALID_POINTER);
+    AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
+
+    PGMM pGMM;
+    GMM_GET_VALID_INSTANCE(pGMM, VERR_GMM_INSTANCE);
+
+    int rc = GVMMR0ValidateGVM(pGVM);
+    if (RT_FAILURE(rc))
+        return rc;
+
+    /*
+     * Take the semaphore and do some more validations.
+     */
+    rc = gmmR0MutexAcquire(pGMM);
+    if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
+    {
+        uint8_t  *pbChunk;
+        PGMMCHUNK pChunk = gmmR0GetChunk(pGMM, pReq->idPage >> GMM_CHUNKID_SHIFT);
+        if (pChunk)
+        {
+            if (gmmR0IsChunkMapped(pGMM, pGVM, pChunk, (PRTR3PTR)&pbChunk))
+            {
+                uint8_t *pbSourcePage = pbChunk + ((pReq->idPage & GMM_PAGEID_IDX_MASK) << PAGE_SHIFT);
+                PGMMPAGE pPage = gmmR0GetPage(pGMM, pReq->idPage);
+                if (pPage)
+                {
+                    /*
+                     * Walk the chunks
+                     */
+                    pReq->fDuplicate = false;
+                    RTListForEach(&pGMM->ChunkList, pChunk, GMMCHUNK, ListNode)
+                    {
+                        if (gmmR0FindDupPageInChunk(pGMM, pGVM, pChunk, pbSourcePage))
+                        {
+                            pReq->fDuplicate = true;
+                            break;
+                        }
+                    }
+                }
+                else
+                {
+                    AssertFailed();
+                    rc = VERR_PGM_PHYS_INVALID_PAGE_ID;
+                }
+            }
+            else
+                AssertFailed();
+        }
+        else
+            AssertFailed();
+    }
+    else
+        rc = VERR_GMM_IS_NOT_SANE;
+
+    gmmR0MutexRelease(pGMM);
+    return rc;
+}
+
+#endif /* VBOX_STRICT && HC_ARCH_BITS == 64 */
+
+
+/**
+ * Retrieves the GMM statistics visible to the caller.
+ *
+ * @returns VBox status code.
+ *
+ * @param   pStats      Where to put the statistics.
+ * @param   pSession    The current session.
+ * @param   pGVM        The GVM to obtain statistics for. Optional.
+ */
+GMMR0DECL(int) GMMR0QueryStatistics(PGMMSTATS pStats, PSUPDRVSESSION pSession, PGVM pGVM)
+{
+    LogFlow(("GVMMR0QueryStatistics: pStats=%p pSession=%p pGVM=%p\n", pStats, pSession, pGVM));
+
+    /*
+     * Validate input.
+     */
+    AssertPtrReturn(pSession, VERR_INVALID_POINTER);
+    AssertPtrReturn(pStats, VERR_INVALID_POINTER);
+    pStats->cMaxPages = 0; /* (crash before taking the mutex...) */
+
+    PGMM pGMM;
+    GMM_GET_VALID_INSTANCE(pGMM, VERR_GMM_INSTANCE);
+
+    /*
+     * Validate the VM handle, if not NULL, and lock the GMM.
+     */
+    int rc;
+    if (pGVM)
+    {
+        rc = GVMMR0ValidateGVM(pGVM);
+        if (RT_FAILURE(rc))
+            return rc;
+    }
+
+    rc = gmmR0MutexAcquire(pGMM);
+    if (RT_FAILURE(rc))
+        return rc;
+
+    /*
+     * Copy out the GMM statistics.
+     */
+    pStats->cMaxPages                   = pGMM->cMaxPages;
+    pStats->cReservedPages              = pGMM->cReservedPages;
+    pStats->cOverCommittedPages         = pGMM->cOverCommittedPages;
+    pStats->cAllocatedPages             = pGMM->cAllocatedPages;
+    pStats->cSharedPages                = pGMM->cSharedPages;
+    pStats->cDuplicatePages             = pGMM->cDuplicatePages;
+    pStats->cLeftBehindSharedPages      = pGMM->cLeftBehindSharedPages;
+    pStats->cBalloonedPages             = pGMM->cBalloonedPages;
+    pStats->cChunks                     = pGMM->cChunks;
+    pStats->cFreedChunks                = pGMM->cFreedChunks;
+    pStats->cShareableModules           = pGMM->cShareableModules;
+    pStats->idFreeGeneration            = pGMM->idFreeGeneration;
+    RT_ZERO(pStats->au64Reserved);
+
+    /*
+     * Copy out the VM statistics.
+     */
+    if (pGVM)
+        pStats->VMStats = pGVM->gmm.s.Stats;
+    else
+        RT_ZERO(pStats->VMStats);
+
+    gmmR0MutexRelease(pGMM);
+    return rc;
+}
+
+
+/**
+ * VMMR0 request wrapper for GMMR0QueryStatistics.
+ *
+ * @returns see GMMR0QueryStatistics.
+ * @param   pGVM        The global (ring-0) VM structure. Optional.
+ * @param   pReq        Pointer to the request packet.
+ */
+GMMR0DECL(int) GMMR0QueryStatisticsReq(PGVM pGVM, PGMMQUERYSTATISTICSSREQ pReq)
+{
+    /*
+     * Validate input and pass it on.
+     */
+    AssertPtrReturn(pReq, VERR_INVALID_POINTER);
+    AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
+
+    return GMMR0QueryStatistics(&pReq->Stats, pReq->pSession, pGVM);
+}
+
+
+/**
+ * Resets the specified GMM statistics.
+ *
+ * @returns VBox status code.
+ *
+ * @param   pStats      Which statistics to reset, that is, non-zero fields
+ *                      indicates which to reset.
+ * @param   pSession    The current session.
+ * @param   pGVM        The GVM to reset statistics for. Optional.
+ */
+GMMR0DECL(int) GMMR0ResetStatistics(PCGMMSTATS pStats, PSUPDRVSESSION pSession, PGVM pGVM)
+{
+    NOREF(pStats); NOREF(pSession); NOREF(pGVM);
+    /* Currently nothing we can reset at the moment. */
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * VMMR0 request wrapper for GMMR0ResetStatistics.
+ *
+ * @returns see GMMR0ResetStatistics.
+ * @param   pGVM        The global (ring-0) VM structure. Optional.
+ * @param   pReq        Pointer to the request packet.
+ */
+GMMR0DECL(int) GMMR0ResetStatisticsReq(PGVM pGVM, PGMMRESETSTATISTICSSREQ pReq)
+{
+    /*
+     * Validate input and pass it on.
+     */
+    AssertPtrReturn(pReq, VERR_INVALID_POINTER);
+    AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
+
+    return GMMR0ResetStatistics(&pReq->Stats, pReq->pSession, pGVM);
+}
+
diff --git a/src/VBox/VMM/VMMR0/GMMR0Internal.h b/src/VBox/VMM/VMMR0/GMMR0Internal.h
new file mode 100644
index 00000000..7bc3833d
--- /dev/null
+++ b/src/VBox/VMM/VMMR0/GMMR0Internal.h
@@ -0,0 +1,116 @@
+/* $Id: GMMR0Internal.h $ */
+/** @file
+ * GMM - The Global Memory Manager, Internal Header.
+ */
+
+/*
+ * Copyright (C) 2007-2020 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+#ifndef VMM_INCLUDED_SRC_VMMR0_GMMR0Internal_h
+#define VMM_INCLUDED_SRC_VMMR0_GMMR0Internal_h
+#ifndef RT_WITHOUT_PRAGMA_ONCE
+# pragma once
+#endif
+
+#include <VBox/vmm/gmm.h>
+#include <iprt/avl.h>
+
+
+/**
+ * Shared module registration info (per VM)
+ */
+typedef struct GMMSHAREDMODULEPERVM
+{
+    /** Tree node. */
+    AVLGCPTRNODECORE            Core;
+    /** Pointer to global shared module info. */
+    PGMMSHAREDMODULE            pGlobalModule;
+    /** Pointer to the region addresses.
+     *
+     * They can differe between VMs because of address space scrambling or
+     * simply different loading order. */
+    RTGCPTR64                   aRegionsGCPtrs[1];
+} GMMSHAREDMODULEPERVM;
+/** Pointer to a GMMSHAREDMODULEPERVM. */
+typedef GMMSHAREDMODULEPERVM *PGMMSHAREDMODULEPERVM;
+
+
+/** Pointer to a GMM allocation chunk. */
+typedef struct GMMCHUNK *PGMMCHUNK;
+
+
+/** The GMMCHUNK::cFree shift count employed by gmmR0SelectFreeSetList. */
+#define GMM_CHUNK_FREE_SET_SHIFT    4
+/** Index of the list containing completely unused chunks.
+ * The code ASSUMES this is the last list. */
+#define GMM_CHUNK_FREE_SET_UNUSED_LIST  (GMM_CHUNK_NUM_PAGES >> GMM_CHUNK_FREE_SET_SHIFT)
+
+/**
+ * A set of free chunks.
+ */
+typedef struct GMMCHUNKFREESET
+{
+    /** The number of free pages in the set. */
+    uint64_t            cFreePages;
+    /** The generation ID for the set.  This is incremented whenever
+     *  something is linked or unlinked from this set. */
+    uint64_t            idGeneration;
+    /** Chunks ordered by increasing number of free pages.
+     *  In the final list the chunks are completely unused. */
+    PGMMCHUNK           apLists[GMM_CHUNK_FREE_SET_UNUSED_LIST + 1];
+} GMMCHUNKFREESET;
+
+
+/**
+ * A per-VM allocation chunk lookup TLB entry (for GMMR0PageIdToVirt).
+ */
+typedef struct GMMPERVMCHUNKTLBE
+{
+    /** The GMM::idFreeGeneration value this is valid for. */
+    uint64_t            idGeneration;
+    /** The chunk. */
+    PGMMCHUNK           pChunk;
+} GMMPERVMCHUNKTLBE;
+/** Poitner to a per-VM allocation chunk TLB entry. */
+typedef GMMPERVMCHUNKTLBE *PGMMPERVMCHUNKTLBE;
+
+/** The number of entries in the allocation chunk lookup TLB. */
+#define GMMPERVM_CHUNKTLB_ENTRIES           32
+/** Gets the TLB entry index for the given Chunk ID. */
+#define GMMPERVM_CHUNKTLB_IDX(a_idChunk)    ( (a_idChunk) & (GMMPERVM_CHUNKTLB_ENTRIES - 1) )
+
+
+/**
+ * The per-VM GMM data.
+ */
+typedef struct GMMPERVM
+{
+    /** Free set for use in bound mode. */
+    GMMCHUNKFREESET     Private;
+    /** The VM statistics. */
+    GMMVMSTATS          Stats;
+    /** Shared module tree (per-vm). */
+    PAVLGCPTRNODECORE   pSharedModuleTree;
+    /** Hints at the last chunk we allocated some memory from. */
+    uint32_t            idLastChunkHint;
+    uint32_t            u32Padding;
+
+    /** Spinlock protecting the chunk lookup TLB. */
+    RTSPINLOCK          hChunkTlbSpinLock;
+    /** The chunk lookup TLB used by GMMR0PageIdToVirt. */
+    GMMPERVMCHUNKTLBE   aChunkTlbEntries[GMMPERVM_CHUNKTLB_ENTRIES];
+} GMMPERVM;
+/** Pointer to the per-VM GMM data. */
+typedef GMMPERVM *PGMMPERVM;
+
+#endif /* !VMM_INCLUDED_SRC_VMMR0_GMMR0Internal_h */
+
diff --git a/src/VBox/VMM/VMMR0/GVMMR0.cpp b/src/VBox/VMM/VMMR0/GVMMR0.cpp
new file mode 100644
index 00000000..2de316d5
--- /dev/null
+++ b/src/VBox/VMM/VMMR0/GVMMR0.cpp
@@ -0,0 +1,3029 @@
+/* $Id: GVMMR0.cpp $ */
+/** @file
+ * GVMM - Global VM Manager.
+ */
+
+/*
+ * Copyright (C) 2007-2020 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+
+/** @page pg_gvmm   GVMM - The Global VM Manager
+ *
+ * The Global VM Manager lives in ring-0.  Its main function at the moment is
+ * to manage a list of all running VMs, keep a ring-0 only structure (GVM) for
+ * each of them, and assign them unique identifiers (so GMM can track page
+ * owners).  The GVMM also manage some of the host CPU resources, like the
+ * periodic preemption timer.
+ *
+ * The GVMM will create a ring-0 object for each VM when it is registered, this
+ * is both for session cleanup purposes and for having a point where it is
+ * possible to implement usage polices later (in SUPR0ObjRegister).
+ *
+ *
+ * @section  sec_gvmm_ppt       Periodic Preemption Timer (PPT)
+ *
+ * On system that sports a high resolution kernel timer API, we use per-cpu
+ * timers to generate interrupts that preempts VT-x, AMD-V and raw-mode guest
+ * execution.  The timer frequency is calculating by taking the max
+ * TMCalcHostTimerFrequency for all VMs running on a CPU for the last ~160 ms
+ * (RT_ELEMENTS((PGVMMHOSTCPU)0, Ppt.aHzHistory) *
+ * GVMMHOSTCPU_PPT_HIST_INTERVAL_NS).
+ *
+ * The TMCalcHostTimerFrequency() part of the things gets its takes the max
+ * TMTimerSetFrequencyHint() value and adjusts by the current catch-up percent,
+ * warp drive percent and some fudge factors.  VMMR0.cpp reports the result via
+ * GVMMR0SchedUpdatePeriodicPreemptionTimer() before switching to the VT-x,
+ * AMD-V and raw-mode execution environments.
+ */
+
+
+/*********************************************************************************************************************************
+*   Header Files                                                                                                                 *
+*********************************************************************************************************************************/
+#define LOG_GROUP LOG_GROUP_GVMM
+#include <VBox/vmm/gvmm.h>
+#include <VBox/vmm/gmm.h>
+#include "GVMMR0Internal.h"
+#include <VBox/vmm/iom.h>
+#include <VBox/vmm/pdm.h>
+#include <VBox/vmm/pgm.h>
+#include <VBox/vmm/vmm.h>
+#ifdef VBOX_WITH_NEM_R0
+# include <VBox/vmm/nem.h>
+#endif
+#include <VBox/vmm/vmcpuset.h>
+#include <VBox/vmm/vmcc.h>
+#include <VBox/param.h>
+#include <VBox/err.h>
+
+#include <iprt/asm.h>
+#include <iprt/asm-amd64-x86.h>
+#include <iprt/critsect.h>
+#include <iprt/mem.h>
+#include <iprt/semaphore.h>
+#include <iprt/time.h>
+#include <VBox/log.h>
+#include <iprt/thread.h>
+#include <iprt/process.h>
+#include <iprt/param.h>
+#include <iprt/string.h>
+#include <iprt/assert.h>
+#include <iprt/mem.h>
+#include <iprt/memobj.h>
+#include <iprt/mp.h>
+#include <iprt/cpuset.h>
+#include <iprt/spinlock.h>
+#include <iprt/timer.h>
+
+#include "dtrace/VBoxVMM.h"
+
+
+/*********************************************************************************************************************************
+*   Defined Constants And Macros                                                                                                 *
+*********************************************************************************************************************************/
+#if defined(RT_OS_LINUX) || defined(RT_OS_SOLARIS) || defined(DOXYGEN_RUNNING)
+/** Define this to enable the periodic preemption timer. */
+# define GVMM_SCHED_WITH_PPT
+#endif
+
+
+/** @def GVMM_CHECK_SMAP_SETUP
+ * SMAP check setup. */
+/** @def GVMM_CHECK_SMAP_CHECK
+ * Checks that the AC flag is set if SMAP is enabled. If AC is not set,
+ * it will be logged and @a a_BadExpr is executed. */
+/** @def GVMM_CHECK_SMAP_CHECK2
+ * Checks that the AC flag is set if SMAP is enabled.  If AC is not set, it will
+ * be logged, written to the VMs assertion text buffer, and @a a_BadExpr is
+ * executed. */
+#if (defined(VBOX_STRICT) || 1) && !defined(VBOX_WITH_RAM_IN_KERNEL)
+# define GVMM_CHECK_SMAP_SETUP() uint32_t const fKernelFeatures = SUPR0GetKernelFeatures()
+# define GVMM_CHECK_SMAP_CHECK(a_BadExpr) \
+    do { \
+        if (fKernelFeatures & SUPKERNELFEATURES_SMAP) \
+        { \
+            RTCCUINTREG fEflCheck = ASMGetFlags(); \
+            if (RT_LIKELY(fEflCheck & X86_EFL_AC)) \
+            { /* likely */ } \
+            else \
+            { \
+                SUPR0Printf("%s, line %d: EFLAGS.AC is clear! (%#x)\n", __FUNCTION__, __LINE__, (uint32_t)fEflCheck); \
+                a_BadExpr; \
+            } \
+        } \
+    } while (0)
+# define GVMM_CHECK_SMAP_CHECK2(a_pGVM, a_BadExpr) \
+    do { \
+        if (fKernelFeatures & SUPKERNELFEATURES_SMAP) \
+        { \
+            RTCCUINTREG fEflCheck = ASMGetFlags(); \
+            if (RT_LIKELY(fEflCheck & X86_EFL_AC)) \
+            { /* likely */ } \
+            else \
+            { \
+                SUPR0BadContext((a_pGVM) ? (a_pGVM)->pSession : NULL, __FILE__, __LINE__, "EFLAGS.AC is zero!"); \
+                a_BadExpr; \
+            } \
+        } \
+    } while (0)
+#else
+# define GVMM_CHECK_SMAP_SETUP()           uint32_t const fKernelFeatures = 0
+# define GVMM_CHECK_SMAP_CHECK(a_BadExpr)           NOREF(fKernelFeatures)
+# define GVMM_CHECK_SMAP_CHECK2(a_pGVM, a_BadExpr)   NOREF(fKernelFeatures)
+#endif
+
+
+
+/*********************************************************************************************************************************
+*   Structures and Typedefs                                                                                                      *
+*********************************************************************************************************************************/
+
+/**
+ * Global VM handle.
+ */
+typedef struct GVMHANDLE
+{
+    /** The index of the next handle in the list (free or used). (0 is nil.) */
+    uint16_t volatile   iNext;
+    /** Our own index / handle value. */
+    uint16_t            iSelf;
+    /** The process ID of the handle owner.
+     * This is used for access checks. */
+    RTPROCESS           ProcId;
+    /** The pointer to the ring-0 only (aka global) VM structure. */
+    PGVM                pGVM;
+    /** The virtual machine object. */
+    void               *pvObj;
+    /** The session this VM is associated with. */
+    PSUPDRVSESSION      pSession;
+    /** The ring-0 handle of the EMT0 thread.
+     * This is used for ownership checks as well as looking up a VM handle by thread
+     * at times like assertions. */
+    RTNATIVETHREAD      hEMT0;
+} GVMHANDLE;
+/** Pointer to a global VM handle. */
+typedef GVMHANDLE *PGVMHANDLE;
+
+/** Number of GVM handles (including the NIL handle). */
+#if HC_ARCH_BITS == 64
+# define GVMM_MAX_HANDLES   8192
+#else
+# define GVMM_MAX_HANDLES   128
+#endif
+
+/**
+ * Per host CPU GVMM data.
+ */
+typedef struct GVMMHOSTCPU
+{
+    /** Magic number (GVMMHOSTCPU_MAGIC). */
+    uint32_t volatile   u32Magic;
+    /** The CPU ID. */
+    RTCPUID             idCpu;
+    /** The CPU set index. */
+    uint32_t            idxCpuSet;
+
+#ifdef GVMM_SCHED_WITH_PPT
+    /** Periodic preemption timer data. */
+    struct
+    {
+        /** The handle to the periodic preemption timer. */
+        PRTTIMER            pTimer;
+        /** Spinlock protecting the data below. */
+        RTSPINLOCK          hSpinlock;
+        /** The smalles Hz that we need to care about. (static) */
+        uint32_t            uMinHz;
+        /** The number of ticks between each historization. */
+        uint32_t            cTicksHistoriziationInterval;
+        /** The current historization tick (counting up to
+         * cTicksHistoriziationInterval and then resetting). */
+        uint32_t            iTickHistorization;
+        /** The current timer interval.  This is set to 0 when inactive. */
+        uint32_t            cNsInterval;
+        /** The current timer frequency.  This is set to 0 when inactive. */
+        uint32_t            uTimerHz;
+        /** The current max frequency reported by the EMTs.
+         * This gets historicize and reset by the timer callback.  This is
+         * read without holding the spinlock, so needs atomic updating. */
+        uint32_t volatile   uDesiredHz;
+        /** Whether the timer was started or not. */
+        bool volatile       fStarted;
+        /** Set if we're starting timer. */
+        bool volatile       fStarting;
+        /** The index of the next history entry (mod it). */
+        uint32_t            iHzHistory;
+        /** Historicized uDesiredHz values.  The array wraps around, new entries
+         * are added at iHzHistory. This is updated approximately every
+         * GVMMHOSTCPU_PPT_HIST_INTERVAL_NS by the timer callback. */
+        uint32_t            aHzHistory[8];
+        /** Statistics counter for recording the number of interval changes. */
+        uint32_t            cChanges;
+        /** Statistics counter for recording the number of timer starts. */
+        uint32_t            cStarts;
+    } Ppt;
+#endif /* GVMM_SCHED_WITH_PPT */
+
+} GVMMHOSTCPU;
+/** Pointer to the per host CPU GVMM data. */
+typedef GVMMHOSTCPU *PGVMMHOSTCPU;
+/** The GVMMHOSTCPU::u32Magic value (Petra, Tanya & Rachel Haden). */
+#define GVMMHOSTCPU_MAGIC   UINT32_C(0x19711011)
+/** The interval on history entry should cover (approximately) give in
+ *  nanoseconds. */
+#define GVMMHOSTCPU_PPT_HIST_INTERVAL_NS    UINT32_C(20000000)
+
+
+/**
+ * The GVMM instance data.
+ */
+typedef struct GVMM
+{
+    /** Eyecatcher / magic. */
+    uint32_t            u32Magic;
+    /** The index of the head of the free handle chain. (0 is nil.) */
+    uint16_t volatile   iFreeHead;
+    /** The index of the head of the active handle chain. (0 is nil.) */
+    uint16_t volatile   iUsedHead;
+    /** The number of VMs. */
+    uint16_t volatile   cVMs;
+    /** Alignment padding. */
+    uint16_t            u16Reserved;
+    /** The number of EMTs. */
+    uint32_t volatile   cEMTs;
+    /** The number of EMTs that have halted in GVMMR0SchedHalt. */
+    uint32_t volatile   cHaltedEMTs;
+    /** Mini lock for restricting early wake-ups to one thread. */
+    bool volatile       fDoingEarlyWakeUps;
+    bool                afPadding[3]; /**< explicit alignment padding. */
+    /** When the next halted or sleeping EMT will wake up.
+     * This is set to 0 when it needs recalculating and to UINT64_MAX when
+     * there are no halted or sleeping EMTs in the GVMM. */
+    uint64_t            uNsNextEmtWakeup;
+    /** The lock used to serialize VM creation, destruction and associated events that
+     * isn't performance critical. Owners may acquire the list lock. */
+    RTCRITSECT          CreateDestroyLock;
+    /** The lock used to serialize used list updates and accesses.
+     * This indirectly includes scheduling since the scheduler will have to walk the
+     * used list to examin running VMs. Owners may not acquire any other locks. */
+    RTCRITSECTRW        UsedLock;
+    /** The handle array.
+     * The size of this array defines the maximum number of currently running VMs.
+     * The first entry is unused as it represents the NIL handle. */
+    GVMHANDLE           aHandles[GVMM_MAX_HANDLES];
+
+    /** @gcfgm{/GVMM/cEMTsMeansCompany, 32-bit, 0, UINT32_MAX, 1}
+     * The number of EMTs that means we no longer consider ourselves alone on a
+     * CPU/Core.
+     */
+    uint32_t            cEMTsMeansCompany;
+    /** @gcfgm{/GVMM/MinSleepAlone,32-bit, 0, 100000000, 750000, ns}
+     * The minimum sleep time for when we're alone, in nano seconds.
+     */
+    uint32_t            nsMinSleepAlone;
+    /** @gcfgm{/GVMM/MinSleepCompany,32-bit,0, 100000000, 15000, ns}
+     * The minimum sleep time for when we've got company, in nano seconds.
+     */
+    uint32_t            nsMinSleepCompany;
+    /** @gcfgm{/GVMM/EarlyWakeUp1, 32-bit, 0, 100000000, 25000, ns}
+     * The limit for the first round of early wake-ups, given in nano seconds.
+     */
+    uint32_t            nsEarlyWakeUp1;
+    /** @gcfgm{/GVMM/EarlyWakeUp2, 32-bit, 0, 100000000, 50000, ns}
+     * The limit for the second round of early wake-ups, given in nano seconds.
+     */
+    uint32_t            nsEarlyWakeUp2;
+
+    /** Set if we're doing early wake-ups.
+     * This reflects  nsEarlyWakeUp1 and nsEarlyWakeUp2.  */
+    bool volatile       fDoEarlyWakeUps;
+
+    /** The number of entries in the host CPU array (aHostCpus). */
+    uint32_t            cHostCpus;
+    /** Per host CPU data (variable length). */
+    GVMMHOSTCPU         aHostCpus[1];
+} GVMM;
+AssertCompileMemberAlignment(GVMM, CreateDestroyLock, 8);
+AssertCompileMemberAlignment(GVMM, UsedLock, 8);
+AssertCompileMemberAlignment(GVMM, uNsNextEmtWakeup, 8);
+/** Pointer to the GVMM instance data. */
+typedef GVMM *PGVMM;
+
+/** The GVMM::u32Magic value (Charlie Haden). */
+#define GVMM_MAGIC      UINT32_C(0x19370806)
+
+
+
+/*********************************************************************************************************************************
+*   Global Variables                                                                                                             *
+*********************************************************************************************************************************/
+/** Pointer to the GVMM instance data.
+ * (Just my general dislike for global variables.) */
+static PGVMM g_pGVMM = NULL;
+
+/** Macro for obtaining and validating the g_pGVMM pointer.
+ * On failure it will return from the invoking function with the specified return value.
+ *
+ * @param   pGVMM   The name of the pGVMM variable.
+ * @param   rc      The return value on failure. Use VERR_GVMM_INSTANCE for VBox
+ *                  status codes.
+ */
+#define GVMM_GET_VALID_INSTANCE(pGVMM, rc) \
+    do { \
+        (pGVMM) = g_pGVMM;\
+        AssertPtrReturn((pGVMM), (rc)); \
+        AssertMsgReturn((pGVMM)->u32Magic == GVMM_MAGIC, ("%p - %#x\n", (pGVMM), (pGVMM)->u32Magic), (rc)); \
+    } while (0)
+
+/** Macro for obtaining and validating the g_pGVMM pointer, void function variant.
+ * On failure it will return from the invoking function.
+ *
+ * @param   pGVMM   The name of the pGVMM variable.
+ */
+#define GVMM_GET_VALID_INSTANCE_VOID(pGVMM) \
+    do { \
+        (pGVMM) = g_pGVMM;\
+        AssertPtrReturnVoid((pGVMM)); \
+        AssertMsgReturnVoid((pGVMM)->u32Magic == GVMM_MAGIC, ("%p - %#x\n", (pGVMM), (pGVMM)->u32Magic)); \
+    } while (0)
+
+
+/*********************************************************************************************************************************
+*   Internal Functions                                                                                                           *
+*********************************************************************************************************************************/
+static void gvmmR0InitPerVMData(PGVM pGVM, int16_t hSelf, VMCPUID cCpus, PSUPDRVSESSION pSession);
+static DECLCALLBACK(void) gvmmR0HandleObjDestructor(void *pvObj, void *pvGVMM, void *pvHandle);
+static int gvmmR0ByGVM(PGVM pGVM, PGVMM *ppGVMM, bool fTakeUsedLock);
+static int gvmmR0ByGVMandEMT(PGVM pGVM, VMCPUID idCpu, PGVMM *ppGVMM);
+
+#ifdef GVMM_SCHED_WITH_PPT
+static DECLCALLBACK(void) gvmmR0SchedPeriodicPreemptionTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
+#endif
+
+
+/**
+ * Initializes the GVMM.
+ *
+ * This is called while owning the loader semaphore (see supdrvIOCtl_LdrLoad()).
+ *
+ * @returns VBox status code.
+ */
+GVMMR0DECL(int) GVMMR0Init(void)
+{
+    LogFlow(("GVMMR0Init:\n"));
+
+    /*
+     * Allocate and initialize the instance data.
+     */
+    uint32_t cHostCpus = RTMpGetArraySize();
+    AssertMsgReturn(cHostCpus > 0 && cHostCpus < _64K, ("%d", (int)cHostCpus), VERR_GVMM_HOST_CPU_RANGE);
+
+    PGVMM pGVMM = (PGVMM)RTMemAllocZ(RT_UOFFSETOF_DYN(GVMM, aHostCpus[cHostCpus]));
+    if (!pGVMM)
+        return VERR_NO_MEMORY;
+    int rc = RTCritSectInitEx(&pGVMM->CreateDestroyLock, 0, NIL_RTLOCKVALCLASS, RTLOCKVAL_SUB_CLASS_NONE,
+                              "GVMM-CreateDestroyLock");
+    if (RT_SUCCESS(rc))
+    {
+        rc = RTCritSectRwInitEx(&pGVMM->UsedLock, 0, NIL_RTLOCKVALCLASS, RTLOCKVAL_SUB_CLASS_NONE, "GVMM-UsedLock");
+        if (RT_SUCCESS(rc))
+        {
+            pGVMM->u32Magic = GVMM_MAGIC;
+            pGVMM->iUsedHead = 0;
+            pGVMM->iFreeHead = 1;
+
+            /* the nil handle */
+            pGVMM->aHandles[0].iSelf = 0;
+            pGVMM->aHandles[0].iNext = 0;
+
+            /* the tail */
+            unsigned i = RT_ELEMENTS(pGVMM->aHandles) - 1;
+            pGVMM->aHandles[i].iSelf = i;
+            pGVMM->aHandles[i].iNext = 0; /* nil */
+
+            /* the rest */
+            while (i-- > 1)
+            {
+                pGVMM->aHandles[i].iSelf = i;
+                pGVMM->aHandles[i].iNext = i + 1;
+            }
+
+            /* The default configuration values. */
+            uint32_t cNsResolution = RTSemEventMultiGetResolution();
+            pGVMM->cEMTsMeansCompany     = 1;                           /** @todo should be adjusted to relative to the cpu count or something... */
+            if (cNsResolution >= 5*RT_NS_100US)
+            {
+                pGVMM->nsMinSleepAlone   = 750000 /* ns (0.750 ms) */;  /** @todo this should be adjusted to be 75% (or something) of the scheduler granularity... */
+                pGVMM->nsMinSleepCompany =  15000 /* ns (0.015 ms) */;
+                pGVMM->nsEarlyWakeUp1    =  25000 /* ns (0.025 ms) */;
+                pGVMM->nsEarlyWakeUp2    =  50000 /* ns (0.050 ms) */;
+            }
+            else if (cNsResolution > RT_NS_100US)
+            {
+                pGVMM->nsMinSleepAlone   = cNsResolution / 2;
+                pGVMM->nsMinSleepCompany = cNsResolution / 4;
+                pGVMM->nsEarlyWakeUp1    = 0;
+                pGVMM->nsEarlyWakeUp2    = 0;
+            }
+            else
+            {
+                pGVMM->nsMinSleepAlone   = 2000;
+                pGVMM->nsMinSleepCompany = 2000;
+                pGVMM->nsEarlyWakeUp1    = 0;
+                pGVMM->nsEarlyWakeUp2    = 0;
+            }
+            pGVMM->fDoEarlyWakeUps = pGVMM->nsEarlyWakeUp1 > 0 && pGVMM->nsEarlyWakeUp2 > 0;
+
+            /* The host CPU data. */
+            pGVMM->cHostCpus = cHostCpus;
+            uint32_t    iCpu = cHostCpus;
+            RTCPUSET    PossibleSet;
+            RTMpGetSet(&PossibleSet);
+            while (iCpu-- > 0)
+            {
+                pGVMM->aHostCpus[iCpu].idxCpuSet        = iCpu;
+#ifdef GVMM_SCHED_WITH_PPT
+                pGVMM->aHostCpus[iCpu].Ppt.pTimer       = NULL;
+                pGVMM->aHostCpus[iCpu].Ppt.hSpinlock    = NIL_RTSPINLOCK;
+                pGVMM->aHostCpus[iCpu].Ppt.uMinHz       = 5; /** @todo Add some API which figures this one out. (not *that* important) */
+                pGVMM->aHostCpus[iCpu].Ppt.cTicksHistoriziationInterval = 1;
+                //pGVMM->aHostCpus[iCpu].Ppt.iTickHistorization           = 0;
+                //pGVMM->aHostCpus[iCpu].Ppt.cNsInterval  = 0;
+                //pGVMM->aHostCpus[iCpu].Ppt.uTimerHz     = 0;
+                //pGVMM->aHostCpus[iCpu].Ppt.uDesiredHz   = 0;
+                //pGVMM->aHostCpus[iCpu].Ppt.fStarted     = false;
+                //pGVMM->aHostCpus[iCpu].Ppt.fStarting    = false;
+                //pGVMM->aHostCpus[iCpu].Ppt.iHzHistory   = 0;
+                //pGVMM->aHostCpus[iCpu].Ppt.aHzHistory   = {0};
+#endif
+
+                if (RTCpuSetIsMember(&PossibleSet, iCpu))
+                {
+                    pGVMM->aHostCpus[iCpu].idCpu        = RTMpCpuIdFromSetIndex(iCpu);
+                    pGVMM->aHostCpus[iCpu].u32Magic     = GVMMHOSTCPU_MAGIC;
+
+#ifdef GVMM_SCHED_WITH_PPT
+                    rc = RTTimerCreateEx(&pGVMM->aHostCpus[iCpu].Ppt.pTimer,
+                                         50*1000*1000 /* whatever */,
+                                         RTTIMER_FLAGS_CPU(iCpu) | RTTIMER_FLAGS_HIGH_RES,
+                                         gvmmR0SchedPeriodicPreemptionTimerCallback,
+                                         &pGVMM->aHostCpus[iCpu]);
+                    if (RT_SUCCESS(rc))
+                        rc = RTSpinlockCreate(&pGVMM->aHostCpus[iCpu].Ppt.hSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "GVMM/CPU");
+                    if (RT_FAILURE(rc))
+                    {
+                        while (iCpu < cHostCpus)
+                        {
+                            RTTimerDestroy(pGVMM->aHostCpus[iCpu].Ppt.pTimer);
+                            RTSpinlockDestroy(pGVMM->aHostCpus[iCpu].Ppt.hSpinlock);
+                            pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
+                            iCpu++;
+                        }
+                        break;
+                    }
+#endif
+                }
+                else
+                {
+                    pGVMM->aHostCpus[iCpu].idCpu        = NIL_RTCPUID;
+                    pGVMM->aHostCpus[iCpu].u32Magic     = 0;
+                }
+            }
+            if (RT_SUCCESS(rc))
+            {
+                g_pGVMM = pGVMM;
+                LogFlow(("GVMMR0Init: pGVMM=%p cHostCpus=%u\n", pGVMM, cHostCpus));
+                return VINF_SUCCESS;
+            }
+
+            /* bail out. */
+            RTCritSectRwDelete(&pGVMM->UsedLock);
+        }
+        RTCritSectDelete(&pGVMM->CreateDestroyLock);
+    }
+
+    RTMemFree(pGVMM);
+    return rc;
+}
+
+
+/**
+ * Terminates the GVM.
+ *
+ * This is called while owning the loader semaphore (see supdrvLdrFree()).
+ * And unless something is wrong, there should be absolutely no VMs
+ * registered at this point.
+ */
+GVMMR0DECL(void) GVMMR0Term(void)
+{
+    LogFlow(("GVMMR0Term:\n"));
+
+    PGVMM pGVMM = g_pGVMM;
+    g_pGVMM = NULL;
+    if (RT_UNLIKELY(!RT_VALID_PTR(pGVMM)))
+    {
+        SUPR0Printf("GVMMR0Term: pGVMM=%RKv\n", pGVMM);
+        return;
+    }
+
+    /*
+     * First of all, stop all active timers.
+     */
+    uint32_t cActiveTimers = 0;
+    uint32_t iCpu = pGVMM->cHostCpus;
+    while (iCpu-- > 0)
+    {
+        ASMAtomicWriteU32(&pGVMM->aHostCpus[iCpu].u32Magic, ~GVMMHOSTCPU_MAGIC);
+#ifdef GVMM_SCHED_WITH_PPT
+        if (    pGVMM->aHostCpus[iCpu].Ppt.pTimer != NULL
+            &&  RT_SUCCESS(RTTimerStop(pGVMM->aHostCpus[iCpu].Ppt.pTimer)))
+            cActiveTimers++;
+#endif
+    }
+    if (cActiveTimers)
+        RTThreadSleep(1); /* fudge */
+
+    /*
+     * Invalidate the and free resources.
+     */
+    pGVMM->u32Magic = ~GVMM_MAGIC;
+    RTCritSectRwDelete(&pGVMM->UsedLock);
+    RTCritSectDelete(&pGVMM->CreateDestroyLock);
+
+    pGVMM->iFreeHead = 0;
+    if (pGVMM->iUsedHead)
+    {
+        SUPR0Printf("GVMMR0Term: iUsedHead=%#x! (cVMs=%#x cEMTs=%#x)\n", pGVMM->iUsedHead, pGVMM->cVMs, pGVMM->cEMTs);
+        pGVMM->iUsedHead = 0;
+    }
+
+#ifdef GVMM_SCHED_WITH_PPT
+    iCpu = pGVMM->cHostCpus;
+    while (iCpu-- > 0)
+    {
+        RTTimerDestroy(pGVMM->aHostCpus[iCpu].Ppt.pTimer);
+        pGVMM->aHostCpus[iCpu].Ppt.pTimer = NULL;
+        RTSpinlockDestroy(pGVMM->aHostCpus[iCpu].Ppt.hSpinlock);
+        pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
+    }
+#endif
+
+    RTMemFree(pGVMM);
+}
+
+
+/**
+ * A quick hack for setting global config values.
+ *
+ * @returns VBox status code.
+ *
+ * @param   pSession    The session handle. Used for authentication.
+ * @param   pszName     The variable name.
+ * @param   u64Value    The new value.
+ */
+GVMMR0DECL(int) GVMMR0SetConfig(PSUPDRVSESSION pSession, const char *pszName, uint64_t u64Value)
+{
+    /*
+     * Validate input.
+     */
+    PGVMM pGVMM;
+    GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
+    AssertPtrReturn(pSession, VERR_INVALID_HANDLE);
+    AssertPtrReturn(pszName, VERR_INVALID_POINTER);
+
+    /*
+     * String switch time!
+     */
+    if (strncmp(pszName, RT_STR_TUPLE("/GVMM/")))
+        return VERR_CFGM_VALUE_NOT_FOUND; /* borrow status codes from CFGM... */
+    int rc = VINF_SUCCESS;
+    pszName += sizeof("/GVMM/") - 1;
+    if (!strcmp(pszName, "cEMTsMeansCompany"))
+    {
+        if (u64Value <= UINT32_MAX)
+            pGVMM->cEMTsMeansCompany = u64Value;
+        else
+            rc = VERR_OUT_OF_RANGE;
+    }
+    else if (!strcmp(pszName, "MinSleepAlone"))
+    {
+        if (u64Value <= RT_NS_100MS)
+            pGVMM->nsMinSleepAlone = u64Value;
+        else
+            rc = VERR_OUT_OF_RANGE;
+    }
+    else if (!strcmp(pszName, "MinSleepCompany"))
+    {
+        if (u64Value <= RT_NS_100MS)
+            pGVMM->nsMinSleepCompany = u64Value;
+        else
+            rc = VERR_OUT_OF_RANGE;
+    }
+    else if (!strcmp(pszName, "EarlyWakeUp1"))
+    {
+        if (u64Value <= RT_NS_100MS)
+        {
+            pGVMM->nsEarlyWakeUp1 = u64Value;
+            pGVMM->fDoEarlyWakeUps = pGVMM->nsEarlyWakeUp1 > 0 && pGVMM->nsEarlyWakeUp2 > 0;
+        }
+        else
+            rc = VERR_OUT_OF_RANGE;
+    }
+    else if (!strcmp(pszName, "EarlyWakeUp2"))
+    {
+        if (u64Value <= RT_NS_100MS)
+        {
+            pGVMM->nsEarlyWakeUp2 = u64Value;
+            pGVMM->fDoEarlyWakeUps = pGVMM->nsEarlyWakeUp1 > 0 && pGVMM->nsEarlyWakeUp2 > 0;
+        }
+        else
+            rc = VERR_OUT_OF_RANGE;
+    }
+    else
+        rc = VERR_CFGM_VALUE_NOT_FOUND;
+    return rc;
+}
+
+
+/**
+ * A quick hack for getting global config values.
+ *
+ * @returns VBox status code.
+ *
+ * @param   pSession    The session handle. Used for authentication.
+ * @param   pszName     The variable name.
+ * @param   pu64Value   Where to return the value.
+ */
+GVMMR0DECL(int) GVMMR0QueryConfig(PSUPDRVSESSION pSession, const char *pszName, uint64_t *pu64Value)
+{
+    /*
+     * Validate input.
+     */
+    PGVMM pGVMM;
+    GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
+    AssertPtrReturn(pSession, VERR_INVALID_HANDLE);
+    AssertPtrReturn(pszName, VERR_INVALID_POINTER);
+    AssertPtrReturn(pu64Value, VERR_INVALID_POINTER);
+
+    /*
+     * String switch time!
+     */
+    if (strncmp(pszName, RT_STR_TUPLE("/GVMM/")))
+        return VERR_CFGM_VALUE_NOT_FOUND; /* borrow status codes from CFGM... */
+    int rc = VINF_SUCCESS;
+    pszName += sizeof("/GVMM/") - 1;
+    if (!strcmp(pszName, "cEMTsMeansCompany"))
+        *pu64Value = pGVMM->cEMTsMeansCompany;
+    else if (!strcmp(pszName, "MinSleepAlone"))
+        *pu64Value = pGVMM->nsMinSleepAlone;
+    else if (!strcmp(pszName, "MinSleepCompany"))
+        *pu64Value = pGVMM->nsMinSleepCompany;
+    else if (!strcmp(pszName, "EarlyWakeUp1"))
+        *pu64Value = pGVMM->nsEarlyWakeUp1;
+    else if (!strcmp(pszName, "EarlyWakeUp2"))
+        *pu64Value = pGVMM->nsEarlyWakeUp2;
+    else
+        rc = VERR_CFGM_VALUE_NOT_FOUND;
+    return rc;
+}
+
+
+/**
+ * Acquire the 'used' lock in shared mode.
+ *
+ * This prevents destruction of the VM while we're in ring-0.
+ *
+ * @returns IPRT status code, see RTSemFastMutexRequest.
+ * @param   a_pGVMM     The GVMM instance data.
+ * @sa      GVMMR0_USED_SHARED_UNLOCK, GVMMR0_USED_EXCLUSIVE_LOCK
+ */
+#define GVMMR0_USED_SHARED_LOCK(a_pGVMM)        RTCritSectRwEnterShared(&(a_pGVMM)->UsedLock)
+
+/**
+ * Release the 'used' lock in when owning it in shared mode.
+ *
+ * @returns IPRT status code, see RTSemFastMutexRequest.
+ * @param   a_pGVMM     The GVMM instance data.
+ * @sa      GVMMR0_USED_SHARED_LOCK
+ */
+#define GVMMR0_USED_SHARED_UNLOCK(a_pGVMM)      RTCritSectRwLeaveShared(&(a_pGVMM)->UsedLock)
+
+/**
+ * Acquire the 'used' lock in exclusive mode.
+ *
+ * Only use this function when making changes to the used list.
+ *
+ * @returns IPRT status code, see RTSemFastMutexRequest.
+ * @param   a_pGVMM     The GVMM instance data.
+ * @sa      GVMMR0_USED_EXCLUSIVE_UNLOCK
+ */
+#define GVMMR0_USED_EXCLUSIVE_LOCK(a_pGVMM)     RTCritSectRwEnterExcl(&(a_pGVMM)->UsedLock)
+
+/**
+ * Release the 'used' lock when owning it in exclusive mode.
+ *
+ * @returns IPRT status code, see RTSemFastMutexRelease.
+ * @param   a_pGVMM     The GVMM instance data.
+ * @sa      GVMMR0_USED_EXCLUSIVE_LOCK, GVMMR0_USED_SHARED_UNLOCK
+ */
+#define GVMMR0_USED_EXCLUSIVE_UNLOCK(a_pGVMM)   RTCritSectRwLeaveExcl(&(a_pGVMM)->UsedLock)
+
+
+/**
+ * Try acquire the 'create & destroy' lock.
+ *
+ * @returns IPRT status code, see RTSemFastMutexRequest.
+ * @param   pGVMM   The GVMM instance data.
+ */
+DECLINLINE(int) gvmmR0CreateDestroyLock(PGVMM pGVMM)
+{
+    LogFlow(("++gvmmR0CreateDestroyLock(%p)\n", pGVMM));
+    int rc = RTCritSectEnter(&pGVMM->CreateDestroyLock);
+    LogFlow(("gvmmR0CreateDestroyLock(%p)->%Rrc\n", pGVMM, rc));
+    return rc;
+}
+
+
+/**
+ * Release the 'create & destroy' lock.
+ *
+ * @returns IPRT status code, see RTSemFastMutexRequest.
+ * @param   pGVMM   The GVMM instance data.
+ */
+DECLINLINE(int) gvmmR0CreateDestroyUnlock(PGVMM pGVMM)
+{
+    LogFlow(("--gvmmR0CreateDestroyUnlock(%p)\n", pGVMM));
+    int rc = RTCritSectLeave(&pGVMM->CreateDestroyLock);
+    AssertRC(rc);
+    return rc;
+}
+
+
+/**
+ * Request wrapper for the GVMMR0CreateVM API.
+ *
+ * @returns VBox status code.
+ * @param   pReq        The request buffer.
+ * @param   pSession    The session handle. The VM will be associated with this.
+ */
+GVMMR0DECL(int) GVMMR0CreateVMReq(PGVMMCREATEVMREQ pReq, PSUPDRVSESSION pSession)
+{
+    /*
+     * Validate the request.
+     */
+    if (!RT_VALID_PTR(pReq))
+        return VERR_INVALID_POINTER;
+    if (pReq->Hdr.cbReq != sizeof(*pReq))
+        return VERR_INVALID_PARAMETER;
+    if (pReq->pSession != pSession)
+        return VERR_INVALID_POINTER;
+
+    /*
+     * Execute it.
+     */
+    PGVM pGVM;
+    pReq->pVMR0 = NULL;
+    pReq->pVMR3 = NIL_RTR3PTR;
+    int rc = GVMMR0CreateVM(pSession, pReq->cCpus, &pGVM);
+    if (RT_SUCCESS(rc))
+    {
+        pReq->pVMR0 = pGVM; /** @todo don't expose this to ring-3, use a unique random number instead. */
+        pReq->pVMR3 = pGVM->pVMR3;
+    }
+    return rc;
+}
+
+
+/**
+ * Allocates the VM structure and registers it with GVM.
+ *
+ * The caller will become the VM owner and there by the EMT.
+ *
+ * @returns VBox status code.
+ * @param   pSession    The support driver session.
+ * @param   cCpus       Number of virtual CPUs for the new VM.
+ * @param   ppGVM       Where to store the pointer to the VM structure.
+ *
+ * @thread  EMT.
+ */
+GVMMR0DECL(int) GVMMR0CreateVM(PSUPDRVSESSION pSession, uint32_t cCpus, PGVM *ppGVM)
+{
+    LogFlow(("GVMMR0CreateVM: pSession=%p\n", pSession));
+    PGVMM pGVMM;
+    GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
+
+    AssertPtrReturn(ppGVM, VERR_INVALID_POINTER);
+    *ppGVM = NULL;
+
+    if (    cCpus == 0
+        ||  cCpus > VMM_MAX_CPU_COUNT)
+        return VERR_INVALID_PARAMETER;
+
+    RTNATIVETHREAD hEMT0 = RTThreadNativeSelf();
+    AssertReturn(hEMT0 != NIL_RTNATIVETHREAD, VERR_GVMM_BROKEN_IPRT);
+    RTPROCESS      ProcId = RTProcSelf();
+    AssertReturn(ProcId != NIL_RTPROCESS, VERR_GVMM_BROKEN_IPRT);
+
+    /*
+     * The whole allocation process is protected by the lock.
+     */
+    int rc = gvmmR0CreateDestroyLock(pGVMM);
+    AssertRCReturn(rc, rc);
+
+    /*
+     * Only one VM per session.
+     */
+    if (SUPR0GetSessionVM(pSession) != NULL)
+    {
+        gvmmR0CreateDestroyUnlock(pGVMM);
+        SUPR0Printf("GVMMR0CreateVM: The session %p already got a VM: %p\n", pSession, SUPR0GetSessionVM(pSession));
+        return VERR_ALREADY_EXISTS;
+    }
+
+    /*
+     * Allocate a handle first so we don't waste resources unnecessarily.
+     */
+    uint16_t iHandle = pGVMM->iFreeHead;
+    if (iHandle)
+    {
+        PGVMHANDLE pHandle = &pGVMM->aHandles[iHandle];
+
+        /* consistency checks, a bit paranoid as always. */
+        if (    !pHandle->pGVM
+            &&  !pHandle->pvObj
+            &&  pHandle->iSelf == iHandle)
+        {
+            pHandle->pvObj = SUPR0ObjRegister(pSession, SUPDRVOBJTYPE_VM, gvmmR0HandleObjDestructor, pGVMM, pHandle);
+            if (pHandle->pvObj)
+            {
+                /*
+                 * Move the handle from the free to used list and perform permission checks.
+                 */
+                rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
+                AssertRC(rc);
+
+                pGVMM->iFreeHead = pHandle->iNext;
+                pHandle->iNext = pGVMM->iUsedHead;
+                pGVMM->iUsedHead = iHandle;
+                pGVMM->cVMs++;
+
+                pHandle->pGVM     = NULL;
+                pHandle->pSession = pSession;
+                pHandle->hEMT0    = NIL_RTNATIVETHREAD;
+                pHandle->ProcId   = NIL_RTPROCESS;
+
+                GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
+
+                rc = SUPR0ObjVerifyAccess(pHandle->pvObj, pSession, NULL);
+                if (RT_SUCCESS(rc))
+                {
+                    /*
+                     * Allocate memory for the VM structure (combined VM + GVM).
+                     */
+                    const uint32_t  cbVM      = RT_UOFFSETOF_DYN(GVM, aCpus[cCpus]);
+                    const uint32_t  cPages    = RT_ALIGN_32(cbVM, PAGE_SIZE) >> PAGE_SHIFT;
+                    RTR0MEMOBJ      hVMMemObj = NIL_RTR0MEMOBJ;
+                    rc = RTR0MemObjAllocPage(&hVMMemObj, cPages << PAGE_SHIFT, false /* fExecutable */);
+                    if (RT_SUCCESS(rc))
+                    {
+                        PGVM pGVM = (PGVM)RTR0MemObjAddress(hVMMemObj);
+                        AssertPtr(pGVM);
+
+                        /*
+                         * Initialise the structure.
+                         */
+                        RT_BZERO(pGVM, cPages << PAGE_SHIFT);
+                        gvmmR0InitPerVMData(pGVM, iHandle, cCpus, pSession);
+                        pGVM->gvmm.s.VMMemObj  = hVMMemObj;
+                        rc = GMMR0InitPerVMData(pGVM);
+                        int rc2 = PGMR0InitPerVMData(pGVM);
+                        PDMR0InitPerVMData(pGVM);
+                        IOMR0InitPerVMData(pGVM);
+                        if (RT_SUCCESS(rc) && RT_SUCCESS(rc2))
+                        {
+                            /*
+                             * Allocate page array.
+                             * This currently have to be made available to ring-3, but this is should change eventually.
+                             */
+                            rc = RTR0MemObjAllocPage(&pGVM->gvmm.s.VMPagesMemObj, cPages * sizeof(SUPPAGE), false /* fExecutable */);
+                            if (RT_SUCCESS(rc))
+                            {
+                                PSUPPAGE paPages = (PSUPPAGE)RTR0MemObjAddress(pGVM->gvmm.s.VMPagesMemObj); AssertPtr(paPages);
+                                for (uint32_t iPage = 0; iPage < cPages; iPage++)
+                                {
+                                    paPages[iPage].uReserved = 0;
+                                    paPages[iPage].Phys = RTR0MemObjGetPagePhysAddr(pGVM->gvmm.s.VMMemObj, iPage);
+                                    Assert(paPages[iPage].Phys != NIL_RTHCPHYS);
+                                }
+
+                                /*
+                                 * Map the page array, VM and VMCPU structures into ring-3.
+                                 */
+                                AssertCompileSizeAlignment(VM, PAGE_SIZE);
+                                rc = RTR0MemObjMapUserEx(&pGVM->gvmm.s.VMMapObj, pGVM->gvmm.s.VMMemObj, (RTR3PTR)-1, 0,
+                                                         RTMEM_PROT_READ | RTMEM_PROT_WRITE, NIL_RTR0PROCESS,
+                                                         0 /*offSub*/, sizeof(VM));
+                                for (VMCPUID i = 0; i < cCpus && RT_SUCCESS(rc); i++)
+                                {
+                                    AssertCompileSizeAlignment(VMCPU, PAGE_SIZE);
+                                    rc = RTR0MemObjMapUserEx(&pGVM->aCpus[i].gvmm.s.VMCpuMapObj, pGVM->gvmm.s.VMMemObj,
+                                                             (RTR3PTR)-1, 0, RTMEM_PROT_READ | RTMEM_PROT_WRITE, NIL_RTR0PROCESS,
+                                                             RT_UOFFSETOF_DYN(GVM, aCpus[i]), sizeof(VMCPU));
+                                }
+                                if (RT_SUCCESS(rc))
+                                    rc = RTR0MemObjMapUser(&pGVM->gvmm.s.VMPagesMapObj, pGVM->gvmm.s.VMPagesMemObj, (RTR3PTR)-1,
+                                                           0 /* uAlignment */, RTMEM_PROT_READ | RTMEM_PROT_WRITE,
+                                                           NIL_RTR0PROCESS);
+                                if (RT_SUCCESS(rc))
+                                {
+                                    /*
+                                     * Initialize all the VM pointers.
+                                     */
+                                    PVMR3 pVMR3 = RTR0MemObjAddressR3(pGVM->gvmm.s.VMMapObj);
+                                    AssertPtr((void *)pVMR3);
+
+                                    for (VMCPUID i = 0; i < cCpus; i++)
+                                    {
+                                        pGVM->aCpus[i].pVMR0 = pGVM;
+                                        pGVM->aCpus[i].pVMR3 = pVMR3;
+                                        pGVM->apCpusR3[i] = RTR0MemObjAddressR3(pGVM->aCpus[i].gvmm.s.VMCpuMapObj);
+                                        pGVM->aCpus[i].pVCpuR3 = pGVM->apCpusR3[i];
+                                        pGVM->apCpusR0[i] = &pGVM->aCpus[i];
+                                        AssertPtr((void *)pGVM->apCpusR3[i]);
+                                    }
+
+                                    pGVM->paVMPagesR3 = RTR0MemObjAddressR3(pGVM->gvmm.s.VMPagesMapObj);
+                                    AssertPtr((void *)pGVM->paVMPagesR3);
+
+                                    /*
+                                     * Complete the handle - take the UsedLock sem just to be careful.
+                                     */
+                                    rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
+                                    AssertRC(rc);
+
+                                    pHandle->pGVM                   = pGVM;
+                                    pHandle->hEMT0                  = hEMT0;
+                                    pHandle->ProcId                 = ProcId;
+                                    pGVM->pVMR3                     = pVMR3;
+                                    pGVM->pVMR3Unsafe               = pVMR3;
+                                    pGVM->aCpus[0].hEMT             = hEMT0;
+                                    pGVM->aCpus[0].hNativeThreadR0  = hEMT0;
+                                    pGVMM->cEMTs += cCpus;
+
+                                    /* Associate it with the session and create the context hook for EMT0. */
+                                    rc = SUPR0SetSessionVM(pSession, pGVM, pGVM);
+                                    if (RT_SUCCESS(rc))
+                                    {
+                                        rc = VMMR0ThreadCtxHookCreateForEmt(&pGVM->aCpus[0]);
+                                        if (RT_SUCCESS(rc))
+                                        {
+                                            /*
+                                             * Done!
+                                             */
+                                            VBOXVMM_R0_GVMM_VM_CREATED(pGVM, pGVM, ProcId, (void *)hEMT0, cCpus);
+
+                                            GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
+                                            gvmmR0CreateDestroyUnlock(pGVMM);
+
+                                            CPUMR0RegisterVCpuThread(&pGVM->aCpus[0]);
+
+                                            *ppGVM = pGVM;
+                                            Log(("GVMMR0CreateVM: pVMR3=%p pGVM=%p hGVM=%d\n", pVMR3, pGVM, iHandle));
+                                            return VINF_SUCCESS;
+                                        }
+
+                                        SUPR0SetSessionVM(pSession, NULL, NULL);
+                                    }
+                                    GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
+                                }
+
+                                /* Cleanup mappings. */
+                                if (pGVM->gvmm.s.VMMapObj != NIL_RTR0MEMOBJ)
+                                {
+                                    RTR0MemObjFree(pGVM->gvmm.s.VMMapObj, false /* fFreeMappings */);
+                                    pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
+                                }
+                                for (VMCPUID i = 0; i < cCpus; i++)
+                                    if (pGVM->aCpus[i].gvmm.s.VMCpuMapObj != NIL_RTR0MEMOBJ)
+                                    {
+                                        RTR0MemObjFree(pGVM->aCpus[i].gvmm.s.VMCpuMapObj, false /* fFreeMappings */);
+                                        pGVM->aCpus[i].gvmm.s.VMCpuMapObj = NIL_RTR0MEMOBJ;
+                                    }
+                                if (pGVM->gvmm.s.VMPagesMapObj != NIL_RTR0MEMOBJ)
+                                {
+                                    RTR0MemObjFree(pGVM->gvmm.s.VMPagesMapObj, false /* fFreeMappings */);
+                                    pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
+                                }
+                            }
+                        }
+                        else if (RT_SUCCESS(rc))
+                            rc = rc2;
+                    }
+                }
+                /* else: The user wasn't permitted to create this VM. */
+
+                /*
+                 * The handle will be freed by gvmmR0HandleObjDestructor as we release the
+                 * object reference here. A little extra mess because of non-recursive lock.
+                 */
+                void *pvObj = pHandle->pvObj;
+                pHandle->pvObj = NULL;
+                gvmmR0CreateDestroyUnlock(pGVMM);
+
+                SUPR0ObjRelease(pvObj, pSession);
+
+                SUPR0Printf("GVMMR0CreateVM: failed, rc=%Rrc\n", rc);
+                return rc;
+            }
+
+            rc = VERR_NO_MEMORY;
+        }
+        else
+            rc = VERR_GVMM_IPE_1;
+    }
+    else
+        rc = VERR_GVM_TOO_MANY_VMS;
+
+    gvmmR0CreateDestroyUnlock(pGVMM);
+    return rc;
+}
+
+
+/**
+ * Initializes the per VM data belonging to GVMM.
+ *
+ * @param   pGVM        Pointer to the global VM structure.
+ * @param   hSelf       The handle.
+ * @param   cCpus       The CPU count.
+ * @param   pSession    The session this VM is associated with.
+ */
+static void gvmmR0InitPerVMData(PGVM pGVM, int16_t hSelf, VMCPUID cCpus, PSUPDRVSESSION pSession)
+{
+    AssertCompile(RT_SIZEOFMEMB(GVM,gvmm.s) <= RT_SIZEOFMEMB(GVM,gvmm.padding));
+    AssertCompile(RT_SIZEOFMEMB(GVMCPU,gvmm.s) <= RT_SIZEOFMEMB(GVMCPU,gvmm.padding));
+    AssertCompileMemberAlignment(VM, cpum, 64);
+    AssertCompileMemberAlignment(VM, tm, 64);
+
+    /* GVM: */
+    pGVM->u32Magic         = GVM_MAGIC;
+    pGVM->hSelf            = hSelf;
+    pGVM->cCpus            = cCpus;
+    pGVM->pSession         = pSession;
+    pGVM->pSelf            = pGVM;
+
+    /* VM: */
+    pGVM->enmVMState       = VMSTATE_CREATING;
+    pGVM->hSelfUnsafe      = hSelf;
+    pGVM->pSessionUnsafe   = pSession;
+    pGVM->pVMR0ForCall     = pGVM;
+    pGVM->cCpusUnsafe      = cCpus;
+    pGVM->uCpuExecutionCap = 100; /* default is no cap. */
+    pGVM->uStructVersion   = 1;
+    pGVM->cbSelf           = sizeof(VM);
+    pGVM->cbVCpu           = sizeof(VMCPU);
+
+    /* GVMM: */
+    pGVM->gvmm.s.VMMemObj       = NIL_RTR0MEMOBJ;
+    pGVM->gvmm.s.VMMapObj       = NIL_RTR0MEMOBJ;
+    pGVM->gvmm.s.VMPagesMemObj  = NIL_RTR0MEMOBJ;
+    pGVM->gvmm.s.VMPagesMapObj  = NIL_RTR0MEMOBJ;
+    pGVM->gvmm.s.fDoneVMMR0Init = false;
+    pGVM->gvmm.s.fDoneVMMR0Term = false;
+
+    /*
+     * Per virtual CPU.
+     */
+    for (VMCPUID i = 0; i < pGVM->cCpus; i++)
+    {
+        pGVM->aCpus[i].idCpu                 = i;
+        pGVM->aCpus[i].idCpuUnsafe           = i;
+        pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
+        pGVM->aCpus[i].gvmm.s.VMCpuMapObj    = NIL_RTR0MEMOBJ;
+        pGVM->aCpus[i].hEMT                  = NIL_RTNATIVETHREAD;
+        pGVM->aCpus[i].pGVM                  = pGVM;
+        pGVM->aCpus[i].idHostCpu             = NIL_RTCPUID;
+        pGVM->aCpus[i].iHostCpuSet           = UINT32_MAX;
+        pGVM->aCpus[i].hNativeThread         = NIL_RTNATIVETHREAD;
+        pGVM->aCpus[i].hNativeThreadR0       = NIL_RTNATIVETHREAD;
+        pGVM->aCpus[i].enmState              = VMCPUSTATE_STOPPED;
+        pGVM->aCpus[i].pVCpuR0ForVtg         = &pGVM->aCpus[i];
+    }
+}
+
+
+/**
+ * Does the VM initialization.
+ *
+ * @returns VBox status code.
+ * @param   pGVM        The global (ring-0) VM structure.
+ */
+GVMMR0DECL(int) GVMMR0InitVM(PGVM pGVM)
+{
+    LogFlow(("GVMMR0InitVM: pGVM=%p\n", pGVM));
+
+    int rc = VERR_INTERNAL_ERROR_3;
+    if (   !pGVM->gvmm.s.fDoneVMMR0Init
+        && pGVM->aCpus[0].gvmm.s.HaltEventMulti == NIL_RTSEMEVENTMULTI)
+    {
+        for (VMCPUID i = 0; i < pGVM->cCpus; i++)
+        {
+            rc = RTSemEventMultiCreate(&pGVM->aCpus[i].gvmm.s.HaltEventMulti);
+            if (RT_FAILURE(rc))
+            {
+                pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
+                break;
+            }
+        }
+    }
+    else
+        rc = VERR_WRONG_ORDER;
+
+    LogFlow(("GVMMR0InitVM: returns %Rrc\n", rc));
+    return rc;
+}
+
+
+/**
+ * Indicates that we're done with the ring-0 initialization
+ * of the VM.
+ *
+ * @param   pGVM        The global (ring-0) VM structure.
+ * @thread  EMT(0)
+ */
+GVMMR0DECL(void) GVMMR0DoneInitVM(PGVM pGVM)
+{
+    /* Set the indicator. */
+    pGVM->gvmm.s.fDoneVMMR0Init = true;
+}
+
+
+/**
+ * Indicates that we're doing the ring-0 termination of the VM.
+ *
+ * @returns true if termination hasn't been done already, false if it has.
+ * @param   pGVM        Pointer to the global VM structure. Optional.
+ * @thread  EMT(0) or session cleanup thread.
+ */
+GVMMR0DECL(bool) GVMMR0DoingTermVM(PGVM pGVM)
+{
+    /* Validate the VM structure, state and handle. */
+    AssertPtrReturn(pGVM, false);
+
+    /* Set the indicator. */
+    if (pGVM->gvmm.s.fDoneVMMR0Term)
+        return false;
+    pGVM->gvmm.s.fDoneVMMR0Term = true;
+    return true;
+}
+
+
+/**
+ * Destroys the VM, freeing all associated resources (the ring-0 ones anyway).
+ *
+ * This is call from the vmR3DestroyFinalBit and from a error path in VMR3Create,
+ * and the caller is not the EMT thread, unfortunately. For security reasons, it
+ * would've been nice if the caller was actually the EMT thread or that we somehow
+ * could've associated the calling thread with the VM up front.
+ *
+ * @returns VBox status code.
+ * @param   pGVM        The global (ring-0) VM structure.
+ *
+ * @thread  EMT(0) if it's associated with the VM, otherwise any thread.
+ */
+GVMMR0DECL(int) GVMMR0DestroyVM(PGVM pGVM)
+{
+    LogFlow(("GVMMR0DestroyVM: pGVM=%p\n", pGVM));
+    PGVMM pGVMM;
+    GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
+
+    /*
+     * Validate the VM structure, state and caller.
+     */
+    AssertPtrReturn(pGVM, VERR_INVALID_POINTER);
+    AssertReturn(!((uintptr_t)pGVM & PAGE_OFFSET_MASK), VERR_INVALID_POINTER);
+    AssertMsgReturn(pGVM->enmVMState >= VMSTATE_CREATING && pGVM->enmVMState <= VMSTATE_TERMINATED, ("%d\n", pGVM->enmVMState),
+                    VERR_WRONG_ORDER);
+
+    uint32_t        hGVM = pGVM->hSelf;
+    ASMCompilerBarrier();
+    AssertReturn(hGVM != NIL_GVM_HANDLE, VERR_INVALID_VM_HANDLE);
+    AssertReturn(hGVM < RT_ELEMENTS(pGVMM->aHandles), VERR_INVALID_VM_HANDLE);
+
+    PGVMHANDLE      pHandle = &pGVMM->aHandles[hGVM];
+    AssertReturn(pHandle->pGVM == pGVM, VERR_NOT_OWNER);
+
+    RTPROCESS       ProcId = RTProcSelf();
+    RTNATIVETHREAD  hSelf  = RTThreadNativeSelf();
+    AssertReturn(   (   pHandle->hEMT0  == hSelf
+                     && pHandle->ProcId == ProcId)
+                 || pHandle->hEMT0 == NIL_RTNATIVETHREAD, VERR_NOT_OWNER);
+
+    /*
+     * Lookup the handle and destroy the object.
+     * Since the lock isn't recursive and we'll have to leave it before dereferencing the
+     * object, we take some precautions against racing callers just in case...
+     */
+    int rc = gvmmR0CreateDestroyLock(pGVMM);
+    AssertRC(rc);
+
+    /* Be careful here because we might theoretically be racing someone else cleaning up. */
+    if (   pHandle->pGVM == pGVM
+        && (   (   pHandle->hEMT0  == hSelf
+                && pHandle->ProcId == ProcId)
+            || pHandle->hEMT0 == NIL_RTNATIVETHREAD)
+        && RT_VALID_PTR(pHandle->pvObj)
+        && RT_VALID_PTR(pHandle->pSession)
+        && RT_VALID_PTR(pHandle->pGVM)
+        && pHandle->pGVM->u32Magic == GVM_MAGIC)
+    {
+        /* Check that other EMTs have deregistered. */
+        uint32_t cNotDeregistered = 0;
+        for (VMCPUID idCpu = 1; idCpu < pGVM->cCpus; idCpu++)
+            cNotDeregistered += pGVM->aCpus[idCpu].hEMT != ~(RTNATIVETHREAD)1; /* see GVMMR0DeregisterVCpu for the value */
+        if (cNotDeregistered == 0)
+        {
+            /* Grab the object pointer. */
+            void *pvObj = pHandle->pvObj;
+            pHandle->pvObj = NULL;
+            gvmmR0CreateDestroyUnlock(pGVMM);
+
+            SUPR0ObjRelease(pvObj, pHandle->pSession);
+        }
+        else
+        {
+            gvmmR0CreateDestroyUnlock(pGVMM);
+            rc = VERR_GVMM_NOT_ALL_EMTS_DEREGISTERED;
+        }
+    }
+    else
+    {
+        SUPR0Printf("GVMMR0DestroyVM: pHandle=%RKv:{.pGVM=%p, .hEMT0=%p, .ProcId=%u, .pvObj=%p} pGVM=%p hSelf=%p\n",
+                    pHandle, pHandle->pGVM, pHandle->hEMT0, pHandle->ProcId, pHandle->pvObj, pGVM, hSelf);
+        gvmmR0CreateDestroyUnlock(pGVMM);
+        rc = VERR_GVMM_IPE_2;
+    }
+
+    return rc;
+}
+
+
+/**
+ * Performs VM cleanup task as part of object destruction.
+ *
+ * @param   pGVM        The GVM pointer.
+ */
+static void gvmmR0CleanupVM(PGVM pGVM)
+{
+    if (    pGVM->gvmm.s.fDoneVMMR0Init
+        &&  !pGVM->gvmm.s.fDoneVMMR0Term)
+    {
+        if (    pGVM->gvmm.s.VMMemObj != NIL_RTR0MEMOBJ
+            &&  RTR0MemObjAddress(pGVM->gvmm.s.VMMemObj) == pGVM)
+        {
+            LogFlow(("gvmmR0CleanupVM: Calling VMMR0TermVM\n"));
+            VMMR0TermVM(pGVM, NIL_VMCPUID);
+        }
+        else
+            AssertMsgFailed(("gvmmR0CleanupVM: VMMemObj=%p pGVM=%p\n", pGVM->gvmm.s.VMMemObj, pGVM));
+    }
+
+    GMMR0CleanupVM(pGVM);
+#ifdef VBOX_WITH_NEM_R0
+    NEMR0CleanupVM(pGVM);
+#endif
+    PDMR0CleanupVM(pGVM);
+    IOMR0CleanupVM(pGVM);
+    PGMR0CleanupVM(pGVM);
+
+    AssertCompile(NIL_RTTHREADCTXHOOK == (RTTHREADCTXHOOK)0); /* Depends on zero initialized memory working for NIL at the moment. */
+    for (VMCPUID idCpu = 0; idCpu < pGVM->cCpus; idCpu++)
+    {
+        /** @todo Can we busy wait here for all thread-context hooks to be
+         *        deregistered before releasing (destroying) it? Only until we find a
+         *        solution for not deregistering hooks everytime we're leaving HMR0
+         *        context. */
+        VMMR0ThreadCtxHookDestroyForEmt(&pGVM->aCpus[idCpu]);
+    }
+}
+
+
+/**
+ * @callback_method_impl{FNSUPDRVDESTRUCTOR,VM handle destructor}
+ *
+ * pvUser1 is the GVM instance pointer.
+ * pvUser2 is the handle pointer.
+ */
+static DECLCALLBACK(void) gvmmR0HandleObjDestructor(void *pvObj, void *pvUser1, void *pvUser2)
+{
+    LogFlow(("gvmmR0HandleObjDestructor: %p %p %p\n", pvObj, pvUser1, pvUser2));
+
+    NOREF(pvObj);
+
+    /*
+     * Some quick, paranoid, input validation.
+     */
+    PGVMHANDLE pHandle = (PGVMHANDLE)pvUser2;
+    AssertPtr(pHandle);
+    PGVMM pGVMM = (PGVMM)pvUser1;
+    Assert(pGVMM == g_pGVMM);
+    const uint16_t iHandle = pHandle - &pGVMM->aHandles[0];
+    if (    !iHandle
+        ||  iHandle >= RT_ELEMENTS(pGVMM->aHandles)
+        ||  iHandle != pHandle->iSelf)
+    {
+        SUPR0Printf("GVM: handle %d is out of range or corrupt (iSelf=%d)!\n", iHandle, pHandle->iSelf);
+        return;
+    }
+
+    int rc = gvmmR0CreateDestroyLock(pGVMM);
+    AssertRC(rc);
+    rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
+    AssertRC(rc);
+
+    /*
+     * This is a tad slow but a doubly linked list is too much hassle.
+     */
+    if (RT_UNLIKELY(pHandle->iNext >= RT_ELEMENTS(pGVMM->aHandles)))
+    {
+        SUPR0Printf("GVM: used list index %d is out of range!\n", pHandle->iNext);
+        GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
+        gvmmR0CreateDestroyUnlock(pGVMM);
+        return;
+    }
+
+    if (pGVMM->iUsedHead == iHandle)
+        pGVMM->iUsedHead = pHandle->iNext;
+    else
+    {
+        uint16_t iPrev = pGVMM->iUsedHead;
+        int c = RT_ELEMENTS(pGVMM->aHandles) + 2;
+        while (iPrev)
+        {
+            if (RT_UNLIKELY(iPrev >= RT_ELEMENTS(pGVMM->aHandles)))
+            {
+                SUPR0Printf("GVM: used list index %d is out of range!\n", iPrev);
+                GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
+                gvmmR0CreateDestroyUnlock(pGVMM);
+                return;
+            }
+            if (RT_UNLIKELY(c-- <= 0))
+            {
+                iPrev = 0;
+                break;
+            }
+
+            if (pGVMM->aHandles[iPrev].iNext == iHandle)
+                break;
+            iPrev = pGVMM->aHandles[iPrev].iNext;
+        }
+        if (!iPrev)
+        {
+            SUPR0Printf("GVM: can't find the handle previous previous of %d!\n", pHandle->iSelf);
+            GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
+            gvmmR0CreateDestroyUnlock(pGVMM);
+            return;
+        }
+
+        Assert(pGVMM->aHandles[iPrev].iNext == iHandle);
+        pGVMM->aHandles[iPrev].iNext = pHandle->iNext;
+    }
+    pHandle->iNext = 0;
+    pGVMM->cVMs--;
+
+    /*
+     * Do the global cleanup round.
+     */
+    PGVM pGVM = pHandle->pGVM;
+    if (   RT_VALID_PTR(pGVM)
+        && pGVM->u32Magic == GVM_MAGIC)
+    {
+        pGVMM->cEMTs -= pGVM->cCpus;
+
+        if (pGVM->pSession)
+            SUPR0SetSessionVM(pGVM->pSession, NULL, NULL);
+
+        GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
+
+        gvmmR0CleanupVM(pGVM);
+
+        /*
+         * Do the GVMM cleanup - must be done last.
+         */
+        /* The VM and VM pages mappings/allocations. */
+        if (pGVM->gvmm.s.VMPagesMapObj != NIL_RTR0MEMOBJ)
+        {
+            rc = RTR0MemObjFree(pGVM->gvmm.s.VMPagesMapObj, false /* fFreeMappings */); AssertRC(rc);
+            pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
+        }
+
+        if (pGVM->gvmm.s.VMMapObj != NIL_RTR0MEMOBJ)
+        {
+            rc = RTR0MemObjFree(pGVM->gvmm.s.VMMapObj, false /* fFreeMappings */); AssertRC(rc);
+            pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
+        }
+
+        if (pGVM->gvmm.s.VMPagesMemObj != NIL_RTR0MEMOBJ)
+        {
+            rc = RTR0MemObjFree(pGVM->gvmm.s.VMPagesMemObj, false /* fFreeMappings */); AssertRC(rc);
+            pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ;
+        }
+
+        for (VMCPUID i = 0; i < pGVM->cCpus; i++)
+        {
+            if (pGVM->aCpus[i].gvmm.s.HaltEventMulti != NIL_RTSEMEVENTMULTI)
+            {
+                rc = RTSemEventMultiDestroy(pGVM->aCpus[i].gvmm.s.HaltEventMulti); AssertRC(rc);
+                pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
+            }
+            if (pGVM->aCpus[i].gvmm.s.VMCpuMapObj != NIL_RTR0MEMOBJ)
+            {
+                rc = RTR0MemObjFree(pGVM->aCpus[i].gvmm.s.VMCpuMapObj, false /* fFreeMappings */); AssertRC(rc);
+                pGVM->aCpus[i].gvmm.s.VMCpuMapObj = NIL_RTR0MEMOBJ;
+            }
+        }
+
+        /* the GVM structure itself. */
+        pGVM->u32Magic |= UINT32_C(0x80000000);
+        Assert(pGVM->gvmm.s.VMMemObj != NIL_RTR0MEMOBJ);
+        rc = RTR0MemObjFree(pGVM->gvmm.s.VMMemObj, true /*fFreeMappings*/); AssertRC(rc);
+        pGVM = NULL;
+
+        /* Re-acquire the UsedLock before freeing the handle since we're updating handle fields. */
+        rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
+        AssertRC(rc);
+    }
+    /* else: GVMMR0CreateVM cleanup. */
+
+    /*
+     * Free the handle.
+     */
+    pHandle->iNext = pGVMM->iFreeHead;
+    pGVMM->iFreeHead = iHandle;
+    ASMAtomicWriteNullPtr(&pHandle->pGVM);
+    ASMAtomicWriteNullPtr(&pHandle->pvObj);
+    ASMAtomicWriteNullPtr(&pHandle->pSession);
+    ASMAtomicWriteHandle(&pHandle->hEMT0,        NIL_RTNATIVETHREAD);
+    ASMAtomicWriteU32(&pHandle->ProcId,          NIL_RTPROCESS);
+
+    GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
+    gvmmR0CreateDestroyUnlock(pGVMM);
+    LogFlow(("gvmmR0HandleObjDestructor: returns\n"));
+}
+
+
+/**
+ * Registers the calling thread as the EMT of a Virtual CPU.
+ *
+ * Note that VCPU 0 is automatically registered during VM creation.
+ *
+ * @returns VBox status code
+ * @param   pGVM        The global (ring-0) VM structure.
+ * @param   idCpu       VCPU id to register the current thread as.
+ */
+GVMMR0DECL(int) GVMMR0RegisterVCpu(PGVM pGVM, VMCPUID idCpu)
+{
+    AssertReturn(idCpu != 0, VERR_INVALID_FUNCTION);
+
+    /*
+     * Validate the VM structure, state and handle.
+     */
+    PGVMM pGVMM;
+    int rc = gvmmR0ByGVM(pGVM, &pGVMM, false /* fTakeUsedLock */); /** @todo take lock here. */
+    if (RT_SUCCESS(rc))
+    {
+        if (idCpu < pGVM->cCpus)
+        {
+            /* Check that the EMT isn't already assigned to a thread. */
+            if (pGVM->aCpus[idCpu].hEMT == NIL_RTNATIVETHREAD)
+            {
+                Assert(pGVM->aCpus[idCpu].hNativeThreadR0 == NIL_RTNATIVETHREAD);
+
+                /* A thread may only be one EMT. */
+                RTNATIVETHREAD const hNativeSelf = RTThreadNativeSelf();
+                for (VMCPUID iCpu = 0; iCpu < pGVM->cCpus; iCpu++)
+                    AssertBreakStmt(pGVM->aCpus[iCpu].hEMT != hNativeSelf, rc = VERR_INVALID_PARAMETER);
+                if (RT_SUCCESS(rc))
+                {
+                    /*
+                     * Do the assignment, then try setup the hook. Undo if that fails.
+                     */
+                    pGVM->aCpus[idCpu].hNativeThreadR0 = pGVM->aCpus[idCpu].hEMT = RTThreadNativeSelf();
+
+                    rc = VMMR0ThreadCtxHookCreateForEmt(&pGVM->aCpus[idCpu]);
+                    if (RT_SUCCESS(rc))
+                        CPUMR0RegisterVCpuThread(&pGVM->aCpus[idCpu]);
+                    else
+                        pGVM->aCpus[idCpu].hNativeThreadR0 = pGVM->aCpus[idCpu].hEMT = NIL_RTNATIVETHREAD;
+                }
+            }
+            else
+                rc = VERR_ACCESS_DENIED;
+        }
+        else
+            rc = VERR_INVALID_CPU_ID;
+    }
+    return rc;
+}
+
+
+/**
+ * Deregisters the calling thread as the EMT of a Virtual CPU.
+ *
+ * Note that VCPU 0 shall call GVMMR0DestroyVM intead of this API.
+ *
+ * @returns VBox status code
+ * @param   pGVM        The global (ring-0) VM structure.
+ * @param   idCpu       VCPU id to register the current thread as.
+ */
+GVMMR0DECL(int) GVMMR0DeregisterVCpu(PGVM pGVM, VMCPUID idCpu)
+{
+    AssertReturn(idCpu != 0, VERR_INVALID_FUNCTION);
+
+    /*
+     * Validate the VM structure, state and handle.
+     */
+    PGVMM pGVMM;
+    int rc = gvmmR0ByGVMandEMT(pGVM, idCpu, &pGVMM);
+    if (RT_SUCCESS(rc))
+    {
+        /*
+         * Take the destruction lock and recheck the handle state to
+         * prevent racing GVMMR0DestroyVM.
+         */
+        gvmmR0CreateDestroyLock(pGVMM);
+        uint32_t hSelf = pGVM->hSelf;
+        ASMCompilerBarrier();
+        if (   hSelf < RT_ELEMENTS(pGVMM->aHandles)
+            && pGVMM->aHandles[hSelf].pvObj != NULL
+            && pGVMM->aHandles[hSelf].pGVM  == pGVM)
+        {
+            /*
+             * Do per-EMT cleanups.
+             */
+            VMMR0ThreadCtxHookDestroyForEmt(&pGVM->aCpus[idCpu]);
+
+            /*
+             * Invalidate hEMT.  We don't use NIL here as that would allow
+             * GVMMR0RegisterVCpu to be called again, and we don't want that.
+             */
+            AssertCompile(~(RTNATIVETHREAD)1 != NIL_RTNATIVETHREAD);
+            pGVM->aCpus[idCpu].hEMT           = ~(RTNATIVETHREAD)1;
+            pGVM->aCpus[idCpu].hNativeThreadR0 = NIL_RTNATIVETHREAD;
+        }
+
+        gvmmR0CreateDestroyUnlock(pGVMM);
+    }
+    return rc;
+}
+
+
+/**
+ * Lookup a GVM structure by its handle.
+ *
+ * @returns The GVM pointer on success, NULL on failure.
+ * @param   hGVM    The global VM handle. Asserts on bad handle.
+ */
+GVMMR0DECL(PGVM) GVMMR0ByHandle(uint32_t hGVM)
+{
+    PGVMM pGVMM;
+    GVMM_GET_VALID_INSTANCE(pGVMM, NULL);
+
+    /*
+     * Validate.
+     */
+    AssertReturn(hGVM != NIL_GVM_HANDLE, NULL);
+    AssertReturn(hGVM < RT_ELEMENTS(pGVMM->aHandles), NULL);
+
+    /*
+     * Look it up.
+     */
+    PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
+    AssertPtrReturn(pHandle->pvObj, NULL);
+    PGVM pGVM = pHandle->pGVM;
+    AssertPtrReturn(pGVM, NULL);
+
+    return pGVM;
+}
+
+
+/**
+ * Check that the given GVM and VM structures match up.
+ *
+ * The calling thread must be in the same process as the VM. All current lookups
+ * are by threads inside the same process, so this will not be an issue.
+ *
+ * @returns VBox status code.
+ * @param   pGVM            The global (ring-0) VM structure.
+ * @param   ppGVMM          Where to store the pointer to the GVMM instance data.
+ * @param   fTakeUsedLock   Whether to take the used lock or not.  We take it in
+ *                          shared mode when requested.
+ *
+ *                          Be very careful if not taking the lock as it's
+ *                          possible that the VM will disappear then!
+ *
+ * @remark  This will not assert on an invalid pGVM but try return silently.
+ */
+static int gvmmR0ByGVM(PGVM pGVM, PGVMM *ppGVMM, bool fTakeUsedLock)
+{
+    /*
+     * Check the pointers.
+     */
+    int rc;
+    if (RT_LIKELY(   RT_VALID_PTR(pGVM)
+                  && ((uintptr_t)pGVM & PAGE_OFFSET_MASK) == 0 ))
+    {
+        /*
+         * Get the pGVMM instance and check the VM handle.
+         */
+        PGVMM pGVMM;
+        GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
+
+        uint16_t hGVM = pGVM->hSelf;
+        if (RT_LIKELY(   hGVM != NIL_GVM_HANDLE
+                      && hGVM < RT_ELEMENTS(pGVMM->aHandles)))
+        {
+            RTPROCESS const pidSelf = RTProcSelf();
+            PGVMHANDLE      pHandle = &pGVMM->aHandles[hGVM];
+            if (fTakeUsedLock)
+            {
+                rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
+                AssertRCReturn(rc, rc);
+            }
+
+            if (RT_LIKELY(   pHandle->pGVM   == pGVM
+                          && pHandle->ProcId == pidSelf
+                          && RT_VALID_PTR(pHandle->pvObj)))
+            {
+                /*
+                 * Some more VM data consistency checks.
+                 */
+                if (RT_LIKELY(   pGVM->cCpusUnsafe == pGVM->cCpus
+                              && pGVM->hSelfUnsafe == hGVM
+                              && pGVM->pSelf       == pGVM))
+                {
+                    if (RT_LIKELY(   pGVM->enmVMState >= VMSTATE_CREATING
+                                  && pGVM->enmVMState <= VMSTATE_TERMINATED))
+                    {
+                        *ppGVMM = pGVMM;
+                        return VINF_SUCCESS;
+                    }
+                    rc = VERR_INCONSISTENT_VM_HANDLE;
+                }
+                else
+                    rc = VERR_INCONSISTENT_VM_HANDLE;
+            }
+            else
+                rc = VERR_INVALID_VM_HANDLE;
+
+            if (fTakeUsedLock)
+                GVMMR0_USED_SHARED_UNLOCK(pGVMM);
+        }
+        else
+            rc = VERR_INVALID_VM_HANDLE;
+    }
+    else
+        rc = VERR_INVALID_POINTER;
+    return rc;
+}
+
+
+/**
+ * Validates a GVM/VM pair.
+ *
+ * @returns VBox status code.
+ * @param   pGVM        The global (ring-0) VM structure.
+ */
+GVMMR0DECL(int) GVMMR0ValidateGVM(PGVM pGVM)
+{
+    PGVMM pGVMM;
+    return gvmmR0ByGVM(pGVM, &pGVMM, false /*fTakeUsedLock*/);
+}
+
+
+/**
+ * Check that the given GVM and VM structures match up.
+ *
+ * The calling thread must be in the same process as the VM. All current lookups
+ * are by threads inside the same process, so this will not be an issue.
+ *
+ * @returns VBox status code.
+ * @param   pGVM            The global (ring-0) VM structure.
+ * @param   idCpu           The (alleged) Virtual CPU ID of the calling EMT.
+ * @param   ppGVMM          Where to store the pointer to the GVMM instance data.
+ * @thread  EMT
+ *
+ * @remarks This will assert in all failure paths.
+ */
+static int gvmmR0ByGVMandEMT(PGVM pGVM, VMCPUID idCpu, PGVMM *ppGVMM)
+{
+    /*
+     * Check the pointers.
+     */
+    AssertPtrReturn(pGVM, VERR_INVALID_POINTER);
+    AssertReturn(((uintptr_t)pGVM & PAGE_OFFSET_MASK) == 0, VERR_INVALID_POINTER);
+
+    /*
+     * Get the pGVMM instance and check the VM handle.
+     */
+    PGVMM pGVMM;
+    GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
+
+    uint16_t hGVM = pGVM->hSelf;
+    ASMCompilerBarrier();
+    AssertReturn(   hGVM != NIL_GVM_HANDLE
+                 && hGVM < RT_ELEMENTS(pGVMM->aHandles), VERR_INVALID_VM_HANDLE);
+
+    RTPROCESS const pidSelf = RTProcSelf();
+    PGVMHANDLE      pHandle = &pGVMM->aHandles[hGVM];
+    AssertReturn(   pHandle->pGVM   == pGVM
+                 && pHandle->ProcId == pidSelf
+                 && RT_VALID_PTR(pHandle->pvObj),
+                 VERR_INVALID_HANDLE);
+
+    /*
+     * Check the EMT claim.
+     */
+    RTNATIVETHREAD const hAllegedEMT = RTThreadNativeSelf();
+    AssertReturn(idCpu < pGVM->cCpus, VERR_INVALID_CPU_ID);
+    AssertReturn(pGVM->aCpus[idCpu].hEMT == hAllegedEMT, VERR_NOT_OWNER);
+
+    /*
+     * Some more VM data consistency checks.
+     */
+    AssertReturn(pGVM->cCpusUnsafe == pGVM->cCpus, VERR_INCONSISTENT_VM_HANDLE);
+    AssertReturn(pGVM->hSelfUnsafe == hGVM, VERR_INCONSISTENT_VM_HANDLE);
+    AssertReturn(   pGVM->enmVMState >= VMSTATE_CREATING
+                 && pGVM->enmVMState <= VMSTATE_TERMINATED, VERR_INCONSISTENT_VM_HANDLE);
+
+    *ppGVMM = pGVMM;
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Validates a GVM/EMT pair.
+ *
+ * @returns VBox status code.
+ * @param   pGVM        The global (ring-0) VM structure.
+ * @param   idCpu       The Virtual CPU ID of the calling EMT.
+ * @thread  EMT(idCpu)
+ */
+GVMMR0DECL(int) GVMMR0ValidateGVMandEMT(PGVM pGVM, VMCPUID idCpu)
+{
+    PGVMM pGVMM;
+    return gvmmR0ByGVMandEMT(pGVM, idCpu, &pGVMM);
+}
+
+
+/**
+ * Looks up the VM belonging to the specified EMT thread.
+ *
+ * This is used by the assertion machinery in VMMR0.cpp to avoid causing
+ * unnecessary kernel panics when the EMT thread hits an assertion. The
+ * call may or not be an EMT thread.
+ *
+ * @returns Pointer to the VM on success, NULL on failure.
+ * @param   hEMT    The native thread handle of the EMT.
+ *                  NIL_RTNATIVETHREAD means the current thread
+ */
+GVMMR0DECL(PVMCC) GVMMR0GetVMByEMT(RTNATIVETHREAD hEMT)
+{
+    /*
+     * No Assertions here as we're usually called in a AssertMsgN or
+     * RTAssert* context.
+     */
+    PGVMM pGVMM = g_pGVMM;
+    if (    !RT_VALID_PTR(pGVMM)
+        ||  pGVMM->u32Magic != GVMM_MAGIC)
+        return NULL;
+
+    if (hEMT == NIL_RTNATIVETHREAD)
+        hEMT = RTThreadNativeSelf();
+    RTPROCESS ProcId = RTProcSelf();
+
+    /*
+     * Search the handles in a linear fashion as we don't dare to take the lock (assert).
+     */
+/** @todo introduce some pid hash table here, please. */
+    for (unsigned i = 1; i < RT_ELEMENTS(pGVMM->aHandles); i++)
+    {
+        if (    pGVMM->aHandles[i].iSelf == i
+            &&  pGVMM->aHandles[i].ProcId == ProcId
+            &&  RT_VALID_PTR(pGVMM->aHandles[i].pvObj)
+            &&  RT_VALID_PTR(pGVMM->aHandles[i].pGVM))
+        {
+            if (pGVMM->aHandles[i].hEMT0 == hEMT)
+                return pGVMM->aHandles[i].pGVM;
+
+            /* This is fearly safe with the current process per VM approach. */
+            PGVM pGVM = pGVMM->aHandles[i].pGVM;
+            VMCPUID const cCpus = pGVM->cCpus;
+            ASMCompilerBarrier();
+            if (    cCpus < 1
+                ||  cCpus > VMM_MAX_CPU_COUNT)
+                continue;
+            for (VMCPUID idCpu = 1; idCpu < cCpus; idCpu++)
+                if (pGVM->aCpus[idCpu].hEMT == hEMT)
+                    return pGVMM->aHandles[i].pGVM;
+        }
+    }
+    return NULL;
+}
+
+
+/**
+ * Looks up the GVMCPU belonging to the specified EMT thread.
+ *
+ * This is used by the assertion machinery in VMMR0.cpp to avoid causing
+ * unnecessary kernel panics when the EMT thread hits an assertion. The
+ * call may or not be an EMT thread.
+ *
+ * @returns Pointer to the VM on success, NULL on failure.
+ * @param   hEMT    The native thread handle of the EMT.
+ *                  NIL_RTNATIVETHREAD means the current thread
+ */
+GVMMR0DECL(PGVMCPU) GVMMR0GetGVCpuByEMT(RTNATIVETHREAD hEMT)
+{
+    /*
+     * No Assertions here as we're usually called in a AssertMsgN,
+     * RTAssert*, Log and LogRel contexts.
+     */
+    PGVMM pGVMM = g_pGVMM;
+    if (   !RT_VALID_PTR(pGVMM)
+        || pGVMM->u32Magic != GVMM_MAGIC)
+        return NULL;
+
+    if (hEMT == NIL_RTNATIVETHREAD)
+        hEMT = RTThreadNativeSelf();
+    RTPROCESS ProcId = RTProcSelf();
+
+    /*
+     * Search the handles in a linear fashion as we don't dare to take the lock (assert).
+     */
+/** @todo introduce some pid hash table here, please. */
+    for (unsigned i = 1; i < RT_ELEMENTS(pGVMM->aHandles); i++)
+    {
+        if (   pGVMM->aHandles[i].iSelf == i
+            && pGVMM->aHandles[i].ProcId == ProcId
+            && RT_VALID_PTR(pGVMM->aHandles[i].pvObj)
+            && RT_VALID_PTR(pGVMM->aHandles[i].pGVM))
+        {
+            PGVM pGVM = pGVMM->aHandles[i].pGVM;
+            if (pGVMM->aHandles[i].hEMT0 == hEMT)
+                return &pGVM->aCpus[0];
+
+            /* This is fearly safe with the current process per VM approach. */
+            VMCPUID const cCpus = pGVM->cCpus;
+            ASMCompilerBarrier();
+            ASMCompilerBarrier();
+            if (   cCpus < 1
+                || cCpus > VMM_MAX_CPU_COUNT)
+                continue;
+            for (VMCPUID idCpu = 1; idCpu < cCpus; idCpu++)
+                if (pGVM->aCpus[idCpu].hEMT == hEMT)
+                    return &pGVM->aCpus[idCpu];
+        }
+    }
+    return NULL;
+}
+
+
+/**
+ * This is will wake up expired and soon-to-be expired VMs.
+ *
+ * @returns Number of VMs that has been woken up.
+ * @param   pGVMM       Pointer to the GVMM instance data.
+ * @param   u64Now      The current time.
+ */
+static unsigned gvmmR0SchedDoWakeUps(PGVMM pGVMM, uint64_t u64Now)
+{
+    /*
+     * Skip this if we've got disabled because of high resolution wakeups or by
+     * the user.
+     */
+    if (!pGVMM->fDoEarlyWakeUps)
+        return 0;
+
+/** @todo Rewrite this algorithm. See performance defect XYZ. */
+
+    /*
+     * A cheap optimization to stop wasting so much time here on big setups.
+     */
+    const uint64_t  uNsEarlyWakeUp2 = u64Now + pGVMM->nsEarlyWakeUp2;
+    if (   pGVMM->cHaltedEMTs == 0
+        || uNsEarlyWakeUp2 > pGVMM->uNsNextEmtWakeup)
+        return 0;
+
+    /*
+     * Only one thread doing this at a time.
+     */
+    if (!ASMAtomicCmpXchgBool(&pGVMM->fDoingEarlyWakeUps, true, false))
+        return 0;
+
+    /*
+     * The first pass will wake up VMs which have actually expired
+     * and look for VMs that should be woken up in the 2nd and 3rd passes.
+     */
+    const uint64_t  uNsEarlyWakeUp1 = u64Now + pGVMM->nsEarlyWakeUp1;
+    uint64_t        u64Min          = UINT64_MAX;
+    unsigned        cWoken          = 0;
+    unsigned        cHalted         = 0;
+    unsigned        cTodo2nd        = 0;
+    unsigned        cTodo3rd        = 0;
+    for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
+         i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
+         i = pGVMM->aHandles[i].iNext)
+    {
+        PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
+        if (    RT_VALID_PTR(pCurGVM)
+            &&  pCurGVM->u32Magic == GVM_MAGIC)
+        {
+            for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
+            {
+                PGVMCPU     pCurGVCpu = &pCurGVM->aCpus[idCpu];
+                uint64_t    u64       = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
+                if (u64)
+                {
+                    if (u64 <= u64Now)
+                    {
+                        if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
+                        {
+                            int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
+                            AssertRC(rc);
+                            cWoken++;
+                        }
+                    }
+                    else
+                    {
+                        cHalted++;
+                        if (u64 <= uNsEarlyWakeUp1)
+                            cTodo2nd++;
+                        else if (u64 <= uNsEarlyWakeUp2)
+                            cTodo3rd++;
+                        else if (u64 < u64Min)
+                            u64 = u64Min;
+                    }
+                }
+            }
+        }
+        AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
+    }
+
+    if (cTodo2nd)
+    {
+        for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
+             i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
+             i = pGVMM->aHandles[i].iNext)
+        {
+            PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
+            if (    RT_VALID_PTR(pCurGVM)
+                &&  pCurGVM->u32Magic == GVM_MAGIC)
+            {
+                for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
+                {
+                    PGVMCPU     pCurGVCpu = &pCurGVM->aCpus[idCpu];
+                    uint64_t    u64       = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
+                    if (   u64
+                        && u64 <= uNsEarlyWakeUp1)
+                    {
+                        if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
+                        {
+                            int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
+                            AssertRC(rc);
+                            cWoken++;
+                        }
+                    }
+                }
+            }
+            AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
+        }
+    }
+
+    if (cTodo3rd)
+    {
+        for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
+             i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
+             i = pGVMM->aHandles[i].iNext)
+        {
+            PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
+            if (    RT_VALID_PTR(pCurGVM)
+                &&  pCurGVM->u32Magic == GVM_MAGIC)
+            {
+                for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
+                {
+                    PGVMCPU     pCurGVCpu = &pCurGVM->aCpus[idCpu];
+                    uint64_t    u64       = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
+                    if (   u64
+                        && u64 <= uNsEarlyWakeUp2)
+                    {
+                        if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
+                        {
+                            int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
+                            AssertRC(rc);
+                            cWoken++;
+                        }
+                    }
+                }
+            }
+            AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
+        }
+    }
+
+    /*
+     * Set the minimum value.
+     */
+    pGVMM->uNsNextEmtWakeup = u64Min;
+
+    ASMAtomicWriteBool(&pGVMM->fDoingEarlyWakeUps, false);
+    return cWoken;
+}
+
+
+/**
+ * Halt the EMT thread.
+ *
+ * @returns VINF_SUCCESS normal wakeup (timeout or kicked by other thread).
+ *          VERR_INTERRUPTED if a signal was scheduled for the thread.
+ * @param   pGVM                The global (ring-0) VM structure.
+ * @param   pGVCpu              The global (ring-0) CPU structure of the calling
+ *                              EMT.
+ * @param   u64ExpireGipTime    The time for the sleep to expire expressed as GIP time.
+ * @thread  EMT(pGVCpu).
+ */
+GVMMR0DECL(int) GVMMR0SchedHalt(PGVM pGVM, PGVMCPU pGVCpu, uint64_t u64ExpireGipTime)
+{
+    LogFlow(("GVMMR0SchedHalt: pGVM=%p pGVCpu=%p(%d) u64ExpireGipTime=%#RX64\n",
+             pGVM, pGVCpu, pGVCpu->idCpu, u64ExpireGipTime));
+    GVMM_CHECK_SMAP_SETUP();
+    GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+
+    PGVMM pGVMM;
+    GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
+
+    pGVM->gvmm.s.StatsSched.cHaltCalls++;
+    Assert(!pGVCpu->gvmm.s.u64HaltExpire);
+
+    /*
+     * If we're doing early wake-ups, we must take the UsedList lock before we
+     * start querying the current time.
+     * Note! Interrupts must NOT be disabled at this point because we ask for GIP time!
+     */
+    bool const fDoEarlyWakeUps = pGVMM->fDoEarlyWakeUps;
+    if (fDoEarlyWakeUps)
+    {
+        int rc2 = GVMMR0_USED_SHARED_LOCK(pGVMM); AssertRC(rc2);
+        GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+    }
+
+    pGVCpu->gvmm.s.iCpuEmt = ASMGetApicId();
+
+    /* GIP hack: We might are frequently sleeping for short intervals where the
+       difference between GIP and system time matters on systems with high resolution
+       system time. So, convert the input from GIP to System time in that case. */
+    Assert(ASMGetFlags() & X86_EFL_IF);
+    const uint64_t u64NowSys = RTTimeSystemNanoTS();
+    const uint64_t u64NowGip = RTTimeNanoTS();
+    GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+
+    if (fDoEarlyWakeUps)
+    {
+        pGVM->gvmm.s.StatsSched.cHaltWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64NowGip);
+        GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+    }
+
+    /*
+     * Go to sleep if we must...
+     * Cap the sleep time to 1 second to be on the safe side.
+     */
+    int rc;
+    uint64_t cNsInterval = u64ExpireGipTime - u64NowGip;
+    if (    u64NowGip < u64ExpireGipTime
+        &&  cNsInterval >= (pGVMM->cEMTs > pGVMM->cEMTsMeansCompany
+                            ? pGVMM->nsMinSleepCompany
+                            : pGVMM->nsMinSleepAlone))
+    {
+        pGVM->gvmm.s.StatsSched.cHaltBlocking++;
+        if (cNsInterval > RT_NS_1SEC)
+            u64ExpireGipTime = u64NowGip + RT_NS_1SEC;
+        ASMAtomicWriteU64(&pGVCpu->gvmm.s.u64HaltExpire, u64ExpireGipTime);
+        ASMAtomicIncU32(&pGVMM->cHaltedEMTs);
+        if (fDoEarlyWakeUps)
+        {
+            if (u64ExpireGipTime < pGVMM->uNsNextEmtWakeup)
+                pGVMM->uNsNextEmtWakeup = u64ExpireGipTime;
+            GVMMR0_USED_SHARED_UNLOCK(pGVMM);
+        }
+        GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+
+        rc = RTSemEventMultiWaitEx(pGVCpu->gvmm.s.HaltEventMulti,
+                                   RTSEMWAIT_FLAGS_ABSOLUTE | RTSEMWAIT_FLAGS_NANOSECS | RTSEMWAIT_FLAGS_INTERRUPTIBLE,
+                                   u64NowGip > u64NowSys ? u64ExpireGipTime : u64NowSys + cNsInterval);
+        GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+
+        ASMAtomicWriteU64(&pGVCpu->gvmm.s.u64HaltExpire, 0);
+        ASMAtomicDecU32(&pGVMM->cHaltedEMTs);
+
+        /* Reset the semaphore to try prevent a few false wake-ups. */
+        if (rc == VINF_SUCCESS)
+        {
+            RTSemEventMultiReset(pGVCpu->gvmm.s.HaltEventMulti);
+            GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+        }
+        else if (rc == VERR_TIMEOUT)
+        {
+            pGVM->gvmm.s.StatsSched.cHaltTimeouts++;
+            rc = VINF_SUCCESS;
+        }
+    }
+    else
+    {
+        pGVM->gvmm.s.StatsSched.cHaltNotBlocking++;
+        if (fDoEarlyWakeUps)
+            GVMMR0_USED_SHARED_UNLOCK(pGVMM);
+        GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+        RTSemEventMultiReset(pGVCpu->gvmm.s.HaltEventMulti);
+        GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+        rc = VINF_SUCCESS;
+    }
+
+    return rc;
+}
+
+
+/**
+ * Halt the EMT thread.
+ *
+ * @returns VINF_SUCCESS normal wakeup (timeout or kicked by other thread).
+ *          VERR_INTERRUPTED if a signal was scheduled for the thread.
+ * @param   pGVM                The global (ring-0) VM structure.
+ * @param   idCpu               The Virtual CPU ID of the calling EMT.
+ * @param   u64ExpireGipTime    The time for the sleep to expire expressed as GIP time.
+ * @thread  EMT(idCpu).
+ */
+GVMMR0DECL(int) GVMMR0SchedHaltReq(PGVM pGVM, VMCPUID idCpu, uint64_t u64ExpireGipTime)
+{
+    GVMM_CHECK_SMAP_SETUP();
+    GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+    PGVMM pGVMM;
+    int rc = gvmmR0ByGVMandEMT(pGVM, idCpu, &pGVMM);
+    if (RT_SUCCESS(rc))
+    {
+        GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+        rc = GVMMR0SchedHalt(pGVM, &pGVM->aCpus[idCpu], u64ExpireGipTime);
+    }
+    GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+    return rc;
+}
+
+
+
+/**
+ * Worker for GVMMR0SchedWakeUp and GVMMR0SchedWakeUpAndPokeCpus that wakes up
+ * the a sleeping EMT.
+ *
+ * @retval  VINF_SUCCESS if successfully woken up.
+ * @retval  VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
+ *
+ * @param   pGVM                The global (ring-0) VM structure.
+ * @param   pGVCpu              The global (ring-0) VCPU structure.
+ */
+DECLINLINE(int) gvmmR0SchedWakeUpOne(PGVM pGVM, PGVMCPU pGVCpu)
+{
+    pGVM->gvmm.s.StatsSched.cWakeUpCalls++;
+
+    /*
+     * Signal the semaphore regardless of whether it's current blocked on it.
+     *
+     * The reason for this is that there is absolutely no way we can be 100%
+     * certain that it isn't *about* go to go to sleep on it and just got
+     * delayed a bit en route. So, we will always signal the semaphore when
+     * the it is flagged as halted in the VMM.
+     */
+/** @todo we can optimize some of that by means of the pVCpu->enmState now. */
+    int rc;
+    if (pGVCpu->gvmm.s.u64HaltExpire)
+    {
+        rc = VINF_SUCCESS;
+        ASMAtomicWriteU64(&pGVCpu->gvmm.s.u64HaltExpire, 0);
+    }
+    else
+    {
+        rc = VINF_GVM_NOT_BLOCKED;
+        pGVM->gvmm.s.StatsSched.cWakeUpNotHalted++;
+    }
+
+    int rc2 = RTSemEventMultiSignal(pGVCpu->gvmm.s.HaltEventMulti);
+    AssertRC(rc2);
+
+    return rc;
+}
+
+
+/**
+ * Wakes up the halted EMT thread so it can service a pending request.
+ *
+ * @returns VBox status code.
+ * @retval  VINF_SUCCESS if successfully woken up.
+ * @retval  VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
+ *
+ * @param   pGVM                The global (ring-0) VM structure.
+ * @param   idCpu               The Virtual CPU ID of the EMT to wake up.
+ * @param   fTakeUsedLock       Take the used lock or not
+ * @thread  Any but EMT(idCpu).
+ */
+GVMMR0DECL(int) GVMMR0SchedWakeUpEx(PGVM pGVM, VMCPUID idCpu, bool fTakeUsedLock)
+{
+    GVMM_CHECK_SMAP_SETUP();
+    GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+
+    /*
+     * Validate input and take the UsedLock.
+     */
+    PGVMM pGVMM;
+    int rc = gvmmR0ByGVM(pGVM, &pGVMM, fTakeUsedLock);
+    GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+    if (RT_SUCCESS(rc))
+    {
+        if (idCpu < pGVM->cCpus)
+        {
+            /*
+             * Do the actual job.
+             */
+            rc = gvmmR0SchedWakeUpOne(pGVM, &pGVM->aCpus[idCpu]);
+            GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+
+            if (fTakeUsedLock && pGVMM->fDoEarlyWakeUps)
+            {
+                /*
+                 * While we're here, do a round of scheduling.
+                 */
+                Assert(ASMGetFlags() & X86_EFL_IF);
+                const uint64_t u64Now = RTTimeNanoTS(); /* (GIP time) */
+                pGVM->gvmm.s.StatsSched.cWakeUpWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64Now);
+                GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+            }
+        }
+        else
+            rc = VERR_INVALID_CPU_ID;
+
+        if (fTakeUsedLock)
+        {
+            int rc2 = GVMMR0_USED_SHARED_UNLOCK(pGVMM);
+            AssertRC(rc2);
+            GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+        }
+    }
+
+    LogFlow(("GVMMR0SchedWakeUpEx: returns %Rrc\n", rc));
+    return rc;
+}
+
+
+/**
+ * Wakes up the halted EMT thread so it can service a pending request.
+ *
+ * @returns VBox status code.
+ * @retval  VINF_SUCCESS if successfully woken up.
+ * @retval  VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
+ *
+ * @param   pGVM                The global (ring-0) VM structure.
+ * @param   idCpu               The Virtual CPU ID of the EMT to wake up.
+ * @thread  Any but EMT(idCpu).
+ */
+GVMMR0DECL(int) GVMMR0SchedWakeUp(PGVM pGVM, VMCPUID idCpu)
+{
+    return GVMMR0SchedWakeUpEx(pGVM, idCpu, true /* fTakeUsedLock */);
+}
+
+
+/**
+ * Wakes up the halted EMT thread so it can service a pending request, no GVM
+ * parameter and no used locking.
+ *
+ * @returns VBox status code.
+ * @retval  VINF_SUCCESS if successfully woken up.
+ * @retval  VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
+ *
+ * @param   pGVM                The global (ring-0) VM structure.
+ * @param   idCpu               The Virtual CPU ID of the EMT to wake up.
+ * @thread  Any but EMT(idCpu).
+ * @deprecated  Don't use in new code if possible!  Use the GVM variant.
+ */
+GVMMR0DECL(int) GVMMR0SchedWakeUpNoGVMNoLock(PGVM pGVM, VMCPUID idCpu)
+{
+    GVMM_CHECK_SMAP_SETUP();
+    GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+    PGVMM pGVMM;
+    int rc = gvmmR0ByGVM(pGVM, &pGVMM, false /*fTakeUsedLock*/);
+    GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+    if (RT_SUCCESS(rc))
+        rc = GVMMR0SchedWakeUpEx(pGVM, idCpu, false /*fTakeUsedLock*/);
+    return rc;
+}
+
+
+/**
+ * Worker common to GVMMR0SchedPoke and GVMMR0SchedWakeUpAndPokeCpus that pokes
+ * the Virtual CPU if it's still busy executing guest code.
+ *
+ * @returns VBox status code.
+ * @retval  VINF_SUCCESS if poked successfully.
+ * @retval  VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
+ *
+ * @param   pGVM                The global (ring-0) VM structure.
+ * @param   pVCpu               The cross context virtual CPU structure.
+ */
+DECLINLINE(int) gvmmR0SchedPokeOne(PGVM pGVM, PVMCPUCC pVCpu)
+{
+    pGVM->gvmm.s.StatsSched.cPokeCalls++;
+
+    RTCPUID idHostCpu = pVCpu->idHostCpu;
+    if (    idHostCpu == NIL_RTCPUID
+        ||  VMCPU_GET_STATE(pVCpu) != VMCPUSTATE_STARTED_EXEC)
+    {
+        pGVM->gvmm.s.StatsSched.cPokeNotBusy++;
+        return VINF_GVM_NOT_BUSY_IN_GC;
+    }
+
+    /* Note: this function is not implemented on Darwin and Linux (kernel < 2.6.19) */
+    RTMpPokeCpu(idHostCpu);
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Pokes an EMT if it's still busy running guest code.
+ *
+ * @returns VBox status code.
+ * @retval  VINF_SUCCESS if poked successfully.
+ * @retval  VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
+ *
+ * @param   pGVM                The global (ring-0) VM structure.
+ * @param   idCpu               The ID of the virtual CPU to poke.
+ * @param   fTakeUsedLock       Take the used lock or not
+ */
+GVMMR0DECL(int) GVMMR0SchedPokeEx(PGVM pGVM, VMCPUID idCpu, bool fTakeUsedLock)
+{
+    /*
+     * Validate input and take the UsedLock.
+     */
+    PGVMM pGVMM;
+    int rc = gvmmR0ByGVM(pGVM, &pGVMM, fTakeUsedLock);
+    if (RT_SUCCESS(rc))
+    {
+        if (idCpu < pGVM->cCpus)
+            rc = gvmmR0SchedPokeOne(pGVM, &pGVM->aCpus[idCpu]);
+        else
+            rc = VERR_INVALID_CPU_ID;
+
+        if (fTakeUsedLock)
+        {
+            int rc2 = GVMMR0_USED_SHARED_UNLOCK(pGVMM);
+            AssertRC(rc2);
+        }
+    }
+
+    LogFlow(("GVMMR0SchedWakeUpAndPokeCpus: returns %Rrc\n", rc));
+    return rc;
+}
+
+
+/**
+ * Pokes an EMT if it's still busy running guest code.
+ *
+ * @returns VBox status code.
+ * @retval  VINF_SUCCESS if poked successfully.
+ * @retval  VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
+ *
+ * @param   pGVM                The global (ring-0) VM structure.
+ * @param   idCpu               The ID of the virtual CPU to poke.
+ */
+GVMMR0DECL(int) GVMMR0SchedPoke(PGVM pGVM, VMCPUID idCpu)
+{
+    return GVMMR0SchedPokeEx(pGVM, idCpu, true /* fTakeUsedLock */);
+}
+
+
+/**
+ * Pokes an EMT if it's still busy running guest code, no GVM parameter and no
+ * used locking.
+ *
+ * @returns VBox status code.
+ * @retval  VINF_SUCCESS if poked successfully.
+ * @retval  VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
+ *
+ * @param   pGVM                The global (ring-0) VM structure.
+ * @param   idCpu               The ID of the virtual CPU to poke.
+ *
+ * @deprecated  Don't use in new code if possible!  Use the GVM variant.
+ */
+GVMMR0DECL(int) GVMMR0SchedPokeNoGVMNoLock(PGVM pGVM, VMCPUID idCpu)
+{
+    PGVMM pGVMM;
+    int rc = gvmmR0ByGVM(pGVM, &pGVMM, false /*fTakeUsedLock*/);
+    if (RT_SUCCESS(rc))
+    {
+        if (idCpu < pGVM->cCpus)
+            rc = gvmmR0SchedPokeOne(pGVM, &pGVM->aCpus[idCpu]);
+        else
+            rc = VERR_INVALID_CPU_ID;
+    }
+    return rc;
+}
+
+
+/**
+ * Wakes up a set of halted EMT threads so they can service pending request.
+ *
+ * @returns VBox status code, no informational stuff.
+ *
+ * @param   pGVM                The global (ring-0) VM structure.
+ * @param   pSleepSet           The set of sleepers to wake up.
+ * @param   pPokeSet            The set of CPUs to poke.
+ */
+GVMMR0DECL(int) GVMMR0SchedWakeUpAndPokeCpus(PGVM pGVM, PCVMCPUSET pSleepSet, PCVMCPUSET pPokeSet)
+{
+    AssertPtrReturn(pSleepSet, VERR_INVALID_POINTER);
+    AssertPtrReturn(pPokeSet, VERR_INVALID_POINTER);
+    GVMM_CHECK_SMAP_SETUP();
+    GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+    RTNATIVETHREAD hSelf = RTThreadNativeSelf();
+
+    /*
+     * Validate input and take the UsedLock.
+     */
+    PGVMM pGVMM;
+    int rc = gvmmR0ByGVM(pGVM, &pGVMM, true /* fTakeUsedLock */);
+    GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+    if (RT_SUCCESS(rc))
+    {
+        rc = VINF_SUCCESS;
+        VMCPUID idCpu = pGVM->cCpus;
+        while (idCpu-- > 0)
+        {
+            /* Don't try poke or wake up ourselves. */
+            if (pGVM->aCpus[idCpu].hEMT == hSelf)
+                continue;
+
+            /* just ignore errors for now. */
+            if (VMCPUSET_IS_PRESENT(pSleepSet, idCpu))
+            {
+                gvmmR0SchedWakeUpOne(pGVM, &pGVM->aCpus[idCpu]);
+                GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+            }
+            else if (VMCPUSET_IS_PRESENT(pPokeSet, idCpu))
+            {
+                gvmmR0SchedPokeOne(pGVM, &pGVM->aCpus[idCpu]);
+                GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+            }
+        }
+
+        int rc2 = GVMMR0_USED_SHARED_UNLOCK(pGVMM);
+        AssertRC(rc2);
+        GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+    }
+
+    LogFlow(("GVMMR0SchedWakeUpAndPokeCpus: returns %Rrc\n", rc));
+    return rc;
+}
+
+
+/**
+ * VMMR0 request wrapper for GVMMR0SchedWakeUpAndPokeCpus.
+ *
+ * @returns see GVMMR0SchedWakeUpAndPokeCpus.
+ * @param   pGVM            The global (ring-0) VM structure.
+ * @param   pReq            Pointer to the request packet.
+ */
+GVMMR0DECL(int) GVMMR0SchedWakeUpAndPokeCpusReq(PGVM pGVM, PGVMMSCHEDWAKEUPANDPOKECPUSREQ pReq)
+{
+    /*
+     * Validate input and pass it on.
+     */
+    AssertPtrReturn(pReq, VERR_INVALID_POINTER);
+    AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
+
+    return GVMMR0SchedWakeUpAndPokeCpus(pGVM, &pReq->SleepSet, &pReq->PokeSet);
+}
+
+
+
+/**
+ * Poll the schedule to see if someone else should get a chance to run.
+ *
+ * This is a bit hackish and will not work too well if the machine is
+ * under heavy load from non-VM processes.
+ *
+ * @returns VINF_SUCCESS if not yielded.
+ *          VINF_GVM_YIELDED if an attempt to switch to a different VM task was made.
+ * @param   pGVM            The global (ring-0) VM structure.
+ * @param   idCpu           The Virtual CPU ID of the calling EMT.
+ * @param   fYield          Whether to yield or not.
+ *                          This is for when we're spinning in the halt loop.
+ * @thread  EMT(idCpu).
+ */
+GVMMR0DECL(int) GVMMR0SchedPoll(PGVM pGVM, VMCPUID idCpu, bool fYield)
+{
+    /*
+     * Validate input.
+     */
+    PGVMM pGVMM;
+    int rc = gvmmR0ByGVMandEMT(pGVM, idCpu, &pGVMM);
+    if (RT_SUCCESS(rc))
+    {
+        /*
+         * We currently only implement helping doing wakeups (fYield = false), so don't
+         * bother taking the lock if gvmmR0SchedDoWakeUps is not going to do anything.
+         */
+        if (!fYield && pGVMM->fDoEarlyWakeUps)
+        {
+            rc = GVMMR0_USED_SHARED_LOCK(pGVMM); AssertRC(rc);
+            pGVM->gvmm.s.StatsSched.cPollCalls++;
+
+            Assert(ASMGetFlags() & X86_EFL_IF);
+            const uint64_t u64Now = RTTimeNanoTS(); /* (GIP time) */
+
+            pGVM->gvmm.s.StatsSched.cPollWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64Now);
+
+            GVMMR0_USED_SHARED_UNLOCK(pGVMM);
+        }
+        /*
+         * Not quite sure what we could do here...
+         */
+        else if (fYield)
+            rc = VERR_NOT_IMPLEMENTED; /** @todo implement this... */
+        else
+            rc = VINF_SUCCESS;
+    }
+
+    LogFlow(("GVMMR0SchedWakeUp: returns %Rrc\n", rc));
+    return rc;
+}
+
+
+#ifdef GVMM_SCHED_WITH_PPT
+/**
+ * Timer callback for the periodic preemption timer.
+ *
+ * @param   pTimer      The timer handle.
+ * @param   pvUser      Pointer to the per cpu structure.
+ * @param   iTick       The current tick.
+ */
+static DECLCALLBACK(void) gvmmR0SchedPeriodicPreemptionTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
+{
+    PGVMMHOSTCPU pCpu = (PGVMMHOSTCPU)pvUser;
+    NOREF(pTimer); NOREF(iTick);
+
+    /*
+     * Termination check
+     */
+    if (pCpu->u32Magic != GVMMHOSTCPU_MAGIC)
+        return;
+
+    /*
+     * Do the house keeping.
+     */
+    RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
+
+    if (++pCpu->Ppt.iTickHistorization >= pCpu->Ppt.cTicksHistoriziationInterval)
+    {
+        /*
+         * Historicize the max frequency.
+         */
+        uint32_t iHzHistory = ++pCpu->Ppt.iHzHistory % RT_ELEMENTS(pCpu->Ppt.aHzHistory);
+        pCpu->Ppt.aHzHistory[iHzHistory] = pCpu->Ppt.uDesiredHz;
+        pCpu->Ppt.iTickHistorization = 0;
+        pCpu->Ppt.uDesiredHz         = 0;
+
+        /*
+         * Check if the current timer frequency.
+         */
+        uint32_t uHistMaxHz = 0;
+        for (uint32_t i = 0; i < RT_ELEMENTS(pCpu->Ppt.aHzHistory); i++)
+            if (pCpu->Ppt.aHzHistory[i] > uHistMaxHz)
+                uHistMaxHz = pCpu->Ppt.aHzHistory[i];
+        if (uHistMaxHz == pCpu->Ppt.uTimerHz)
+            RTSpinlockRelease(pCpu->Ppt.hSpinlock);
+        else if (uHistMaxHz)
+        {
+            /*
+             * Reprogram it.
+             */
+            pCpu->Ppt.cChanges++;
+            pCpu->Ppt.iTickHistorization    = 0;
+            pCpu->Ppt.uTimerHz              = uHistMaxHz;
+            uint32_t const cNsInterval      = RT_NS_1SEC / uHistMaxHz;
+            pCpu->Ppt.cNsInterval           = cNsInterval;
+            if (cNsInterval < GVMMHOSTCPU_PPT_HIST_INTERVAL_NS)
+                pCpu->Ppt.cTicksHistoriziationInterval = (  GVMMHOSTCPU_PPT_HIST_INTERVAL_NS
+                                                          + GVMMHOSTCPU_PPT_HIST_INTERVAL_NS / 2 - 1)
+                                                       / cNsInterval;
+            else
+                pCpu->Ppt.cTicksHistoriziationInterval = 1;
+            RTSpinlockRelease(pCpu->Ppt.hSpinlock);
+
+            /*SUPR0Printf("Cpu%u: change to %u Hz / %u ns\n", pCpu->idxCpuSet, uHistMaxHz, cNsInterval);*/
+            RTTimerChangeInterval(pTimer, cNsInterval);
+        }
+        else
+        {
+            /*
+             * Stop it.
+             */
+            pCpu->Ppt.fStarted    = false;
+            pCpu->Ppt.uTimerHz    = 0;
+            pCpu->Ppt.cNsInterval = 0;
+            RTSpinlockRelease(pCpu->Ppt.hSpinlock);
+
+            /*SUPR0Printf("Cpu%u: stopping (%u Hz)\n", pCpu->idxCpuSet, uHistMaxHz);*/
+            RTTimerStop(pTimer);
+        }
+    }
+    else
+        RTSpinlockRelease(pCpu->Ppt.hSpinlock);
+}
+#endif /* GVMM_SCHED_WITH_PPT */
+
+
+/**
+ * Updates the periodic preemption timer for the calling CPU.
+ *
+ * The caller must have disabled preemption!
+ * The caller must check that the host can do high resolution timers.
+ *
+ * @param   pGVM        The global (ring-0) VM structure.
+ * @param   idHostCpu   The current host CPU id.
+ * @param   uHz         The desired frequency.
+ */
+GVMMR0DECL(void) GVMMR0SchedUpdatePeriodicPreemptionTimer(PGVM pGVM, RTCPUID idHostCpu, uint32_t uHz)
+{
+    NOREF(pGVM);
+#ifdef GVMM_SCHED_WITH_PPT
+    Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+    Assert(RTTimerCanDoHighResolution());
+
+    /*
+     * Resolve the per CPU data.
+     */
+    uint32_t    iCpu  = RTMpCpuIdToSetIndex(idHostCpu);
+    PGVMM       pGVMM = g_pGVMM;
+    if (   !RT_VALID_PTR(pGVMM)
+        || pGVMM->u32Magic != GVMM_MAGIC)
+        return;
+    AssertMsgReturnVoid(iCpu < pGVMM->cHostCpus, ("iCpu=%d cHostCpus=%d\n", iCpu, pGVMM->cHostCpus));
+    PGVMMHOSTCPU pCpu = &pGVMM->aHostCpus[iCpu];
+    AssertMsgReturnVoid(   pCpu->u32Magic == GVMMHOSTCPU_MAGIC
+                        && pCpu->idCpu    == idHostCpu,
+                        ("u32Magic=%#x idCpu=% idHostCpu=%d\n", pCpu->u32Magic, pCpu->idCpu, idHostCpu));
+
+    /*
+     * Check whether we need to do anything about the timer.
+     * We have to be a little bit careful since we might be race the timer
+     * callback here.
+     */
+    if (uHz > 16384)
+        uHz = 16384;  /** @todo add a query method for this! */
+    if (RT_UNLIKELY(   uHz > ASMAtomicReadU32(&pCpu->Ppt.uDesiredHz)
+                    && uHz >= pCpu->Ppt.uMinHz
+                    && !pCpu->Ppt.fStarting /* solaris paranoia */))
+    {
+        RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
+
+        pCpu->Ppt.uDesiredHz = uHz;
+        uint32_t cNsInterval = 0;
+        if (!pCpu->Ppt.fStarted)
+        {
+            pCpu->Ppt.cStarts++;
+            pCpu->Ppt.fStarted              = true;
+            pCpu->Ppt.fStarting             = true;
+            pCpu->Ppt.iTickHistorization    = 0;
+            pCpu->Ppt.uTimerHz              = uHz;
+            pCpu->Ppt.cNsInterval           = cNsInterval = RT_NS_1SEC / uHz;
+            if (cNsInterval < GVMMHOSTCPU_PPT_HIST_INTERVAL_NS)
+                pCpu->Ppt.cTicksHistoriziationInterval = (  GVMMHOSTCPU_PPT_HIST_INTERVAL_NS
+                                                          + GVMMHOSTCPU_PPT_HIST_INTERVAL_NS / 2 - 1)
+                                                       / cNsInterval;
+            else
+                pCpu->Ppt.cTicksHistoriziationInterval = 1;
+        }
+
+        RTSpinlockRelease(pCpu->Ppt.hSpinlock);
+
+        if (cNsInterval)
+        {
+            RTTimerChangeInterval(pCpu->Ppt.pTimer, cNsInterval);
+            int rc = RTTimerStart(pCpu->Ppt.pTimer, cNsInterval);
+            AssertRC(rc);
+
+            RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
+            if (RT_FAILURE(rc))
+                pCpu->Ppt.fStarted = false;
+            pCpu->Ppt.fStarting = false;
+            RTSpinlockRelease(pCpu->Ppt.hSpinlock);
+        }
+    }
+#else  /* !GVMM_SCHED_WITH_PPT */
+    NOREF(idHostCpu); NOREF(uHz);
+#endif /* !GVMM_SCHED_WITH_PPT */
+}
+
+
+/**
+ * Calls @a pfnCallback for each VM in the system.
+ *
+ * This will enumerate the VMs while holding the global VM used list lock in
+ * shared mode.  So, only suitable for simple work.  If more expensive work
+ * needs doing, a different approach must be taken as using this API would
+ * otherwise block VM creation and destruction.
+ *
+ * @returns VBox status code.
+ * @param   pfnCallback     The callback function.
+ * @param   pvUser          User argument to the callback.
+ */
+GVMMR0DECL(int) GVMMR0EnumVMs(PFNGVMMR0ENUMCALLBACK pfnCallback, void *pvUser)
+{
+    PGVMM pGVMM;
+    GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
+
+    int rc = VINF_SUCCESS;
+    GVMMR0_USED_SHARED_LOCK(pGVMM);
+    for (unsigned i = pGVMM->iUsedHead, cLoops = 0;
+         i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
+         i = pGVMM->aHandles[i].iNext, cLoops++)
+    {
+        PGVM pGVM = pGVMM->aHandles[i].pGVM;
+        if (   RT_VALID_PTR(pGVM)
+            && RT_VALID_PTR(pGVMM->aHandles[i].pvObj)
+            && pGVM->u32Magic == GVM_MAGIC)
+        {
+            rc = pfnCallback(pGVM, pvUser);
+            if (rc != VINF_SUCCESS)
+                break;
+        }
+
+        AssertBreak(cLoops < RT_ELEMENTS(pGVMM->aHandles) * 4); /* paranoia */
+    }
+    GVMMR0_USED_SHARED_UNLOCK(pGVMM);
+    return rc;
+}
+
+
+/**
+ * Retrieves the GVMM statistics visible to the caller.
+ *
+ * @returns VBox status code.
+ *
+ * @param   pStats      Where to put the statistics.
+ * @param   pSession    The current session.
+ * @param   pGVM        The GVM to obtain statistics for. Optional.
+ */
+GVMMR0DECL(int) GVMMR0QueryStatistics(PGVMMSTATS pStats, PSUPDRVSESSION pSession, PGVM pGVM)
+{
+    LogFlow(("GVMMR0QueryStatistics: pStats=%p pSession=%p pGVM=%p\n", pStats, pSession, pGVM));
+
+    /*
+     * Validate input.
+     */
+    AssertPtrReturn(pSession, VERR_INVALID_POINTER);
+    AssertPtrReturn(pStats, VERR_INVALID_POINTER);
+    pStats->cVMs = 0; /* (crash before taking the sem...) */
+
+    /*
+     * Take the lock and get the VM statistics.
+     */
+    PGVMM pGVMM;
+    if (pGVM)
+    {
+        int rc = gvmmR0ByGVM(pGVM, &pGVMM, true /*fTakeUsedLock*/);
+        if (RT_FAILURE(rc))
+            return rc;
+        pStats->SchedVM = pGVM->gvmm.s.StatsSched;
+    }
+    else
+    {
+        GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
+        memset(&pStats->SchedVM, 0, sizeof(pStats->SchedVM));
+
+        int rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
+        AssertRCReturn(rc, rc);
+    }
+
+    /*
+     * Enumerate the VMs and add the ones visible to the statistics.
+     */
+    pStats->cVMs = 0;
+    pStats->cEMTs = 0;
+    memset(&pStats->SchedSum, 0, sizeof(pStats->SchedSum));
+
+    for (unsigned i = pGVMM->iUsedHead;
+         i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
+         i = pGVMM->aHandles[i].iNext)
+    {
+        PGVM pOtherGVM = pGVMM->aHandles[i].pGVM;
+        void *pvObj = pGVMM->aHandles[i].pvObj;
+        if (    RT_VALID_PTR(pvObj)
+            &&  RT_VALID_PTR(pOtherGVM)
+            &&  pOtherGVM->u32Magic == GVM_MAGIC
+            &&  RT_SUCCESS(SUPR0ObjVerifyAccess(pvObj, pSession, NULL)))
+        {
+            pStats->cVMs++;
+            pStats->cEMTs += pOtherGVM->cCpus;
+
+            pStats->SchedSum.cHaltCalls        += pOtherGVM->gvmm.s.StatsSched.cHaltCalls;
+            pStats->SchedSum.cHaltBlocking     += pOtherGVM->gvmm.s.StatsSched.cHaltBlocking;
+            pStats->SchedSum.cHaltTimeouts     += pOtherGVM->gvmm.s.StatsSched.cHaltTimeouts;
+            pStats->SchedSum.cHaltNotBlocking  += pOtherGVM->gvmm.s.StatsSched.cHaltNotBlocking;
+            pStats->SchedSum.cHaltWakeUps      += pOtherGVM->gvmm.s.StatsSched.cHaltWakeUps;
+
+            pStats->SchedSum.cWakeUpCalls      += pOtherGVM->gvmm.s.StatsSched.cWakeUpCalls;
+            pStats->SchedSum.cWakeUpNotHalted  += pOtherGVM->gvmm.s.StatsSched.cWakeUpNotHalted;
+            pStats->SchedSum.cWakeUpWakeUps    += pOtherGVM->gvmm.s.StatsSched.cWakeUpWakeUps;
+
+            pStats->SchedSum.cPokeCalls        += pOtherGVM->gvmm.s.StatsSched.cPokeCalls;
+            pStats->SchedSum.cPokeNotBusy      += pOtherGVM->gvmm.s.StatsSched.cPokeNotBusy;
+
+            pStats->SchedSum.cPollCalls        += pOtherGVM->gvmm.s.StatsSched.cPollCalls;
+            pStats->SchedSum.cPollHalts        += pOtherGVM->gvmm.s.StatsSched.cPollHalts;
+            pStats->SchedSum.cPollWakeUps      += pOtherGVM->gvmm.s.StatsSched.cPollWakeUps;
+        }
+    }
+
+    /*
+     * Copy out the per host CPU statistics.
+     */
+    uint32_t iDstCpu = 0;
+    uint32_t cSrcCpus = pGVMM->cHostCpus;
+    for (uint32_t iSrcCpu = 0; iSrcCpu < cSrcCpus; iSrcCpu++)
+    {
+        if (pGVMM->aHostCpus[iSrcCpu].idCpu != NIL_RTCPUID)
+        {
+            pStats->aHostCpus[iDstCpu].idCpu      = pGVMM->aHostCpus[iSrcCpu].idCpu;
+            pStats->aHostCpus[iDstCpu].idxCpuSet  = pGVMM->aHostCpus[iSrcCpu].idxCpuSet;
+#ifdef GVMM_SCHED_WITH_PPT
+            pStats->aHostCpus[iDstCpu].uDesiredHz = pGVMM->aHostCpus[iSrcCpu].Ppt.uDesiredHz;
+            pStats->aHostCpus[iDstCpu].uTimerHz   = pGVMM->aHostCpus[iSrcCpu].Ppt.uTimerHz;
+            pStats->aHostCpus[iDstCpu].cChanges   = pGVMM->aHostCpus[iSrcCpu].Ppt.cChanges;
+            pStats->aHostCpus[iDstCpu].cStarts    = pGVMM->aHostCpus[iSrcCpu].Ppt.cStarts;
+#else
+            pStats->aHostCpus[iDstCpu].uDesiredHz = 0;
+            pStats->aHostCpus[iDstCpu].uTimerHz   = 0;
+            pStats->aHostCpus[iDstCpu].cChanges   = 0;
+            pStats->aHostCpus[iDstCpu].cStarts    = 0;
+#endif
+            iDstCpu++;
+            if (iDstCpu >= RT_ELEMENTS(pStats->aHostCpus))
+                break;
+        }
+    }
+    pStats->cHostCpus = iDstCpu;
+
+    GVMMR0_USED_SHARED_UNLOCK(pGVMM);
+
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * VMMR0 request wrapper for GVMMR0QueryStatistics.
+ *
+ * @returns see GVMMR0QueryStatistics.
+ * @param   pGVM            The global (ring-0) VM structure. Optional.
+ * @param   pReq            Pointer to the request packet.
+ * @param   pSession        The current session.
+ */
+GVMMR0DECL(int) GVMMR0QueryStatisticsReq(PGVM pGVM, PGVMMQUERYSTATISTICSSREQ pReq, PSUPDRVSESSION pSession)
+{
+    /*
+     * Validate input and pass it on.
+     */
+    AssertPtrReturn(pReq, VERR_INVALID_POINTER);
+    AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
+    AssertReturn(pReq->pSession == pSession, VERR_INVALID_PARAMETER);
+
+    return GVMMR0QueryStatistics(&pReq->Stats, pSession, pGVM);
+}
+
+
+/**
+ * Resets the specified GVMM statistics.
+ *
+ * @returns VBox status code.
+ *
+ * @param   pStats      Which statistics to reset, that is, non-zero fields indicates which to reset.
+ * @param   pSession    The current session.
+ * @param   pGVM        The GVM to reset statistics for. Optional.
+ */
+GVMMR0DECL(int) GVMMR0ResetStatistics(PCGVMMSTATS pStats, PSUPDRVSESSION pSession, PGVM pGVM)
+{
+    LogFlow(("GVMMR0ResetStatistics: pStats=%p pSession=%p pGVM=%p\n", pStats, pSession, pGVM));
+
+    /*
+     * Validate input.
+     */
+    AssertPtrReturn(pSession, VERR_INVALID_POINTER);
+    AssertPtrReturn(pStats, VERR_INVALID_POINTER);
+
+    /*
+     * Take the lock and get the VM statistics.
+     */
+    PGVMM pGVMM;
+    if (pGVM)
+    {
+        int rc = gvmmR0ByGVM(pGVM, &pGVMM, true /*fTakeUsedLock*/);
+        if (RT_FAILURE(rc))
+            return rc;
+#       define MAYBE_RESET_FIELD(field) \
+            do { if (pStats->SchedVM. field ) { pGVM->gvmm.s.StatsSched. field = 0; } } while (0)
+        MAYBE_RESET_FIELD(cHaltCalls);
+        MAYBE_RESET_FIELD(cHaltBlocking);
+        MAYBE_RESET_FIELD(cHaltTimeouts);
+        MAYBE_RESET_FIELD(cHaltNotBlocking);
+        MAYBE_RESET_FIELD(cHaltWakeUps);
+        MAYBE_RESET_FIELD(cWakeUpCalls);
+        MAYBE_RESET_FIELD(cWakeUpNotHalted);
+        MAYBE_RESET_FIELD(cWakeUpWakeUps);
+        MAYBE_RESET_FIELD(cPokeCalls);
+        MAYBE_RESET_FIELD(cPokeNotBusy);
+        MAYBE_RESET_FIELD(cPollCalls);
+        MAYBE_RESET_FIELD(cPollHalts);
+        MAYBE_RESET_FIELD(cPollWakeUps);
+#       undef MAYBE_RESET_FIELD
+    }
+    else
+    {
+        GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
+
+        int rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
+        AssertRCReturn(rc, rc);
+    }
+
+    /*
+     * Enumerate the VMs and add the ones visible to the statistics.
+     */
+    if (!ASMMemIsZero(&pStats->SchedSum, sizeof(pStats->SchedSum)))
+    {
+        for (unsigned i = pGVMM->iUsedHead;
+             i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
+             i = pGVMM->aHandles[i].iNext)
+        {
+            PGVM pOtherGVM = pGVMM->aHandles[i].pGVM;
+            void *pvObj = pGVMM->aHandles[i].pvObj;
+            if (    RT_VALID_PTR(pvObj)
+                &&  RT_VALID_PTR(pOtherGVM)
+                &&  pOtherGVM->u32Magic == GVM_MAGIC
+                &&  RT_SUCCESS(SUPR0ObjVerifyAccess(pvObj, pSession, NULL)))
+            {
+#               define MAYBE_RESET_FIELD(field) \
+                    do { if (pStats->SchedSum. field ) { pOtherGVM->gvmm.s.StatsSched. field = 0; } } while (0)
+                MAYBE_RESET_FIELD(cHaltCalls);
+                MAYBE_RESET_FIELD(cHaltBlocking);
+                MAYBE_RESET_FIELD(cHaltTimeouts);
+                MAYBE_RESET_FIELD(cHaltNotBlocking);
+                MAYBE_RESET_FIELD(cHaltWakeUps);
+                MAYBE_RESET_FIELD(cWakeUpCalls);
+                MAYBE_RESET_FIELD(cWakeUpNotHalted);
+                MAYBE_RESET_FIELD(cWakeUpWakeUps);
+                MAYBE_RESET_FIELD(cPokeCalls);
+                MAYBE_RESET_FIELD(cPokeNotBusy);
+                MAYBE_RESET_FIELD(cPollCalls);
+                MAYBE_RESET_FIELD(cPollHalts);
+                MAYBE_RESET_FIELD(cPollWakeUps);
+#               undef MAYBE_RESET_FIELD
+            }
+        }
+    }
+
+    GVMMR0_USED_SHARED_UNLOCK(pGVMM);
+
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * VMMR0 request wrapper for GVMMR0ResetStatistics.
+ *
+ * @returns see GVMMR0ResetStatistics.
+ * @param   pGVM            The global (ring-0) VM structure. Optional.
+ * @param   pReq            Pointer to the request packet.
+ * @param   pSession        The current session.
+ */
+GVMMR0DECL(int) GVMMR0ResetStatisticsReq(PGVM pGVM, PGVMMRESETSTATISTICSSREQ pReq, PSUPDRVSESSION pSession)
+{
+    /*
+     * Validate input and pass it on.
+     */
+    AssertPtrReturn(pReq, VERR_INVALID_POINTER);
+    AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
+    AssertReturn(pReq->pSession == pSession, VERR_INVALID_PARAMETER);
+
+    return GVMMR0ResetStatistics(&pReq->Stats, pSession, pGVM);
+}
+
diff --git a/src/VBox/VMM/VMMR0/GVMMR0Internal.h b/src/VBox/VMM/VMMR0/GVMMR0Internal.h
new file mode 100644
index 00000000..85f7ccef
--- /dev/null
+++ b/src/VBox/VMM/VMMR0/GVMMR0Internal.h
@@ -0,0 +1,73 @@
+/* $Id: GVMMR0Internal.h $ */
+/** @file
+ * GVMM - The Global VM Manager, Internal header.
+ */
+
+/*
+ * Copyright (C) 2007-2020 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+#ifndef VMM_INCLUDED_SRC_VMMR0_GVMMR0Internal_h
+#define VMM_INCLUDED_SRC_VMMR0_GVMMR0Internal_h
+#ifndef RT_WITHOUT_PRAGMA_ONCE
+# pragma once
+#endif
+
+#include <iprt/mem.h>
+
+/**
+ * The GVMM per VM data.
+ */
+typedef struct GVMMPERVCPU
+{
+    /** The time the halted EMT thread expires.
+     * 0 if the EMT thread is blocked here. */
+    uint64_t volatile   u64HaltExpire;
+    /** The event semaphore the EMT thread is blocking on. */
+    RTSEMEVENTMULTI     HaltEventMulti;
+    /** The ring-3 mapping of the VMCPU structure. */
+    RTR0MEMOBJ          VMCpuMapObj;
+    /** The APIC ID of the CPU that EMT was scheduled on the last time we checked.
+     * @todo Extend to 32-bit and use most suitable APIC ID function when we
+     *       start using this for something sensible... */
+    uint8_t             iCpuEmt;
+} GVMMPERVCPU;
+/** Pointer to the GVMM per VCPU data. */
+typedef GVMMPERVCPU *PGVMMPERVCPU;
+
+/**
+ * The GVMM per VM data.
+ */
+typedef struct GVMMPERVM
+{
+    /** The shared VM data structure allocation object (PVMR0). */
+    RTR0MEMOBJ          VMMemObj;
+    /** The Ring-3 mapping of the shared VM data structure (PVMR3). */
+    RTR0MEMOBJ          VMMapObj;
+    /** The allocation object for the VM pages. */
+    RTR0MEMOBJ          VMPagesMemObj;
+    /** The ring-3 mapping of the VM pages. */
+    RTR0MEMOBJ          VMPagesMapObj;
+
+    /** The scheduler statistics. */
+    GVMMSTATSSCHED      StatsSched;
+
+    /** Whether the per-VM ring-0 initialization has been performed. */
+    bool                fDoneVMMR0Init;
+    /** Whether the per-VM ring-0 termination is being or has been performed. */
+    bool                fDoneVMMR0Term;
+} GVMMPERVM;
+/** Pointer to the GVMM per VM data. */
+typedef GVMMPERVM *PGVMMPERVM;
+
+
+#endif /* !VMM_INCLUDED_SRC_VMMR0_GVMMR0Internal_h */
+
diff --git a/src/VBox/VMM/VMMR0/HMR0.cpp b/src/VBox/VMM/VMMR0/HMR0.cpp
new file mode 100644
index 00000000..5d3e3533
--- /dev/null
+++ b/src/VBox/VMM/VMMR0/HMR0.cpp
@@ -0,0 +1,1862 @@
+/* $Id: HMR0.cpp $ */
+/** @file
+ * Hardware Assisted Virtualization Manager (HM) - Host Context Ring-0.
+ */
+
+/*
+ * Copyright (C) 2006-2020 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+
+/*********************************************************************************************************************************
+*   Header Files                                                                                                                 *
+*********************************************************************************************************************************/
+#define LOG_GROUP LOG_GROUP_HM
+#define VMCPU_INCL_CPUM_GST_CTX
+#include <VBox/vmm/hm.h>
+#include <VBox/vmm/pgm.h>
+#include "HMInternal.h"
+#include <VBox/vmm/vmcc.h>
+#include <VBox/vmm/hm_svm.h>
+#include <VBox/vmm/hmvmxinline.h>
+#include <VBox/err.h>
+#include <VBox/log.h>
+#include <iprt/assert.h>
+#include <iprt/asm.h>
+#include <iprt/asm-amd64-x86.h>
+#include <iprt/cpuset.h>
+#include <iprt/mem.h>
+#include <iprt/memobj.h>
+#include <iprt/once.h>
+#include <iprt/param.h>
+#include <iprt/power.h>
+#include <iprt/string.h>
+#include <iprt/thread.h>
+#include <iprt/x86.h>
+#include "HMVMXR0.h"
+#include "HMSVMR0.h"
+
+
+/*********************************************************************************************************************************
+*   Internal Functions                                                                                                           *
+*********************************************************************************************************************************/
+static DECLCALLBACK(void) hmR0EnableCpuCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2);
+static DECLCALLBACK(void) hmR0DisableCpuCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2);
+static DECLCALLBACK(void) hmR0InitIntelCpu(RTCPUID idCpu, void *pvUser1, void *pvUser2);
+static DECLCALLBACK(void) hmR0InitAmdCpu(RTCPUID idCpu, void *pvUser1, void *pvUser2);
+static DECLCALLBACK(void) hmR0PowerCallback(RTPOWEREVENT enmEvent, void *pvUser);
+static DECLCALLBACK(void) hmR0MpEventCallback(RTMPEVENT enmEvent, RTCPUID idCpu, void *pvData);
+
+
+/*********************************************************************************************************************************
+*   Structures and Typedefs                                                                                                      *
+*********************************************************************************************************************************/
+/**
+ * This is used to manage the status code of a RTMpOnAll in HM.
+ */
+typedef struct HMR0FIRSTRC
+{
+    /** The status code. */
+    int32_t volatile    rc;
+    /** The ID of the CPU reporting the first failure. */
+    RTCPUID volatile    idCpu;
+} HMR0FIRSTRC;
+/** Pointer to a first return code structure. */
+typedef HMR0FIRSTRC *PHMR0FIRSTRC;
+
+
+/*********************************************************************************************************************************
+*   Global Variables                                                                                                             *
+*********************************************************************************************************************************/
+/**
+ * Global data.
+ */
+static struct
+{
+    /** Per CPU globals. */
+    HMPHYSCPU                       aCpuInfo[RTCPUSET_MAX_CPUS];
+
+    /** @name Ring-0 method table for AMD-V and VT-x specific operations.
+     * @{ */
+    DECLR0CALLBACKMEMBER(int,          pfnEnterSession, (PVMCPUCC pVCpu));
+    DECLR0CALLBACKMEMBER(void,         pfnThreadCtxCallback, (RTTHREADCTXEVENT enmEvent, PVMCPUCC pVCpu, bool fGlobalInit));
+    DECLR0CALLBACKMEMBER(int,          pfnCallRing3Callback, (PVMCPUCC pVCpu, VMMCALLRING3 enmOperation));
+    DECLR0CALLBACKMEMBER(int,          pfnExportHostState, (PVMCPUCC pVCpu));
+    DECLR0CALLBACKMEMBER(VBOXSTRICTRC, pfnRunGuestCode, (PVMCPUCC pVCpu));
+    DECLR0CALLBACKMEMBER(int,          pfnEnableCpu, (PHMPHYSCPU pHostCpu, PVMCC pVM, void *pvCpuPage, RTHCPHYS HCPhysCpuPage,
+                                                      bool fEnabledByHost, PCSUPHWVIRTMSRS pHwvirtMsrs));
+    DECLR0CALLBACKMEMBER(int,          pfnDisableCpu, (PHMPHYSCPU pHostCpu, void *pvCpuPage, RTHCPHYS HCPhysCpuPage));
+    DECLR0CALLBACKMEMBER(int,          pfnInitVM, (PVMCC pVM));
+    DECLR0CALLBACKMEMBER(int,          pfnTermVM, (PVMCC pVM));
+    DECLR0CALLBACKMEMBER(int,          pfnSetupVM, (PVMCC pVM));
+    /** @} */
+
+    /** Hardware-virtualization data. */
+    struct
+    {
+        union
+        {
+            /** VT-x data. */
+            struct
+            {
+                /** Host CR4 value (set by ring-0 VMX init) */
+                uint64_t                    u64HostCr4;
+                /** Host EFER value (set by ring-0 VMX init) */
+                uint64_t                    u64HostMsrEfer;
+                /** Host SMM monitor control (used for logging/diagnostics) */
+                uint64_t                    u64HostSmmMonitorCtl;
+                /** Last instruction error. */
+                uint32_t                    ulLastInstrError;
+                /** The shift mask employed by the VMX-Preemption timer. */
+                uint8_t                     cPreemptTimerShift;
+                /** Padding. */
+                uint8_t                     abPadding[3];
+                /** Whether we're using the preemption timer or not. */
+                bool                        fUsePreemptTimer;
+                /** Whether we're using SUPR0EnableVTx or not. */
+                bool                        fUsingSUPR0EnableVTx;
+                /** Set if we've called SUPR0EnableVTx(true) and should disable it during
+                 * module termination. */
+                bool                        fCalledSUPR0EnableVTx;
+                /** Set to by us to indicate VMX is supported by the CPU. */
+                bool                        fSupported;
+            } vmx;
+
+            /** AMD-V data. */
+            struct
+            {
+                /** SVM revision. */
+                uint32_t                    u32Rev;
+                /** SVM feature bits from cpuid 0x8000000a */
+                uint32_t                    u32Features;
+                /** Padding. */
+                bool                        afPadding[3];
+                /** Set by us to indicate SVM is supported by the CPU. */
+                bool                        fSupported;
+            } svm;
+        } u;
+        /** Maximum allowed ASID/VPID (inclusive). */
+        uint32_t                    uMaxAsid;
+        /** MSRs. */
+        SUPHWVIRTMSRS               Msrs;
+    } hwvirt;
+
+    /** Last recorded error code during HM ring-0 init. */
+    int32_t                         rcInit;
+
+    /** If set, VT-x/AMD-V is enabled globally at init time, otherwise it's
+     * enabled and disabled each time it's used to execute guest code. */
+    bool                            fGlobalInit;
+    /** Indicates whether the host is suspending or not.  We'll refuse a few
+     *  actions when the host is being suspended to speed up the suspending and
+     *  avoid trouble. */
+    bool volatile                   fSuspended;
+
+    /** Whether we've already initialized all CPUs.
+     * @remarks We could check the EnableAllCpusOnce state, but this is
+     *          simpler and hopefully easier to understand. */
+    bool                            fEnabled;
+    /** Serialize initialization in HMR0EnableAllCpus. */
+    RTONCE                          EnableAllCpusOnce;
+} g_HmR0;
+
+
+/**
+ * Initializes a first return code structure.
+ *
+ * @param   pFirstRc            The structure to init.
+ */
+static void hmR0FirstRcInit(PHMR0FIRSTRC pFirstRc)
+{
+    pFirstRc->rc    = VINF_SUCCESS;
+    pFirstRc->idCpu = NIL_RTCPUID;
+}
+
+
+/**
+ * Try set the status code (success ignored).
+ *
+ * @param   pFirstRc            The first return code structure.
+ * @param   rc                  The status code.
+ */
+static void hmR0FirstRcSetStatus(PHMR0FIRSTRC pFirstRc, int rc)
+{
+    if (   RT_FAILURE(rc)
+        && ASMAtomicCmpXchgS32(&pFirstRc->rc, rc, VINF_SUCCESS))
+        pFirstRc->idCpu = RTMpCpuId();
+}
+
+
+/**
+ * Get the status code of a first return code structure.
+ *
+ * @returns The status code; VINF_SUCCESS or error status, no informational or
+ *          warning errors.
+ * @param   pFirstRc            The first return code structure.
+ */
+static int hmR0FirstRcGetStatus(PHMR0FIRSTRC pFirstRc)
+{
+    return pFirstRc->rc;
+}
+
+
+#ifdef VBOX_STRICT
+# ifndef DEBUG_bird
+/**
+ * Get the CPU ID on which the failure status code was reported.
+ *
+ * @returns The CPU ID, NIL_RTCPUID if no failure was reported.
+ * @param   pFirstRc            The first return code structure.
+ */
+static RTCPUID hmR0FirstRcGetCpuId(PHMR0FIRSTRC pFirstRc)
+{
+    return pFirstRc->idCpu;
+}
+# endif
+#endif /* VBOX_STRICT */
+
+
+/** @name Dummy callback handlers.
+ * @{ */
+
+static DECLCALLBACK(int) hmR0DummyEnter(PVMCPUCC pVCpu)
+{
+    RT_NOREF1(pVCpu);
+    return VINF_SUCCESS;
+}
+
+static DECLCALLBACK(void) hmR0DummyThreadCtxCallback(RTTHREADCTXEVENT enmEvent, PVMCPUCC pVCpu, bool fGlobalInit)
+{
+    RT_NOREF3(enmEvent, pVCpu, fGlobalInit);
+}
+
+static DECLCALLBACK(int) hmR0DummyEnableCpu(PHMPHYSCPU pHostCpu, PVMCC pVM, void *pvCpuPage, RTHCPHYS HCPhysCpuPage,
+                                            bool fEnabledBySystem, PCSUPHWVIRTMSRS pHwvirtMsrs)
+{
+    RT_NOREF6(pHostCpu, pVM, pvCpuPage, HCPhysCpuPage, fEnabledBySystem, pHwvirtMsrs);
+    return VINF_SUCCESS;
+}
+
+static DECLCALLBACK(int) hmR0DummyDisableCpu(PHMPHYSCPU pHostCpu, void *pvCpuPage, RTHCPHYS HCPhysCpuPage)
+{
+    RT_NOREF3(pHostCpu, pvCpuPage, HCPhysCpuPage);
+    return VINF_SUCCESS;
+}
+
+static DECLCALLBACK(int) hmR0DummyInitVM(PVMCC pVM)
+{
+    RT_NOREF1(pVM);
+    return VINF_SUCCESS;
+}
+
+static DECLCALLBACK(int) hmR0DummyTermVM(PVMCC pVM)
+{
+    RT_NOREF1(pVM);
+    return VINF_SUCCESS;
+}
+
+static DECLCALLBACK(int) hmR0DummySetupVM(PVMCC pVM)
+{
+    RT_NOREF1(pVM);
+    return VINF_SUCCESS;
+}
+
+static DECLCALLBACK(int) hmR0DummyCallRing3Callback(PVMCPUCC pVCpu, VMMCALLRING3 enmOperation)
+{
+    RT_NOREF2(pVCpu, enmOperation);
+    return VINF_SUCCESS;
+}
+
+static DECLCALLBACK(VBOXSTRICTRC) hmR0DummyRunGuestCode(PVMCPUCC pVCpu)
+{
+    RT_NOREF(pVCpu);
+    return VINF_SUCCESS;
+}
+
+static DECLCALLBACK(int) hmR0DummyExportHostState(PVMCPUCC pVCpu)
+{
+    RT_NOREF1(pVCpu);
+    return VINF_SUCCESS;
+}
+
+/** @} */
+
+
+/**
+ * Intel specific initialization code.
+ *
+ * @returns VBox status code (will only fail if out of memory).
+ */
+static int hmR0InitIntel(void)
+{
+    /* Read this MSR now as it may be useful for error reporting when initializing VT-x fails. */
+    g_HmR0.hwvirt.Msrs.u.vmx.u64FeatCtrl = ASMRdMsr(MSR_IA32_FEATURE_CONTROL);
+
+    /*
+     * First try use native kernel API for controlling VT-x.
+     * (This is only supported by some Mac OS X kernels atm.)
+     */
+    int rc = g_HmR0.rcInit = SUPR0EnableVTx(true /* fEnable */);
+    g_HmR0.hwvirt.u.vmx.fUsingSUPR0EnableVTx = rc != VERR_NOT_SUPPORTED;
+    if (g_HmR0.hwvirt.u.vmx.fUsingSUPR0EnableVTx)
+    {
+        AssertLogRelMsg(rc == VINF_SUCCESS || rc == VERR_VMX_IN_VMX_ROOT_MODE || rc == VERR_VMX_NO_VMX, ("%Rrc\n", rc));
+        if (RT_SUCCESS(rc))
+        {
+            g_HmR0.hwvirt.u.vmx.fSupported = true;
+            rc = SUPR0EnableVTx(false /* fEnable */);
+            AssertLogRelRC(rc);
+        }
+    }
+    else
+    {
+        HMR0FIRSTRC FirstRc;
+        hmR0FirstRcInit(&FirstRc);
+        g_HmR0.rcInit = RTMpOnAll(hmR0InitIntelCpu, &FirstRc, NULL);
+        if (RT_SUCCESS(g_HmR0.rcInit))
+            g_HmR0.rcInit = hmR0FirstRcGetStatus(&FirstRc);
+    }
+
+    if (RT_SUCCESS(g_HmR0.rcInit))
+    {
+        /* Read CR4 and EFER for logging/diagnostic purposes. */
+        g_HmR0.hwvirt.u.vmx.u64HostCr4     = ASMGetCR4();
+        g_HmR0.hwvirt.u.vmx.u64HostMsrEfer = ASMRdMsr(MSR_K6_EFER);
+
+        /* Get VMX MSRs for determining VMX features we can ultimately use. */
+        SUPR0GetHwvirtMsrs(&g_HmR0.hwvirt.Msrs, SUPVTCAPS_VT_X, false /* fForce */);
+
+        /*
+         * Nested KVM workaround: Intel SDM section 34.15.5 describes that
+         * MSR_IA32_SMM_MONITOR_CTL depends on bit 49 of MSR_IA32_VMX_BASIC while
+         * table 35-2 says that this MSR is available if either VMX or SMX is supported.
+         */
+        uint64_t const uVmxBasicMsr = g_HmR0.hwvirt.Msrs.u.vmx.u64Basic;
+        if (RT_BF_GET(uVmxBasicMsr, VMX_BF_BASIC_DUAL_MON))
+            g_HmR0.hwvirt.u.vmx.u64HostSmmMonitorCtl = ASMRdMsr(MSR_IA32_SMM_MONITOR_CTL);
+
+        /* Initialize VPID - 16 bits ASID. */
+        g_HmR0.hwvirt.uMaxAsid = 0x10000; /* exclusive */
+
+        /*
+         * If the host OS has not enabled VT-x for us, try enter VMX root mode
+         * to really verify if VT-x is usable.
+         */
+        if (!g_HmR0.hwvirt.u.vmx.fUsingSUPR0EnableVTx)
+        {
+            /* Allocate a temporary VMXON region. */
+            RTR0MEMOBJ hScatchMemObj;
+            rc = RTR0MemObjAllocCont(&hScatchMemObj, PAGE_SIZE, false /* fExecutable */);
+            if (RT_FAILURE(rc))
+            {
+                LogRel(("hmR0InitIntel: RTR0MemObjAllocCont(,PAGE_SIZE,false) -> %Rrc\n", rc));
+                return rc;
+            }
+            void          *pvScatchPage      = RTR0MemObjAddress(hScatchMemObj);
+            RTHCPHYS const HCPhysScratchPage = RTR0MemObjGetPagePhysAddr(hScatchMemObj, 0);
+            ASMMemZeroPage(pvScatchPage);
+
+            /* Set revision dword at the beginning of the VMXON structure. */
+            *(uint32_t *)pvScatchPage = RT_BF_GET(uVmxBasicMsr, VMX_BF_BASIC_VMCS_ID);
+
+            /* Make sure we don't get rescheduled to another CPU during this probe. */
+            RTCCUINTREG const fEFlags = ASMIntDisableFlags();
+
+            /* Enable CR4.VMXE if it isn't already set. */
+            RTCCUINTREG const uOldCr4 = SUPR0ChangeCR4(X86_CR4_VMXE, RTCCUINTREG_MAX);
+
+            /*
+             * The only way of checking if we're in VMX root mode or not is to try and enter it.
+             * There is no instruction or control bit that tells us if we're in VMX root mode.
+             * Therefore, try and enter VMX root mode here.
+             */
+            rc = VMXEnable(HCPhysScratchPage);
+            if (RT_SUCCESS(rc))
+            {
+                g_HmR0.hwvirt.u.vmx.fSupported = true;
+                VMXDisable();
+            }
+            else
+            {
+                /*
+                 * KVM leaves the CPU in VMX root mode. Not only is  this not allowed,
+                 * it will crash the host when we enter raw mode, because:
+                 *
+                 *   (a) clearing X86_CR4_VMXE in CR4 causes a #GP (we no longer modify
+                 *       this bit), and
+                 *   (b) turning off paging causes a #GP  (unavoidable when switching
+                 *       from long to 32 bits mode or 32 bits to PAE).
+                 *
+                 * They should fix their code, but until they do we simply refuse to run.
+                 */
+                g_HmR0.rcInit = VERR_VMX_IN_VMX_ROOT_MODE;
+                Assert(g_HmR0.hwvirt.u.vmx.fSupported == false);
+            }
+
+            /* Restore CR4.VMXE if it wasn't set prior to us setting it above. */
+            if (!(uOldCr4 & X86_CR4_VMXE))
+                SUPR0ChangeCR4(0 /* fOrMask */, ~(uint64_t)X86_CR4_VMXE);
+
+            /* Restore interrupts. */
+            ASMSetFlags(fEFlags);
+
+            RTR0MemObjFree(hScatchMemObj, false);
+        }
+
+        if (g_HmR0.hwvirt.u.vmx.fSupported)
+        {
+            rc = VMXR0GlobalInit();
+            if (RT_FAILURE(rc))
+                g_HmR0.rcInit = rc;
+
+            /*
+             * Install the VT-x methods.
+             */
+            g_HmR0.pfnEnterSession      = VMXR0Enter;
+            g_HmR0.pfnThreadCtxCallback = VMXR0ThreadCtxCallback;
+            g_HmR0.pfnCallRing3Callback = VMXR0CallRing3Callback;
+            g_HmR0.pfnExportHostState   = VMXR0ExportHostState;
+            g_HmR0.pfnRunGuestCode      = VMXR0RunGuestCode;
+            g_HmR0.pfnEnableCpu         = VMXR0EnableCpu;
+            g_HmR0.pfnDisableCpu        = VMXR0DisableCpu;
+            g_HmR0.pfnInitVM            = VMXR0InitVM;
+            g_HmR0.pfnTermVM            = VMXR0TermVM;
+            g_HmR0.pfnSetupVM           = VMXR0SetupVM;
+
+            /*
+             * Check for the VMX-Preemption Timer and adjust for the "VMX-Preemption
+             * Timer Does Not Count Down at the Rate Specified" CPU erratum.
+             */
+            VMXCTLSMSR PinCtls;
+            PinCtls.u = g_HmR0.hwvirt.Msrs.u.vmx.u64PinCtls;
+            if (PinCtls.n.allowed1 & VMX_PIN_CTLS_PREEMPT_TIMER)
+            {
+                uint64_t const uVmxMiscMsr = g_HmR0.hwvirt.Msrs.u.vmx.u64Misc;
+                g_HmR0.hwvirt.u.vmx.fUsePreemptTimer   = true;
+                g_HmR0.hwvirt.u.vmx.cPreemptTimerShift = RT_BF_GET(uVmxMiscMsr, VMX_BF_MISC_PREEMPT_TIMER_TSC);
+                if (HMIsSubjectToVmxPreemptTimerErratum())
+                    g_HmR0.hwvirt.u.vmx.cPreemptTimerShift = 0; /* This is about right most of the time here. */
+            }
+        }
+    }
+#ifdef LOG_ENABLED
+    else
+        SUPR0Printf("hmR0InitIntelCpu failed with rc=%Rrc\n", g_HmR0.rcInit);
+#endif
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * AMD-specific initialization code.
+ *
+ * @returns VBox status code (will only fail if out of memory).
+ */
+static int hmR0InitAmd(void)
+{
+    /* Call the global AMD-V initialization routine (should only fail in out-of-memory situations). */
+    int rc = SVMR0GlobalInit();
+    if (RT_FAILURE(rc))
+    {
+        g_HmR0.rcInit = rc;
+        return rc;
+    }
+
+    /*
+     * Install the AMD-V methods.
+     */
+    g_HmR0.pfnEnterSession      = SVMR0Enter;
+    g_HmR0.pfnThreadCtxCallback = SVMR0ThreadCtxCallback;
+    g_HmR0.pfnCallRing3Callback = SVMR0CallRing3Callback;
+    g_HmR0.pfnExportHostState   = SVMR0ExportHostState;
+    g_HmR0.pfnRunGuestCode      = SVMR0RunGuestCode;
+    g_HmR0.pfnEnableCpu         = SVMR0EnableCpu;
+    g_HmR0.pfnDisableCpu        = SVMR0DisableCpu;
+    g_HmR0.pfnInitVM            = SVMR0InitVM;
+    g_HmR0.pfnTermVM            = SVMR0TermVM;
+    g_HmR0.pfnSetupVM           = SVMR0SetupVM;
+
+    /* Query AMD features. */
+    uint32_t u32Dummy;
+    ASMCpuId(0x8000000a, &g_HmR0.hwvirt.u.svm.u32Rev, &g_HmR0.hwvirt.uMaxAsid, &u32Dummy, &g_HmR0.hwvirt.u.svm.u32Features);
+
+    /*
+     * We need to check if AMD-V has been properly initialized on all CPUs.
+     * Some BIOSes might do a poor job.
+     */
+    HMR0FIRSTRC FirstRc;
+    hmR0FirstRcInit(&FirstRc);
+    rc = RTMpOnAll(hmR0InitAmdCpu, &FirstRc, NULL);
+    AssertRC(rc);
+    if (RT_SUCCESS(rc))
+        rc = hmR0FirstRcGetStatus(&FirstRc);
+#ifndef DEBUG_bird
+    AssertMsg(rc == VINF_SUCCESS || rc == VERR_SVM_IN_USE,
+              ("hmR0InitAmdCpu failed for cpu %d with rc=%Rrc\n", hmR0FirstRcGetCpuId(&FirstRc), rc));
+#endif
+    if (RT_SUCCESS(rc))
+    {
+        SUPR0GetHwvirtMsrs(&g_HmR0.hwvirt.Msrs, SUPVTCAPS_AMD_V, false /* fForce */);
+        g_HmR0.hwvirt.u.svm.fSupported = true;
+    }
+    else
+    {
+        g_HmR0.rcInit = rc;
+        if (rc == VERR_SVM_DISABLED || rc == VERR_SVM_IN_USE)
+            rc = VINF_SUCCESS; /* Don't fail if AMD-V is disabled or in use. */
+    }
+    return rc;
+}
+
+
+/**
+ * Does global Ring-0 HM initialization (at module init).
+ *
+ * @returns VBox status code.
+ */
+VMMR0_INT_DECL(int) HMR0Init(void)
+{
+    /*
+     * Initialize the globals.
+     */
+    g_HmR0.fEnabled = false;
+    static RTONCE s_OnceInit = RTONCE_INITIALIZER;
+    g_HmR0.EnableAllCpusOnce = s_OnceInit;
+    for (unsigned i = 0; i < RT_ELEMENTS(g_HmR0.aCpuInfo); i++)
+    {
+        g_HmR0.aCpuInfo[i].idCpu        = NIL_RTCPUID;
+        g_HmR0.aCpuInfo[i].hMemObj      = NIL_RTR0MEMOBJ;
+        g_HmR0.aCpuInfo[i].HCPhysMemObj = NIL_RTHCPHYS;
+        g_HmR0.aCpuInfo[i].pvMemObj     = NULL;
+#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
+        g_HmR0.aCpuInfo[i].n.svm.hNstGstMsrpm      = NIL_RTR0MEMOBJ;
+        g_HmR0.aCpuInfo[i].n.svm.HCPhysNstGstMsrpm = NIL_RTHCPHYS;
+        g_HmR0.aCpuInfo[i].n.svm.pvNstGstMsrpm     = NULL;
+#endif
+    }
+
+    /* Fill in all callbacks with placeholders. */
+    g_HmR0.pfnEnterSession      = hmR0DummyEnter;
+    g_HmR0.pfnThreadCtxCallback = hmR0DummyThreadCtxCallback;
+    g_HmR0.pfnCallRing3Callback = hmR0DummyCallRing3Callback;
+    g_HmR0.pfnExportHostState   = hmR0DummyExportHostState;
+    g_HmR0.pfnRunGuestCode      = hmR0DummyRunGuestCode;
+    g_HmR0.pfnEnableCpu         = hmR0DummyEnableCpu;
+    g_HmR0.pfnDisableCpu        = hmR0DummyDisableCpu;
+    g_HmR0.pfnInitVM            = hmR0DummyInitVM;
+    g_HmR0.pfnTermVM            = hmR0DummyTermVM;
+    g_HmR0.pfnSetupVM           = hmR0DummySetupVM;
+
+    /* Default is global VT-x/AMD-V init. */
+    g_HmR0.fGlobalInit         = true;
+
+    /*
+     * Make sure aCpuInfo is big enough for all the CPUs on this system.
+     */
+    if (RTMpGetArraySize() > RT_ELEMENTS(g_HmR0.aCpuInfo))
+    {
+        LogRel(("HM: Too many real CPUs/cores/threads - %u, max %u\n", RTMpGetArraySize(), RT_ELEMENTS(g_HmR0.aCpuInfo)));
+        return VERR_TOO_MANY_CPUS;
+    }
+
+    /*
+     * Check for VT-x or AMD-V support.
+     * Return failure only in out-of-memory situations.
+     */
+    uint32_t fCaps = 0;
+    int rc = SUPR0GetVTSupport(&fCaps);
+    if (RT_SUCCESS(rc))
+    {
+        if (fCaps & SUPVTCAPS_VT_X)
+        {
+            rc = hmR0InitIntel();
+            if (RT_FAILURE(rc))
+                return rc;
+        }
+        else
+        {
+            Assert(fCaps & SUPVTCAPS_AMD_V);
+            rc = hmR0InitAmd();
+            if (RT_FAILURE(rc))
+                return rc;
+        }
+    }
+    else
+        g_HmR0.rcInit = VERR_UNSUPPORTED_CPU;
+
+    /*
+     * Register notification callbacks that we can use to disable/enable CPUs
+     * when brought offline/online or suspending/resuming.
+     */
+    if (!g_HmR0.hwvirt.u.vmx.fUsingSUPR0EnableVTx)
+    {
+        rc = RTMpNotificationRegister(hmR0MpEventCallback, NULL);
+        AssertRC(rc);
+
+        rc = RTPowerNotificationRegister(hmR0PowerCallback, NULL);
+        AssertRC(rc);
+    }
+
+    /* We return success here because module init shall not fail if HM fails to initialize. */
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Does global Ring-0 HM termination (at module termination).
+ *
+ * @returns VBox status code.
+ */
+VMMR0_INT_DECL(int) HMR0Term(void)
+{
+    int rc;
+    if (   g_HmR0.hwvirt.u.vmx.fSupported
+        && g_HmR0.hwvirt.u.vmx.fUsingSUPR0EnableVTx)
+    {
+        /*
+         * Simple if the host OS manages VT-x.
+         */
+        Assert(g_HmR0.fGlobalInit);
+
+        if (g_HmR0.hwvirt.u.vmx.fCalledSUPR0EnableVTx)
+        {
+            rc = SUPR0EnableVTx(false /* fEnable */);
+            g_HmR0.hwvirt.u.vmx.fCalledSUPR0EnableVTx = false;
+        }
+        else
+            rc = VINF_SUCCESS;
+
+        for (unsigned iCpu = 0; iCpu < RT_ELEMENTS(g_HmR0.aCpuInfo); iCpu++)
+        {
+            g_HmR0.aCpuInfo[iCpu].fConfigured = false;
+            Assert(g_HmR0.aCpuInfo[iCpu].hMemObj == NIL_RTR0MEMOBJ);
+        }
+    }
+    else
+    {
+        Assert(!g_HmR0.hwvirt.u.vmx.fSupported || !g_HmR0.hwvirt.u.vmx.fUsingSUPR0EnableVTx);
+
+        /* Doesn't really matter if this fails. */
+        rc = RTMpNotificationDeregister(hmR0MpEventCallback, NULL);  AssertRC(rc);
+        rc = RTPowerNotificationDeregister(hmR0PowerCallback, NULL); AssertRC(rc);
+
+        /*
+         * Disable VT-x/AMD-V on all CPUs if we enabled it before.
+         */
+        if (g_HmR0.fGlobalInit)
+        {
+            HMR0FIRSTRC FirstRc;
+            hmR0FirstRcInit(&FirstRc);
+            rc = RTMpOnAll(hmR0DisableCpuCallback, NULL /* pvUser 1 */, &FirstRc);
+            Assert(RT_SUCCESS(rc) || rc == VERR_NOT_SUPPORTED);
+            if (RT_SUCCESS(rc))
+                rc = hmR0FirstRcGetStatus(&FirstRc);
+        }
+
+        /*
+         * Free the per-cpu pages used for VT-x and AMD-V.
+         */
+        for (unsigned i = 0; i < RT_ELEMENTS(g_HmR0.aCpuInfo); i++)
+        {
+            if (g_HmR0.aCpuInfo[i].hMemObj != NIL_RTR0MEMOBJ)
+            {
+                RTR0MemObjFree(g_HmR0.aCpuInfo[i].hMemObj, false);
+                g_HmR0.aCpuInfo[i].hMemObj      = NIL_RTR0MEMOBJ;
+                g_HmR0.aCpuInfo[i].HCPhysMemObj = NIL_RTHCPHYS;
+                g_HmR0.aCpuInfo[i].pvMemObj     = NULL;
+            }
+#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
+            if (g_HmR0.aCpuInfo[i].n.svm.hNstGstMsrpm != NIL_RTR0MEMOBJ)
+            {
+                RTR0MemObjFree(g_HmR0.aCpuInfo[i].n.svm.hNstGstMsrpm, false);
+                g_HmR0.aCpuInfo[i].n.svm.hNstGstMsrpm      = NIL_RTR0MEMOBJ;
+                g_HmR0.aCpuInfo[i].n.svm.HCPhysNstGstMsrpm = NIL_RTHCPHYS;
+                g_HmR0.aCpuInfo[i].n.svm.pvNstGstMsrpm     = NULL;
+            }
+#endif
+        }
+    }
+
+    /** @todo This needs cleaning up. There's no matching
+     *        hmR0TermIntel()/hmR0TermAmd() and all the VT-x/AMD-V specific bits
+     *        should move into their respective modules. */
+    /* Finally, call global VT-x/AMD-V termination. */
+    if (g_HmR0.hwvirt.u.vmx.fSupported)
+        VMXR0GlobalTerm();
+    else if (g_HmR0.hwvirt.u.svm.fSupported)
+        SVMR0GlobalTerm();
+
+    return rc;
+}
+
+
+/**
+ * Worker function used by hmR0PowerCallback() and HMR0Init() to initalize VT-x
+ * on a CPU.
+ *
+ * @param   idCpu       The identifier for the CPU the function is called on.
+ * @param   pvUser1     Pointer to the first RC structure.
+ * @param   pvUser2     Ignored.
+ */
+static DECLCALLBACK(void) hmR0InitIntelCpu(RTCPUID idCpu, void *pvUser1, void *pvUser2)
+{
+    PHMR0FIRSTRC pFirstRc = (PHMR0FIRSTRC)pvUser1;
+    Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+    Assert(idCpu == (RTCPUID)RTMpCpuIdToSetIndex(idCpu)); /** @todo fix idCpu == index assumption (rainy day) */
+    NOREF(idCpu); NOREF(pvUser2);
+
+    int rc = SUPR0GetVmxUsability(NULL /* pfIsSmxModeAmbiguous */);
+    hmR0FirstRcSetStatus(pFirstRc, rc);
+}
+
+
+/**
+ * Worker function used by hmR0PowerCallback() and HMR0Init() to initalize AMD-V
+ * on a CPU.
+ *
+ * @param   idCpu       The identifier for the CPU the function is called on.
+ * @param   pvUser1     Pointer to the first RC structure.
+ * @param   pvUser2     Ignored.
+ */
+static DECLCALLBACK(void) hmR0InitAmdCpu(RTCPUID idCpu, void *pvUser1, void *pvUser2)
+{
+    PHMR0FIRSTRC pFirstRc = (PHMR0FIRSTRC)pvUser1;
+    Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+    Assert(idCpu == (RTCPUID)RTMpCpuIdToSetIndex(idCpu)); /** @todo fix idCpu == index assumption (rainy day) */
+    NOREF(idCpu); NOREF(pvUser2);
+
+    int rc = SUPR0GetSvmUsability(true /* fInitSvm */);
+    hmR0FirstRcSetStatus(pFirstRc, rc);
+}
+
+
+/**
+ * Enable VT-x or AMD-V on the current CPU
+ *
+ * @returns VBox status code.
+ * @param   pVM     The cross context VM structure. Can be NULL.
+ * @param   idCpu   The identifier for the CPU the function is called on.
+ *
+ * @remarks Maybe called with interrupts disabled!
+ */
+static int hmR0EnableCpu(PVMCC pVM, RTCPUID idCpu)
+{
+    PHMPHYSCPU pHostCpu = &g_HmR0.aCpuInfo[idCpu];
+
+    Assert(idCpu == (RTCPUID)RTMpCpuIdToSetIndex(idCpu)); /** @todo fix idCpu == index assumption (rainy day) */
+    Assert(idCpu < RT_ELEMENTS(g_HmR0.aCpuInfo));
+    Assert(!pHostCpu->fConfigured);
+    Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+
+    pHostCpu->idCpu = idCpu;
+    /* Do NOT reset cTlbFlushes here, see @bugref{6255}. */
+
+    int rc;
+    if (   g_HmR0.hwvirt.u.vmx.fSupported
+        && g_HmR0.hwvirt.u.vmx.fUsingSUPR0EnableVTx)
+        rc = g_HmR0.pfnEnableCpu(pHostCpu, pVM, NULL /* pvCpuPage */, NIL_RTHCPHYS, true, &g_HmR0.hwvirt.Msrs);
+    else
+    {
+        AssertLogRelMsgReturn(pHostCpu->hMemObj != NIL_RTR0MEMOBJ, ("hmR0EnableCpu failed idCpu=%u.\n", idCpu), VERR_HM_IPE_1);
+        rc = g_HmR0.pfnEnableCpu(pHostCpu, pVM, pHostCpu->pvMemObj, pHostCpu->HCPhysMemObj, false, &g_HmR0.hwvirt.Msrs);
+    }
+    if (RT_SUCCESS(rc))
+        pHostCpu->fConfigured = true;
+    return rc;
+}
+
+
+/**
+ * Worker function passed to RTMpOnAll() that is to be called on all CPUs.
+ *
+ * @param   idCpu       The identifier for the CPU the function is called on.
+ * @param   pvUser1     Opaque pointer to the VM (can be NULL!).
+ * @param   pvUser2     The 2nd user argument.
+ */
+static DECLCALLBACK(void) hmR0EnableCpuCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
+{
+    PVMCC           pVM      = (PVMCC)pvUser1;     /* can be NULL! */
+    PHMR0FIRSTRC    pFirstRc = (PHMR0FIRSTRC)pvUser2;
+    AssertReturnVoid(g_HmR0.fGlobalInit);
+    Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+    hmR0FirstRcSetStatus(pFirstRc, hmR0EnableCpu(pVM, idCpu));
+}
+
+
+/**
+ * RTOnce callback employed by HMR0EnableAllCpus.
+ *
+ * @returns VBox status code.
+ * @param   pvUser          Pointer to the VM.
+ */
+static DECLCALLBACK(int32_t) hmR0EnableAllCpuOnce(void *pvUser)
+{
+    PVMCC pVM = (PVMCC)pvUser;
+
+    /*
+     * Indicate that we've initialized.
+     *
+     * Note! There is a potential race between this function and the suspend
+     *       notification.  Kind of unlikely though, so ignored for now.
+     */
+    AssertReturn(!g_HmR0.fEnabled, VERR_HM_ALREADY_ENABLED_IPE);
+    ASMAtomicWriteBool(&g_HmR0.fEnabled, true);
+
+    /*
+     * The global init variable is set by the first VM.
+     */
+    g_HmR0.fGlobalInit = pVM->hm.s.fGlobalInit;
+
+#ifdef VBOX_STRICT
+    for (unsigned i = 0; i < RT_ELEMENTS(g_HmR0.aCpuInfo); i++)
+    {
+        Assert(g_HmR0.aCpuInfo[i].hMemObj      == NIL_RTR0MEMOBJ);
+        Assert(g_HmR0.aCpuInfo[i].HCPhysMemObj == NIL_RTHCPHYS);
+        Assert(g_HmR0.aCpuInfo[i].pvMemObj     == NULL);
+        Assert(!g_HmR0.aCpuInfo[i].fConfigured);
+        Assert(!g_HmR0.aCpuInfo[i].cTlbFlushes);
+        Assert(!g_HmR0.aCpuInfo[i].uCurrentAsid);
+# ifdef VBOX_WITH_NESTED_HWVIRT_SVM
+        Assert(g_HmR0.aCpuInfo[i].n.svm.hNstGstMsrpm      == NIL_RTR0MEMOBJ);
+        Assert(g_HmR0.aCpuInfo[i].n.svm.HCPhysNstGstMsrpm == NIL_RTHCPHYS);
+        Assert(g_HmR0.aCpuInfo[i].n.svm.pvNstGstMsrpm     == NULL);
+# endif
+    }
+#endif
+
+    int rc;
+    if (   g_HmR0.hwvirt.u.vmx.fSupported
+        && g_HmR0.hwvirt.u.vmx.fUsingSUPR0EnableVTx)
+    {
+        /*
+         * Global VT-x initialization API (only darwin for now).
+         */
+        rc = SUPR0EnableVTx(true /* fEnable */);
+        if (RT_SUCCESS(rc))
+        {
+            g_HmR0.hwvirt.u.vmx.fCalledSUPR0EnableVTx = true;
+            /* If the host provides a VT-x init API, then we'll rely on that for global init. */
+            g_HmR0.fGlobalInit = pVM->hm.s.fGlobalInit = true;
+        }
+        else
+            AssertMsgFailed(("hmR0EnableAllCpuOnce/SUPR0EnableVTx: rc=%Rrc\n", rc));
+    }
+    else
+    {
+        /*
+         * We're doing the job ourselves.
+         */
+        /* Allocate one page per cpu for the global VT-x and AMD-V pages */
+        for (unsigned i = 0; i < RT_ELEMENTS(g_HmR0.aCpuInfo); i++)
+        {
+            Assert(g_HmR0.aCpuInfo[i].hMemObj == NIL_RTR0MEMOBJ);
+#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
+            Assert(g_HmR0.aCpuInfo[i].n.svm.hNstGstMsrpm == NIL_RTR0MEMOBJ);
+#endif
+            if (RTMpIsCpuPossible(RTMpCpuIdFromSetIndex(i)))
+            {
+                /** @todo NUMA */
+                rc = RTR0MemObjAllocCont(&g_HmR0.aCpuInfo[i].hMemObj, PAGE_SIZE, false /* executable R0 mapping */);
+                AssertLogRelRCReturn(rc, rc);
+
+                g_HmR0.aCpuInfo[i].HCPhysMemObj = RTR0MemObjGetPagePhysAddr(g_HmR0.aCpuInfo[i].hMemObj, 0);
+                Assert(g_HmR0.aCpuInfo[i].HCPhysMemObj != NIL_RTHCPHYS);
+                Assert(!(g_HmR0.aCpuInfo[i].HCPhysMemObj & PAGE_OFFSET_MASK));
+
+                g_HmR0.aCpuInfo[i].pvMemObj     = RTR0MemObjAddress(g_HmR0.aCpuInfo[i].hMemObj);
+                AssertPtr(g_HmR0.aCpuInfo[i].pvMemObj);
+                ASMMemZeroPage(g_HmR0.aCpuInfo[i].pvMemObj);
+
+#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
+                rc = RTR0MemObjAllocCont(&g_HmR0.aCpuInfo[i].n.svm.hNstGstMsrpm, SVM_MSRPM_PAGES << X86_PAGE_4K_SHIFT,
+                                         false /* executable R0 mapping */);
+                AssertLogRelRCReturn(rc, rc);
+
+                g_HmR0.aCpuInfo[i].n.svm.HCPhysNstGstMsrpm = RTR0MemObjGetPagePhysAddr(g_HmR0.aCpuInfo[i].n.svm.hNstGstMsrpm, 0);
+                Assert(g_HmR0.aCpuInfo[i].n.svm.HCPhysNstGstMsrpm != NIL_RTHCPHYS);
+                Assert(!(g_HmR0.aCpuInfo[i].n.svm.HCPhysNstGstMsrpm & PAGE_OFFSET_MASK));
+
+                g_HmR0.aCpuInfo[i].n.svm.pvNstGstMsrpm    = RTR0MemObjAddress(g_HmR0.aCpuInfo[i].n.svm.hNstGstMsrpm);
+                AssertPtr(g_HmR0.aCpuInfo[i].n.svm.pvNstGstMsrpm);
+                ASMMemFill32(g_HmR0.aCpuInfo[i].n.svm.pvNstGstMsrpm, SVM_MSRPM_PAGES << X86_PAGE_4K_SHIFT, UINT32_C(0xffffffff));
+#endif
+            }
+        }
+
+        rc = VINF_SUCCESS;
+    }
+
+    if (   RT_SUCCESS(rc)
+        && g_HmR0.fGlobalInit)
+    {
+        /* First time, so initialize each cpu/core. */
+        HMR0FIRSTRC FirstRc;
+        hmR0FirstRcInit(&FirstRc);
+        rc = RTMpOnAll(hmR0EnableCpuCallback, (void *)pVM, &FirstRc);
+        if (RT_SUCCESS(rc))
+            rc = hmR0FirstRcGetStatus(&FirstRc);
+    }
+
+    return rc;
+}
+
+
+/**
+ * Sets up HM on all cpus.
+ *
+ * @returns VBox status code.
+ * @param   pVM                 The cross context VM structure.
+ */
+VMMR0_INT_DECL(int) HMR0EnableAllCpus(PVMCC pVM)
+{
+    /* Make sure we don't touch HM after we've disabled HM in preparation of a suspend. */
+    if (ASMAtomicReadBool(&g_HmR0.fSuspended))
+        return VERR_HM_SUSPEND_PENDING;
+
+    return RTOnce(&g_HmR0.EnableAllCpusOnce, hmR0EnableAllCpuOnce, pVM);
+}
+
+
+/**
+ * Disable VT-x or AMD-V on the current CPU.
+ *
+ * @returns VBox status code.
+ * @param   idCpu       The identifier for the CPU this function is called on.
+ *
+ * @remarks Must be called with preemption disabled.
+ */
+static int hmR0DisableCpu(RTCPUID idCpu)
+{
+    PHMPHYSCPU pHostCpu = &g_HmR0.aCpuInfo[idCpu];
+
+    Assert(!g_HmR0.hwvirt.u.vmx.fSupported || !g_HmR0.hwvirt.u.vmx.fUsingSUPR0EnableVTx);
+    Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+    Assert(idCpu == (RTCPUID)RTMpCpuIdToSetIndex(idCpu)); /** @todo fix idCpu == index assumption (rainy day) */
+    Assert(idCpu < RT_ELEMENTS(g_HmR0.aCpuInfo));
+    Assert(!pHostCpu->fConfigured || pHostCpu->hMemObj != NIL_RTR0MEMOBJ);
+    AssertRelease(idCpu == RTMpCpuId());
+
+    if (pHostCpu->hMemObj == NIL_RTR0MEMOBJ)
+        return pHostCpu->fConfigured ? VERR_NO_MEMORY : VINF_SUCCESS /* not initialized. */;
+    AssertPtr(pHostCpu->pvMemObj);
+    Assert(pHostCpu->HCPhysMemObj != NIL_RTHCPHYS);
+
+    int rc;
+    if (pHostCpu->fConfigured)
+    {
+        rc = g_HmR0.pfnDisableCpu(pHostCpu, pHostCpu->pvMemObj, pHostCpu->HCPhysMemObj);
+        AssertRCReturn(rc, rc);
+
+        pHostCpu->fConfigured = false;
+        pHostCpu->idCpu = NIL_RTCPUID;
+    }
+    else
+        rc = VINF_SUCCESS; /* nothing to do */
+    return rc;
+}
+
+
+/**
+ * Worker function passed to RTMpOnAll() that is to be called on the target
+ * CPUs.
+ *
+ * @param   idCpu       The identifier for the CPU the function is called on.
+ * @param   pvUser1     The 1st user argument.
+ * @param   pvUser2     Opaque pointer to the FirstRc.
+ */
+static DECLCALLBACK(void) hmR0DisableCpuCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
+{
+    PHMR0FIRSTRC pFirstRc = (PHMR0FIRSTRC)pvUser2; NOREF(pvUser1);
+    AssertReturnVoid(g_HmR0.fGlobalInit);
+    hmR0FirstRcSetStatus(pFirstRc, hmR0DisableCpu(idCpu));
+}
+
+
+/**
+ * Worker function passed to RTMpOnSpecific() that is to be called on the target
+ * CPU.
+ *
+ * @param   idCpu       The identifier for the CPU the function is called on.
+ * @param   pvUser1     Null, not used.
+ * @param   pvUser2     Null, not used.
+ */
+static DECLCALLBACK(void) hmR0DisableCpuOnSpecificCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
+{
+    NOREF(pvUser1);
+    NOREF(pvUser2);
+    hmR0DisableCpu(idCpu);
+}
+
+
+/**
+ * Callback function invoked when a cpu goes online or offline.
+ *
+ * @param   enmEvent            The Mp event.
+ * @param   idCpu               The identifier for the CPU the function is called on.
+ * @param   pvData              Opaque data (PVMCC pointer).
+ */
+static DECLCALLBACK(void) hmR0MpEventCallback(RTMPEVENT enmEvent, RTCPUID idCpu, void *pvData)
+{
+    NOREF(pvData);
+    Assert(!g_HmR0.hwvirt.u.vmx.fSupported || !g_HmR0.hwvirt.u.vmx.fUsingSUPR0EnableVTx);
+
+    /*
+     * We only care about uninitializing a CPU that is going offline. When a
+     * CPU comes online, the initialization is done lazily in HMR0Enter().
+     */
+    switch (enmEvent)
+    {
+        case RTMPEVENT_OFFLINE:
+        {
+            RTTHREADPREEMPTSTATE PreemptState = RTTHREADPREEMPTSTATE_INITIALIZER;
+            RTThreadPreemptDisable(&PreemptState);
+            if (idCpu == RTMpCpuId())
+            {
+                int rc = hmR0DisableCpu(idCpu);
+                AssertRC(rc);
+                RTThreadPreemptRestore(&PreemptState);
+            }
+            else
+            {
+                RTThreadPreemptRestore(&PreemptState);
+                RTMpOnSpecific(idCpu, hmR0DisableCpuOnSpecificCallback, NULL /* pvUser1 */, NULL /* pvUser2 */);
+            }
+            break;
+        }
+
+        default:
+            break;
+    }
+}
+
+
+/**
+ * Called whenever a system power state change occurs.
+ *
+ * @param   enmEvent        The Power event.
+ * @param   pvUser          User argument.
+ */
+static DECLCALLBACK(void) hmR0PowerCallback(RTPOWEREVENT enmEvent, void *pvUser)
+{
+    NOREF(pvUser);
+    Assert(!g_HmR0.hwvirt.u.vmx.fSupported || !g_HmR0.hwvirt.u.vmx.fUsingSUPR0EnableVTx);
+
+#ifdef LOG_ENABLED
+    if (enmEvent == RTPOWEREVENT_SUSPEND)
+        SUPR0Printf("hmR0PowerCallback RTPOWEREVENT_SUSPEND\n");
+    else
+        SUPR0Printf("hmR0PowerCallback RTPOWEREVENT_RESUME\n");
+#endif
+
+    if (enmEvent == RTPOWEREVENT_SUSPEND)
+        ASMAtomicWriteBool(&g_HmR0.fSuspended, true);
+
+    if (g_HmR0.fEnabled)
+    {
+        int         rc;
+        HMR0FIRSTRC FirstRc;
+        hmR0FirstRcInit(&FirstRc);
+
+        if (enmEvent == RTPOWEREVENT_SUSPEND)
+        {
+            if (g_HmR0.fGlobalInit)
+            {
+                /* Turn off VT-x or AMD-V on all CPUs. */
+                rc = RTMpOnAll(hmR0DisableCpuCallback, NULL /* pvUser 1 */, &FirstRc);
+                Assert(RT_SUCCESS(rc) || rc == VERR_NOT_SUPPORTED);
+            }
+            /* else nothing to do here for the local init case */
+        }
+        else
+        {
+            /* Reinit the CPUs from scratch as the suspend state might have
+               messed with the MSRs. (lousy BIOSes as usual) */
+            if (g_HmR0.hwvirt.u.vmx.fSupported)
+                rc = RTMpOnAll(hmR0InitIntelCpu, &FirstRc, NULL);
+            else
+                rc = RTMpOnAll(hmR0InitAmdCpu, &FirstRc, NULL);
+            Assert(RT_SUCCESS(rc) || rc == VERR_NOT_SUPPORTED);
+            if (RT_SUCCESS(rc))
+                rc = hmR0FirstRcGetStatus(&FirstRc);
+#ifdef LOG_ENABLED
+            if (RT_FAILURE(rc))
+                SUPR0Printf("hmR0PowerCallback hmR0InitXxxCpu failed with %Rc\n", rc);
+#endif
+            if (g_HmR0.fGlobalInit)
+            {
+                /* Turn VT-x or AMD-V back on on all CPUs. */
+                rc = RTMpOnAll(hmR0EnableCpuCallback, NULL /* pVM */, &FirstRc /* output ignored */);
+                Assert(RT_SUCCESS(rc) || rc == VERR_NOT_SUPPORTED);
+            }
+            /* else nothing to do here for the local init case */
+        }
+    }
+
+    if (enmEvent == RTPOWEREVENT_RESUME)
+        ASMAtomicWriteBool(&g_HmR0.fSuspended, false);
+}
+
+
+/**
+ * Does ring-0 per-VM HM initialization.
+ *
+ * This will call the CPU specific init. routine which may initialize and allocate
+ * resources for virtual CPUs.
+ *
+ * @returns VBox status code.
+ * @param   pVM         The cross context VM structure.
+ *
+ * @remarks This is called after HMR3Init(), see vmR3CreateU() and
+ *          vmR3InitRing3().
+ */
+VMMR0_INT_DECL(int) HMR0InitVM(PVMCC pVM)
+{
+    AssertReturn(pVM, VERR_INVALID_PARAMETER);
+
+    /* Make sure we don't touch HM after we've disabled HM in preparation of a suspend. */
+    if (ASMAtomicReadBool(&g_HmR0.fSuspended))
+        return VERR_HM_SUSPEND_PENDING;
+
+    /*
+     * Copy globals to the VM structure.
+     */
+    Assert(!(pVM->hm.s.vmx.fSupported && pVM->hm.s.svm.fSupported));
+    if (pVM->hm.s.vmx.fSupported)
+    {
+        pVM->hm.s.vmx.fUsePreemptTimer     &= g_HmR0.hwvirt.u.vmx.fUsePreemptTimer; /* Can be overridden by CFGM in HMR3Init(). */
+        pVM->hm.s.vmx.cPreemptTimerShift    = g_HmR0.hwvirt.u.vmx.cPreemptTimerShift;
+        pVM->hm.s.vmx.u64HostCr4            = g_HmR0.hwvirt.u.vmx.u64HostCr4;
+        pVM->hm.s.vmx.u64HostMsrEfer        = g_HmR0.hwvirt.u.vmx.u64HostMsrEfer;
+        pVM->hm.s.vmx.u64HostSmmMonitorCtl  = g_HmR0.hwvirt.u.vmx.u64HostSmmMonitorCtl;
+        HMGetVmxMsrsFromHwvirtMsrs(&g_HmR0.hwvirt.Msrs, &pVM->hm.s.vmx.Msrs);
+        /* If you need to tweak host MSRs for testing VMX R0 code, do it here. */
+
+        /* Enable VPID if supported and configured. */
+        if (pVM->hm.s.vmx.Msrs.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_VPID)
+            pVM->hm.s.vmx.fVpid = pVM->hm.s.vmx.fAllowVpid; /* Can be overridden by CFGM in HMR3Init(). */
+
+        /* Use VMCS shadowing if supported. */
+        Assert(!pVM->hm.s.vmx.fUseVmcsShadowing);
+        if (   pVM->cpum.ro.GuestFeatures.fVmx
+            && (pVM->hm.s.vmx.Msrs.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_VMCS_SHADOWING))
+            pVM->hm.s.vmx.fUseVmcsShadowing = true;
+
+        /* Use the VMCS controls for swapping the EFER MSR if supported. */
+        Assert(!pVM->hm.s.vmx.fSupportsVmcsEfer);
+        if (   (pVM->hm.s.vmx.Msrs.EntryCtls.n.allowed1 & VMX_ENTRY_CTLS_LOAD_EFER_MSR)
+            && (pVM->hm.s.vmx.Msrs.ExitCtls.n.allowed1  & VMX_EXIT_CTLS_LOAD_EFER_MSR)
+            && (pVM->hm.s.vmx.Msrs.ExitCtls.n.allowed1  & VMX_EXIT_CTLS_SAVE_EFER_MSR))
+            pVM->hm.s.vmx.fSupportsVmcsEfer = true;
+
+#if 0
+        /* Enable APIC register virtualization and virtual-interrupt delivery if supported. */
+        if (   (pVM->hm.s.vmx.Msrs.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_APIC_REG_VIRT)
+            && (pVM->hm.s.vmx.Msrs.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_VIRT_INTR_DELIVERY))
+            pVM->hm.s.fVirtApicRegs = true;
+
+        /* Enable posted-interrupt processing if supported. */
+        /** @todo Add and query IPRT API for host OS support for posted-interrupt IPI
+         *        here. */
+        if (   (pVM->hm.s.vmx.Msrs.PinCtls.n.allowed1  & VMX_PIN_CTLS_POSTED_INT)
+            && (pVM->hm.s.vmx.Msrs.ExitCtls.n.allowed1 & VMX_EXIT_CTLS_ACK_EXT_INT))
+            pVM->hm.s.fPostedIntrs = true;
+#endif
+    }
+    else if (pVM->hm.s.svm.fSupported)
+    {
+        pVM->hm.s.svm.u32Rev      = g_HmR0.hwvirt.u.svm.u32Rev;
+        pVM->hm.s.svm.u32Features = g_HmR0.hwvirt.u.svm.u32Features;
+        pVM->hm.s.svm.u64MsrHwcr  = g_HmR0.hwvirt.Msrs.u.svm.u64MsrHwcr;
+        /* If you need to tweak host MSRs for testing SVM R0 code, do it here. */
+    }
+    pVM->hm.s.rcInit              = g_HmR0.rcInit;
+    pVM->hm.s.uMaxAsid            = g_HmR0.hwvirt.uMaxAsid;
+
+    /*
+     * Set default maximum inner loops in ring-0 before returning to ring-3.
+     * Can be overriden using CFGM.
+     */
+    if (!pVM->hm.s.cMaxResumeLoops)
+    {
+        pVM->hm.s.cMaxResumeLoops       = 1024;
+        if (RTThreadPreemptIsPendingTrusty())
+            pVM->hm.s.cMaxResumeLoops   = 8192;
+    }
+
+    /*
+     * Initialize some per-VCPU fields.
+     */
+    for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++)
+    {
+        PVMCPUCC pVCpu = VMCC_GET_CPU(pVM, idCpu);
+        pVCpu->hm.s.idEnteredCpu   = NIL_RTCPUID;
+        pVCpu->hm.s.idLastCpu      = NIL_RTCPUID;
+
+        /* We'll aways increment this the first time (host uses ASID 0). */
+        AssertReturn(!pVCpu->hm.s.uCurrentAsid, VERR_HM_IPE_3);
+    }
+
+    /*
+     * Get host kernel features that HM might need to know in order
+     * to co-operate and function properly with the host OS (e.g. SMAP).
+     *
+     * Technically, we could do this as part of the pre-init VM procedure
+     * but it shouldn't be done later than this point so we do it here.
+     */
+    pVM->hm.s.fHostKernelFeatures = SUPR0GetKernelFeatures();
+
+    /*
+     * Call the hardware specific initialization method.
+     */
+    return g_HmR0.pfnInitVM(pVM);
+}
+
+
+/**
+ * Does ring-0 per VM HM termination.
+ *
+ * @returns VBox status code.
+ * @param   pVM         The cross context VM structure.
+ */
+VMMR0_INT_DECL(int) HMR0TermVM(PVMCC pVM)
+{
+    Log(("HMR0TermVM: %p\n", pVM));
+    AssertReturn(pVM, VERR_INVALID_PARAMETER);
+
+    /*
+     * Call the hardware specific method.
+     *
+     * Note! We might be preparing for a suspend, so the pfnTermVM() functions should probably not
+     * mess with VT-x/AMD-V features on the CPU, currently all they do is free memory so this is safe.
+     */
+    return g_HmR0.pfnTermVM(pVM);
+}
+
+
+/**
+ * Sets up a VT-x or AMD-V session.
+ *
+ * This is mostly about setting up the hardware VM state.
+ *
+ * @returns VBox status code.
+ * @param   pVM         The cross context VM structure.
+ */
+VMMR0_INT_DECL(int) HMR0SetupVM(PVMCC pVM)
+{
+    Log(("HMR0SetupVM: %p\n", pVM));
+    AssertReturn(pVM, VERR_INVALID_PARAMETER);
+
+    /* Make sure we don't touch HM after we've disabled HM in preparation of a suspend. */
+    AssertReturn(!ASMAtomicReadBool(&g_HmR0.fSuspended), VERR_HM_SUSPEND_PENDING);
+
+    /* On first entry we'll sync everything. */
+    VMCC_FOR_EACH_VMCPU_STMT(pVM, pVCpu->hm.s.fCtxChanged |= HM_CHANGED_HOST_CONTEXT | HM_CHANGED_ALL_GUEST);
+
+    /*
+     * Call the hardware specific setup VM method. This requires the CPU to be
+     * enabled for AMD-V/VT-x and preemption to be prevented.
+     */
+    RTTHREADPREEMPTSTATE PreemptState = RTTHREADPREEMPTSTATE_INITIALIZER;
+    RTThreadPreemptDisable(&PreemptState);
+    RTCPUID const idCpu = RTMpCpuId();
+
+    /* Enable VT-x or AMD-V if local init is required. */
+    int rc;
+    if (!g_HmR0.fGlobalInit)
+    {
+        Assert(!g_HmR0.hwvirt.u.vmx.fSupported || !g_HmR0.hwvirt.u.vmx.fUsingSUPR0EnableVTx);
+        rc = hmR0EnableCpu(pVM, idCpu);
+        if (RT_FAILURE(rc))
+        {
+            RTThreadPreemptRestore(&PreemptState);
+            return rc;
+        }
+    }
+
+    /* Setup VT-x or AMD-V. */
+    rc = g_HmR0.pfnSetupVM(pVM);
+
+    /* Disable VT-x or AMD-V if local init was done before. */
+    if (!g_HmR0.fGlobalInit)
+    {
+        Assert(!g_HmR0.hwvirt.u.vmx.fSupported || !g_HmR0.hwvirt.u.vmx.fUsingSUPR0EnableVTx);
+        int rc2 = hmR0DisableCpu(idCpu);
+        AssertRC(rc2);
+    }
+
+    RTThreadPreemptRestore(&PreemptState);
+    return rc;
+}
+
+
+/**
+ * Notification callback before performing a longjump to ring-3.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   enmOperation    The operation causing the ring-3 longjump.
+ * @param   pvUser          User argument, currently unused, NULL.
+ */
+static DECLCALLBACK(int) hmR0CallRing3Callback(PVMCPUCC pVCpu, VMMCALLRING3 enmOperation, void *pvUser)
+{
+    RT_NOREF(pvUser);
+    Assert(pVCpu);
+    Assert(g_HmR0.pfnCallRing3Callback);
+    return g_HmR0.pfnCallRing3Callback(pVCpu, enmOperation);
+}
+
+
+/**
+ * Turns on HM on the CPU if necessary and initializes the bare minimum state
+ * required for entering HM context.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu       The cross context virtual CPU structure.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+VMMR0_INT_DECL(int) hmR0EnterCpu(PVMCPUCC pVCpu)
+{
+    Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+
+    int              rc       = VINF_SUCCESS;
+    RTCPUID const    idCpu    = RTMpCpuId();
+    PHMPHYSCPU       pHostCpu = &g_HmR0.aCpuInfo[idCpu];
+    AssertPtr(pHostCpu);
+
+    /* Enable VT-x or AMD-V if local init is required, or enable if it's a freshly onlined CPU. */
+    if (!pHostCpu->fConfigured)
+        rc = hmR0EnableCpu(pVCpu->CTX_SUFF(pVM), idCpu);
+
+    /* Register a callback to fire prior to performing a longjmp to ring-3 so HM can disable VT-x/AMD-V if needed. */
+    VMMRZCallRing3SetNotification(pVCpu, hmR0CallRing3Callback, NULL /* pvUser */);
+
+    /* Reload host-state (back from ring-3/migrated CPUs) and shared guest/host bits. */
+    if (g_HmR0.hwvirt.u.vmx.fSupported)
+        pVCpu->hm.s.fCtxChanged |= HM_CHANGED_HOST_CONTEXT | HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE;
+    else
+        pVCpu->hm.s.fCtxChanged |= HM_CHANGED_HOST_CONTEXT | HM_CHANGED_SVM_HOST_GUEST_SHARED_STATE;
+
+    Assert(pHostCpu->idCpu == idCpu && pHostCpu->idCpu != NIL_RTCPUID);
+    pVCpu->hm.s.idEnteredCpu = idCpu;
+    return rc;
+}
+
+
+/**
+ * Enters the VT-x or AMD-V session.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu      The cross context virtual CPU structure.
+ *
+ * @remarks This is called with preemption disabled.
+ */
+VMMR0_INT_DECL(int) HMR0Enter(PVMCPUCC pVCpu)
+{
+    /* Make sure we can't enter a session after we've disabled HM in preparation of a suspend. */
+    AssertReturn(!ASMAtomicReadBool(&g_HmR0.fSuspended), VERR_HM_SUSPEND_PENDING);
+    Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+
+    /* Load the bare minimum state required for entering HM. */
+    int rc = hmR0EnterCpu(pVCpu);
+    if (RT_SUCCESS(rc))
+    {
+        if (g_HmR0.hwvirt.u.vmx.fSupported)
+        {
+            Assert((pVCpu->hm.s.fCtxChanged & (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE))
+                                           == (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE));
+        }
+        else
+        {
+            Assert((pVCpu->hm.s.fCtxChanged & (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_SVM_HOST_GUEST_SHARED_STATE))
+                                           == (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_SVM_HOST_GUEST_SHARED_STATE));
+        }
+
+#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE
+        AssertReturn(!VMMR0ThreadCtxHookIsEnabled(pVCpu), VERR_HM_IPE_5);
+        bool const fStartedSet = PGMR0DynMapStartOrMigrateAutoSet(pVCpu);
+#endif
+
+        /* Keep track of the CPU owning the VMCS for debugging scheduling weirdness and ring-3 calls. */
+        rc = g_HmR0.pfnEnterSession(pVCpu);
+        AssertMsgRCReturnStmt(rc, ("rc=%Rrc pVCpu=%p\n", rc, pVCpu),  pVCpu->hm.s.idEnteredCpu = NIL_RTCPUID, rc);
+
+        /* Exports the host-state as we may be resuming code after a longjmp and quite
+           possibly now be scheduled on a different CPU. */
+        rc = g_HmR0.pfnExportHostState(pVCpu);
+        AssertMsgRCReturnStmt(rc, ("rc=%Rrc pVCpu=%p\n", rc, pVCpu),  pVCpu->hm.s.idEnteredCpu = NIL_RTCPUID, rc);
+
+#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE
+        if (fStartedSet)
+            PGMRZDynMapReleaseAutoSet(pVCpu);
+#endif
+    }
+    return rc;
+}
+
+
+/**
+ * Deinitializes the bare minimum state used for HM context and if necessary
+ * disable HM on the CPU.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu       The cross context virtual CPU structure.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+VMMR0_INT_DECL(int) HMR0LeaveCpu(PVMCPUCC pVCpu)
+{
+    Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+    VMCPU_ASSERT_EMT_RETURN(pVCpu, VERR_HM_WRONG_CPU);
+
+    RTCPUID const idCpu    = RTMpCpuId();
+    PCHMPHYSCPU   pHostCpu = &g_HmR0.aCpuInfo[idCpu];
+
+    if (   !g_HmR0.fGlobalInit
+        && pHostCpu->fConfigured)
+    {
+        int rc = hmR0DisableCpu(idCpu);
+        AssertRCReturn(rc, rc);
+        Assert(!pHostCpu->fConfigured);
+        Assert(pHostCpu->idCpu == NIL_RTCPUID);
+
+        /* For obtaining a non-zero ASID/VPID on next re-entry. */
+        pVCpu->hm.s.idLastCpu = NIL_RTCPUID;
+    }
+
+    /* Clear it while leaving HM context, hmPokeCpuForTlbFlush() relies on this. */
+    pVCpu->hm.s.idEnteredCpu = NIL_RTCPUID;
+
+    /* De-register the longjmp-to-ring 3 callback now that we have reliquished hardware resources. */
+    VMMRZCallRing3RemoveNotification(pVCpu);
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Thread-context hook for HM.
+ *
+ * @param   enmEvent        The thread-context event.
+ * @param   pvUser          Opaque pointer to the VMCPU.
+ */
+VMMR0_INT_DECL(void) HMR0ThreadCtxCallback(RTTHREADCTXEVENT enmEvent, void *pvUser)
+{
+    PVMCPUCC pVCpu = (PVMCPUCC)pvUser;
+    Assert(pVCpu);
+    Assert(g_HmR0.pfnThreadCtxCallback);
+
+    g_HmR0.pfnThreadCtxCallback(enmEvent, pVCpu, g_HmR0.fGlobalInit);
+}
+
+
+/**
+ * Runs guest code in a hardware accelerated VM.
+ *
+ * @returns Strict VBox status code. (VBOXSTRICTRC isn't used because it's
+ *          called from setjmp assembly.)
+ * @param   pVM         The cross context VM structure.
+ * @param   pVCpu       The cross context virtual CPU structure.
+ *
+ * @remarks Can be called with preemption enabled if thread-context hooks are
+ *          used!!!
+ */
+VMMR0_INT_DECL(int) HMR0RunGuestCode(PVMCC pVM, PVMCPUCC pVCpu)
+{
+    RT_NOREF(pVM);
+
+#ifdef VBOX_STRICT
+    /* With thread-context hooks we would be running this code with preemption enabled. */
+    if (!RTThreadPreemptIsEnabled(NIL_RTTHREAD))
+    {
+        PCHMPHYSCPU pHostCpu = &g_HmR0.aCpuInfo[RTMpCpuId()];
+        Assert(!VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL));
+        Assert(pHostCpu->fConfigured);
+        AssertReturn(!ASMAtomicReadBool(&g_HmR0.fSuspended), VERR_HM_SUSPEND_PENDING);
+    }
+#endif
+
+#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE
+    AssertReturn(!VMMR0ThreadCtxHookIsEnabled(pVCpu), VERR_HM_IPE_4);
+    Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+    PGMRZDynMapStartAutoSet(pVCpu);
+#endif
+
+    VBOXSTRICTRC rcStrict = g_HmR0.pfnRunGuestCode(pVCpu);
+
+#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE
+    PGMRZDynMapReleaseAutoSet(pVCpu);
+#endif
+    return VBOXSTRICTRC_VAL(rcStrict);
+}
+
+
+/**
+ * Notification from CPUM that it has unloaded the guest FPU/SSE/AVX state from
+ * the host CPU and that guest access to it must be intercepted.
+ *
+ * @param   pVCpu   The cross context virtual CPU structure of the calling EMT.
+ */
+VMMR0_INT_DECL(void) HMR0NotifyCpumUnloadedGuestFpuState(PVMCPUCC pVCpu)
+{
+    ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_CR0);
+}
+
+
+/**
+ * Notification from CPUM that it has modified the host CR0 (because of FPU).
+ *
+ * @param   pVCpu   The cross context virtual CPU structure of the calling EMT.
+ */
+VMMR0_INT_DECL(void) HMR0NotifyCpumModifiedHostCr0(PVMCPUCC pVCpu)
+{
+    ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_HOST_CONTEXT);
+}
+
+
+/**
+ * Returns suspend status of the host.
+ *
+ * @returns Suspend pending or not.
+ */
+VMMR0_INT_DECL(bool) HMR0SuspendPending(void)
+{
+    return ASMAtomicReadBool(&g_HmR0.fSuspended);
+}
+
+
+/**
+ * Invalidates a guest page from the host TLB.
+ *
+ * @param   pVCpu       The cross context virtual CPU structure.
+ * @param   GCVirt      Page to invalidate.
+ */
+VMMR0_INT_DECL(int) HMR0InvalidatePage(PVMCPUCC pVCpu, RTGCPTR GCVirt)
+{
+    PVMCC pVM = pVCpu->CTX_SUFF(pVM);
+    if (pVM->hm.s.vmx.fSupported)
+        return VMXR0InvalidatePage(pVCpu, GCVirt);
+    return SVMR0InvalidatePage(pVCpu, GCVirt);
+}
+
+
+/**
+ * Returns the cpu structure for the current cpu.
+ * Keep in mind that there is no guarantee it will stay the same (long jumps to ring 3!!!).
+ *
+ * @returns The cpu structure pointer.
+ */
+VMMR0_INT_DECL(PHMPHYSCPU) hmR0GetCurrentCpu(void)
+{
+    Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+    RTCPUID const idCpu = RTMpCpuId();
+    Assert(idCpu < RT_ELEMENTS(g_HmR0.aCpuInfo));
+    return &g_HmR0.aCpuInfo[idCpu];
+}
+
+
+/**
+ * Interface for importing state on demand (used by IEM).
+ *
+ * @returns VBox status code.
+ * @param   pVCpu       The cross context CPU structure.
+ * @param   fWhat       What to import, CPUMCTX_EXTRN_XXX.
+ */
+VMMR0_INT_DECL(int) HMR0ImportStateOnDemand(PVMCPUCC pVCpu, uint64_t fWhat)
+{
+    if (pVCpu->CTX_SUFF(pVM)->hm.s.vmx.fSupported)
+        return VMXR0ImportStateOnDemand(pVCpu, fWhat);
+    return SVMR0ImportStateOnDemand(pVCpu, fWhat);
+}
+
+#ifdef VBOX_STRICT
+
+/**
+ * Dumps a descriptor.
+ *
+ * @param   pDesc    Descriptor to dump.
+ * @param   Sel      The selector.
+ * @param   pszSel   The name of the selector.
+ */
+VMMR0_INT_DECL(void) hmR0DumpDescriptor(PCX86DESCHC pDesc, RTSEL Sel, const char *pszSel)
+{
+    /*
+     * Make variable description string.
+     */
+    static struct
+    {
+        unsigned    cch;
+        const char *psz;
+    } const s_aTypes[32] =
+    {
+# define STRENTRY(str) { sizeof(str) - 1, str }
+
+        /* system */
+# if HC_ARCH_BITS == 64
+        STRENTRY("Reserved0 "),                  /* 0x00 */
+        STRENTRY("Reserved1 "),                  /* 0x01 */
+        STRENTRY("LDT "),                        /* 0x02 */
+        STRENTRY("Reserved3 "),                  /* 0x03 */
+        STRENTRY("Reserved4 "),                  /* 0x04 */
+        STRENTRY("Reserved5 "),                  /* 0x05 */
+        STRENTRY("Reserved6 "),                  /* 0x06 */
+        STRENTRY("Reserved7 "),                  /* 0x07 */
+        STRENTRY("Reserved8 "),                  /* 0x08 */
+        STRENTRY("TSS64Avail "),                 /* 0x09 */
+        STRENTRY("ReservedA "),                  /* 0x0a */
+        STRENTRY("TSS64Busy "),                  /* 0x0b */
+        STRENTRY("Call64 "),                     /* 0x0c */
+        STRENTRY("ReservedD "),                  /* 0x0d */
+        STRENTRY("Int64 "),                      /* 0x0e */
+        STRENTRY("Trap64 "),                     /* 0x0f */
+# else
+        STRENTRY("Reserved0 "),                  /* 0x00 */
+        STRENTRY("TSS16Avail "),                 /* 0x01 */
+        STRENTRY("LDT "),                        /* 0x02 */
+        STRENTRY("TSS16Busy "),                  /* 0x03 */
+        STRENTRY("Call16 "),                     /* 0x04 */
+        STRENTRY("Task "),                       /* 0x05 */
+        STRENTRY("Int16 "),                      /* 0x06 */
+        STRENTRY("Trap16 "),                     /* 0x07 */
+        STRENTRY("Reserved8 "),                  /* 0x08 */
+        STRENTRY("TSS32Avail "),                 /* 0x09 */
+        STRENTRY("ReservedA "),                  /* 0x0a */
+        STRENTRY("TSS32Busy "),                  /* 0x0b */
+        STRENTRY("Call32 "),                     /* 0x0c */
+        STRENTRY("ReservedD "),                  /* 0x0d */
+        STRENTRY("Int32 "),                      /* 0x0e */
+        STRENTRY("Trap32 "),                     /* 0x0f */
+# endif
+        /* non system */
+        STRENTRY("DataRO "),                     /* 0x10 */
+        STRENTRY("DataRO Accessed "),            /* 0x11 */
+        STRENTRY("DataRW "),                     /* 0x12 */
+        STRENTRY("DataRW Accessed "),            /* 0x13 */
+        STRENTRY("DataDownRO "),                 /* 0x14 */
+        STRENTRY("DataDownRO Accessed "),        /* 0x15 */
+        STRENTRY("DataDownRW "),                 /* 0x16 */
+        STRENTRY("DataDownRW Accessed "),        /* 0x17 */
+        STRENTRY("CodeEO "),                     /* 0x18 */
+        STRENTRY("CodeEO Accessed "),            /* 0x19 */
+        STRENTRY("CodeER "),                     /* 0x1a */
+        STRENTRY("CodeER Accessed "),            /* 0x1b */
+        STRENTRY("CodeConfEO "),                 /* 0x1c */
+        STRENTRY("CodeConfEO Accessed "),        /* 0x1d */
+        STRENTRY("CodeConfER "),                 /* 0x1e */
+        STRENTRY("CodeConfER Accessed ")         /* 0x1f */
+# undef SYSENTRY
+    };
+# define ADD_STR(psz, pszAdd) do { strcpy(psz, pszAdd); psz += strlen(pszAdd); } while (0)
+    char        szMsg[128];
+    char       *psz = &szMsg[0];
+    unsigned    i = pDesc->Gen.u1DescType << 4 | pDesc->Gen.u4Type;
+    memcpy(psz, s_aTypes[i].psz, s_aTypes[i].cch);
+    psz += s_aTypes[i].cch;
+
+    if (pDesc->Gen.u1Present)
+        ADD_STR(psz, "Present ");
+    else
+        ADD_STR(psz, "Not-Present ");
+# if HC_ARCH_BITS == 64
+    if (pDesc->Gen.u1Long)
+        ADD_STR(psz, "64-bit ");
+    else
+        ADD_STR(psz, "Comp ");
+# else
+    if (pDesc->Gen.u1Granularity)
+        ADD_STR(psz, "Page ");
+    if (pDesc->Gen.u1DefBig)
+        ADD_STR(psz, "32-bit ");
+    else
+        ADD_STR(psz, "16-bit ");
+# endif
+# undef ADD_STR
+    *psz = '\0';
+
+    /*
+     * Limit and Base and format the output.
+     */
+#ifdef LOG_ENABLED
+    uint32_t u32Limit = X86DESC_LIMIT_G(pDesc);
+
+# if HC_ARCH_BITS == 64
+    uint64_t const u64Base  = X86DESC64_BASE(pDesc);
+    Log(("  %s { %#04x - %#RX64 %#RX64 - base=%#RX64 limit=%#08x dpl=%d } %s\n", pszSel,
+         Sel, pDesc->au64[0], pDesc->au64[1], u64Base, u32Limit, pDesc->Gen.u2Dpl, szMsg));
+# else
+    uint32_t const u32Base  = X86DESC_BASE(pDesc);
+    Log(("  %s { %#04x - %#08x %#08x - base=%#08x limit=%#08x dpl=%d } %s\n", pszSel,
+         Sel, pDesc->au32[0], pDesc->au32[1], u32Base, u32Limit, pDesc->Gen.u2Dpl, szMsg));
+# endif
+#else
+    NOREF(Sel); NOREF(pszSel);
+#endif
+}
+
+
+/**
+ * Formats a full register dump.
+ *
+ * @param   pVCpu   The cross context virtual CPU structure.
+ * @param   fFlags  The dumping flags (HM_DUMP_REG_FLAGS_XXX).
+ */
+VMMR0_INT_DECL(void) hmR0DumpRegs(PVMCPUCC pVCpu, uint32_t fFlags)
+{
+    /*
+     * Format the flags.
+     */
+    static struct
+    {
+        const char *pszSet;
+        const char *pszClear;
+        uint32_t    fFlag;
+    } const s_aFlags[] =
+    {
+        { "vip", NULL, X86_EFL_VIP },
+        { "vif", NULL, X86_EFL_VIF },
+        { "ac",  NULL, X86_EFL_AC  },
+        { "vm",  NULL, X86_EFL_VM  },
+        { "rf",  NULL, X86_EFL_RF  },
+        { "nt",  NULL, X86_EFL_NT  },
+        { "ov",  "nv", X86_EFL_OF  },
+        { "dn",  "up", X86_EFL_DF  },
+        { "ei",  "di", X86_EFL_IF  },
+        { "tf",  NULL, X86_EFL_TF  },
+        { "nt",  "pl", X86_EFL_SF  },
+        { "nz",  "zr", X86_EFL_ZF  },
+        { "ac",  "na", X86_EFL_AF  },
+        { "po",  "pe", X86_EFL_PF  },
+        { "cy",  "nc", X86_EFL_CF  },
+    };
+    char szEFlags[80];
+    char *psz = szEFlags;
+    PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+    uint32_t uEFlags = pCtx->eflags.u32;
+    for (unsigned i = 0; i < RT_ELEMENTS(s_aFlags); i++)
+    {
+        const char *pszAdd = s_aFlags[i].fFlag & uEFlags ? s_aFlags[i].pszSet : s_aFlags[i].pszClear;
+        if (pszAdd)
+        {
+            strcpy(psz, pszAdd);
+            psz += strlen(pszAdd);
+            *psz++ = ' ';
+        }
+    }
+    psz[-1] = '\0';
+
+    if (fFlags & HM_DUMP_REG_FLAGS_GPRS)
+    {
+        /*
+         * Format the registers.
+         */
+        if (CPUMIsGuestIn64BitCode(pVCpu))
+        {
+            Log(("rax=%016RX64 rbx=%016RX64 rcx=%016RX64 rdx=%016RX64\n"
+                 "rsi=%016RX64 rdi=%016RX64 r8 =%016RX64 r9 =%016RX64\n"
+                 "r10=%016RX64 r11=%016RX64 r12=%016RX64 r13=%016RX64\n"
+                 "r14=%016RX64 r15=%016RX64\n"
+                 "rip=%016RX64 rsp=%016RX64 rbp=%016RX64 iopl=%d %*s\n"
+                 "cs={%04x base=%016RX64 limit=%08x flags=%08x}\n"
+                 "ds={%04x base=%016RX64 limit=%08x flags=%08x}\n"
+                 "es={%04x base=%016RX64 limit=%08x flags=%08x}\n"
+                 "fs={%04x base=%016RX64 limit=%08x flags=%08x}\n"
+                 "gs={%04x base=%016RX64 limit=%08x flags=%08x}\n"
+                 "ss={%04x base=%016RX64 limit=%08x flags=%08x}\n"
+                 "cr0=%016RX64 cr2=%016RX64 cr3=%016RX64 cr4=%016RX64\n"
+                 "dr0=%016RX64 dr1=%016RX64 dr2=%016RX64 dr3=%016RX64\n"
+                 "dr4=%016RX64 dr5=%016RX64 dr6=%016RX64 dr7=%016RX64\n"
+                 "gdtr=%016RX64:%04x  idtr=%016RX64:%04x  eflags=%08x\n"
+                 "ldtr={%04x base=%08RX64 limit=%08x flags=%08x}\n"
+                 "tr  ={%04x base=%08RX64 limit=%08x flags=%08x}\n"
+                 "SysEnter={cs=%04llx eip=%08llx esp=%08llx}\n"
+                 ,
+                 pCtx->rax, pCtx->rbx, pCtx->rcx, pCtx->rdx, pCtx->rsi, pCtx->rdi,
+                 pCtx->r8, pCtx->r9, pCtx->r10, pCtx->r11, pCtx->r12, pCtx->r13,
+                 pCtx->r14, pCtx->r15,
+                 pCtx->rip, pCtx->rsp, pCtx->rbp, X86_EFL_GET_IOPL(uEFlags), 31, szEFlags,
+                 pCtx->cs.Sel, pCtx->cs.u64Base, pCtx->cs.u32Limit, pCtx->cs.Attr.u,
+                 pCtx->ds.Sel, pCtx->ds.u64Base, pCtx->ds.u32Limit, pCtx->ds.Attr.u,
+                 pCtx->es.Sel, pCtx->es.u64Base, pCtx->es.u32Limit, pCtx->es.Attr.u,
+                 pCtx->fs.Sel, pCtx->fs.u64Base, pCtx->fs.u32Limit, pCtx->fs.Attr.u,
+                 pCtx->gs.Sel, pCtx->gs.u64Base, pCtx->gs.u32Limit, pCtx->gs.Attr.u,
+                 pCtx->ss.Sel, pCtx->ss.u64Base, pCtx->ss.u32Limit, pCtx->ss.Attr.u,
+                 pCtx->cr0,  pCtx->cr2, pCtx->cr3,  pCtx->cr4,
+                 pCtx->dr[0],  pCtx->dr[1], pCtx->dr[2],  pCtx->dr[3],
+                 pCtx->dr[4],  pCtx->dr[5], pCtx->dr[6],  pCtx->dr[7],
+                 pCtx->gdtr.pGdt, pCtx->gdtr.cbGdt, pCtx->idtr.pIdt, pCtx->idtr.cbIdt, uEFlags,
+                 pCtx->ldtr.Sel, pCtx->ldtr.u64Base, pCtx->ldtr.u32Limit, pCtx->ldtr.Attr.u,
+                 pCtx->tr.Sel, pCtx->tr.u64Base, pCtx->tr.u32Limit, pCtx->tr.Attr.u,
+                 pCtx->SysEnter.cs, pCtx->SysEnter.eip, pCtx->SysEnter.esp));
+        }
+        else
+            Log(("eax=%08x ebx=%08x ecx=%08x edx=%08x esi=%08x edi=%08x\n"
+                 "eip=%08x esp=%08x ebp=%08x iopl=%d %*s\n"
+                 "cs={%04x base=%016RX64 limit=%08x flags=%08x} dr0=%08RX64 dr1=%08RX64\n"
+                 "ds={%04x base=%016RX64 limit=%08x flags=%08x} dr2=%08RX64 dr3=%08RX64\n"
+                 "es={%04x base=%016RX64 limit=%08x flags=%08x} dr4=%08RX64 dr5=%08RX64\n"
+                 "fs={%04x base=%016RX64 limit=%08x flags=%08x} dr6=%08RX64 dr7=%08RX64\n"
+                 "gs={%04x base=%016RX64 limit=%08x flags=%08x} cr0=%08RX64 cr2=%08RX64\n"
+                 "ss={%04x base=%016RX64 limit=%08x flags=%08x} cr3=%08RX64 cr4=%08RX64\n"
+                 "gdtr=%016RX64:%04x  idtr=%016RX64:%04x  eflags=%08x\n"
+                 "ldtr={%04x base=%08RX64 limit=%08x flags=%08x}\n"
+                 "tr  ={%04x base=%08RX64 limit=%08x flags=%08x}\n"
+                 "SysEnter={cs=%04llx eip=%08llx esp=%08llx}\n"
+                 ,
+                 pCtx->eax, pCtx->ebx, pCtx->ecx, pCtx->edx, pCtx->esi, pCtx->edi,
+                 pCtx->eip, pCtx->esp, pCtx->ebp, X86_EFL_GET_IOPL(uEFlags), 31, szEFlags,
+                 pCtx->cs.Sel, pCtx->cs.u64Base, pCtx->cs.u32Limit, pCtx->cs.Attr.u, pCtx->dr[0],  pCtx->dr[1],
+                 pCtx->ds.Sel, pCtx->ds.u64Base, pCtx->ds.u32Limit, pCtx->ds.Attr.u, pCtx->dr[2],  pCtx->dr[3],
+                 pCtx->es.Sel, pCtx->es.u64Base, pCtx->es.u32Limit, pCtx->es.Attr.u, pCtx->dr[4],  pCtx->dr[5],
+                 pCtx->fs.Sel, pCtx->fs.u64Base, pCtx->fs.u32Limit, pCtx->fs.Attr.u, pCtx->dr[6],  pCtx->dr[7],
+                 pCtx->gs.Sel, pCtx->gs.u64Base, pCtx->gs.u32Limit, pCtx->gs.Attr.u, pCtx->cr0,  pCtx->cr2,
+                 pCtx->ss.Sel, pCtx->ss.u64Base, pCtx->ss.u32Limit, pCtx->ss.Attr.u, pCtx->cr3,  pCtx->cr4,
+                 pCtx->gdtr.pGdt, pCtx->gdtr.cbGdt, pCtx->idtr.pIdt, pCtx->idtr.cbIdt, uEFlags,
+                 pCtx->ldtr.Sel, pCtx->ldtr.u64Base, pCtx->ldtr.u32Limit, pCtx->ldtr.Attr.u,
+                 pCtx->tr.Sel, pCtx->tr.u64Base, pCtx->tr.u32Limit, pCtx->tr.Attr.u,
+                 pCtx->SysEnter.cs, pCtx->SysEnter.eip, pCtx->SysEnter.esp));
+    }
+
+    if (fFlags & HM_DUMP_REG_FLAGS_FPU)
+    {
+        PCX86FXSTATE pFpuCtx = &pCtx->CTX_SUFF(pXState)->x87;
+        Log(("FPU:\n"
+            "FCW=%04x FSW=%04x FTW=%02x\n"
+            "FOP=%04x FPUIP=%08x CS=%04x Rsrvd1=%04x\n"
+            "FPUDP=%04x DS=%04x Rsvrd2=%04x MXCSR=%08x MXCSR_MASK=%08x\n"
+            ,
+            pFpuCtx->FCW,   pFpuCtx->FSW,   pFpuCtx->FTW,
+            pFpuCtx->FOP,   pFpuCtx->FPUIP, pFpuCtx->CS, pFpuCtx->Rsrvd1,
+            pFpuCtx->FPUDP, pFpuCtx->DS,    pFpuCtx->Rsrvd2,
+            pFpuCtx->MXCSR, pFpuCtx->MXCSR_MASK));
+        NOREF(pFpuCtx);
+    }
+
+    if (fFlags & HM_DUMP_REG_FLAGS_MSRS)
+    {
+        Log(("MSR:\n"
+            "EFER         =%016RX64\n"
+            "PAT          =%016RX64\n"
+            "STAR         =%016RX64\n"
+            "CSTAR        =%016RX64\n"
+            "LSTAR        =%016RX64\n"
+            "SFMASK       =%016RX64\n"
+            "KERNELGSBASE =%016RX64\n",
+            pCtx->msrEFER,
+            pCtx->msrPAT,
+            pCtx->msrSTAR,
+            pCtx->msrCSTAR,
+            pCtx->msrLSTAR,
+            pCtx->msrSFMASK,
+            pCtx->msrKERNELGSBASE));
+    }
+}
+
+#endif /* VBOX_STRICT */
+
diff --git a/src/VBox/VMM/VMMR0/HMR0A.asm b/src/VBox/VMM/VMMR0/HMR0A.asm
new file mode 100644
index 00000000..fc1fc67f
--- /dev/null
+++ b/src/VBox/VMM/VMMR0/HMR0A.asm
@@ -0,0 +1,1705 @@
+; $Id: HMR0A.asm $
+;; @file
+; HM - Ring-0 VMX, SVM world-switch and helper routines.
+;
+
+;
+; Copyright (C) 2006-2020 Oracle Corporation
+;
+; This file is part of VirtualBox Open Source Edition (OSE), as
+; available from http://www.virtualbox.org. This file is free software;
+; you can redistribute it and/or modify it under the terms of the GNU
+; General Public License (GPL) as published by the Free Software
+; Foundation, in version 2 as it comes in the "COPYING" file of the
+; VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+;
+
+;*********************************************************************************************************************************
+;*  Header Files                                                                                                                 *
+;*********************************************************************************************************************************
+%include "VBox/asmdefs.mac"
+%include "VBox/err.mac"
+%include "VBox/vmm/hm_vmx.mac"
+%include "VBox/vmm/cpum.mac"
+%include "VBox/vmm/vm.mac"
+%include "iprt/x86.mac"
+%include "HMInternal.mac"
+
+%ifdef RT_OS_OS2 ;; @todo fix OMF support in yasm and kick nasm out completely.
+ %macro vmwrite 2,
+    int3
+ %endmacro
+ %define vmlaunch int3
+ %define vmresume int3
+ %define vmsave int3
+ %define vmload int3
+ %define vmrun int3
+ %define clgi int3
+ %define stgi int3
+ %macro invlpga 2,
+    int3
+ %endmacro
+%endif
+
+;*********************************************************************************************************************************
+;*  Defined Constants And Macros                                                                                                 *
+;*********************************************************************************************************************************
+;; The offset of the XMM registers in X86FXSTATE.
+; Use define because I'm too lazy to convert the struct.
+%define XMM_OFF_IN_X86FXSTATE   160
+
+;; Spectre filler for 32-bit mode.
+; Some user space address that points to a 4MB page boundrary in hope that it
+; will somehow make it less useful.
+%define SPECTRE_FILLER32        0x227fffff
+;; Spectre filler for 64-bit mode.
+; Choosen to be an invalid address (also with 5 level paging).
+%define SPECTRE_FILLER64        0x02204204207fffff
+;; Spectre filler for the current CPU mode.
+%ifdef RT_ARCH_AMD64
+ %define SPECTRE_FILLER         SPECTRE_FILLER64
+%else
+ %define SPECTRE_FILLER         SPECTRE_FILLER32
+%endif
+
+;;
+; Determine skipping restoring of GDTR, IDTR, TR across VMX non-root operation.
+;
+%ifdef RT_ARCH_AMD64
+ %define VMX_SKIP_GDTR
+ %define VMX_SKIP_TR
+ %define VBOX_SKIP_RESTORE_SEG
+ %ifdef RT_OS_DARWIN
+  ; Load the NULL selector into DS, ES, FS and GS on 64-bit darwin so we don't
+  ; risk loading a stale LDT value or something invalid.
+  %define HM_64_BIT_USE_NULL_SEL
+  ; Darwin (Mavericks) uses IDTR limit to store the CPU Id so we need to restore it always.
+  ; See @bugref{6875}.
+ %else
+  %define VMX_SKIP_IDTR
+ %endif
+%endif
+
+;; @def MYPUSHAD
+; Macro generating an equivalent to PUSHAD instruction.
+
+;; @def MYPOPAD
+; Macro generating an equivalent to POPAD instruction.
+
+;; @def MYPUSHSEGS
+; Macro saving all segment registers on the stack.
+; @param 1  Full width register name.
+; @param 2  16-bit register name for \a 1.
+
+;; @def MYPOPSEGS
+; Macro restoring all segment registers on the stack.
+; @param 1  Full width register name.
+; @param 2  16-bit register name for \a 1.
+
+%ifdef ASM_CALL64_GCC
+ %macro MYPUSHAD64 0
+   push    r15
+   push    r14
+   push    r13
+   push    r12
+   push    rbx
+ %endmacro
+ %macro MYPOPAD64 0
+   pop     rbx
+   pop     r12
+   pop     r13
+   pop     r14
+   pop     r15
+ %endmacro
+
+%else ; ASM_CALL64_MSC
+ %macro MYPUSHAD64 0
+   push    r15
+   push    r14
+   push    r13
+   push    r12
+   push    rbx
+   push    rsi
+   push    rdi
+ %endmacro
+ %macro MYPOPAD64 0
+   pop     rdi
+   pop     rsi
+   pop     rbx
+   pop     r12
+   pop     r13
+   pop     r14
+   pop     r15
+ %endmacro
+%endif
+
+%ifdef VBOX_SKIP_RESTORE_SEG
+ %macro MYPUSHSEGS64 2
+ %endmacro
+
+ %macro MYPOPSEGS64 2
+ %endmacro
+%else       ; !VBOX_SKIP_RESTORE_SEG
+ ; Trashes, rax, rdx & rcx.
+ %macro MYPUSHSEGS64 2
+  %ifndef HM_64_BIT_USE_NULL_SEL
+   mov     %2, es
+   push    %1
+   mov     %2, ds
+   push    %1
+  %endif
+
+   ; Special case for FS; Windows and Linux either don't use it or restore it when leaving kernel mode,
+   ; Solaris OTOH doesn't and we must save it.
+   mov     ecx, MSR_K8_FS_BASE
+   rdmsr
+   push    rdx
+   push    rax
+  %ifndef HM_64_BIT_USE_NULL_SEL
+   push    fs
+  %endif
+
+   ; Special case for GS; OSes typically use swapgs to reset the hidden base register for GS on entry into the kernel.
+   ; The same happens on exit.
+   mov     ecx, MSR_K8_GS_BASE
+   rdmsr
+   push    rdx
+   push    rax
+  %ifndef HM_64_BIT_USE_NULL_SEL
+   push    gs
+  %endif
+ %endmacro
+
+ ; trashes, rax, rdx & rcx
+ %macro MYPOPSEGS64 2
+   ; Note: do not step through this code with a debugger!
+  %ifndef HM_64_BIT_USE_NULL_SEL
+   xor     eax, eax
+   mov     ds, ax
+   mov     es, ax
+   mov     fs, ax
+   mov     gs, ax
+  %endif
+
+  %ifndef HM_64_BIT_USE_NULL_SEL
+   pop     gs
+  %endif
+   pop     rax
+   pop     rdx
+   mov     ecx, MSR_K8_GS_BASE
+   wrmsr
+
+  %ifndef HM_64_BIT_USE_NULL_SEL
+   pop     fs
+  %endif
+   pop     rax
+   pop     rdx
+   mov     ecx, MSR_K8_FS_BASE
+   wrmsr
+   ; Now it's safe to step again
+
+  %ifndef HM_64_BIT_USE_NULL_SEL
+   pop     %1
+   mov     ds, %2
+   pop     %1
+   mov     es, %2
+  %endif
+ %endmacro
+%endif ; VBOX_SKIP_RESTORE_SEG
+
+%macro MYPUSHAD32 0
+  pushad
+%endmacro
+%macro MYPOPAD32 0
+  popad
+%endmacro
+
+%macro MYPUSHSEGS32 2
+  push    ds
+  push    es
+  push    fs
+  push    gs
+%endmacro
+%macro MYPOPSEGS32 2
+  pop     gs
+  pop     fs
+  pop     es
+  pop     ds
+%endmacro
+
+%ifdef RT_ARCH_AMD64
+ %define MYPUSHAD       MYPUSHAD64
+ %define MYPOPAD        MYPOPAD64
+ %define MYPUSHSEGS     MYPUSHSEGS64
+ %define MYPOPSEGS      MYPOPSEGS64
+%else
+ %define MYPUSHAD       MYPUSHAD32
+ %define MYPOPAD        MYPOPAD32
+ %define MYPUSHSEGS     MYPUSHSEGS32
+ %define MYPOPSEGS      MYPOPSEGS32
+%endif
+
+;;
+; Creates an indirect branch prediction barrier on CPUs that need and supports that.
+; @clobbers eax, edx, ecx
+; @param    1   How to address CPUMCTX.
+; @param    2   Which flag to test for (CPUMCTX_WSF_IBPB_ENTRY or CPUMCTX_WSF_IBPB_EXIT)
+%macro INDIRECT_BRANCH_PREDICTION_BARRIER 2
+    test    byte [%1 + CPUMCTX.fWorldSwitcher], %2
+    jz      %%no_indirect_branch_barrier
+    mov     ecx, MSR_IA32_PRED_CMD
+    mov     eax, MSR_IA32_PRED_CMD_F_IBPB
+    xor     edx, edx
+    wrmsr
+%%no_indirect_branch_barrier:
+%endmacro
+
+;;
+; Creates an indirect branch prediction and L1D barrier on CPUs that need and supports that.
+; @clobbers eax, edx, ecx
+; @param    1   How to address CPUMCTX.
+; @param    2   Which IBPB flag to test for (CPUMCTX_WSF_IBPB_ENTRY or CPUMCTX_WSF_IBPB_EXIT)
+; @param    3   Which FLUSH flag to test for (CPUMCTX_WSF_L1D_ENTRY)
+; @param    4   Which MDS flag to test for (CPUMCTX_WSF_MDS_ENTRY)
+%macro INDIRECT_BRANCH_PREDICTION_AND_L1_CACHE_BARRIER 4
+    ; Only one test+jmp when disabled CPUs.
+    test    byte [%1 + CPUMCTX.fWorldSwitcher], (%2 | %3 | %4)
+    jz      %%no_barrier_needed
+
+    ; The eax:edx value is the same for both.
+    AssertCompile(MSR_IA32_PRED_CMD_F_IBPB == MSR_IA32_FLUSH_CMD_F_L1D)
+    mov     eax, MSR_IA32_PRED_CMD_F_IBPB
+    xor     edx, edx
+
+    ; Indirect branch barrier.
+    test    byte [%1 + CPUMCTX.fWorldSwitcher], %2
+    jz      %%no_indirect_branch_barrier
+    mov     ecx, MSR_IA32_PRED_CMD
+    wrmsr
+%%no_indirect_branch_barrier:
+
+    ; Level 1 data cache flush.
+    test    byte [%1 + CPUMCTX.fWorldSwitcher], %3
+    jz      %%no_cache_flush_barrier
+    mov     ecx, MSR_IA32_FLUSH_CMD
+    wrmsr
+    jmp     %%no_mds_buffer_flushing    ; MDS flushing is included in L1D_FLUSH
+%%no_cache_flush_barrier:
+
+    ; MDS buffer flushing.
+    test    byte [%1 + CPUMCTX.fWorldSwitcher], %4
+    jz      %%no_mds_buffer_flushing
+    sub     xSP, xSP
+    mov     [xSP], ds
+    verw    [xSP]
+    add     xSP, xSP
+%%no_mds_buffer_flushing:
+
+%%no_barrier_needed:
+%endmacro
+
+
+;*********************************************************************************************************************************
+;*  External Symbols                                                                                                             *
+;*********************************************************************************************************************************
+%ifdef VBOX_WITH_KERNEL_USING_XMM
+extern NAME(CPUMIsGuestFPUStateActive)
+%endif
+
+
+BEGINCODE
+
+
+;;
+; Restores host-state fields.
+;
+; @returns VBox status code
+; @param   f32RestoreHost x86: [ebp + 08h]  msc: ecx  gcc: edi   RestoreHost flags.
+; @param   pRestoreHost   x86: [ebp + 0ch]  msc: rdx  gcc: rsi   Pointer to the RestoreHost struct.
+;
+ALIGNCODE(16)
+BEGINPROC VMXRestoreHostState
+%ifdef RT_ARCH_AMD64
+ %ifndef ASM_CALL64_GCC
+    ; Use GCC's input registers since we'll be needing both rcx and rdx further
+    ; down with the wrmsr instruction.  Use the R10 and R11 register for saving
+    ; RDI and RSI since MSC preserve the two latter registers.
+    mov         r10, rdi
+    mov         r11, rsi
+    mov         rdi, rcx
+    mov         rsi, rdx
+ %endif
+
+    test        edi, VMX_RESTORE_HOST_GDTR
+    jz          .test_idtr
+    lgdt        [rsi + VMXRESTOREHOST.HostGdtr]
+
+.test_idtr:
+    test        edi, VMX_RESTORE_HOST_IDTR
+    jz          .test_ds
+    lidt        [rsi + VMXRESTOREHOST.HostIdtr]
+
+.test_ds:
+    test        edi, VMX_RESTORE_HOST_SEL_DS
+    jz          .test_es
+    mov         ax, [rsi + VMXRESTOREHOST.uHostSelDS]
+    mov         ds, eax
+
+.test_es:
+    test        edi, VMX_RESTORE_HOST_SEL_ES
+    jz          .test_tr
+    mov         ax, [rsi + VMXRESTOREHOST.uHostSelES]
+    mov         es, eax
+
+.test_tr:
+    test        edi, VMX_RESTORE_HOST_SEL_TR
+    jz          .test_fs
+    ; When restoring the TR, we must first clear the busy flag or we'll end up faulting.
+    mov         dx, [rsi + VMXRESTOREHOST.uHostSelTR]
+    mov         ax, dx
+    and         eax, X86_SEL_MASK_OFF_RPL                       ; mask away TI and RPL bits leaving only the descriptor offset
+    test        edi, VMX_RESTORE_HOST_GDT_READ_ONLY | VMX_RESTORE_HOST_GDT_NEED_WRITABLE
+    jnz         .gdt_readonly
+    add         rax, qword [rsi + VMXRESTOREHOST.HostGdtr + 2]  ; xAX <- descriptor offset + GDTR.pGdt.
+    and         dword [rax + 4], ~RT_BIT(9)                     ; clear the busy flag in TSS desc (bits 0-7=base, bit 9=busy bit)
+    ltr         dx
+    jmp short   .test_fs
+.gdt_readonly:
+    test        edi, VMX_RESTORE_HOST_GDT_NEED_WRITABLE
+    jnz         .gdt_readonly_need_writable
+    mov         rcx, cr0
+    mov         r9, rcx
+    add         rax, qword [rsi + VMXRESTOREHOST.HostGdtr + 2]  ; xAX <- descriptor offset + GDTR.pGdt.
+    and         rcx, ~X86_CR0_WP
+    mov         cr0, rcx
+    and         dword [rax + 4], ~RT_BIT(9)                     ; clear the busy flag in TSS desc (bits 0-7=base, bit 9=busy bit)
+    ltr         dx
+    mov         cr0, r9
+    jmp short   .test_fs
+.gdt_readonly_need_writable:
+    add         rax, qword [rsi + VMXRESTOREHOST.HostGdtrRw + 2]  ; xAX <- descriptor offset + GDTR.pGdtRw
+    and         dword [rax + 4], ~RT_BIT(9)                     ; clear the busy flag in TSS desc (bits 0-7=base, bit 9=busy bit)
+    lgdt        [rsi + VMXRESTOREHOST.HostGdtrRw]
+    ltr         dx
+    lgdt        [rsi + VMXRESTOREHOST.HostGdtr]                 ; load the original GDT
+
+.test_fs:
+    ;
+    ; When restoring the selector values for FS and GS, we'll temporarily trash
+    ; the base address (at least the high 32-bit bits, but quite possibly the
+    ; whole base address), the wrmsr will restore it correctly. (VT-x actually
+    ; restores the base correctly when leaving guest mode, but not the selector
+    ; value, so there is little problem with interrupts being enabled prior to
+    ; this restore job.)
+    ; We'll disable ints once for both FS and GS as that's probably faster.
+    ;
+    test        edi, VMX_RESTORE_HOST_SEL_FS | VMX_RESTORE_HOST_SEL_GS
+    jz          .restore_success
+    pushfq
+    cli                                   ; (see above)
+
+    test        edi, VMX_RESTORE_HOST_SEL_FS
+    jz          .test_gs
+    mov         ax, word [rsi + VMXRESTOREHOST.uHostSelFS]
+    mov         fs, eax
+    mov         eax, dword [rsi + VMXRESTOREHOST.uHostFSBase]         ; uHostFSBase - Lo
+    mov         edx, dword [rsi + VMXRESTOREHOST.uHostFSBase + 4h]    ; uHostFSBase - Hi
+    mov         ecx, MSR_K8_FS_BASE
+    wrmsr
+
+.test_gs:
+    test        edi, VMX_RESTORE_HOST_SEL_GS
+    jz          .restore_flags
+    mov         ax, word [rsi + VMXRESTOREHOST.uHostSelGS]
+    mov         gs, eax
+    mov         eax, dword [rsi + VMXRESTOREHOST.uHostGSBase]         ; uHostGSBase - Lo
+    mov         edx, dword [rsi + VMXRESTOREHOST.uHostGSBase + 4h]    ; uHostGSBase - Hi
+    mov         ecx, MSR_K8_GS_BASE
+    wrmsr
+
+.restore_flags:
+    popfq
+
+.restore_success:
+    mov         eax, VINF_SUCCESS
+ %ifndef ASM_CALL64_GCC
+    ; Restore RDI and RSI on MSC.
+    mov         rdi, r10
+    mov         rsi, r11
+ %endif
+%else  ; RT_ARCH_X86
+    mov         eax, VERR_NOT_IMPLEMENTED
+%endif
+    ret
+ENDPROC VMXRestoreHostState
+
+
+;;
+; Dispatches an NMI to the host.
+;
+ALIGNCODE(16)
+BEGINPROC VMXDispatchHostNmi
+    ; NMI is always vector 2. The IDT[2] IRQ handler cannot be anything else. See Intel spec. 6.3.1 "External Interrupts".
+    int 2
+    ret
+ENDPROC VMXDispatchHostNmi
+
+
+;;
+; Executes VMWRITE, 64-bit value.
+;
+; @returns VBox status code.
+; @param   idxField   x86: [ebp + 08h]  msc: rcx  gcc: rdi   VMCS index.
+; @param   u64Data    x86: [ebp + 0ch]  msc: rdx  gcc: rsi   VM field value.
+;
+ALIGNCODE(16)
+BEGINPROC VMXWriteVmcs64
+%ifdef RT_ARCH_AMD64
+ %ifdef ASM_CALL64_GCC
+    and         edi, 0ffffffffh
+    xor         rax, rax
+    vmwrite     rdi, rsi
+ %else
+    and         ecx, 0ffffffffh
+    xor         rax, rax
+    vmwrite     rcx, rdx
+ %endif
+%else  ; RT_ARCH_X86
+    mov         ecx, [esp + 4]          ; idxField
+    lea         edx, [esp + 8]          ; &u64Data
+    vmwrite     ecx, [edx]              ; low dword
+    jz          .done
+    jc          .done
+    inc         ecx
+    xor         eax, eax
+    vmwrite     ecx, [edx + 4]          ; high dword
+.done:
+%endif ; RT_ARCH_X86
+    jnc         .valid_vmcs
+    mov         eax, VERR_VMX_INVALID_VMCS_PTR
+    ret
+.valid_vmcs:
+    jnz         .the_end
+    mov         eax, VERR_VMX_INVALID_VMCS_FIELD
+.the_end:
+    ret
+ENDPROC VMXWriteVmcs64
+
+
+;;
+; Executes VMREAD, 64-bit value.
+;
+; @returns VBox status code.
+; @param   idxField        VMCS index.
+; @param   pData           Where to store VM field value.
+;
+;DECLASM(int) VMXReadVmcs64(uint32_t idxField, uint64_t *pData);
+ALIGNCODE(16)
+BEGINPROC VMXReadVmcs64
+%ifdef RT_ARCH_AMD64
+ %ifdef ASM_CALL64_GCC
+    and         edi, 0ffffffffh
+    xor         rax, rax
+    vmread      [rsi], rdi
+ %else
+    and         ecx, 0ffffffffh
+    xor         rax, rax
+    vmread      [rdx], rcx
+ %endif
+%else  ; RT_ARCH_X86
+    mov         ecx, [esp + 4]          ; idxField
+    mov         edx, [esp + 8]          ; pData
+    vmread      [edx], ecx              ; low dword
+    jz          .done
+    jc          .done
+    inc         ecx
+    xor         eax, eax
+    vmread      [edx + 4], ecx          ; high dword
+.done:
+%endif ; RT_ARCH_X86
+    jnc         .valid_vmcs
+    mov         eax, VERR_VMX_INVALID_VMCS_PTR
+    ret
+.valid_vmcs:
+    jnz         .the_end
+    mov         eax, VERR_VMX_INVALID_VMCS_FIELD
+.the_end:
+    ret
+ENDPROC VMXReadVmcs64
+
+
+;;
+; Executes VMREAD, 32-bit value.
+;
+; @returns VBox status code.
+; @param   idxField        VMCS index.
+; @param   pu32Data        Where to store VM field value.
+;
+;DECLASM(int) VMXReadVmcs32(uint32_t idxField, uint32_t *pu32Data);
+ALIGNCODE(16)
+BEGINPROC VMXReadVmcs32
+%ifdef RT_ARCH_AMD64
+ %ifdef ASM_CALL64_GCC
+    and     edi, 0ffffffffh
+    xor     rax, rax
+    vmread  r10, rdi
+    mov     [rsi], r10d
+ %else
+    and     ecx, 0ffffffffh
+    xor     rax, rax
+    vmread  r10, rcx
+    mov     [rdx], r10d
+ %endif
+%else  ; RT_ARCH_X86
+    mov     ecx, [esp + 4]              ; idxField
+    mov     edx, [esp + 8]              ; pu32Data
+    xor     eax, eax
+    vmread  [edx], ecx
+%endif ; RT_ARCH_X86
+    jnc     .valid_vmcs
+    mov     eax, VERR_VMX_INVALID_VMCS_PTR
+    ret
+.valid_vmcs:
+    jnz     .the_end
+    mov     eax, VERR_VMX_INVALID_VMCS_FIELD
+.the_end:
+    ret
+ENDPROC VMXReadVmcs32
+
+
+;;
+; Executes VMWRITE, 32-bit value.
+;
+; @returns VBox status code.
+; @param   idxField        VMCS index.
+; @param   u32Data         Where to store VM field value.
+;
+;DECLASM(int) VMXWriteVmcs32(uint32_t idxField, uint32_t u32Data);
+ALIGNCODE(16)
+BEGINPROC VMXWriteVmcs32
+%ifdef RT_ARCH_AMD64
+ %ifdef ASM_CALL64_GCC
+    and     edi, 0ffffffffh
+    and     esi, 0ffffffffh
+    xor     rax, rax
+    vmwrite rdi, rsi
+ %else
+    and     ecx, 0ffffffffh
+    and     edx, 0ffffffffh
+    xor     rax, rax
+    vmwrite rcx, rdx
+ %endif
+%else  ; RT_ARCH_X86
+    mov     ecx, [esp + 4]              ; idxField
+    mov     edx, [esp + 8]              ; u32Data
+    xor     eax, eax
+    vmwrite ecx, edx
+%endif ; RT_ARCH_X86
+    jnc     .valid_vmcs
+    mov     eax, VERR_VMX_INVALID_VMCS_PTR
+    ret
+.valid_vmcs:
+    jnz     .the_end
+    mov     eax, VERR_VMX_INVALID_VMCS_FIELD
+.the_end:
+    ret
+ENDPROC VMXWriteVmcs32
+
+
+;;
+; Executes VMXON.
+;
+; @returns VBox status code.
+; @param   HCPhysVMXOn      Physical address of VMXON structure.
+;
+;DECLASM(int) VMXEnable(RTHCPHYS HCPhysVMXOn);
+BEGINPROC VMXEnable
+%ifdef RT_ARCH_AMD64
+    xor     rax, rax
+ %ifdef ASM_CALL64_GCC
+    push    rdi
+ %else
+    push    rcx
+ %endif
+    vmxon   [rsp]
+%else  ; RT_ARCH_X86
+    xor     eax, eax
+    vmxon   [esp + 4]
+%endif ; RT_ARCH_X86
+    jnc     .good
+    mov     eax, VERR_VMX_INVALID_VMXON_PTR
+    jmp     .the_end
+
+.good:
+    jnz     .the_end
+    mov     eax, VERR_VMX_VMXON_FAILED
+
+.the_end:
+%ifdef RT_ARCH_AMD64
+    add     rsp, 8
+%endif
+    ret
+ENDPROC VMXEnable
+
+
+;;
+; Executes VMXOFF.
+;
+;DECLASM(void) VMXDisable(void);
+BEGINPROC VMXDisable
+    vmxoff
+.the_end:
+    ret
+ENDPROC VMXDisable
+
+
+;;
+; Executes VMCLEAR.
+;
+; @returns VBox status code.
+; @param   HCPhysVmcs     Physical address of VM control structure.
+;
+;DECLASM(int) VMXClearVmcs(RTHCPHYS HCPhysVmcs);
+ALIGNCODE(16)
+BEGINPROC VMXClearVmcs
+%ifdef RT_ARCH_AMD64
+    xor     rax, rax
+ %ifdef ASM_CALL64_GCC
+    push    rdi
+ %else
+    push    rcx
+ %endif
+    vmclear [rsp]
+%else  ; RT_ARCH_X86
+    xor     eax, eax
+    vmclear [esp + 4]
+%endif ; RT_ARCH_X86
+    jnc     .the_end
+    mov     eax, VERR_VMX_INVALID_VMCS_PTR
+.the_end:
+%ifdef RT_ARCH_AMD64
+    add     rsp, 8
+%endif
+    ret
+ENDPROC VMXClearVmcs
+
+
+;;
+; Executes VMPTRLD.
+;
+; @returns VBox status code.
+; @param   HCPhysVmcs     Physical address of VMCS structure.
+;
+;DECLASM(int) VMXLoadVmcs(RTHCPHYS HCPhysVmcs);
+ALIGNCODE(16)
+BEGINPROC VMXLoadVmcs
+%ifdef RT_ARCH_AMD64
+    xor     rax, rax
+ %ifdef ASM_CALL64_GCC
+    push    rdi
+ %else
+    push    rcx
+ %endif
+    vmptrld [rsp]
+%else
+    xor     eax, eax
+    vmptrld [esp + 4]
+%endif
+    jnc     .the_end
+    mov     eax, VERR_VMX_INVALID_VMCS_PTR
+.the_end:
+%ifdef RT_ARCH_AMD64
+    add     rsp, 8
+%endif
+    ret
+ENDPROC VMXLoadVmcs
+
+
+;;
+; Executes VMPTRST.
+;
+; @returns VBox status code.
+; @param    [esp + 04h]  gcc:rdi  msc:rcx   Param 1 - First parameter - Address that will receive the current pointer.
+;
+;DECLASM(int) VMXGetCurrentVmcs(RTHCPHYS *pVMCS);
+BEGINPROC VMXGetCurrentVmcs
+%ifdef RT_OS_OS2
+    mov     eax, VERR_NOT_SUPPORTED
+    ret
+%else
+ %ifdef RT_ARCH_AMD64
+  %ifdef ASM_CALL64_GCC
+    vmptrst qword [rdi]
+  %else
+    vmptrst qword [rcx]
+  %endif
+ %else
+    vmptrst qword [esp+04h]
+ %endif
+    xor     eax, eax
+.the_end:
+    ret
+%endif
+ENDPROC VMXGetCurrentVmcs
+
+;;
+; Invalidate a page using INVEPT.
+;
+; @param   enmTlbFlush  msc:ecx  gcc:edi  x86:[esp+04]  Type of flush.
+; @param   pDescriptor  msc:edx  gcc:esi  x86:[esp+08]  Descriptor pointer.
+;
+;DECLASM(int) VMXR0InvEPT(VMXTLBFLUSHEPT enmTlbFlush, uint64_t *pDescriptor);
+BEGINPROC VMXR0InvEPT
+%ifdef RT_ARCH_AMD64
+ %ifdef ASM_CALL64_GCC
+    and         edi, 0ffffffffh
+    xor         rax, rax
+;    invept      rdi, qword [rsi]
+    DB          0x66, 0x0F, 0x38, 0x80, 0x3E
+ %else
+    and         ecx, 0ffffffffh
+    xor         rax, rax
+;    invept      rcx, qword [rdx]
+    DB          0x66, 0x0F, 0x38, 0x80, 0xA
+ %endif
+%else
+    mov         ecx, [esp + 4]
+    mov         edx, [esp + 8]
+    xor         eax, eax
+;    invept      ecx, qword [edx]
+    DB          0x66, 0x0F, 0x38, 0x80, 0xA
+%endif
+    jnc         .valid_vmcs
+    mov         eax, VERR_VMX_INVALID_VMCS_PTR
+    ret
+.valid_vmcs:
+    jnz         .the_end
+    mov         eax, VERR_INVALID_PARAMETER
+.the_end:
+    ret
+ENDPROC VMXR0InvEPT
+
+
+;;
+; Invalidate a page using INVVPID.
+;
+; @param   enmTlbFlush  msc:ecx  gcc:edi  x86:[esp+04]  Type of flush
+; @param   pDescriptor  msc:edx  gcc:esi  x86:[esp+08]  Descriptor pointer
+;
+;DECLASM(int) VMXR0InvVPID(VMXTLBFLUSHVPID enmTlbFlush, uint64_t *pDescriptor);
+BEGINPROC VMXR0InvVPID
+%ifdef RT_ARCH_AMD64
+ %ifdef ASM_CALL64_GCC
+    and         edi, 0ffffffffh
+    xor         rax, rax
+;    invvpid     rdi, qword [rsi]
+    DB          0x66, 0x0F, 0x38, 0x81, 0x3E
+ %else
+    and         ecx, 0ffffffffh
+    xor         rax, rax
+;    invvpid     rcx, qword [rdx]
+    DB          0x66, 0x0F, 0x38, 0x81, 0xA
+ %endif
+%else
+    mov         ecx, [esp + 4]
+    mov         edx, [esp + 8]
+    xor         eax, eax
+;    invvpid     ecx, qword [edx]
+    DB          0x66, 0x0F, 0x38, 0x81, 0xA
+%endif
+    jnc         .valid_vmcs
+    mov         eax, VERR_VMX_INVALID_VMCS_PTR
+    ret
+.valid_vmcs:
+    jnz         .the_end
+    mov         eax, VERR_INVALID_PARAMETER
+.the_end:
+    ret
+ENDPROC VMXR0InvVPID
+
+
+%if GC_ARCH_BITS == 64
+;;
+; Executes INVLPGA.
+;
+; @param   pPageGC  msc:rcx  gcc:rdi  x86:[esp+04]  Virtual page to invalidate
+; @param   uASID    msc:rdx  gcc:rsi  x86:[esp+0C]  Tagged TLB id
+;
+;DECLASM(void) SVMR0InvlpgA(RTGCPTR pPageGC, uint32_t uASID);
+BEGINPROC SVMR0InvlpgA
+%ifdef RT_ARCH_AMD64
+ %ifdef ASM_CALL64_GCC
+    mov     rax, rdi
+    mov     rcx, rsi
+ %else
+    mov     rax, rcx
+    mov     rcx, rdx
+ %endif
+%else
+    mov     eax, [esp + 4]
+    mov     ecx, [esp + 0Ch]
+%endif
+    invlpga [xAX], ecx
+    ret
+ENDPROC SVMR0InvlpgA
+
+%else ; GC_ARCH_BITS != 64
+;;
+; Executes INVLPGA
+;
+; @param   pPageGC  msc:ecx  gcc:edi  x86:[esp+04]  Virtual page to invalidate
+; @param   uASID    msc:edx  gcc:esi  x86:[esp+08]  Tagged TLB id
+;
+;DECLASM(void) SVMR0InvlpgA(RTGCPTR pPageGC, uint32_t uASID);
+BEGINPROC SVMR0InvlpgA
+%ifdef RT_ARCH_AMD64
+ %ifdef ASM_CALL64_GCC
+    movzx   rax, edi
+    mov     ecx, esi
+ %else
+    ; from http://www.cs.cmu.edu/~fp/courses/15213-s06/misc/asm64-handout.pdf:
+    ; "Perhaps unexpectedly, instructions that move or generate 32-bit register
+    ;  values also set the upper 32 bits of the register to zero. Consequently
+    ;  there is no need for an instruction movzlq."
+    mov     eax, ecx
+    mov     ecx, edx
+ %endif
+%else
+    mov     eax, [esp + 4]
+    mov     ecx, [esp + 8]
+%endif
+    invlpga [xAX], ecx
+    ret
+ENDPROC SVMR0InvlpgA
+
+%endif ; GC_ARCH_BITS != 64
+
+
+%ifdef VBOX_WITH_KERNEL_USING_XMM
+
+;;
+; Wrapper around vmx.pfnStartVM that preserves host XMM registers and
+; load the guest ones when necessary.
+;
+; @cproto       DECLASM(int) hmR0VMXStartVMWrapXMM(RTHCUINT fResume, PCPUMCTX pCtx, void *pvUnused, PVM pVM,
+;                                                  PVMCPU pVCpu, PFNHMVMXSTARTVM pfnStartVM);
+;
+; @returns      eax
+;
+; @param        fResumeVM       msc:rcx
+; @param        pCtx            msc:rdx
+; @param        pvUnused        msc:r8
+; @param        pVM             msc:r9
+; @param        pVCpu           msc:[rbp+30h]   The cross context virtual CPU structure of the calling EMT.
+; @param        pfnStartVM      msc:[rbp+38h]
+;
+; @remarks      This is essentially the same code as hmR0SVMRunWrapXMM, only the parameters differ a little bit.
+;
+; @remarks      Drivers shouldn't use AVX registers without saving+loading:
+;                   https://msdn.microsoft.com/en-us/library/windows/hardware/ff545910%28v=vs.85%29.aspx?f=255&MSPPError=-2147217396
+;               However the compiler docs have different idea:
+;                   https://msdn.microsoft.com/en-us/library/9z1stfyw.aspx
+;               We'll go with the former for now.
+;
+; ASSUMING 64-bit and windows for now.
+;
+ALIGNCODE(16)
+BEGINPROC hmR0VMXStartVMWrapXMM
+        push    xBP
+        mov     xBP, xSP
+        sub     xSP, 0b0h + 040h ; Don't bother optimizing the frame size.
+
+        ; Spill input parameters.
+        mov     [xBP + 010h], rcx       ; fResumeVM
+        mov     [xBP + 018h], rdx       ; pCtx
+        mov     [xBP + 020h], r8        ; pvUnused
+        mov     [xBP + 028h], r9        ; pVM
+
+        ; Ask CPUM whether we've started using the FPU yet.
+        mov     rcx, [xBP + 30h]        ; pVCpu
+        call    NAME(CPUMIsGuestFPUStateActive)
+        test    al, al
+        jnz     .guest_fpu_state_active
+
+        ; No need to mess with XMM registers just call the start routine and return.
+        mov     r11, [xBP + 38h]        ; pfnStartVM
+        mov     r10, [xBP + 30h]        ; pVCpu
+        mov     [xSP + 020h], r10
+        mov     rcx, [xBP + 010h]       ; fResumeVM
+        mov     rdx, [xBP + 018h]       ; pCtx
+        mov     r8,  [xBP + 020h]       ; pvUnused
+        mov     r9,  [xBP + 028h]       ; pVM
+        call    r11
+
+        leave
+        ret
+
+ALIGNCODE(8)
+.guest_fpu_state_active:
+        ; Save the non-volatile host XMM registers.
+        movdqa  [rsp + 040h + 000h], xmm6
+        movdqa  [rsp + 040h + 010h], xmm7
+        movdqa  [rsp + 040h + 020h], xmm8
+        movdqa  [rsp + 040h + 030h], xmm9
+        movdqa  [rsp + 040h + 040h], xmm10
+        movdqa  [rsp + 040h + 050h], xmm11
+        movdqa  [rsp + 040h + 060h], xmm12
+        movdqa  [rsp + 040h + 070h], xmm13
+        movdqa  [rsp + 040h + 080h], xmm14
+        movdqa  [rsp + 040h + 090h], xmm15
+        stmxcsr [rsp + 040h + 0a0h]
+
+        mov     r10, [xBP + 018h]       ; pCtx
+        mov     eax, [r10 + CPUMCTX.fXStateMask]
+        test    eax, eax
+        jz      .guest_fpu_state_manually
+
+        ;
+        ; Using XSAVE to load the guest XMM, YMM and ZMM registers.
+        ;
+        and     eax, CPUM_VOLATILE_XSAVE_GUEST_COMPONENTS
+        xor     edx, edx
+        mov     r10, [r10 + CPUMCTX.pXStateR0]
+        xrstor  [r10]
+
+        ; Make the call (same as in the other case).
+        mov     r11, [xBP + 38h]        ; pfnStartVM
+        mov     r10, [xBP + 30h]        ; pVCpu
+        mov     [xSP + 020h], r10
+        mov     rcx, [xBP + 010h]       ; fResumeVM
+        mov     rdx, [xBP + 018h]       ; pCtx
+        mov     r8,  [xBP + 020h]       ; pvUnused
+        mov     r9,  [xBP + 028h]       ; pVM
+        call    r11
+
+        mov     r11d, eax               ; save return value (xsave below uses eax)
+
+        ; Save the guest XMM registers.
+        mov     r10, [xBP + 018h]       ; pCtx
+        mov     eax, [r10 + CPUMCTX.fXStateMask]
+        and     eax, CPUM_VOLATILE_XSAVE_GUEST_COMPONENTS
+        xor     edx, edx
+        mov     r10, [r10 + CPUMCTX.pXStateR0]
+        xsave  [r10]
+
+        mov     eax, r11d               ; restore return value
+
+.restore_non_volatile_host_xmm_regs:
+        ; Load the non-volatile host XMM registers.
+        movdqa  xmm6,  [rsp + 040h + 000h]
+        movdqa  xmm7,  [rsp + 040h + 010h]
+        movdqa  xmm8,  [rsp + 040h + 020h]
+        movdqa  xmm9,  [rsp + 040h + 030h]
+        movdqa  xmm10, [rsp + 040h + 040h]
+        movdqa  xmm11, [rsp + 040h + 050h]
+        movdqa  xmm12, [rsp + 040h + 060h]
+        movdqa  xmm13, [rsp + 040h + 070h]
+        movdqa  xmm14, [rsp + 040h + 080h]
+        movdqa  xmm15, [rsp + 040h + 090h]
+        ldmxcsr        [rsp + 040h + 0a0h]
+        leave
+        ret
+
+        ;
+        ; No XSAVE, load and save the guest XMM registers manually.
+        ;
+.guest_fpu_state_manually:
+        ; Load the full guest XMM register state.
+        mov     r10, [r10 + CPUMCTX.pXStateR0]
+        movdqa  xmm0,  [r10 + XMM_OFF_IN_X86FXSTATE + 000h]
+        movdqa  xmm1,  [r10 + XMM_OFF_IN_X86FXSTATE + 010h]
+        movdqa  xmm2,  [r10 + XMM_OFF_IN_X86FXSTATE + 020h]
+        movdqa  xmm3,  [r10 + XMM_OFF_IN_X86FXSTATE + 030h]
+        movdqa  xmm4,  [r10 + XMM_OFF_IN_X86FXSTATE + 040h]
+        movdqa  xmm5,  [r10 + XMM_OFF_IN_X86FXSTATE + 050h]
+        movdqa  xmm6,  [r10 + XMM_OFF_IN_X86FXSTATE + 060h]
+        movdqa  xmm7,  [r10 + XMM_OFF_IN_X86FXSTATE + 070h]
+        movdqa  xmm8,  [r10 + XMM_OFF_IN_X86FXSTATE + 080h]
+        movdqa  xmm9,  [r10 + XMM_OFF_IN_X86FXSTATE + 090h]
+        movdqa  xmm10, [r10 + XMM_OFF_IN_X86FXSTATE + 0a0h]
+        movdqa  xmm11, [r10 + XMM_OFF_IN_X86FXSTATE + 0b0h]
+        movdqa  xmm12, [r10 + XMM_OFF_IN_X86FXSTATE + 0c0h]
+        movdqa  xmm13, [r10 + XMM_OFF_IN_X86FXSTATE + 0d0h]
+        movdqa  xmm14, [r10 + XMM_OFF_IN_X86FXSTATE + 0e0h]
+        movdqa  xmm15, [r10 + XMM_OFF_IN_X86FXSTATE + 0f0h]
+        ldmxcsr        [r10 + X86FXSTATE.MXCSR]
+
+        ; Make the call (same as in the other case).
+        mov     r11, [xBP + 38h]        ; pfnStartVM
+        mov     r10, [xBP + 30h]        ; pVCpu
+        mov     [xSP + 020h], r10
+        mov     rcx, [xBP + 010h]       ; fResumeVM
+        mov     rdx, [xBP + 018h]       ; pCtx
+        mov     r8,  [xBP + 020h]       ; pvUnused
+        mov     r9,  [xBP + 028h]       ; pVM
+        call    r11
+
+        ; Save the guest XMM registers.
+        mov     r10, [xBP + 018h]       ; pCtx
+        mov     r10, [r10 + CPUMCTX.pXStateR0]
+        stmxcsr [r10 + X86FXSTATE.MXCSR]
+        movdqa  [r10 + XMM_OFF_IN_X86FXSTATE + 000h], xmm0
+        movdqa  [r10 + XMM_OFF_IN_X86FXSTATE + 010h], xmm1
+        movdqa  [r10 + XMM_OFF_IN_X86FXSTATE + 020h], xmm2
+        movdqa  [r10 + XMM_OFF_IN_X86FXSTATE + 030h], xmm3
+        movdqa  [r10 + XMM_OFF_IN_X86FXSTATE + 040h], xmm4
+        movdqa  [r10 + XMM_OFF_IN_X86FXSTATE + 050h], xmm5
+        movdqa  [r10 + XMM_OFF_IN_X86FXSTATE + 060h], xmm6
+        movdqa  [r10 + XMM_OFF_IN_X86FXSTATE + 070h], xmm7
+        movdqa  [r10 + XMM_OFF_IN_X86FXSTATE + 080h], xmm8
+        movdqa  [r10 + XMM_OFF_IN_X86FXSTATE + 090h], xmm9
+        movdqa  [r10 + XMM_OFF_IN_X86FXSTATE + 0a0h], xmm10
+        movdqa  [r10 + XMM_OFF_IN_X86FXSTATE + 0b0h], xmm11
+        movdqa  [r10 + XMM_OFF_IN_X86FXSTATE + 0c0h], xmm12
+        movdqa  [r10 + XMM_OFF_IN_X86FXSTATE + 0d0h], xmm13
+        movdqa  [r10 + XMM_OFF_IN_X86FXSTATE + 0e0h], xmm14
+        movdqa  [r10 + XMM_OFF_IN_X86FXSTATE + 0f0h], xmm15
+        jmp     .restore_non_volatile_host_xmm_regs
+ENDPROC   hmR0VMXStartVMWrapXMM
+
+;;
+; Wrapper around svm.pfnVMRun that preserves host XMM registers and
+; load the guest ones when necessary.
+;
+; @cproto       DECLASM(int) hmR0SVMRunWrapXMM(RTHCPHYS HCPhysVmcbHost, RTHCPHYS HCPhysVmcb, PCPUMCTX pCtx, PVM pVM, PVMCPU pVCpu,
+;                                              PFNHMSVMVMRUN pfnVMRun);
+;
+; @returns      eax
+;
+; @param        HCPhysVmcbHost  msc:rcx
+; @param        HCPhysVmcb      msc:rdx
+; @param        pCtx            msc:r8
+; @param        pVM             msc:r9
+; @param        pVCpu           msc:[rbp+30h]   The cross context virtual CPU structure of the calling EMT.
+; @param        pfnVMRun        msc:[rbp+38h]
+;
+; @remarks      This is essentially the same code as hmR0VMXStartVMWrapXMM, only the parameters differ a little bit.
+;
+; @remarks      Drivers shouldn't use AVX registers without saving+loading:
+;                   https://msdn.microsoft.com/en-us/library/windows/hardware/ff545910%28v=vs.85%29.aspx?f=255&MSPPError=-2147217396
+;               However the compiler docs have different idea:
+;                   https://msdn.microsoft.com/en-us/library/9z1stfyw.aspx
+;               We'll go with the former for now.
+;
+; ASSUMING 64-bit and windows for now.
+ALIGNCODE(16)
+BEGINPROC hmR0SVMRunWrapXMM
+        push    xBP
+        mov     xBP, xSP
+        sub     xSP, 0b0h + 040h        ; don't bother optimizing the frame size
+
+        ; Spill input parameters.
+        mov     [xBP + 010h], rcx       ; HCPhysVmcbHost
+        mov     [xBP + 018h], rdx       ; HCPhysVmcb
+        mov     [xBP + 020h], r8        ; pCtx
+        mov     [xBP + 028h], r9        ; pVM
+
+        ; Ask CPUM whether we've started using the FPU yet.
+        mov     rcx, [xBP + 30h]        ; pVCpu
+        call    NAME(CPUMIsGuestFPUStateActive)
+        test    al, al
+        jnz     .guest_fpu_state_active
+
+        ; No need to mess with XMM registers just call the start routine and return.
+        mov     r11, [xBP + 38h]        ; pfnVMRun
+        mov     r10, [xBP + 30h]        ; pVCpu
+        mov     [xSP + 020h], r10
+        mov     rcx, [xBP + 010h]       ; HCPhysVmcbHost
+        mov     rdx, [xBP + 018h]       ; HCPhysVmcb
+        mov     r8,  [xBP + 020h]       ; pCtx
+        mov     r9,  [xBP + 028h]       ; pVM
+        call    r11
+
+        leave
+        ret
+
+ALIGNCODE(8)
+.guest_fpu_state_active:
+        ; Save the non-volatile host XMM registers.
+        movdqa  [rsp + 040h + 000h], xmm6
+        movdqa  [rsp + 040h + 010h], xmm7
+        movdqa  [rsp + 040h + 020h], xmm8
+        movdqa  [rsp + 040h + 030h], xmm9
+        movdqa  [rsp + 040h + 040h], xmm10
+        movdqa  [rsp + 040h + 050h], xmm11
+        movdqa  [rsp + 040h + 060h], xmm12
+        movdqa  [rsp + 040h + 070h], xmm13
+        movdqa  [rsp + 040h + 080h], xmm14
+        movdqa  [rsp + 040h + 090h], xmm15
+        stmxcsr [rsp + 040h + 0a0h]
+
+        mov     r10, [xBP + 020h]       ; pCtx
+        mov     eax, [r10 + CPUMCTX.fXStateMask]
+        test    eax, eax
+        jz      .guest_fpu_state_manually
+
+        ;
+        ; Using XSAVE.
+        ;
+        and     eax, CPUM_VOLATILE_XSAVE_GUEST_COMPONENTS
+        xor     edx, edx
+        mov     r10, [r10 + CPUMCTX.pXStateR0]
+        xrstor  [r10]
+
+        ; Make the call (same as in the other case).
+        mov     r11, [xBP + 38h]        ; pfnVMRun
+        mov     r10, [xBP + 30h]        ; pVCpu
+        mov     [xSP + 020h], r10
+        mov     rcx, [xBP + 010h]       ; HCPhysVmcbHost
+        mov     rdx, [xBP + 018h]       ; HCPhysVmcb
+        mov     r8,  [xBP + 020h]       ; pCtx
+        mov     r9,  [xBP + 028h]       ; pVM
+        call    r11
+
+        mov     r11d, eax               ; save return value (xsave below uses eax)
+
+        ; Save the guest XMM registers.
+        mov     r10, [xBP + 020h]       ; pCtx
+        mov     eax, [r10 + CPUMCTX.fXStateMask]
+        and     eax, CPUM_VOLATILE_XSAVE_GUEST_COMPONENTS
+        xor     edx, edx
+        mov     r10, [r10 + CPUMCTX.pXStateR0]
+        xsave  [r10]
+
+        mov     eax, r11d               ; restore return value
+
+.restore_non_volatile_host_xmm_regs:
+        ; Load the non-volatile host XMM registers.
+        movdqa  xmm6,  [rsp + 040h + 000h]
+        movdqa  xmm7,  [rsp + 040h + 010h]
+        movdqa  xmm8,  [rsp + 040h + 020h]
+        movdqa  xmm9,  [rsp + 040h + 030h]
+        movdqa  xmm10, [rsp + 040h + 040h]
+        movdqa  xmm11, [rsp + 040h + 050h]
+        movdqa  xmm12, [rsp + 040h + 060h]
+        movdqa  xmm13, [rsp + 040h + 070h]
+        movdqa  xmm14, [rsp + 040h + 080h]
+        movdqa  xmm15, [rsp + 040h + 090h]
+        ldmxcsr [rsp + 040h + 0a0h]
+        leave
+        ret
+
+        ;
+        ; No XSAVE, load and save the guest XMM registers manually.
+        ;
+.guest_fpu_state_manually:
+        ; Load the full guest XMM register state.
+        mov     r10, [r10 + CPUMCTX.pXStateR0]
+        movdqa  xmm0,  [r10 + XMM_OFF_IN_X86FXSTATE + 000h]
+        movdqa  xmm1,  [r10 + XMM_OFF_IN_X86FXSTATE + 010h]
+        movdqa  xmm2,  [r10 + XMM_OFF_IN_X86FXSTATE + 020h]
+        movdqa  xmm3,  [r10 + XMM_OFF_IN_X86FXSTATE + 030h]
+        movdqa  xmm4,  [r10 + XMM_OFF_IN_X86FXSTATE + 040h]
+        movdqa  xmm5,  [r10 + XMM_OFF_IN_X86FXSTATE + 050h]
+        movdqa  xmm6,  [r10 + XMM_OFF_IN_X86FXSTATE + 060h]
+        movdqa  xmm7,  [r10 + XMM_OFF_IN_X86FXSTATE + 070h]
+        movdqa  xmm8,  [r10 + XMM_OFF_IN_X86FXSTATE + 080h]
+        movdqa  xmm9,  [r10 + XMM_OFF_IN_X86FXSTATE + 090h]
+        movdqa  xmm10, [r10 + XMM_OFF_IN_X86FXSTATE + 0a0h]
+        movdqa  xmm11, [r10 + XMM_OFF_IN_X86FXSTATE + 0b0h]
+        movdqa  xmm12, [r10 + XMM_OFF_IN_X86FXSTATE + 0c0h]
+        movdqa  xmm13, [r10 + XMM_OFF_IN_X86FXSTATE + 0d0h]
+        movdqa  xmm14, [r10 + XMM_OFF_IN_X86FXSTATE + 0e0h]
+        movdqa  xmm15, [r10 + XMM_OFF_IN_X86FXSTATE + 0f0h]
+        ldmxcsr        [r10 + X86FXSTATE.MXCSR]
+
+        ; Make the call (same as in the other case).
+        mov     r11, [xBP + 38h]        ; pfnVMRun
+        mov     r10, [xBP + 30h]        ; pVCpu
+        mov     [xSP + 020h], r10
+        mov     rcx, [xBP + 010h]       ; HCPhysVmcbHost
+        mov     rdx, [xBP + 018h]       ; HCPhysVmcb
+        mov     r8,  [xBP + 020h]       ; pCtx
+        mov     r9,  [xBP + 028h]       ; pVM
+        call    r11
+
+        ; Save the guest XMM registers.
+        mov     r10, [xBP + 020h]       ; pCtx
+        mov     r10, [r10 + CPUMCTX.pXStateR0]
+        stmxcsr [r10 + X86FXSTATE.MXCSR]
+        movdqa  [r10 + XMM_OFF_IN_X86FXSTATE + 000h], xmm0
+        movdqa  [r10 + XMM_OFF_IN_X86FXSTATE + 010h], xmm1
+        movdqa  [r10 + XMM_OFF_IN_X86FXSTATE + 020h], xmm2
+        movdqa  [r10 + XMM_OFF_IN_X86FXSTATE + 030h], xmm3
+        movdqa  [r10 + XMM_OFF_IN_X86FXSTATE + 040h], xmm4
+        movdqa  [r10 + XMM_OFF_IN_X86FXSTATE + 050h], xmm5
+        movdqa  [r10 + XMM_OFF_IN_X86FXSTATE + 060h], xmm6
+        movdqa  [r10 + XMM_OFF_IN_X86FXSTATE + 070h], xmm7
+        movdqa  [r10 + XMM_OFF_IN_X86FXSTATE + 080h], xmm8
+        movdqa  [r10 + XMM_OFF_IN_X86FXSTATE + 090h], xmm9
+        movdqa  [r10 + XMM_OFF_IN_X86FXSTATE + 0a0h], xmm10
+        movdqa  [r10 + XMM_OFF_IN_X86FXSTATE + 0b0h], xmm11
+        movdqa  [r10 + XMM_OFF_IN_X86FXSTATE + 0c0h], xmm12
+        movdqa  [r10 + XMM_OFF_IN_X86FXSTATE + 0d0h], xmm13
+        movdqa  [r10 + XMM_OFF_IN_X86FXSTATE + 0e0h], xmm14
+        movdqa  [r10 + XMM_OFF_IN_X86FXSTATE + 0f0h], xmm15
+        jmp     .restore_non_volatile_host_xmm_regs
+ENDPROC   hmR0SVMRunWrapXMM
+
+%endif ; VBOX_WITH_KERNEL_USING_XMM
+
+
+%ifdef RT_ARCH_AMD64
+;; @def RESTORE_STATE_VM64
+; Macro restoring essential host state and updating guest state
+; for 64-bit host, 64-bit guest for VT-x.
+;
+%macro RESTORE_STATE_VM64 0
+    ; Restore base and limit of the IDTR & GDTR.
+ %ifndef VMX_SKIP_IDTR
+    lidt    [xSP]
+    add     xSP, xCB * 2
+ %endif
+ %ifndef VMX_SKIP_GDTR
+    lgdt    [xSP]
+    add     xSP, xCB * 2
+ %endif
+
+    push    xDI
+ %ifndef VMX_SKIP_TR
+    mov     xDI, [xSP + xCB * 3]        ; pCtx (*3 to skip the saved xDI, TR, LDTR)
+ %else
+    mov     xDI, [xSP + xCB * 2]        ; pCtx (*2 to skip the saved xDI, LDTR)
+ %endif
+
+    mov     qword [xDI + CPUMCTX.eax], rax
+    mov     rax, SPECTRE_FILLER64
+    mov     qword [xDI + CPUMCTX.ebx], rbx
+    mov     rbx, rax
+    mov     qword [xDI + CPUMCTX.ecx], rcx
+    mov     rcx, rax
+    mov     qword [xDI + CPUMCTX.edx], rdx
+    mov     rdx, rax
+    mov     qword [xDI + CPUMCTX.esi], rsi
+    mov     rsi, rax
+    mov     qword [xDI + CPUMCTX.ebp], rbp
+    mov     rbp, rax
+    mov     qword [xDI + CPUMCTX.r8],  r8
+    mov     r8, rax
+    mov     qword [xDI + CPUMCTX.r9],  r9
+    mov     r9, rax
+    mov     qword [xDI + CPUMCTX.r10], r10
+    mov     r10, rax
+    mov     qword [xDI + CPUMCTX.r11], r11
+    mov     r11, rax
+    mov     qword [xDI + CPUMCTX.r12], r12
+    mov     r12, rax
+    mov     qword [xDI + CPUMCTX.r13], r13
+    mov     r13, rax
+    mov     qword [xDI + CPUMCTX.r14], r14
+    mov     r14, rax
+    mov     qword [xDI + CPUMCTX.r15], r15
+    mov     r15, rax
+    mov     rax, cr2
+    mov     qword [xDI + CPUMCTX.cr2], rax
+
+    pop     xAX                                 ; The guest rdi we pushed above
+    mov     qword [xDI + CPUMCTX.edi], rax
+
+    ; Fight spectre.
+    INDIRECT_BRANCH_PREDICTION_BARRIER xDI, CPUMCTX_WSF_IBPB_EXIT
+
+ %ifndef VMX_SKIP_TR
+    ; Restore TSS selector; must mark it as not busy before using ltr!
+    ; ASSUME that this is supposed to be 'BUSY' (saves 20-30 ticks on the T42p).
+    ; @todo get rid of sgdt
+    pop     xBX         ; Saved TR
+    sub     xSP, xCB * 2
+    sgdt    [xSP]
+    mov     xAX, xBX
+    and     eax, X86_SEL_MASK_OFF_RPL           ; mask away TI and RPL bits leaving only the descriptor offset
+    add     xAX, [xSP + 2]                      ; eax <- GDTR.address + descriptor offset
+    and     dword [xAX + 4], ~RT_BIT(9)         ; clear the busy flag in TSS desc (bits 0-7=base, bit 9=busy bit)
+    ltr     bx
+    add     xSP, xCB * 2
+ %endif
+
+    pop     xAX         ; Saved LDTR
+    cmp     eax, 0
+    je      %%skip_ldt_write64
+    lldt    ax
+
+%%skip_ldt_write64:
+    pop     xSI         ; pCtx (needed in rsi by the macros below)
+
+    ; Restore segment registers.
+    MYPOPSEGS xAX, ax
+
+    ; Restore the host XCR0 if necessary.
+    pop     xCX
+    test    ecx, ecx
+    jnz     %%xcr0_after_skip
+    pop     xAX
+    pop     xDX
+    xsetbv                              ; ecx is already zero.
+%%xcr0_after_skip:
+
+    ; Restore general purpose registers.
+    MYPOPAD
+%endmacro
+
+
+;;
+; Prepares for and executes VMLAUNCH/VMRESUME (64 bits guest mode)
+;
+; @returns VBox status code
+; @param    fResume    msc:rcx, gcc:rdi       Whether to use vmlauch/vmresume.
+; @param    pCtx       msc:rdx, gcc:rsi       Pointer to the guest-CPU context.
+; @param    pvUnused   msc:r8,  gcc:rdx       Unused argument.
+; @param    pVM        msc:r9,  gcc:rcx       The cross context VM structure.
+; @param    pVCpu      msc:[ebp+30], gcc:r8   The cross context virtual CPU structure of the calling EMT.
+;
+ALIGNCODE(16)
+BEGINPROC VMXR0StartVM64
+    push    xBP
+    mov     xBP, xSP
+
+    pushf
+    cli
+
+    ; Save all general purpose host registers.
+    MYPUSHAD
+
+    ; First we have to save some final CPU context registers.
+    lea     r10, [.vmlaunch64_done wrt rip]
+    mov     rax, VMX_VMCS_HOST_RIP      ; return address (too difficult to continue after VMLAUNCH?)
+    vmwrite rax, r10
+    ; Note: ASSUMES success!
+
+    ;
+    ; Unify the input parameter registers.
+    ;
+%ifdef ASM_CALL64_GCC
+    ; fResume already in rdi
+    ; pCtx    already in rsi
+    mov     rbx, rdx        ; pvUnused
+%else
+    mov     rdi, rcx        ; fResume
+    mov     rsi, rdx        ; pCtx
+    mov     rbx, r8         ; pvUnused
+%endif
+
+    ;
+    ; Save the host XCR0 and load the guest one if necessary.
+    ; Note! Trashes rdx and rcx.
+    ;
+%ifdef ASM_CALL64_MSC
+    mov     rax, [xBP + 30h]            ; pVCpu
+%else
+    mov     rax, r8                     ; pVCpu
+%endif
+    test    byte [xAX + VMCPU.hm + HMCPU.fLoadSaveGuestXcr0], 1
+    jz      .xcr0_before_skip
+
+    xor     ecx, ecx
+    xgetbv                              ; save the host one on the stack
+    push    xDX
+    push    xAX
+
+    mov     eax, [xSI + CPUMCTX.aXcr]   ; load the guest one
+    mov     edx, [xSI + CPUMCTX.aXcr + 4]
+    xor     ecx, ecx                    ; paranoia
+    xsetbv
+
+    push    0                           ; indicate that we must restore XCR0 (popped into ecx, thus 0)
+    jmp     .xcr0_before_done
+
+.xcr0_before_skip:
+    push    3fh                         ; indicate that we need not
+.xcr0_before_done:
+
+    ;
+    ; Save segment registers.
+    ; Note! Trashes rdx & rcx, so we moved it here (amd64 case).
+    ;
+    MYPUSHSEGS xAX, ax
+
+    ; Save the pCtx pointer.
+    push    xSI
+
+    ; Save host LDTR.
+    xor     eax, eax
+    sldt    ax
+    push    xAX
+
+%ifndef VMX_SKIP_TR
+    ; The host TR limit is reset to 0x67; save & restore it manually.
+    str     eax
+    push    xAX
+%endif
+
+%ifndef VMX_SKIP_GDTR
+    ; VT-x only saves the base of the GDTR & IDTR and resets the limit to 0xffff; we must restore the limit correctly!
+    sub     xSP, xCB * 2
+    sgdt    [xSP]
+%endif
+%ifndef VMX_SKIP_IDTR
+    sub     xSP, xCB * 2
+    sidt    [xSP]
+%endif
+
+    ; Load CR2 if necessary (may be expensive as writing CR2 is a synchronizing instruction).
+    mov     rbx, qword [xSI + CPUMCTX.cr2]
+    mov     rdx, cr2
+    cmp     rbx, rdx
+    je      .skip_cr2_write
+    mov     cr2, rbx
+
+.skip_cr2_write:
+    mov     eax, VMX_VMCS_HOST_RSP
+    vmwrite xAX, xSP
+    ; Note: ASSUMES success!
+    ; Don't mess with ESP anymore!!!
+
+    ; Fight spectre and similar.
+    INDIRECT_BRANCH_PREDICTION_AND_L1_CACHE_BARRIER xSI, CPUMCTX_WSF_IBPB_ENTRY, CPUMCTX_WSF_L1D_ENTRY, CPUMCTX_WSF_MDS_ENTRY
+
+    ; Load guest general purpose registers.
+    mov     rax, qword [xSI + CPUMCTX.eax]
+    mov     rbx, qword [xSI + CPUMCTX.ebx]
+    mov     rcx, qword [xSI + CPUMCTX.ecx]
+    mov     rdx, qword [xSI + CPUMCTX.edx]
+    mov     rbp, qword [xSI + CPUMCTX.ebp]
+    mov     r8,  qword [xSI + CPUMCTX.r8]
+    mov     r9,  qword [xSI + CPUMCTX.r9]
+    mov     r10, qword [xSI + CPUMCTX.r10]
+    mov     r11, qword [xSI + CPUMCTX.r11]
+    mov     r12, qword [xSI + CPUMCTX.r12]
+    mov     r13, qword [xSI + CPUMCTX.r13]
+    mov     r14, qword [xSI + CPUMCTX.r14]
+    mov     r15, qword [xSI + CPUMCTX.r15]
+
+    ; Resume or start VM?
+    cmp     xDI, 0                  ; fResume
+
+    ; Load guest rdi & rsi.
+    mov     rdi, qword [xSI + CPUMCTX.edi]
+    mov     rsi, qword [xSI + CPUMCTX.esi]
+
+    je      .vmlaunch64_launch
+
+    vmresume
+    jc      near .vmxstart64_invalid_vmcs_ptr
+    jz      near .vmxstart64_start_failed
+    jmp     .vmlaunch64_done;      ; here if vmresume detected a failure
+
+.vmlaunch64_launch:
+    vmlaunch
+    jc      near .vmxstart64_invalid_vmcs_ptr
+    jz      near .vmxstart64_start_failed
+    jmp     .vmlaunch64_done;      ; here if vmlaunch detected a failure
+
+ALIGNCODE(16)
+.vmlaunch64_done:
+    RESTORE_STATE_VM64
+    mov     eax, VINF_SUCCESS
+
+.vmstart64_end:
+    popf
+    pop     xBP
+    ret
+
+.vmxstart64_invalid_vmcs_ptr:
+    RESTORE_STATE_VM64
+    mov     eax, VERR_VMX_INVALID_VMCS_PTR_TO_START_VM
+    jmp     .vmstart64_end
+
+.vmxstart64_start_failed:
+    RESTORE_STATE_VM64
+    mov     eax, VERR_VMX_UNABLE_TO_START_VM
+    jmp     .vmstart64_end
+ENDPROC VMXR0StartVM64
+%endif ; RT_ARCH_AMD64
+
+
+;;
+; Clears the MDS buffers using VERW.
+ALIGNCODE(16)
+BEGINPROC hmR0MdsClear
+        sub     xSP, xCB
+        mov     [xSP], ds
+        verw    [xSP]
+        add     xSP, xCB
+        ret
+ENDPROC   hmR0MdsClear
+
+
+%ifdef RT_ARCH_AMD64
+;;
+; Prepares for and executes VMRUN (32-bit and 64-bit guests).
+;
+; @returns  VBox status code
+; @param    HCPhysVmcbHost  msc:rcx,gcc:rdi     Physical address of host VMCB.
+; @param    HCPhysVmcb      msc:rdx,gcc:rsi     Physical address of guest VMCB.
+; @param    pCtx            msc:r8,gcc:rdx      Pointer to the guest-CPU context.
+; @param    pVM             msc:r9,gcc:rcx      The cross context VM structure.
+; @param    pVCpu           msc:[rsp+28],gcc:r8 The cross context virtual CPU structure of the calling EMT.
+;
+ALIGNCODE(16)
+BEGINPROC SVMR0VMRun
+    ; Fake a cdecl stack frame
+ %ifdef ASM_CALL64_GCC
+    push    r8                ; pVCpu
+    push    rcx               ; pVM
+    push    rdx               ; pCtx
+    push    rsi               ; HCPhysVmcb
+    push    rdi               ; HCPhysVmcbHost
+ %else
+    mov     rax, [rsp + 28h]
+    push    rax               ; rbp + 30h pVCpu
+    push    r9                ; rbp + 28h pVM
+    push    r8                ; rbp + 20h pCtx
+    push    rdx               ; rbp + 18h HCPhysVmcb
+    push    rcx               ; rbp + 10h HCPhysVmcbHost
+ %endif
+    push    0                 ; rbp + 08h "fake ret addr"
+    push    rbp               ; rbp + 00h
+    mov     rbp, rsp
+    pushf
+
+    ; Manual save and restore:
+    ;  - General purpose registers except RIP, RSP, RAX
+    ;
+    ; Trashed:
+    ;  - CR2 (we don't care)
+    ;  - LDTR (reset to 0)
+    ;  - DRx (presumably not changed at all)
+    ;  - DR7 (reset to 0x400)
+
+    ; Save all general purpose host registers.
+    MYPUSHAD
+
+    ; Load pCtx into xSI.
+    mov     xSI, [rbp + xCB * 2 + RTHCPHYS_CB * 2]
+
+    ; Save the host XCR0 and load the guest one if necessary.
+    mov     rax, [xBP + 30h]                ; pVCpu
+    test    byte [xAX + VMCPU.hm + HMCPU.fLoadSaveGuestXcr0], 1
+    jz      .xcr0_before_skip
+
+    xor     ecx, ecx
+    xgetbv                                  ; save the host XCR0 on the stack
+    push    xDX
+    push    xAX
+
+    mov     xSI, [xBP + xCB * 2 + RTHCPHYS_CB * 2]  ; pCtx
+    mov     eax, [xSI + CPUMCTX.aXcr]       ; load the guest XCR0
+    mov     edx, [xSI + CPUMCTX.aXcr + 4]
+    xor     ecx, ecx                        ; paranoia
+    xsetbv
+
+    push    0                               ; indicate that we must restore XCR0 (popped into ecx, thus 0)
+    jmp     .xcr0_before_done
+
+.xcr0_before_skip:
+    push    3fh                             ; indicate that we need not restore XCR0
+.xcr0_before_done:
+
+    ; Save guest CPU-context pointer for simplifying saving of the GPRs afterwards.
+    push    rsi
+
+    ; Save host fs, gs, sysenter msr etc.
+    mov     rax, [rbp + xCB * 2]            ; HCPhysVmcbHost (64 bits physical address; x86: take low dword only)
+    push    rax                             ; save for the vmload after vmrun
+    vmsave
+
+    ; Fight spectre.
+    INDIRECT_BRANCH_PREDICTION_BARRIER xSI, CPUMCTX_WSF_IBPB_ENTRY
+
+    ; Setup rax for VMLOAD.
+    mov     rax, [rbp + xCB * 2 + RTHCPHYS_CB] ; HCPhysVmcb (64 bits physical address; take low dword only)
+
+    ; Load guest general purpose registers (rax is loaded from the VMCB by VMRUN).
+    mov     rbx, qword [xSI + CPUMCTX.ebx]
+    mov     rcx, qword [xSI + CPUMCTX.ecx]
+    mov     rdx, qword [xSI + CPUMCTX.edx]
+    mov     rdi, qword [xSI + CPUMCTX.edi]
+    mov     rbp, qword [xSI + CPUMCTX.ebp]
+    mov     r8,  qword [xSI + CPUMCTX.r8]
+    mov     r9,  qword [xSI + CPUMCTX.r9]
+    mov     r10, qword [xSI + CPUMCTX.r10]
+    mov     r11, qword [xSI + CPUMCTX.r11]
+    mov     r12, qword [xSI + CPUMCTX.r12]
+    mov     r13, qword [xSI + CPUMCTX.r13]
+    mov     r14, qword [xSI + CPUMCTX.r14]
+    mov     r15, qword [xSI + CPUMCTX.r15]
+    mov     rsi, qword [xSI + CPUMCTX.esi]
+
+    ; Clear the global interrupt flag & execute sti to make sure external interrupts cause a world switch.
+    clgi
+    sti
+
+    ; Load guest FS, GS, Sysenter MSRs etc.
+    vmload
+
+    ; Run the VM.
+    vmrun
+
+    ; Save guest fs, gs, sysenter msr etc.
+    vmsave
+
+    ; Load host fs, gs, sysenter msr etc.
+    pop     rax                         ; load HCPhysVmcbHost (pushed above)
+    vmload
+
+    ; Set the global interrupt flag again, but execute cli to make sure IF=0.
+    cli
+    stgi
+
+    ; Pop the context pointer (pushed above) and save the guest GPRs (sans RSP and RAX).
+    pop     rax
+
+    mov     qword [rax + CPUMCTX.ebx], rbx
+    mov     rbx, SPECTRE_FILLER64
+    mov     qword [rax + CPUMCTX.ecx], rcx
+    mov     rcx, rbx
+    mov     qword [rax + CPUMCTX.edx], rdx
+    mov     rdx, rbx
+    mov     qword [rax + CPUMCTX.esi], rsi
+    mov     rsi, rbx
+    mov     qword [rax + CPUMCTX.edi], rdi
+    mov     rdi, rbx
+    mov     qword [rax + CPUMCTX.ebp], rbp
+    mov     rbp, rbx
+    mov     qword [rax + CPUMCTX.r8],  r8
+    mov     r8, rbx
+    mov     qword [rax + CPUMCTX.r9],  r9
+    mov     r9, rbx
+    mov     qword [rax + CPUMCTX.r10], r10
+    mov     r10, rbx
+    mov     qword [rax + CPUMCTX.r11], r11
+    mov     r11, rbx
+    mov     qword [rax + CPUMCTX.r12], r12
+    mov     r12, rbx
+    mov     qword [rax + CPUMCTX.r13], r13
+    mov     r13, rbx
+    mov     qword [rax + CPUMCTX.r14], r14
+    mov     r14, rbx
+    mov     qword [rax + CPUMCTX.r15], r15
+    mov     r15, rbx
+
+    ; Fight spectre.  Note! Trashes rax!
+    INDIRECT_BRANCH_PREDICTION_BARRIER rax, CPUMCTX_WSF_IBPB_EXIT
+
+    ; Restore the host xcr0 if necessary.
+    pop     xCX
+    test    ecx, ecx
+    jnz     .xcr0_after_skip
+    pop     xAX
+    pop     xDX
+    xsetbv                              ; ecx is already zero
+.xcr0_after_skip:
+
+    ; Restore host general purpose registers.
+    MYPOPAD
+
+    mov     eax, VINF_SUCCESS
+
+    popf
+    pop     rbp
+    add     rsp, 6 * xCB
+    ret
+ENDPROC SVMR0VMRun
+%endif ; RT_ARCH_AMD64
+
diff --git a/src/VBox/VMM/VMMR0/HMSVMR0.cpp b/src/VBox/VMM/VMMR0/HMSVMR0.cpp
new file mode 100644
index 00000000..9b71f272
--- /dev/null
+++ b/src/VBox/VMM/VMMR0/HMSVMR0.cpp
@@ -0,0 +1,7847 @@
+/* $Id: HMSVMR0.cpp $ */
+/** @file
+ * HM SVM (AMD-V) - Host Context Ring-0.
+ */
+
+/*
+ * Copyright (C) 2013-2020 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+
+/*********************************************************************************************************************************
+*   Header Files                                                                                                                 *
+*********************************************************************************************************************************/
+#define LOG_GROUP LOG_GROUP_HM
+#define VMCPU_INCL_CPUM_GST_CTX
+#include <iprt/asm-amd64-x86.h>
+#include <iprt/thread.h>
+
+#include <VBox/vmm/pdmapi.h>
+#include <VBox/vmm/dbgf.h>
+#include <VBox/vmm/iem.h>
+#include <VBox/vmm/iom.h>
+#include <VBox/vmm/tm.h>
+#include <VBox/vmm/em.h>
+#include <VBox/vmm/gim.h>
+#include <VBox/vmm/apic.h>
+#include "HMInternal.h"
+#include <VBox/vmm/vmcc.h>
+#include <VBox/err.h>
+#include "HMSVMR0.h"
+#include "dtrace/VBoxVMM.h"
+
+#ifdef DEBUG_ramshankar
+# define HMSVM_SYNC_FULL_GUEST_STATE
+# define HMSVM_ALWAYS_TRAP_ALL_XCPTS
+# define HMSVM_ALWAYS_TRAP_PF
+# define HMSVM_ALWAYS_TRAP_TASK_SWITCH
+#endif
+
+
+/*********************************************************************************************************************************
+*   Defined Constants And Macros                                                                                                 *
+*********************************************************************************************************************************/
+#ifdef VBOX_WITH_STATISTICS
+# define HMSVM_EXITCODE_STAM_COUNTER_INC(u64ExitCode) do { \
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatExitAll); \
+        if ((u64ExitCode) == SVM_EXIT_NPF) \
+            STAM_COUNTER_INC(&pVCpu->hm.s.StatExitReasonNpf); \
+        else \
+            STAM_COUNTER_INC(&pVCpu->hm.s.paStatExitReasonR0[(u64ExitCode) & MASK_EXITREASON_STAT]); \
+        } while (0)
+
+# ifdef VBOX_WITH_NESTED_HWVIRT_SVM
+#  define HMSVM_NESTED_EXITCODE_STAM_COUNTER_INC(u64ExitCode) do { \
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatExitAll); \
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatNestedExitAll); \
+        if ((u64ExitCode) == SVM_EXIT_NPF) \
+            STAM_COUNTER_INC(&pVCpu->hm.s.StatNestedExitReasonNpf); \
+        else \
+            STAM_COUNTER_INC(&pVCpu->hm.s.paStatNestedExitReasonR0[(u64ExitCode) & MASK_EXITREASON_STAT]); \
+        } while (0)
+# endif
+#else
+# define HMSVM_EXITCODE_STAM_COUNTER_INC(u64ExitCode)           do { } while (0)
+# ifdef VBOX_WITH_NESTED_HWVIRT_SVM
+#  define HMSVM_NESTED_EXITCODE_STAM_COUNTER_INC(u64ExitCode)   do { } while (0)
+# endif
+#endif /* !VBOX_WITH_STATISTICS */
+
+/** If we decide to use a function table approach this can be useful to
+ *  switch to a "static DECLCALLBACK(int)". */
+#define HMSVM_EXIT_DECL                 static int
+
+/**
+ * Subset of the guest-CPU state that is kept by SVM R0 code while executing the
+ * guest using hardware-assisted SVM.
+ *
+ * This excludes state like TSC AUX, GPRs (other than RSP, RAX) which are always
+ * are swapped and restored across the world-switch and also registers like
+ * EFER, PAT MSR etc. which cannot be modified by the guest without causing a
+ * \#VMEXIT.
+ */
+#define HMSVM_CPUMCTX_EXTRN_ALL         (  CPUMCTX_EXTRN_RIP            \
+                                         | CPUMCTX_EXTRN_RFLAGS         \
+                                         | CPUMCTX_EXTRN_RAX            \
+                                         | CPUMCTX_EXTRN_RSP            \
+                                         | CPUMCTX_EXTRN_SREG_MASK      \
+                                         | CPUMCTX_EXTRN_CR0            \
+                                         | CPUMCTX_EXTRN_CR2            \
+                                         | CPUMCTX_EXTRN_CR3            \
+                                         | CPUMCTX_EXTRN_TABLE_MASK     \
+                                         | CPUMCTX_EXTRN_DR6            \
+                                         | CPUMCTX_EXTRN_DR7            \
+                                         | CPUMCTX_EXTRN_KERNEL_GS_BASE \
+                                         | CPUMCTX_EXTRN_SYSCALL_MSRS   \
+                                         | CPUMCTX_EXTRN_SYSENTER_MSRS  \
+                                         | CPUMCTX_EXTRN_HWVIRT         \
+                                         | CPUMCTX_EXTRN_HM_SVM_MASK)
+
+/**
+ * Subset of the guest-CPU state that is shared between the guest and host.
+ */
+#define HMSVM_CPUMCTX_SHARED_STATE      CPUMCTX_EXTRN_DR_MASK
+
+/** Macro for importing guest state from the VMCB back into CPUMCTX.  */
+#define HMSVM_CPUMCTX_IMPORT_STATE(a_pVCpu, a_fWhat) \
+    do { \
+        if ((a_pVCpu)->cpum.GstCtx.fExtrn & (a_fWhat)) \
+            hmR0SvmImportGuestState((a_pVCpu), (a_fWhat)); \
+    } while (0)
+
+/** Assert that the required state bits are fetched. */
+#define HMSVM_CPUMCTX_ASSERT(a_pVCpu, a_fExtrnMbz)      AssertMsg(!((a_pVCpu)->cpum.GstCtx.fExtrn & (a_fExtrnMbz)), \
+                                                                  ("fExtrn=%#RX64 fExtrnMbz=%#RX64\n", \
+                                                                  (a_pVCpu)->cpum.GstCtx.fExtrn, (a_fExtrnMbz)))
+
+/** Assert that preemption is disabled or covered by thread-context hooks. */
+#define HMSVM_ASSERT_PREEMPT_SAFE(a_pVCpu)              Assert(   VMMR0ThreadCtxHookIsEnabled((a_pVCpu)) \
+                                                               || !RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+
+/** Assert that we haven't migrated CPUs when thread-context hooks are not
+ *  used. */
+#define HMSVM_ASSERT_CPU_SAFE(a_pVCpu)                  AssertMsg(   VMMR0ThreadCtxHookIsEnabled((a_pVCpu)) \
+                                                                  || (a_pVCpu)->hm.s.idEnteredCpu == RTMpCpuId(), \
+                                                                  ("Illegal migration! Entered on CPU %u Current %u\n", \
+                                                                   (a_pVCpu)->hm.s.idEnteredCpu, RTMpCpuId()));
+
+/** Assert that we're not executing a nested-guest. */
+#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
+# define HMSVM_ASSERT_NOT_IN_NESTED_GUEST(a_pCtx)       Assert(!CPUMIsGuestInSvmNestedHwVirtMode((a_pCtx)))
+#else
+# define HMSVM_ASSERT_NOT_IN_NESTED_GUEST(a_pCtx)       do { NOREF((a_pCtx)); } while (0)
+#endif
+
+/** Assert that we're executing a nested-guest. */
+#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
+# define HMSVM_ASSERT_IN_NESTED_GUEST(a_pCtx)           Assert(CPUMIsGuestInSvmNestedHwVirtMode((a_pCtx)))
+#else
+# define HMSVM_ASSERT_IN_NESTED_GUEST(a_pCtx)           do { NOREF((a_pCtx)); } while (0)
+#endif
+
+/** Macro for checking and returning from the using function for
+ * \#VMEXIT intercepts that maybe caused during delivering of another
+ * event in the guest. */
+#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
+# define HMSVM_CHECK_EXIT_DUE_TO_EVENT_DELIVERY(a_pVCpu, a_pSvmTransient) \
+    do \
+    { \
+        int rc = hmR0SvmCheckExitDueToEventDelivery((a_pVCpu), (a_pSvmTransient)); \
+        if (RT_LIKELY(rc == VINF_SUCCESS))        { /* continue #VMEXIT handling */ } \
+        else if (     rc == VINF_HM_DOUBLE_FAULT) { return VINF_SUCCESS;            } \
+        else if (     rc == VINF_EM_RESET \
+                 &&   CPUMIsGuestSvmCtrlInterceptSet((a_pVCpu), &(a_pVCpu)->cpum.GstCtx, SVM_CTRL_INTERCEPT_SHUTDOWN)) \
+        { \
+            HMSVM_CPUMCTX_IMPORT_STATE((a_pVCpu), HMSVM_CPUMCTX_EXTRN_ALL); \
+            return VBOXSTRICTRC_TODO(IEMExecSvmVmexit((a_pVCpu), SVM_EXIT_SHUTDOWN, 0, 0)); \
+        } \
+        else \
+            return rc; \
+    } while (0)
+#else
+# define HMSVM_CHECK_EXIT_DUE_TO_EVENT_DELIVERY(a_pVCpu, a_pSvmTransient) \
+    do \
+    { \
+        int rc = hmR0SvmCheckExitDueToEventDelivery((a_pVCpu), (a_pSvmTransient)); \
+        if (RT_LIKELY(rc == VINF_SUCCESS))        { /* continue #VMEXIT handling */ } \
+        else if (     rc == VINF_HM_DOUBLE_FAULT) { return VINF_SUCCESS;            } \
+        else \
+            return rc; \
+    } while (0)
+#endif
+
+/** Macro for upgrading a @a a_rc to VINF_EM_DBG_STEPPED after emulating an
+ * instruction that exited. */
+#define HMSVM_CHECK_SINGLE_STEP(a_pVCpu, a_rc) \
+    do { \
+        if ((a_pVCpu)->hm.s.fSingleInstruction && (a_rc) == VINF_SUCCESS) \
+            (a_rc) = VINF_EM_DBG_STEPPED; \
+    } while (0)
+
+/** Validate segment descriptor granularity bit. */
+#ifdef VBOX_STRICT
+# define HMSVM_ASSERT_SEG_GRANULARITY(a_pCtx, reg) \
+    AssertMsg(   !(a_pCtx)->reg.Attr.n.u1Present \
+              || (   (a_pCtx)->reg.Attr.n.u1Granularity \
+                  ? ((a_pCtx)->reg.u32Limit & 0xfff) == 0xfff \
+                  :  (a_pCtx)->reg.u32Limit <= UINT32_C(0xfffff)), \
+              ("Invalid Segment Attributes Limit=%#RX32 Attr=%#RX32 Base=%#RX64\n", (a_pCtx)->reg.u32Limit, \
+              (a_pCtx)->reg.Attr.u, (a_pCtx)->reg.u64Base))
+#else
+# define HMSVM_ASSERT_SEG_GRANULARITY(a_pCtx, reg)      do { } while (0)
+#endif
+
+/**
+ * Exception bitmap mask for all contributory exceptions.
+ *
+ * Page fault is deliberately excluded here as it's conditional as to whether
+ * it's contributory or benign. Page faults are handled separately.
+ */
+#define HMSVM_CONTRIBUTORY_XCPT_MASK  (  RT_BIT(X86_XCPT_GP) | RT_BIT(X86_XCPT_NP) | RT_BIT(X86_XCPT_SS) | RT_BIT(X86_XCPT_TS) \
+                                       | RT_BIT(X86_XCPT_DE))
+
+/**
+ * Mandatory/unconditional guest control intercepts.
+ *
+ * SMIs can and do happen in normal operation. We need not intercept them
+ * while executing the guest (or nested-guest).
+ */
+#define HMSVM_MANDATORY_GUEST_CTRL_INTERCEPTS           (  SVM_CTRL_INTERCEPT_INTR          \
+                                                         | SVM_CTRL_INTERCEPT_NMI           \
+                                                         | SVM_CTRL_INTERCEPT_INIT          \
+                                                         | SVM_CTRL_INTERCEPT_RDPMC         \
+                                                         | SVM_CTRL_INTERCEPT_CPUID         \
+                                                         | SVM_CTRL_INTERCEPT_RSM           \
+                                                         | SVM_CTRL_INTERCEPT_HLT           \
+                                                         | SVM_CTRL_INTERCEPT_IOIO_PROT     \
+                                                         | SVM_CTRL_INTERCEPT_MSR_PROT      \
+                                                         | SVM_CTRL_INTERCEPT_INVLPGA       \
+                                                         | SVM_CTRL_INTERCEPT_SHUTDOWN      \
+                                                         | SVM_CTRL_INTERCEPT_FERR_FREEZE   \
+                                                         | SVM_CTRL_INTERCEPT_VMRUN         \
+                                                         | SVM_CTRL_INTERCEPT_SKINIT        \
+                                                         | SVM_CTRL_INTERCEPT_WBINVD        \
+                                                         | SVM_CTRL_INTERCEPT_MONITOR       \
+                                                         | SVM_CTRL_INTERCEPT_MWAIT         \
+                                                         | SVM_CTRL_INTERCEPT_CR0_SEL_WRITE \
+                                                         | SVM_CTRL_INTERCEPT_XSETBV)
+
+/** @name VMCB Clean Bits.
+ *
+ * These flags are used for VMCB-state caching. A set VMCB Clean bit indicates
+ * AMD-V doesn't need to reload the corresponding value(s) from the VMCB in
+ * memory.
+ *
+ * @{ */
+/** All intercepts vectors, TSC offset, PAUSE filter counter. */
+#define HMSVM_VMCB_CLEAN_INTERCEPTS             RT_BIT(0)
+/** I/O permission bitmap, MSR permission bitmap. */
+#define HMSVM_VMCB_CLEAN_IOPM_MSRPM             RT_BIT(1)
+/** ASID.  */
+#define HMSVM_VMCB_CLEAN_ASID                   RT_BIT(2)
+/** TRP: V_TPR, V_IRQ, V_INTR_PRIO, V_IGN_TPR, V_INTR_MASKING,
+V_INTR_VECTOR. */
+#define HMSVM_VMCB_CLEAN_INT_CTRL               RT_BIT(3)
+/** Nested Paging: Nested CR3 (nCR3), PAT. */
+#define HMSVM_VMCB_CLEAN_NP                     RT_BIT(4)
+/** Control registers (CR0, CR3, CR4, EFER). */
+#define HMSVM_VMCB_CLEAN_CRX_EFER               RT_BIT(5)
+/** Debug registers (DR6, DR7). */
+#define HMSVM_VMCB_CLEAN_DRX                    RT_BIT(6)
+/** GDT, IDT limit and base. */
+#define HMSVM_VMCB_CLEAN_DT                     RT_BIT(7)
+/** Segment register: CS, SS, DS, ES limit and base. */
+#define HMSVM_VMCB_CLEAN_SEG                    RT_BIT(8)
+/** CR2.*/
+#define HMSVM_VMCB_CLEAN_CR2                    RT_BIT(9)
+/** Last-branch record (DbgCtlMsr, br_from, br_to, lastint_from, lastint_to) */
+#define HMSVM_VMCB_CLEAN_LBR                    RT_BIT(10)
+/** AVIC (AVIC APIC_BAR; AVIC APIC_BACKING_PAGE, AVIC
+PHYSICAL_TABLE and AVIC LOGICAL_TABLE Pointers). */
+#define HMSVM_VMCB_CLEAN_AVIC                   RT_BIT(11)
+/** Mask of all valid VMCB Clean bits. */
+#define HMSVM_VMCB_CLEAN_ALL                    (  HMSVM_VMCB_CLEAN_INTERCEPTS  \
+                                                 | HMSVM_VMCB_CLEAN_IOPM_MSRPM  \
+                                                 | HMSVM_VMCB_CLEAN_ASID        \
+                                                 | HMSVM_VMCB_CLEAN_INT_CTRL    \
+                                                 | HMSVM_VMCB_CLEAN_NP          \
+                                                 | HMSVM_VMCB_CLEAN_CRX_EFER    \
+                                                 | HMSVM_VMCB_CLEAN_DRX         \
+                                                 | HMSVM_VMCB_CLEAN_DT          \
+                                                 | HMSVM_VMCB_CLEAN_SEG         \
+                                                 | HMSVM_VMCB_CLEAN_CR2         \
+                                                 | HMSVM_VMCB_CLEAN_LBR         \
+                                                 | HMSVM_VMCB_CLEAN_AVIC)
+/** @} */
+
+/** @name SVM transient.
+ *
+ * A state structure for holding miscellaneous information across AMD-V
+ * VMRUN/\#VMEXIT operation, restored after the transition.
+ *
+ * @{ */
+typedef struct SVMTRANSIENT
+{
+    /** The host's rflags/eflags. */
+    RTCCUINTREG     fEFlags;
+    /** The \#VMEXIT exit code (the EXITCODE field in the VMCB). */
+    uint64_t        u64ExitCode;
+
+    /** The guest's TPR value used for TPR shadowing. */
+    uint8_t         u8GuestTpr;
+    /** Alignment. */
+    uint8_t         abAlignment0[7];
+
+    /** Pointer to the currently executing VMCB. */
+    PSVMVMCB        pVmcb;
+
+    /** Whether we are currently executing a nested-guest. */
+    bool            fIsNestedGuest;
+    /** Whether the guest debug state was active at the time of \#VMEXIT. */
+    bool            fWasGuestDebugStateActive;
+    /** Whether the hyper debug state was active at the time of \#VMEXIT. */
+    bool            fWasHyperDebugStateActive;
+    /** Whether the TSC offset mode needs to be updated. */
+    bool            fUpdateTscOffsetting;
+    /** Whether the TSC_AUX MSR needs restoring on \#VMEXIT. */
+    bool            fRestoreTscAuxMsr;
+    /** Whether the \#VMEXIT was caused by a page-fault during delivery of a
+     *  contributary exception or a page-fault. */
+    bool            fVectoringDoublePF;
+    /** Whether the \#VMEXIT was caused by a page-fault during delivery of an
+     *  external interrupt or NMI. */
+    bool            fVectoringPF;
+    /** Padding. */
+    bool            afPadding0;
+} SVMTRANSIENT;
+/** Pointer to SVM transient state. */
+typedef SVMTRANSIENT *PSVMTRANSIENT;
+/** Pointer to a const SVM transient state. */
+typedef const SVMTRANSIENT *PCSVMTRANSIENT;
+
+AssertCompileSizeAlignment(SVMTRANSIENT, sizeof(uint64_t));
+AssertCompileMemberAlignment(SVMTRANSIENT, u64ExitCode, sizeof(uint64_t));
+AssertCompileMemberAlignment(SVMTRANSIENT, pVmcb,       sizeof(uint64_t));
+/** @}  */
+
+/**
+ * MSRPM (MSR permission bitmap) read permissions (for guest RDMSR).
+ */
+typedef enum SVMMSREXITREAD
+{
+    /** Reading this MSR causes a \#VMEXIT. */
+    SVMMSREXIT_INTERCEPT_READ = 0xb,
+    /** Reading this MSR does not cause a \#VMEXIT. */
+    SVMMSREXIT_PASSTHRU_READ
+} SVMMSREXITREAD;
+
+/**
+ * MSRPM (MSR permission bitmap) write permissions (for guest WRMSR).
+ */
+typedef enum SVMMSREXITWRITE
+{
+    /** Writing to this MSR causes a \#VMEXIT. */
+    SVMMSREXIT_INTERCEPT_WRITE = 0xd,
+    /** Writing to this MSR does not cause a \#VMEXIT. */
+    SVMMSREXIT_PASSTHRU_WRITE
+} SVMMSREXITWRITE;
+
+/**
+ * SVM \#VMEXIT handler.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   pSvmTransient   Pointer to the SVM-transient structure.
+ */
+typedef int FNSVMEXITHANDLER(PVMCPUCC pVCpu, PSVMTRANSIENT pSvmTransient);
+
+
+/*********************************************************************************************************************************
+*   Internal Functions                                                                                                           *
+*********************************************************************************************************************************/
+static void hmR0SvmPendingEventToTrpmTrap(PVMCPUCC pVCpu);
+static void hmR0SvmLeave(PVMCPUCC pVCpu, bool fImportState);
+
+
+/** @name \#VMEXIT handlers.
+ * @{
+ */
+static FNSVMEXITHANDLER hmR0SvmExitIntr;
+static FNSVMEXITHANDLER hmR0SvmExitWbinvd;
+static FNSVMEXITHANDLER hmR0SvmExitInvd;
+static FNSVMEXITHANDLER hmR0SvmExitCpuid;
+static FNSVMEXITHANDLER hmR0SvmExitRdtsc;
+static FNSVMEXITHANDLER hmR0SvmExitRdtscp;
+static FNSVMEXITHANDLER hmR0SvmExitRdpmc;
+static FNSVMEXITHANDLER hmR0SvmExitInvlpg;
+static FNSVMEXITHANDLER hmR0SvmExitHlt;
+static FNSVMEXITHANDLER hmR0SvmExitMonitor;
+static FNSVMEXITHANDLER hmR0SvmExitMwait;
+static FNSVMEXITHANDLER hmR0SvmExitShutdown;
+static FNSVMEXITHANDLER hmR0SvmExitUnexpected;
+static FNSVMEXITHANDLER hmR0SvmExitReadCRx;
+static FNSVMEXITHANDLER hmR0SvmExitWriteCRx;
+static FNSVMEXITHANDLER hmR0SvmExitMsr;
+static FNSVMEXITHANDLER hmR0SvmExitReadDRx;
+static FNSVMEXITHANDLER hmR0SvmExitWriteDRx;
+static FNSVMEXITHANDLER hmR0SvmExitXsetbv;
+static FNSVMEXITHANDLER hmR0SvmExitIOInstr;
+static FNSVMEXITHANDLER hmR0SvmExitNestedPF;
+static FNSVMEXITHANDLER hmR0SvmExitVIntr;
+static FNSVMEXITHANDLER hmR0SvmExitTaskSwitch;
+static FNSVMEXITHANDLER hmR0SvmExitVmmCall;
+static FNSVMEXITHANDLER hmR0SvmExitPause;
+static FNSVMEXITHANDLER hmR0SvmExitFerrFreeze;
+static FNSVMEXITHANDLER hmR0SvmExitIret;
+static FNSVMEXITHANDLER hmR0SvmExitXcptPF;
+static FNSVMEXITHANDLER hmR0SvmExitXcptUD;
+static FNSVMEXITHANDLER hmR0SvmExitXcptMF;
+static FNSVMEXITHANDLER hmR0SvmExitXcptDB;
+static FNSVMEXITHANDLER hmR0SvmExitXcptAC;
+static FNSVMEXITHANDLER hmR0SvmExitXcptBP;
+static FNSVMEXITHANDLER hmR0SvmExitXcptGP;
+#if defined(HMSVM_ALWAYS_TRAP_ALL_XCPTS) || defined(VBOX_WITH_NESTED_HWVIRT_SVM)
+static FNSVMEXITHANDLER hmR0SvmExitXcptGeneric;
+#endif
+#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
+static FNSVMEXITHANDLER hmR0SvmExitClgi;
+static FNSVMEXITHANDLER hmR0SvmExitStgi;
+static FNSVMEXITHANDLER hmR0SvmExitVmload;
+static FNSVMEXITHANDLER hmR0SvmExitVmsave;
+static FNSVMEXITHANDLER hmR0SvmExitInvlpga;
+static FNSVMEXITHANDLER hmR0SvmExitVmrun;
+static FNSVMEXITHANDLER hmR0SvmNestedExitXcptDB;
+static FNSVMEXITHANDLER hmR0SvmNestedExitXcptBP;
+#endif
+/** @} */
+
+static int hmR0SvmHandleExit(PVMCPUCC pVCpu, PSVMTRANSIENT pSvmTransient);
+#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
+static int hmR0SvmHandleExitNested(PVMCPUCC pVCpu, PSVMTRANSIENT pSvmTransient);
+#endif
+
+
+/*********************************************************************************************************************************
+*   Global Variables                                                                                                             *
+*********************************************************************************************************************************/
+/** Ring-0 memory object for the IO bitmap. */
+static RTR0MEMOBJ           g_hMemObjIOBitmap = NIL_RTR0MEMOBJ;
+/** Physical address of the IO bitmap. */
+static RTHCPHYS             g_HCPhysIOBitmap;
+/** Pointer to the IO bitmap. */
+static R0PTRTYPE(void *)    g_pvIOBitmap;
+
+#ifdef VBOX_STRICT
+# define HMSVM_LOG_RBP_RSP      RT_BIT_32(0)
+# define HMSVM_LOG_CR_REGS      RT_BIT_32(1)
+# define HMSVM_LOG_CS           RT_BIT_32(2)
+# define HMSVM_LOG_SS           RT_BIT_32(3)
+# define HMSVM_LOG_FS           RT_BIT_32(4)
+# define HMSVM_LOG_GS           RT_BIT_32(5)
+# define HMSVM_LOG_LBR          RT_BIT_32(6)
+# define HMSVM_LOG_ALL          (  HMSVM_LOG_RBP_RSP \
+                                 | HMSVM_LOG_CR_REGS \
+                                 | HMSVM_LOG_CS \
+                                 | HMSVM_LOG_SS \
+                                 | HMSVM_LOG_FS \
+                                 | HMSVM_LOG_GS \
+                                 | HMSVM_LOG_LBR)
+
+/**
+ * Dumps virtual CPU state and additional info. to the logger for diagnostics.
+ *
+ * @param   pVCpu       The cross context virtual CPU structure.
+ * @param   pVmcb       Pointer to the VM control block.
+ * @param   pszPrefix   Log prefix.
+ * @param   fFlags      Log flags, see HMSVM_LOG_XXX.
+ * @param   uVerbose    The verbosity level, currently unused.
+ */
+static void hmR0SvmLogState(PVMCPUCC pVCpu, PCSVMVMCB pVmcb, const char *pszPrefix, uint32_t fFlags, uint8_t uVerbose)
+{
+    RT_NOREF2(pVCpu, uVerbose);
+    PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+
+    HMSVM_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_CS | CPUMCTX_EXTRN_RIP | CPUMCTX_EXTRN_RFLAGS);
+    Log4(("%s: cs:rip=%04x:%RX64 efl=%#RX64\n", pszPrefix, pCtx->cs.Sel, pCtx->rip, pCtx->rflags.u));
+
+    if (fFlags & HMSVM_LOG_RBP_RSP)
+    {
+        HMSVM_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_RSP | CPUMCTX_EXTRN_RBP);
+        Log4(("%s: rsp=%#RX64 rbp=%#RX64\n", pszPrefix, pCtx->rsp, pCtx->rbp));
+    }
+
+    if (fFlags & HMSVM_LOG_CR_REGS)
+    {
+        HMSVM_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_CR0 | CPUMCTX_EXTRN_CR3 | CPUMCTX_EXTRN_CR4);
+        Log4(("%s: cr0=%#RX64 cr3=%#RX64 cr4=%#RX64\n", pszPrefix, pCtx->cr0, pCtx->cr3, pCtx->cr4));
+    }
+
+    if (fFlags & HMSVM_LOG_CS)
+    {
+        HMSVM_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_CS);
+        Log4(("%s: cs={%04x base=%016RX64 limit=%08x flags=%08x}\n", pszPrefix, pCtx->cs.Sel, pCtx->cs.u64Base,
+              pCtx->cs.u32Limit, pCtx->cs.Attr.u));
+    }
+    if (fFlags & HMSVM_LOG_SS)
+    {
+        HMSVM_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_SS);
+        Log4(("%s: ss={%04x base=%016RX64 limit=%08x flags=%08x}\n", pszPrefix, pCtx->ss.Sel, pCtx->ss.u64Base,
+              pCtx->ss.u32Limit, pCtx->ss.Attr.u));
+    }
+    if (fFlags & HMSVM_LOG_FS)
+    {
+        HMSVM_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_FS);
+        Log4(("%s: fs={%04x base=%016RX64 limit=%08x flags=%08x}\n", pszPrefix, pCtx->fs.Sel, pCtx->fs.u64Base,
+              pCtx->fs.u32Limit, pCtx->fs.Attr.u));
+    }
+    if (fFlags & HMSVM_LOG_GS)
+    {
+        HMSVM_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_GS);
+        Log4(("%s: gs={%04x base=%016RX64 limit=%08x flags=%08x}\n", pszPrefix, pCtx->gs.Sel, pCtx->gs.u64Base,
+              pCtx->gs.u32Limit, pCtx->gs.Attr.u));
+    }
+
+    PCSVMVMCBSTATESAVE pVmcbGuest = &pVmcb->guest;
+    if (fFlags & HMSVM_LOG_LBR)
+    {
+        Log4(("%s: br_from=%#RX64 br_to=%#RX64 lastxcpt_from=%#RX64 lastxcpt_to=%#RX64\n", pszPrefix, pVmcbGuest->u64BR_FROM,
+              pVmcbGuest->u64BR_TO, pVmcbGuest->u64LASTEXCPFROM, pVmcbGuest->u64LASTEXCPTO));
+    }
+    NOREF(pszPrefix); NOREF(pVmcbGuest); NOREF(pCtx);
+}
+#endif  /* VBOX_STRICT */
+
+
+/**
+ * Sets up and activates AMD-V on the current CPU.
+ *
+ * @returns VBox status code.
+ * @param   pHostCpu        The HM physical-CPU structure.
+ * @param   pVM             The cross context VM structure. Can be
+ *                          NULL after a resume!
+ * @param   pvCpuPage       Pointer to the global CPU page.
+ * @param   HCPhysCpuPage   Physical address of the global CPU page.
+ * @param   fEnabledByHost  Whether the host OS has already initialized AMD-V.
+ * @param   pHwvirtMsrs     Pointer to the hardware-virtualization MSRs (currently
+ *                          unused).
+ */
+VMMR0DECL(int) SVMR0EnableCpu(PHMPHYSCPU pHostCpu, PVMCC pVM, void *pvCpuPage, RTHCPHYS HCPhysCpuPage, bool fEnabledByHost,
+                              PCSUPHWVIRTMSRS pHwvirtMsrs)
+{
+    Assert(!fEnabledByHost);
+    Assert(HCPhysCpuPage && HCPhysCpuPage != NIL_RTHCPHYS);
+    Assert(RT_ALIGN_T(HCPhysCpuPage, _4K, RTHCPHYS) == HCPhysCpuPage);
+    Assert(pvCpuPage); NOREF(pvCpuPage);
+    Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+
+    RT_NOREF2(fEnabledByHost, pHwvirtMsrs);
+
+    /* Paranoid: Disable interrupt as, in theory, interrupt handlers might mess with EFER. */
+    RTCCUINTREG const fEFlags = ASMIntDisableFlags();
+
+    /*
+     * We must turn on AMD-V and setup the host state physical address, as those MSRs are per CPU.
+     */
+    uint64_t u64HostEfer = ASMRdMsr(MSR_K6_EFER);
+    if (u64HostEfer & MSR_K6_EFER_SVME)
+    {
+        /* If the VBOX_HWVIRTEX_IGNORE_SVM_IN_USE is active, then we blindly use AMD-V. */
+        if (   pVM
+            && pVM->hm.s.svm.fIgnoreInUseError)
+            pHostCpu->fIgnoreAMDVInUseError = true;
+
+        if (!pHostCpu->fIgnoreAMDVInUseError)
+        {
+            ASMSetFlags(fEFlags);
+            return VERR_SVM_IN_USE;
+        }
+    }
+
+    /* Turn on AMD-V in the EFER MSR. */
+    ASMWrMsr(MSR_K6_EFER, u64HostEfer | MSR_K6_EFER_SVME);
+
+    /* Write the physical page address where the CPU will store the host state while executing the VM. */
+    ASMWrMsr(MSR_K8_VM_HSAVE_PA, HCPhysCpuPage);
+
+    /* Restore interrupts. */
+    ASMSetFlags(fEFlags);
+
+    /*
+     * Theoretically, other hypervisors may have used ASIDs, ideally we should flush all
+     * non-zero ASIDs when enabling SVM. AMD doesn't have an SVM instruction to flush all
+     * ASIDs (flushing is done upon VMRUN). Therefore, flag that we need to flush the TLB
+     * entirely with before executing any guest code.
+     */
+    pHostCpu->fFlushAsidBeforeUse = true;
+
+    /*
+     * Ensure each VCPU scheduled on this CPU gets a new ASID on resume. See @bugref{6255}.
+     */
+    ++pHostCpu->cTlbFlushes;
+
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Deactivates AMD-V on the current CPU.
+ *
+ * @returns VBox status code.
+ * @param   pHostCpu        The HM physical-CPU structure.
+ * @param   pvCpuPage       Pointer to the global CPU page.
+ * @param   HCPhysCpuPage   Physical address of the global CPU page.
+ */
+VMMR0DECL(int) SVMR0DisableCpu(PHMPHYSCPU pHostCpu, void *pvCpuPage, RTHCPHYS HCPhysCpuPage)
+{
+    RT_NOREF1(pHostCpu);
+    Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+    AssertReturn(   HCPhysCpuPage
+                 && HCPhysCpuPage != NIL_RTHCPHYS, VERR_INVALID_PARAMETER);
+    AssertReturn(pvCpuPage, VERR_INVALID_PARAMETER);
+
+    /* Paranoid: Disable interrupts as, in theory, interrupt handlers might mess with EFER. */
+    RTCCUINTREG const fEFlags = ASMIntDisableFlags();
+
+    /* Turn off AMD-V in the EFER MSR. */
+    uint64_t u64HostEfer = ASMRdMsr(MSR_K6_EFER);
+    ASMWrMsr(MSR_K6_EFER, u64HostEfer & ~MSR_K6_EFER_SVME);
+
+    /* Invalidate host state physical address. */
+    ASMWrMsr(MSR_K8_VM_HSAVE_PA, 0);
+
+    /* Restore interrupts. */
+    ASMSetFlags(fEFlags);
+
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Does global AMD-V initialization (called during module initialization).
+ *
+ * @returns VBox status code.
+ */
+VMMR0DECL(int) SVMR0GlobalInit(void)
+{
+    /*
+     * Allocate 12 KB (3 pages) for the IO bitmap. Since this is non-optional and we always
+     * intercept all IO accesses, it's done once globally here instead of per-VM.
+     */
+    Assert(g_hMemObjIOBitmap == NIL_RTR0MEMOBJ);
+    int rc = RTR0MemObjAllocCont(&g_hMemObjIOBitmap, SVM_IOPM_PAGES << X86_PAGE_4K_SHIFT, false /* fExecutable */);
+    if (RT_FAILURE(rc))
+        return rc;
+
+    g_pvIOBitmap     = RTR0MemObjAddress(g_hMemObjIOBitmap);
+    g_HCPhysIOBitmap = RTR0MemObjGetPagePhysAddr(g_hMemObjIOBitmap, 0 /* iPage */);
+
+    /* Set all bits to intercept all IO accesses. */
+    ASMMemFill32(g_pvIOBitmap, SVM_IOPM_PAGES << X86_PAGE_4K_SHIFT, UINT32_C(0xffffffff));
+
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Does global AMD-V termination (called during module termination).
+ */
+VMMR0DECL(void) SVMR0GlobalTerm(void)
+{
+    if (g_hMemObjIOBitmap != NIL_RTR0MEMOBJ)
+    {
+        RTR0MemObjFree(g_hMemObjIOBitmap, true /* fFreeMappings */);
+        g_pvIOBitmap      = NULL;
+        g_HCPhysIOBitmap  = 0;
+        g_hMemObjIOBitmap = NIL_RTR0MEMOBJ;
+    }
+}
+
+
+/**
+ * Frees any allocated per-VCPU structures for a VM.
+ *
+ * @param   pVM     The cross context VM structure.
+ */
+DECLINLINE(void) hmR0SvmFreeStructs(PVMCC pVM)
+{
+    for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++)
+    {
+        PVMCPUCC pVCpu = VMCC_GET_CPU(pVM, idCpu);
+        AssertPtr(pVCpu);
+
+        if (pVCpu->hm.s.svm.hMemObjVmcbHost != NIL_RTR0MEMOBJ)
+        {
+            RTR0MemObjFree(pVCpu->hm.s.svm.hMemObjVmcbHost, false);
+            pVCpu->hm.s.svm.HCPhysVmcbHost   = 0;
+            pVCpu->hm.s.svm.hMemObjVmcbHost  = NIL_RTR0MEMOBJ;
+        }
+
+        if (pVCpu->hm.s.svm.hMemObjVmcb != NIL_RTR0MEMOBJ)
+        {
+            RTR0MemObjFree(pVCpu->hm.s.svm.hMemObjVmcb, false);
+            pVCpu->hm.s.svm.pVmcb            = NULL;
+            pVCpu->hm.s.svm.HCPhysVmcb       = 0;
+            pVCpu->hm.s.svm.hMemObjVmcb      = NIL_RTR0MEMOBJ;
+        }
+
+        if (pVCpu->hm.s.svm.hMemObjMsrBitmap != NIL_RTR0MEMOBJ)
+        {
+            RTR0MemObjFree(pVCpu->hm.s.svm.hMemObjMsrBitmap, false);
+            pVCpu->hm.s.svm.pvMsrBitmap      = NULL;
+            pVCpu->hm.s.svm.HCPhysMsrBitmap  = 0;
+            pVCpu->hm.s.svm.hMemObjMsrBitmap = NIL_RTR0MEMOBJ;
+        }
+    }
+}
+
+
+/**
+ * Does per-VM AMD-V initialization.
+ *
+ * @returns VBox status code.
+ * @param   pVM         The cross context VM structure.
+ */
+VMMR0DECL(int) SVMR0InitVM(PVMCC pVM)
+{
+    int rc = VERR_INTERNAL_ERROR_5;
+
+    /*
+     * Check for an AMD CPU erratum which requires us to flush the TLB before every world-switch.
+     */
+    uint32_t u32Family;
+    uint32_t u32Model;
+    uint32_t u32Stepping;
+    if (HMIsSubjectToSvmErratum170(&u32Family, &u32Model, &u32Stepping))
+    {
+        Log4Func(("AMD cpu with erratum 170 family %#x model %#x stepping %#x\n", u32Family, u32Model, u32Stepping));
+        pVM->hm.s.svm.fAlwaysFlushTLB = true;
+    }
+
+    /*
+     * Initialize the R0 memory objects up-front so we can properly cleanup on allocation failures.
+     */
+    for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++)
+    {
+        PVMCPUCC pVCpu = VMCC_GET_CPU(pVM, idCpu);
+        pVCpu->hm.s.svm.hMemObjVmcbHost  = NIL_RTR0MEMOBJ;
+        pVCpu->hm.s.svm.hMemObjVmcb      = NIL_RTR0MEMOBJ;
+        pVCpu->hm.s.svm.hMemObjMsrBitmap = NIL_RTR0MEMOBJ;
+    }
+
+    for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++)
+    {
+        PVMCPUCC pVCpu = VMCC_GET_CPU(pVM, idCpu);
+
+        /*
+         * Initialize the hardware-assisted SVM guest-execution handler.
+         * We now use a single handler for both 32-bit and 64-bit guests, see @bugref{6208#c73}.
+         */
+        pVCpu->hm.s.svm.pfnVMRun = SVMR0VMRun;
+
+        /*
+         * Allocate one page for the host-context VM control block (VMCB). This is used for additional host-state (such as
+         * FS, GS, Kernel GS Base, etc.) apart from the host-state save area specified in MSR_K8_VM_HSAVE_PA.
+         */
+        rc = RTR0MemObjAllocCont(&pVCpu->hm.s.svm.hMemObjVmcbHost, SVM_VMCB_PAGES << PAGE_SHIFT, false /* fExecutable */);
+        if (RT_FAILURE(rc))
+            goto failure_cleanup;
+
+        void *pvVmcbHost               = RTR0MemObjAddress(pVCpu->hm.s.svm.hMemObjVmcbHost);
+        pVCpu->hm.s.svm.HCPhysVmcbHost = RTR0MemObjGetPagePhysAddr(pVCpu->hm.s.svm.hMemObjVmcbHost, 0 /* iPage */);
+        Assert(pVCpu->hm.s.svm.HCPhysVmcbHost < _4G);
+        ASMMemZeroPage(pvVmcbHost);
+
+        /*
+         * Allocate one page for the guest-state VMCB.
+         */
+        rc = RTR0MemObjAllocCont(&pVCpu->hm.s.svm.hMemObjVmcb, SVM_VMCB_PAGES << PAGE_SHIFT, false /* fExecutable */);
+        if (RT_FAILURE(rc))
+            goto failure_cleanup;
+
+        pVCpu->hm.s.svm.pVmcb           = (PSVMVMCB)RTR0MemObjAddress(pVCpu->hm.s.svm.hMemObjVmcb);
+        pVCpu->hm.s.svm.HCPhysVmcb      = RTR0MemObjGetPagePhysAddr(pVCpu->hm.s.svm.hMemObjVmcb, 0 /* iPage */);
+        Assert(pVCpu->hm.s.svm.HCPhysVmcb < _4G);
+        ASMMemZeroPage(pVCpu->hm.s.svm.pVmcb);
+
+        /*
+         * Allocate two pages (8 KB) for the MSR permission bitmap. There doesn't seem to be a way to convince
+         * SVM to not require one.
+         */
+        rc = RTR0MemObjAllocCont(&pVCpu->hm.s.svm.hMemObjMsrBitmap, SVM_MSRPM_PAGES << X86_PAGE_4K_SHIFT,
+                                 false /* fExecutable */);
+        if (RT_FAILURE(rc))
+            goto failure_cleanup;
+
+        pVCpu->hm.s.svm.pvMsrBitmap     = RTR0MemObjAddress(pVCpu->hm.s.svm.hMemObjMsrBitmap);
+        pVCpu->hm.s.svm.HCPhysMsrBitmap = RTR0MemObjGetPagePhysAddr(pVCpu->hm.s.svm.hMemObjMsrBitmap, 0 /* iPage */);
+        /* Set all bits to intercept all MSR accesses (changed later on). */
+        ASMMemFill32(pVCpu->hm.s.svm.pvMsrBitmap, SVM_MSRPM_PAGES << X86_PAGE_4K_SHIFT, UINT32_C(0xffffffff));
+   }
+
+    return VINF_SUCCESS;
+
+failure_cleanup:
+    hmR0SvmFreeStructs(pVM);
+    return rc;
+}
+
+
+/**
+ * Does per-VM AMD-V termination.
+ *
+ * @returns VBox status code.
+ * @param   pVM         The cross context VM structure.
+ */
+VMMR0DECL(int) SVMR0TermVM(PVMCC pVM)
+{
+    hmR0SvmFreeStructs(pVM);
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Returns whether the VMCB Clean Bits feature is supported.
+ *
+ * @returns @c true if supported, @c false otherwise.
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   fIsNestedGuest  Whether we are currently executing the nested-guest.
+ */
+DECL_FORCE_INLINE(bool) hmR0SvmSupportsVmcbCleanBits(PVMCPUCC pVCpu, bool fIsNestedGuest)
+{
+    PCVMCC pVM = pVCpu->CTX_SUFF(pVM);
+    bool const fHostVmcbCleanBits = RT_BOOL(pVM->hm.s.svm.u32Features & X86_CPUID_SVM_FEATURE_EDX_VMCB_CLEAN);
+    if (!fIsNestedGuest)
+        return fHostVmcbCleanBits;
+    return fHostVmcbCleanBits && pVM->cpum.ro.GuestFeatures.fSvmVmcbClean;
+}
+
+
+/**
+ * Returns whether the decode assists feature is supported.
+ *
+ * @returns @c true if supported, @c false otherwise.
+ * @param   pVCpu       The cross context virtual CPU structure.
+ */
+DECLINLINE(bool) hmR0SvmSupportsDecodeAssists(PVMCPUCC pVCpu)
+{
+    PVMCC pVM = pVCpu->CTX_SUFF(pVM);
+#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
+    if (CPUMIsGuestInSvmNestedHwVirtMode(&pVCpu->cpum.GstCtx))
+    {
+        return (pVM->hm.s.svm.u32Features & X86_CPUID_SVM_FEATURE_EDX_DECODE_ASSISTS)
+            &&  pVM->cpum.ro.GuestFeatures.fSvmDecodeAssists;
+    }
+#endif
+    return RT_BOOL(pVM->hm.s.svm.u32Features & X86_CPUID_SVM_FEATURE_EDX_DECODE_ASSISTS);
+}
+
+
+/**
+ * Returns whether the NRIP_SAVE feature is supported.
+ *
+ * @returns @c true if supported, @c false otherwise.
+ * @param   pVCpu       The cross context virtual CPU structure.
+ */
+DECLINLINE(bool) hmR0SvmSupportsNextRipSave(PVMCPUCC pVCpu)
+{
+    PVMCC pVM = pVCpu->CTX_SUFF(pVM);
+#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
+    if (CPUMIsGuestInSvmNestedHwVirtMode(&pVCpu->cpum.GstCtx))
+    {
+        return (pVM->hm.s.svm.u32Features & X86_CPUID_SVM_FEATURE_EDX_NRIP_SAVE)
+            &&  pVM->cpum.ro.GuestFeatures.fSvmNextRipSave;
+    }
+#endif
+    return RT_BOOL(pVM->hm.s.svm.u32Features & X86_CPUID_SVM_FEATURE_EDX_NRIP_SAVE);
+}
+
+
+/**
+ * Sets the permission bits for the specified MSR in the MSRPM bitmap.
+ *
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   pbMsrBitmap     Pointer to the MSR bitmap.
+ * @param   idMsr           The MSR for which the permissions are being set.
+ * @param   enmRead         MSR read permissions.
+ * @param   enmWrite        MSR write permissions.
+ *
+ * @remarks This function does -not- clear the VMCB clean bits for MSRPM. The
+ *          caller needs to take care of this.
+ */
+static void hmR0SvmSetMsrPermission(PVMCPUCC pVCpu, uint8_t *pbMsrBitmap, uint32_t idMsr, SVMMSREXITREAD enmRead,
+                                    SVMMSREXITWRITE enmWrite)
+{
+    bool const  fInNestedGuestMode = CPUMIsGuestInSvmNestedHwVirtMode(&pVCpu->cpum.GstCtx);
+    uint16_t    offMsrpm;
+    uint8_t     uMsrpmBit;
+    int rc = CPUMGetSvmMsrpmOffsetAndBit(idMsr, &offMsrpm, &uMsrpmBit);
+    AssertRC(rc);
+
+    Assert(uMsrpmBit == 0 || uMsrpmBit == 2 || uMsrpmBit == 4 || uMsrpmBit == 6);
+    Assert(offMsrpm < SVM_MSRPM_PAGES << X86_PAGE_4K_SHIFT);
+
+    pbMsrBitmap += offMsrpm;
+    if (enmRead == SVMMSREXIT_INTERCEPT_READ)
+        *pbMsrBitmap |= RT_BIT(uMsrpmBit);
+    else
+    {
+        if (!fInNestedGuestMode)
+            *pbMsrBitmap &= ~RT_BIT(uMsrpmBit);
+#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
+        else
+        {
+            /* Only clear the bit if the nested-guest is also not intercepting the MSR read.*/
+            uint8_t const *pbNstGstMsrBitmap = (uint8_t *)pVCpu->cpum.GstCtx.hwvirt.svm.CTX_SUFF(pvMsrBitmap);
+            pbNstGstMsrBitmap += offMsrpm;
+            if (!(*pbNstGstMsrBitmap & RT_BIT(uMsrpmBit)))
+                *pbMsrBitmap &= ~RT_BIT(uMsrpmBit);
+            else
+                Assert(*pbMsrBitmap & RT_BIT(uMsrpmBit));
+        }
+#endif
+    }
+
+    if (enmWrite == SVMMSREXIT_INTERCEPT_WRITE)
+        *pbMsrBitmap |= RT_BIT(uMsrpmBit + 1);
+    else
+    {
+        if (!fInNestedGuestMode)
+            *pbMsrBitmap &= ~RT_BIT(uMsrpmBit + 1);
+#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
+        else
+        {
+            /* Only clear the bit if the nested-guest is also not intercepting the MSR write.*/
+            uint8_t const *pbNstGstMsrBitmap = (uint8_t *)pVCpu->cpum.GstCtx.hwvirt.svm.CTX_SUFF(pvMsrBitmap);
+            pbNstGstMsrBitmap += offMsrpm;
+            if (!(*pbNstGstMsrBitmap & RT_BIT(uMsrpmBit + 1)))
+                *pbMsrBitmap &= ~RT_BIT(uMsrpmBit + 1);
+            else
+                Assert(*pbMsrBitmap & RT_BIT(uMsrpmBit + 1));
+        }
+#endif
+    }
+}
+
+
+/**
+ * Sets up AMD-V for the specified VM.
+ * This function is only called once per-VM during initalization.
+ *
+ * @returns VBox status code.
+ * @param   pVM         The cross context VM structure.
+ */
+VMMR0DECL(int) SVMR0SetupVM(PVMCC pVM)
+{
+    Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+    AssertReturn(pVM, VERR_INVALID_PARAMETER);
+    Assert(pVM->hm.s.svm.fSupported);
+
+    bool const fPauseFilter          = RT_BOOL(pVM->hm.s.svm.u32Features & X86_CPUID_SVM_FEATURE_EDX_PAUSE_FILTER);
+    bool const fPauseFilterThreshold = RT_BOOL(pVM->hm.s.svm.u32Features & X86_CPUID_SVM_FEATURE_EDX_PAUSE_FILTER_THRESHOLD);
+    bool const fUsePauseFilter       = fPauseFilter && pVM->hm.s.svm.cPauseFilter;
+
+    bool const fLbrVirt              = RT_BOOL(pVM->hm.s.svm.u32Features & X86_CPUID_SVM_FEATURE_EDX_LBR_VIRT);
+    bool const fUseLbrVirt           = fLbrVirt && pVM->hm.s.svm.fLbrVirt; /** @todo IEM implementation etc. */
+
+#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
+    bool const fVirtVmsaveVmload     = RT_BOOL(pVM->hm.s.svm.u32Features & X86_CPUID_SVM_FEATURE_EDX_VIRT_VMSAVE_VMLOAD);
+    bool const fUseVirtVmsaveVmload  = fVirtVmsaveVmload && pVM->hm.s.svm.fVirtVmsaveVmload && pVM->hm.s.fNestedPaging;
+
+    bool const fVGif                 = RT_BOOL(pVM->hm.s.svm.u32Features & X86_CPUID_SVM_FEATURE_EDX_VGIF);
+    bool const fUseVGif              = fVGif && pVM->hm.s.svm.fVGif;
+#endif
+
+    PVMCPUCC     pVCpu0 = VMCC_GET_CPU_0(pVM);
+    PSVMVMCB     pVmcb0 = pVCpu0->hm.s.svm.pVmcb;
+    AssertMsgReturn(RT_VALID_PTR(pVmcb0), ("Invalid pVmcb (%p) for vcpu[0]\n", pVmcb0), VERR_SVM_INVALID_PVMCB);
+    PSVMVMCBCTRL pVmcbCtrl0 = &pVmcb0->ctrl;
+
+    /* Always trap #AC for reasons of security. */
+    pVmcbCtrl0->u32InterceptXcpt |= RT_BIT_32(X86_XCPT_AC);
+
+    /* Always trap #DB for reasons of security. */
+    pVmcbCtrl0->u32InterceptXcpt |= RT_BIT_32(X86_XCPT_DB);
+
+    /* Trap exceptions unconditionally (debug purposes). */
+#ifdef HMSVM_ALWAYS_TRAP_PF
+    pVmcbCtrl0->u32InterceptXcpt |= RT_BIT_32(X86_XCPT_PF);
+#endif
+#ifdef HMSVM_ALWAYS_TRAP_ALL_XCPTS
+    /* If you add any exceptions here, make sure to update hmR0SvmHandleExit(). */
+    pVmcbCtrl0->u32InterceptXcpt |= RT_BIT_32(X86_XCPT_BP)
+                                 | RT_BIT_32(X86_XCPT_DE)
+                                 | RT_BIT_32(X86_XCPT_NM)
+                                 | RT_BIT_32(X86_XCPT_UD)
+                                 | RT_BIT_32(X86_XCPT_NP)
+                                 | RT_BIT_32(X86_XCPT_SS)
+                                 | RT_BIT_32(X86_XCPT_GP)
+                                 | RT_BIT_32(X86_XCPT_PF)
+                                 | RT_BIT_32(X86_XCPT_MF)
+                                 ;
+#endif
+
+    /* Apply the exceptions intercepts needed by the GIM provider. */
+    if (pVCpu0->hm.s.fGIMTrapXcptUD)
+        pVmcbCtrl0->u32InterceptXcpt |= RT_BIT(X86_XCPT_UD);
+
+    /* The mesa 3d driver hack needs #GP. */
+    if (pVCpu0->hm.s.fTrapXcptGpForLovelyMesaDrv)
+        pVmcbCtrl0->u32InterceptXcpt |= RT_BIT(X86_XCPT_GP);
+
+    /* Set up unconditional intercepts and conditions. */
+    pVmcbCtrl0->u64InterceptCtrl = HMSVM_MANDATORY_GUEST_CTRL_INTERCEPTS
+                                 | SVM_CTRL_INTERCEPT_VMMCALL;
+
+#ifdef HMSVM_ALWAYS_TRAP_TASK_SWITCH
+    pVmcbCtrl0->u64InterceptCtrl |= SVM_CTRL_INTERCEPT_TASK_SWITCH;
+#endif
+
+#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
+    /* Virtualized VMSAVE/VMLOAD. */
+    pVmcbCtrl0->LbrVirt.n.u1VirtVmsaveVmload = fUseVirtVmsaveVmload;
+    if (!fUseVirtVmsaveVmload)
+        pVmcbCtrl0->u64InterceptCtrl |= SVM_CTRL_INTERCEPT_VMSAVE
+                                     |  SVM_CTRL_INTERCEPT_VMLOAD;
+
+    /* Virtual GIF. */
+    pVmcbCtrl0->IntCtrl.n.u1VGifEnable = fUseVGif;
+    if (!fUseVGif)
+        pVmcbCtrl0->u64InterceptCtrl |= SVM_CTRL_INTERCEPT_CLGI
+                                     |  SVM_CTRL_INTERCEPT_STGI;
+#endif
+
+    /* CR4 writes must always be intercepted for tracking PGM mode changes. */
+    pVmcbCtrl0->u16InterceptWrCRx = RT_BIT(4);
+
+    /* Intercept all DRx reads and writes by default. Changed later on. */
+    pVmcbCtrl0->u16InterceptRdDRx = 0xffff;
+    pVmcbCtrl0->u16InterceptWrDRx = 0xffff;
+
+    /* Virtualize masking of INTR interrupts. (reads/writes from/to CR8 go to the V_TPR register) */
+    pVmcbCtrl0->IntCtrl.n.u1VIntrMasking = 1;
+
+    /* Ignore the priority in the virtual TPR. This is necessary for delivering PIC style (ExtInt) interrupts
+       and we currently deliver both PIC and APIC interrupts alike, see hmR0SvmEvaluatePendingEvent() */
+    pVmcbCtrl0->IntCtrl.n.u1IgnoreTPR = 1;
+
+    /* Set the IO permission bitmap physical addresses. */
+    pVmcbCtrl0->u64IOPMPhysAddr = g_HCPhysIOBitmap;
+
+    /* LBR virtualization. */
+    pVmcbCtrl0->LbrVirt.n.u1LbrVirt = fUseLbrVirt;
+
+    /* The host ASID MBZ, for the guest start with 1. */
+    pVmcbCtrl0->TLBCtrl.n.u32ASID = 1;
+
+    /* Setup Nested Paging. This doesn't change throughout the execution time of the VM. */
+    pVmcbCtrl0->NestedPagingCtrl.n.u1NestedPaging = pVM->hm.s.fNestedPaging;
+
+    /* Without Nested Paging, we need additionally intercepts. */
+    if (!pVM->hm.s.fNestedPaging)
+    {
+        /* CR3 reads/writes must be intercepted; our shadow values differ from the guest values. */
+        pVmcbCtrl0->u16InterceptRdCRx |= RT_BIT(3);
+        pVmcbCtrl0->u16InterceptWrCRx |= RT_BIT(3);
+
+        /* Intercept INVLPG and task switches (may change CR3, EFLAGS, LDT). */
+        pVmcbCtrl0->u64InterceptCtrl |= SVM_CTRL_INTERCEPT_INVLPG
+                                     |  SVM_CTRL_INTERCEPT_TASK_SWITCH;
+
+        /* Page faults must be intercepted to implement shadow paging. */
+        pVmcbCtrl0->u32InterceptXcpt |= RT_BIT(X86_XCPT_PF);
+    }
+
+    /* Setup Pause Filter for guest pause-loop (spinlock) exiting. */
+    if (fUsePauseFilter)
+    {
+        Assert(pVM->hm.s.svm.cPauseFilter > 0);
+        pVmcbCtrl0->u16PauseFilterCount = pVM->hm.s.svm.cPauseFilter;
+        if (fPauseFilterThreshold)
+            pVmcbCtrl0->u16PauseFilterThreshold = pVM->hm.s.svm.cPauseFilterThresholdTicks;
+        pVmcbCtrl0->u64InterceptCtrl |= SVM_CTRL_INTERCEPT_PAUSE;
+    }
+
+    /*
+     * Setup the MSR permission bitmap.
+     * The following MSRs are saved/restored automatically during the world-switch.
+     * Don't intercept guest read/write accesses to these MSRs.
+     */
+    uint8_t *pbMsrBitmap0 = (uint8_t *)pVCpu0->hm.s.svm.pvMsrBitmap;
+    hmR0SvmSetMsrPermission(pVCpu0, pbMsrBitmap0, MSR_K8_LSTAR,          SVMMSREXIT_PASSTHRU_READ, SVMMSREXIT_PASSTHRU_WRITE);
+    hmR0SvmSetMsrPermission(pVCpu0, pbMsrBitmap0, MSR_K8_CSTAR,          SVMMSREXIT_PASSTHRU_READ, SVMMSREXIT_PASSTHRU_WRITE);
+    hmR0SvmSetMsrPermission(pVCpu0, pbMsrBitmap0, MSR_K6_STAR,           SVMMSREXIT_PASSTHRU_READ, SVMMSREXIT_PASSTHRU_WRITE);
+    hmR0SvmSetMsrPermission(pVCpu0, pbMsrBitmap0, MSR_K8_SF_MASK,        SVMMSREXIT_PASSTHRU_READ, SVMMSREXIT_PASSTHRU_WRITE);
+    hmR0SvmSetMsrPermission(pVCpu0, pbMsrBitmap0, MSR_K8_FS_BASE,        SVMMSREXIT_PASSTHRU_READ, SVMMSREXIT_PASSTHRU_WRITE);
+    hmR0SvmSetMsrPermission(pVCpu0, pbMsrBitmap0, MSR_K8_GS_BASE,        SVMMSREXIT_PASSTHRU_READ, SVMMSREXIT_PASSTHRU_WRITE);
+    hmR0SvmSetMsrPermission(pVCpu0, pbMsrBitmap0, MSR_K8_KERNEL_GS_BASE, SVMMSREXIT_PASSTHRU_READ, SVMMSREXIT_PASSTHRU_WRITE);
+    hmR0SvmSetMsrPermission(pVCpu0, pbMsrBitmap0, MSR_IA32_SYSENTER_CS,  SVMMSREXIT_PASSTHRU_READ, SVMMSREXIT_PASSTHRU_WRITE);
+    hmR0SvmSetMsrPermission(pVCpu0, pbMsrBitmap0, MSR_IA32_SYSENTER_ESP, SVMMSREXIT_PASSTHRU_READ, SVMMSREXIT_PASSTHRU_WRITE);
+    hmR0SvmSetMsrPermission(pVCpu0, pbMsrBitmap0, MSR_IA32_SYSENTER_EIP, SVMMSREXIT_PASSTHRU_READ, SVMMSREXIT_PASSTHRU_WRITE);
+    pVmcbCtrl0->u64MSRPMPhysAddr = pVCpu0->hm.s.svm.HCPhysMsrBitmap;
+
+    /* Initially all VMCB clean bits MBZ indicating that everything should be loaded from the VMCB in memory. */
+    Assert(pVmcbCtrl0->u32VmcbCleanBits == 0);
+
+    for (VMCPUID idCpu = 1; idCpu < pVM->cCpus; idCpu++)
+    {
+        PVMCPUCC     pVCpuCur = VMCC_GET_CPU(pVM, idCpu);
+        PSVMVMCB     pVmcbCur = pVCpuCur->hm.s.svm.pVmcb;
+        AssertMsgReturn(RT_VALID_PTR(pVmcbCur), ("Invalid pVmcb (%p) for vcpu[%u]\n", pVmcbCur, idCpu), VERR_SVM_INVALID_PVMCB);
+        PSVMVMCBCTRL pVmcbCtrlCur = &pVmcbCur->ctrl;
+
+        /* Copy the VMCB control area. */
+        memcpy(pVmcbCtrlCur, pVmcbCtrl0, sizeof(*pVmcbCtrlCur));
+
+        /* Copy the MSR bitmap and setup the VCPU-specific host physical address. */
+        uint8_t *pbMsrBitmapCur = (uint8_t *)pVCpuCur->hm.s.svm.pvMsrBitmap;
+        memcpy(pbMsrBitmapCur, pbMsrBitmap0, SVM_MSRPM_PAGES << X86_PAGE_4K_SHIFT);
+        pVmcbCtrlCur->u64MSRPMPhysAddr = pVCpuCur->hm.s.svm.HCPhysMsrBitmap;
+
+        /* Initially all VMCB clean bits MBZ indicating that everything should be loaded from the VMCB in memory. */
+        Assert(pVmcbCtrlCur->u32VmcbCleanBits == 0);
+
+        /* Verify our assumption that GIM providers trap #UD uniformly across VCPUs initially. */
+        Assert(pVCpuCur->hm.s.fGIMTrapXcptUD == pVCpu0->hm.s.fGIMTrapXcptUD);
+    }
+
+#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
+    LogRel(("HM: fUsePauseFilter=%RTbool fUseLbrVirt=%RTbool fUseVGif=%RTbool fUseVirtVmsaveVmload=%RTbool\n", fUsePauseFilter,
+            fUseLbrVirt, fUseVGif, fUseVirtVmsaveVmload));
+#else
+    LogRel(("HM: fUsePauseFilter=%RTbool fUseLbrVirt=%RTbool\n", fUsePauseFilter, fUseLbrVirt));
+#endif
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Gets a pointer to the currently active guest (or nested-guest) VMCB.
+ *
+ * @returns Pointer to the current context VMCB.
+ * @param   pVCpu           The cross context virtual CPU structure.
+ */
+DECLINLINE(PSVMVMCB) hmR0SvmGetCurrentVmcb(PVMCPUCC pVCpu)
+{
+#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
+    if (CPUMIsGuestInSvmNestedHwVirtMode(&pVCpu->cpum.GstCtx))
+        return pVCpu->cpum.GstCtx.hwvirt.svm.CTX_SUFF(pVmcb);
+#endif
+    return pVCpu->hm.s.svm.pVmcb;
+}
+
+
+/**
+ * Gets a pointer to the nested-guest VMCB cache.
+ *
+ * @returns Pointer to the nested-guest VMCB cache.
+ * @param   pVCpu           The cross context virtual CPU structure.
+ */
+DECLINLINE(PSVMNESTEDVMCBCACHE) hmR0SvmGetNestedVmcbCache(PVMCPUCC pVCpu)
+{
+#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
+    Assert(pVCpu->hm.s.svm.NstGstVmcbCache.fCacheValid);
+    return &pVCpu->hm.s.svm.NstGstVmcbCache;
+#else
+    RT_NOREF(pVCpu);
+    return NULL;
+#endif
+}
+
+
+/**
+ * Invalidates a guest page by guest virtual address.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu       The cross context virtual CPU structure.
+ * @param   GCVirt      Guest virtual address of the page to invalidate.
+ */
+VMMR0DECL(int) SVMR0InvalidatePage(PVMCPUCC pVCpu, RTGCPTR GCVirt)
+{
+    Assert(pVCpu->CTX_SUFF(pVM)->hm.s.svm.fSupported);
+
+    bool const fFlushPending = pVCpu->CTX_SUFF(pVM)->hm.s.svm.fAlwaysFlushTLB || VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
+
+    /* Skip it if a TLB flush is already pending. */
+    if (!fFlushPending)
+    {
+        Log4Func(("%#RGv\n", GCVirt));
+
+        PSVMVMCB pVmcb = hmR0SvmGetCurrentVmcb(pVCpu);
+        AssertMsgReturn(pVmcb, ("Invalid pVmcb!\n"), VERR_SVM_INVALID_PVMCB);
+
+        SVMR0InvlpgA(GCVirt, pVmcb->ctrl.TLBCtrl.n.u32ASID);
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbInvlpgVirt);
+    }
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Flushes the appropriate tagged-TLB entries.
+ *
+ * @param   pHostCpu    The HM physical-CPU structure.
+ * @param   pVCpu       The cross context virtual CPU structure.
+ * @param   pVmcb       Pointer to the VM control block.
+ */
+static void hmR0SvmFlushTaggedTlb(PHMPHYSCPU pHostCpu, PVMCPUCC pVCpu, PSVMVMCB pVmcb)
+{
+    /*
+     * Force a TLB flush for the first world switch if the current CPU differs from the one
+     * we ran on last. This can happen both for start & resume due to long jumps back to
+     * ring-3.
+     *
+     * We also force a TLB flush every time when executing a nested-guest VCPU as there is no
+     * correlation between it and the physical CPU.
+     *
+     * If the TLB flush count changed, another VM (VCPU rather) has hit the ASID limit while
+     * flushing the TLB, so we cannot reuse the ASIDs without flushing.
+     */
+    bool fNewAsid = false;
+    Assert(pHostCpu->idCpu != NIL_RTCPUID);
+    if (   pVCpu->hm.s.idLastCpu   != pHostCpu->idCpu
+        || pVCpu->hm.s.cTlbFlushes != pHostCpu->cTlbFlushes
+#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
+        || CPUMIsGuestInSvmNestedHwVirtMode(&pVCpu->cpum.GstCtx)
+#endif
+        )
+    {
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbWorldSwitch);
+        pVCpu->hm.s.fForceTLBFlush = true;
+        fNewAsid = true;
+    }
+
+    /* Set TLB flush state as checked until we return from the world switch. */
+    ASMAtomicWriteBool(&pVCpu->hm.s.fCheckedTLBFlush, true);
+
+    /* Check for explicit TLB flushes. */
+    if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH))
+    {
+        pVCpu->hm.s.fForceTLBFlush = true;
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlb);
+    }
+
+    /*
+     * If the AMD CPU erratum 170, We need to flush the entire TLB for each world switch. Sad.
+     * This Host CPU requirement takes precedence.
+     */
+    PVMCC pVM = pVCpu->CTX_SUFF(pVM);
+    if (pVM->hm.s.svm.fAlwaysFlushTLB)
+    {
+        pHostCpu->uCurrentAsid           = 1;
+        pVCpu->hm.s.uCurrentAsid         = 1;
+        pVCpu->hm.s.cTlbFlushes          = pHostCpu->cTlbFlushes;
+        pVCpu->hm.s.idLastCpu            = pHostCpu->idCpu;
+        pVmcb->ctrl.TLBCtrl.n.u8TLBFlush = SVM_TLB_FLUSH_ENTIRE;
+
+        /* Clear the VMCB Clean Bit for NP while flushing the TLB. See @bugref{7152}. */
+        pVmcb->ctrl.u32VmcbCleanBits    &= ~HMSVM_VMCB_CLEAN_NP;
+    }
+    else
+    {
+        pVmcb->ctrl.TLBCtrl.n.u8TLBFlush = SVM_TLB_FLUSH_NOTHING;
+        if (pVCpu->hm.s.fForceTLBFlush)
+        {
+            /* Clear the VMCB Clean Bit for NP while flushing the TLB. See @bugref{7152}. */
+            pVmcb->ctrl.u32VmcbCleanBits    &= ~HMSVM_VMCB_CLEAN_NP;
+
+            if (fNewAsid)
+            {
+                ++pHostCpu->uCurrentAsid;
+
+                bool fHitASIDLimit = false;
+                if (pHostCpu->uCurrentAsid >= pVM->hm.s.uMaxAsid)
+                {
+                    pHostCpu->uCurrentAsid = 1;      /* Wraparound at 1; host uses 0 */
+                    pHostCpu->cTlbFlushes++;         /* All VCPUs that run on this host CPU must use a new ASID. */
+                    fHitASIDLimit      = true;
+                }
+
+                if (   fHitASIDLimit
+                    || pHostCpu->fFlushAsidBeforeUse)
+                {
+                    pVmcb->ctrl.TLBCtrl.n.u8TLBFlush = SVM_TLB_FLUSH_ENTIRE;
+                    pHostCpu->fFlushAsidBeforeUse = false;
+                }
+
+                pVCpu->hm.s.uCurrentAsid = pHostCpu->uCurrentAsid;
+                pVCpu->hm.s.idLastCpu    = pHostCpu->idCpu;
+                pVCpu->hm.s.cTlbFlushes  = pHostCpu->cTlbFlushes;
+            }
+            else
+            {
+                if (pVM->hm.s.svm.u32Features & X86_CPUID_SVM_FEATURE_EDX_FLUSH_BY_ASID)
+                    pVmcb->ctrl.TLBCtrl.n.u8TLBFlush = SVM_TLB_FLUSH_SINGLE_CONTEXT;
+                else
+                    pVmcb->ctrl.TLBCtrl.n.u8TLBFlush = SVM_TLB_FLUSH_ENTIRE;
+            }
+
+            pVCpu->hm.s.fForceTLBFlush = false;
+        }
+    }
+
+    /* Update VMCB with the ASID. */
+    if (pVmcb->ctrl.TLBCtrl.n.u32ASID != pVCpu->hm.s.uCurrentAsid)
+    {
+        pVmcb->ctrl.TLBCtrl.n.u32ASID = pVCpu->hm.s.uCurrentAsid;
+        pVmcb->ctrl.u32VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_ASID;
+    }
+
+    AssertMsg(pVCpu->hm.s.idLastCpu == pHostCpu->idCpu,
+              ("vcpu idLastCpu=%u hostcpu idCpu=%u\n", pVCpu->hm.s.idLastCpu, pHostCpu->idCpu));
+    AssertMsg(pVCpu->hm.s.cTlbFlushes == pHostCpu->cTlbFlushes,
+              ("Flush count mismatch for cpu %u (%u vs %u)\n", pHostCpu->idCpu, pVCpu->hm.s.cTlbFlushes, pHostCpu->cTlbFlushes));
+    AssertMsg(pHostCpu->uCurrentAsid >= 1 && pHostCpu->uCurrentAsid < pVM->hm.s.uMaxAsid,
+              ("cpu%d uCurrentAsid = %x\n", pHostCpu->idCpu, pHostCpu->uCurrentAsid));
+    AssertMsg(pVCpu->hm.s.uCurrentAsid >= 1 && pVCpu->hm.s.uCurrentAsid < pVM->hm.s.uMaxAsid,
+              ("cpu%d VM uCurrentAsid = %x\n", pHostCpu->idCpu, pVCpu->hm.s.uCurrentAsid));
+
+#ifdef VBOX_WITH_STATISTICS
+    if (pVmcb->ctrl.TLBCtrl.n.u8TLBFlush == SVM_TLB_FLUSH_NOTHING)
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatNoFlushTlbWorldSwitch);
+    else if (   pVmcb->ctrl.TLBCtrl.n.u8TLBFlush == SVM_TLB_FLUSH_SINGLE_CONTEXT
+             || pVmcb->ctrl.TLBCtrl.n.u8TLBFlush == SVM_TLB_FLUSH_SINGLE_CONTEXT_RETAIN_GLOBALS)
+    {
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushAsid);
+    }
+    else
+    {
+        Assert(pVmcb->ctrl.TLBCtrl.n.u8TLBFlush == SVM_TLB_FLUSH_ENTIRE);
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushEntire);
+    }
+#endif
+}
+
+
+/**
+ * Sets an exception intercept in the specified VMCB.
+ *
+ * @param   pVmcb       Pointer to the VM control block.
+ * @param   uXcpt       The exception (X86_XCPT_*).
+ */
+DECLINLINE(void) hmR0SvmSetXcptIntercept(PSVMVMCB pVmcb, uint8_t uXcpt)
+{
+    if (!(pVmcb->ctrl.u32InterceptXcpt & RT_BIT(uXcpt)))
+    {
+        pVmcb->ctrl.u32InterceptXcpt |= RT_BIT(uXcpt);
+        pVmcb->ctrl.u32VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_INTERCEPTS;
+    }
+}
+
+
+/**
+ * Clears an exception intercept in the specified VMCB.
+ *
+ * @param   pVCpu       The cross context virtual CPU structure.
+ * @param   pVmcb       Pointer to the VM control block.
+ * @param   uXcpt       The exception (X86_XCPT_*).
+ *
+ * @remarks This takes into account if we're executing a nested-guest and only
+ *          removes the exception intercept if both the guest -and- nested-guest
+ *          are not intercepting it.
+ */
+DECLINLINE(void) hmR0SvmClearXcptIntercept(PVMCPUCC pVCpu, PSVMVMCB pVmcb, uint8_t uXcpt)
+{
+    Assert(uXcpt != X86_XCPT_DB);
+    Assert(uXcpt != X86_XCPT_AC);
+    Assert(uXcpt != X86_XCPT_GP);
+#ifndef HMSVM_ALWAYS_TRAP_ALL_XCPTS
+    if (pVmcb->ctrl.u32InterceptXcpt & RT_BIT(uXcpt))
+    {
+        bool fRemove = true;
+# ifdef VBOX_WITH_NESTED_HWVIRT_SVM
+        /* Only remove the intercept if the nested-guest is also not intercepting it! */
+        PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+        if (CPUMIsGuestInSvmNestedHwVirtMode(pCtx))
+        {
+            PCSVMNESTEDVMCBCACHE pVmcbNstGstCache = hmR0SvmGetNestedVmcbCache(pVCpu);
+            fRemove = !(pVmcbNstGstCache->u32InterceptXcpt & RT_BIT(uXcpt));
+        }
+# else
+        RT_NOREF(pVCpu);
+# endif
+        if (fRemove)
+        {
+            pVmcb->ctrl.u32InterceptXcpt &= ~RT_BIT(uXcpt);
+            pVmcb->ctrl.u32VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_INTERCEPTS;
+        }
+    }
+#else
+    RT_NOREF3(pVCpu, pVmcb, uXcpt);
+#endif
+}
+
+
+/**
+ * Sets a control intercept in the specified VMCB.
+ *
+ * @param   pVmcb           Pointer to the VM control block.
+ * @param   fCtrlIntercept  The control intercept (SVM_CTRL_INTERCEPT_*).
+ */
+DECLINLINE(void) hmR0SvmSetCtrlIntercept(PSVMVMCB pVmcb, uint64_t fCtrlIntercept)
+{
+    if (!(pVmcb->ctrl.u64InterceptCtrl & fCtrlIntercept))
+    {
+        pVmcb->ctrl.u64InterceptCtrl |= fCtrlIntercept;
+        pVmcb->ctrl.u32VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_INTERCEPTS;
+    }
+}
+
+
+/**
+ * Clears a control intercept in the specified VMCB.
+ *
+ * @returns @c true if the intercept is still set, @c false otherwise.
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   pVmcb           Pointer to the VM control block.
+ * @param   fCtrlIntercept  The control intercept (SVM_CTRL_INTERCEPT_*).
+ *
+ * @remarks This takes into account if we're executing a nested-guest and only
+ *          removes the control intercept if both the guest -and- nested-guest
+ *          are not intercepting it.
+ */
+static bool hmR0SvmClearCtrlIntercept(PVMCPUCC pVCpu, PSVMVMCB pVmcb, uint64_t fCtrlIntercept)
+{
+    if (pVmcb->ctrl.u64InterceptCtrl & fCtrlIntercept)
+    {
+        bool fRemove = true;
+#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
+        /* Only remove the control intercept if the nested-guest is also not intercepting it! */
+        if (CPUMIsGuestInSvmNestedHwVirtMode(&pVCpu->cpum.GstCtx))
+        {
+            PCSVMNESTEDVMCBCACHE pVmcbNstGstCache = hmR0SvmGetNestedVmcbCache(pVCpu);
+            fRemove = !(pVmcbNstGstCache->u64InterceptCtrl & fCtrlIntercept);
+        }
+#else
+        RT_NOREF(pVCpu);
+#endif
+        if (fRemove)
+        {
+            pVmcb->ctrl.u64InterceptCtrl &= ~fCtrlIntercept;
+            pVmcb->ctrl.u32VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_INTERCEPTS;
+        }
+    }
+
+    return RT_BOOL(pVmcb->ctrl.u64InterceptCtrl & fCtrlIntercept);
+}
+
+
+/**
+ * Exports the guest (or nested-guest) CR0 into the VMCB.
+ *
+ * @param   pVCpu       The cross context virtual CPU structure.
+ * @param   pVmcb       Pointer to the VM control block.
+ *
+ * @remarks This assumes we always pre-load the guest FPU.
+ * @remarks No-long-jump zone!!!
+ */
+static void hmR0SvmExportGuestCR0(PVMCPUCC pVCpu, PSVMVMCB pVmcb)
+{
+    Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+
+    PCPUMCTX       pCtx = &pVCpu->cpum.GstCtx;
+    uint64_t const uGuestCr0  = pCtx->cr0;
+    uint64_t       uShadowCr0 = uGuestCr0;
+
+    /* Always enable caching. */
+    uShadowCr0 &= ~(X86_CR0_CD | X86_CR0_NW);
+
+    /* When Nested Paging is not available use shadow page tables and intercept #PFs (latter done in SVMR0SetupVM()). */
+    if (!pVCpu->CTX_SUFF(pVM)->hm.s.fNestedPaging)
+    {
+        uShadowCr0 |= X86_CR0_PG      /* Use shadow page tables. */
+                   |  X86_CR0_WP;     /* Guest CPL 0 writes to its read-only pages should cause a #PF #VMEXIT. */
+    }
+
+    /*
+     * Use the #MF style of legacy-FPU error reporting for now. Although AMD-V has MSRs that
+     * lets us isolate the host from it, IEM/REM still needs work to emulate it properly,
+     * see @bugref{7243#c103}.
+     */
+    if (!(uGuestCr0 & X86_CR0_NE))
+    {
+        uShadowCr0 |= X86_CR0_NE;
+        hmR0SvmSetXcptIntercept(pVmcb, X86_XCPT_MF);
+    }
+    else
+        hmR0SvmClearXcptIntercept(pVCpu, pVmcb, X86_XCPT_MF);
+
+    /*
+     * If the shadow and guest CR0 are identical we can avoid intercepting CR0 reads.
+     *
+     * CR0 writes still needs interception as PGM requires tracking paging mode changes,
+     * see @bugref{6944}.
+     *
+     * We also don't ever want to honor weird things like cache disable from the guest.
+     * However, we can avoid intercepting changes to the TS & MP bits by clearing the CR0
+     * write intercept below and keeping SVM_CTRL_INTERCEPT_CR0_SEL_WRITE instead.
+     */
+    if (uShadowCr0 == uGuestCr0)
+    {
+        if (!CPUMIsGuestInSvmNestedHwVirtMode(pCtx))
+        {
+            pVmcb->ctrl.u16InterceptRdCRx &= ~RT_BIT(0);
+            pVmcb->ctrl.u16InterceptWrCRx &= ~RT_BIT(0);
+            Assert(pVmcb->ctrl.u64InterceptCtrl & SVM_CTRL_INTERCEPT_CR0_SEL_WRITE);
+        }
+        else
+        {
+            /* If the nested-hypervisor intercepts CR0 reads/writes, we need to continue intercepting them. */
+            PCSVMNESTEDVMCBCACHE pVmcbNstGstCache = hmR0SvmGetNestedVmcbCache(pVCpu);
+            pVmcb->ctrl.u16InterceptRdCRx = (pVmcb->ctrl.u16InterceptRdCRx       & ~RT_BIT(0))
+                                          | (pVmcbNstGstCache->u16InterceptRdCRx &  RT_BIT(0));
+            pVmcb->ctrl.u16InterceptWrCRx = (pVmcb->ctrl.u16InterceptWrCRx       & ~RT_BIT(0))
+                                          | (pVmcbNstGstCache->u16InterceptWrCRx &  RT_BIT(0));
+        }
+    }
+    else
+    {
+        pVmcb->ctrl.u16InterceptRdCRx |= RT_BIT(0);
+        pVmcb->ctrl.u16InterceptWrCRx |= RT_BIT(0);
+    }
+    pVmcb->ctrl.u32VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_INTERCEPTS;
+
+    Assert(!RT_HI_U32(uShadowCr0));
+    if (pVmcb->guest.u64CR0 != uShadowCr0)
+    {
+        pVmcb->guest.u64CR0 = uShadowCr0;
+        pVmcb->ctrl.u32VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_CRX_EFER;
+    }
+}
+
+
+/**
+ * Exports the guest (or nested-guest) CR3 into the VMCB.
+ *
+ * @param   pVCpu       The cross context virtual CPU structure.
+ * @param   pVmcb       Pointer to the VM control block.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+static void hmR0SvmExportGuestCR3(PVMCPUCC pVCpu, PSVMVMCB pVmcb)
+{
+    Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+
+    PVMCC      pVM  = pVCpu->CTX_SUFF(pVM);
+    PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+    if (pVM->hm.s.fNestedPaging)
+    {
+        pVmcb->ctrl.u64NestedPagingCR3 = PGMGetHyperCR3(pVCpu);
+        pVmcb->ctrl.u32VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_NP;
+        pVmcb->guest.u64CR3 = pCtx->cr3;
+        Assert(pVmcb->ctrl.u64NestedPagingCR3);
+    }
+    else
+        pVmcb->guest.u64CR3 = PGMGetHyperCR3(pVCpu);
+
+    pVmcb->ctrl.u32VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_CRX_EFER;
+}
+
+
+/**
+ * Exports the guest (or nested-guest) CR4 into the VMCB.
+ *
+ * @param   pVCpu       The cross context virtual CPU structure.
+ * @param   pVmcb       Pointer to the VM control block.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+static int hmR0SvmExportGuestCR4(PVMCPUCC pVCpu, PSVMVMCB pVmcb)
+{
+    Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+
+    PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+    uint64_t uShadowCr4 = pCtx->cr4;
+    if (!pVCpu->CTX_SUFF(pVM)->hm.s.fNestedPaging)
+    {
+        switch (pVCpu->hm.s.enmShadowMode)
+        {
+            case PGMMODE_REAL:
+            case PGMMODE_PROTECTED:     /* Protected mode, no paging. */
+                return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
+
+            case PGMMODE_32_BIT:        /* 32-bit paging. */
+                uShadowCr4 &= ~X86_CR4_PAE;
+                break;
+
+            case PGMMODE_PAE:           /* PAE paging. */
+            case PGMMODE_PAE_NX:        /* PAE paging with NX enabled. */
+                /** Must use PAE paging as we could use physical memory > 4 GB */
+                uShadowCr4 |= X86_CR4_PAE;
+                break;
+
+            case PGMMODE_AMD64:         /* 64-bit AMD paging (long mode). */
+            case PGMMODE_AMD64_NX:      /* 64-bit AMD paging (long mode) with NX enabled. */
+#ifdef VBOX_WITH_64_BITS_GUESTS
+                break;
+#else
+                return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
+#endif
+
+            default:                    /* shut up gcc */
+                return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
+        }
+    }
+
+    /* Whether to save/load/restore XCR0 during world switch depends on CR4.OSXSAVE and host+guest XCR0. */
+    pVCpu->hm.s.fLoadSaveGuestXcr0 = (pCtx->cr4 & X86_CR4_OSXSAVE) && pCtx->aXcr[0] != ASMGetXcr0();
+
+    /* Avoid intercepting CR4 reads if the guest and shadow CR4 values are identical. */
+    if (uShadowCr4 == pCtx->cr4)
+    {
+        if (!CPUMIsGuestInSvmNestedHwVirtMode(pCtx))
+            pVmcb->ctrl.u16InterceptRdCRx &= ~RT_BIT(4);
+        else
+        {
+            /* If the nested-hypervisor intercepts CR4 reads, we need to continue intercepting them. */
+            PCSVMNESTEDVMCBCACHE pVmcbNstGstCache = hmR0SvmGetNestedVmcbCache(pVCpu);
+            pVmcb->ctrl.u16InterceptRdCRx = (pVmcb->ctrl.u16InterceptRdCRx       & ~RT_BIT(4))
+                                          | (pVmcbNstGstCache->u16InterceptRdCRx &  RT_BIT(4));
+        }
+    }
+    else
+        pVmcb->ctrl.u16InterceptRdCRx |= RT_BIT(4);
+
+    /* CR4 writes are always intercepted (both guest, nested-guest) for tracking PGM mode changes. */
+    Assert(pVmcb->ctrl.u16InterceptWrCRx & RT_BIT(4));
+
+    /* Update VMCB with the shadow CR4 the appropriate VMCB clean bits. */
+    Assert(!RT_HI_U32(uShadowCr4));
+    pVmcb->guest.u64CR4 = uShadowCr4;
+    pVmcb->ctrl.u32VmcbCleanBits &= ~(HMSVM_VMCB_CLEAN_CRX_EFER | HMSVM_VMCB_CLEAN_INTERCEPTS);
+
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Exports the guest (or nested-guest) control registers into the VMCB.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu       The cross context virtual CPU structure.
+ * @param   pVmcb       Pointer to the VM control block.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+static int hmR0SvmExportGuestControlRegs(PVMCPUCC pVCpu, PSVMVMCB pVmcb)
+{
+    Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+
+    if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_GUEST_CR_MASK)
+    {
+        if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_GUEST_CR0)
+            hmR0SvmExportGuestCR0(pVCpu, pVmcb);
+
+        if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_GUEST_CR2)
+        {
+            pVmcb->guest.u64CR2 = pVCpu->cpum.GstCtx.cr2;
+            pVmcb->ctrl.u32VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_CR2;
+        }
+
+        if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_GUEST_CR3)
+            hmR0SvmExportGuestCR3(pVCpu, pVmcb);
+
+        /* CR4 re-loading is ASSUMED to be done everytime we get in from ring-3! (XCR0) */
+        if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_GUEST_CR4)
+        {
+            int rc = hmR0SvmExportGuestCR4(pVCpu, pVmcb);
+            if (RT_FAILURE(rc))
+                return rc;
+        }
+
+        pVCpu->hm.s.fCtxChanged &= ~HM_CHANGED_GUEST_CR_MASK;
+    }
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Exports the guest (or nested-guest) segment registers into the VMCB.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu       The cross context virtual CPU structure.
+ * @param   pVmcb       Pointer to the VM control block.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+static void hmR0SvmExportGuestSegmentRegs(PVMCPUCC pVCpu, PSVMVMCB pVmcb)
+{
+    Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+    PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+
+    /* Guest segment registers. */
+    if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_GUEST_SREG_MASK)
+    {
+        if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_GUEST_CS)
+            HMSVM_SEG_REG_COPY_TO_VMCB(pCtx, &pVmcb->guest, CS, cs);
+
+        if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_GUEST_SS)
+        {
+            HMSVM_SEG_REG_COPY_TO_VMCB(pCtx, &pVmcb->guest, SS, ss);
+            pVmcb->guest.u8CPL = pCtx->ss.Attr.n.u2Dpl;
+        }
+
+        if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_GUEST_DS)
+            HMSVM_SEG_REG_COPY_TO_VMCB(pCtx, &pVmcb->guest, DS, ds);
+
+        if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_GUEST_ES)
+            HMSVM_SEG_REG_COPY_TO_VMCB(pCtx, &pVmcb->guest, ES, es);
+
+        if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_GUEST_FS)
+            HMSVM_SEG_REG_COPY_TO_VMCB(pCtx, &pVmcb->guest, FS, fs);
+
+        if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_GUEST_GS)
+            HMSVM_SEG_REG_COPY_TO_VMCB(pCtx, &pVmcb->guest, GS, gs);
+
+        pVmcb->ctrl.u32VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_SEG;
+    }
+
+    /* Guest TR. */
+    if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_GUEST_TR)
+        HMSVM_SEG_REG_COPY_TO_VMCB(pCtx, &pVmcb->guest, TR, tr);
+
+    /* Guest LDTR. */
+    if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_GUEST_LDTR)
+        HMSVM_SEG_REG_COPY_TO_VMCB(pCtx, &pVmcb->guest, LDTR, ldtr);
+
+    /* Guest GDTR. */
+    if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_GUEST_GDTR)
+    {
+        pVmcb->guest.GDTR.u32Limit = pCtx->gdtr.cbGdt;
+        pVmcb->guest.GDTR.u64Base  = pCtx->gdtr.pGdt;
+        pVmcb->ctrl.u32VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_DT;
+    }
+
+    /* Guest IDTR. */
+    if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_GUEST_IDTR)
+    {
+        pVmcb->guest.IDTR.u32Limit = pCtx->idtr.cbIdt;
+        pVmcb->guest.IDTR.u64Base  = pCtx->idtr.pIdt;
+        pVmcb->ctrl.u32VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_DT;
+    }
+
+    pVCpu->hm.s.fCtxChanged &= ~(  HM_CHANGED_GUEST_SREG_MASK
+                                 | HM_CHANGED_GUEST_TABLE_MASK);
+}
+
+
+/**
+ * Exports the guest (or nested-guest) MSRs into the VMCB.
+ *
+ * @param   pVCpu       The cross context virtual CPU structure.
+ * @param   pVmcb       Pointer to the VM control block.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+static void hmR0SvmExportGuestMsrs(PVMCPUCC pVCpu, PSVMVMCB pVmcb)
+{
+    Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+    PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+
+    /* Guest Sysenter MSRs. */
+    if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_GUEST_SYSENTER_MSR_MASK)
+    {
+        if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_GUEST_SYSENTER_CS_MSR)
+            pVmcb->guest.u64SysEnterCS  = pCtx->SysEnter.cs;
+
+        if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_GUEST_SYSENTER_EIP_MSR)
+            pVmcb->guest.u64SysEnterEIP = pCtx->SysEnter.eip;
+
+        if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_GUEST_SYSENTER_ESP_MSR)
+            pVmcb->guest.u64SysEnterESP = pCtx->SysEnter.esp;
+    }
+
+    /*
+     * Guest EFER MSR.
+     * AMD-V requires guest EFER.SVME to be set. Weird.
+     * See AMD spec. 15.5.1 "Basic Operation" | "Canonicalization and Consistency Checks".
+     */
+    if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_GUEST_EFER_MSR)
+    {
+        pVmcb->guest.u64EFER = pCtx->msrEFER | MSR_K6_EFER_SVME;
+        pVmcb->ctrl.u32VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_CRX_EFER;
+    }
+
+    /* If the guest isn't in 64-bit mode, clear MSR_K6_LME bit, otherwise SVM expects amd64 shadow paging. */
+    if (   !CPUMIsGuestInLongModeEx(pCtx)
+        && (pCtx->msrEFER & MSR_K6_EFER_LME))
+    {
+        pVmcb->guest.u64EFER &= ~MSR_K6_EFER_LME;
+        pVmcb->ctrl.u32VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_CRX_EFER;
+    }
+
+    if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_GUEST_SYSCALL_MSRS)
+    {
+        pVmcb->guest.u64STAR   = pCtx->msrSTAR;
+        pVmcb->guest.u64LSTAR  = pCtx->msrLSTAR;
+        pVmcb->guest.u64CSTAR  = pCtx->msrCSTAR;
+        pVmcb->guest.u64SFMASK = pCtx->msrSFMASK;
+    }
+
+    if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_GUEST_KERNEL_GS_BASE)
+        pVmcb->guest.u64KernelGSBase = pCtx->msrKERNELGSBASE;
+
+    pVCpu->hm.s.fCtxChanged &= ~(  HM_CHANGED_GUEST_SYSENTER_MSR_MASK
+                                 | HM_CHANGED_GUEST_EFER_MSR
+                                 | HM_CHANGED_GUEST_SYSCALL_MSRS
+                                 | HM_CHANGED_GUEST_KERNEL_GS_BASE);
+
+    /*
+     * Setup the PAT MSR (applicable for Nested Paging only).
+     *
+     * The default value should be MSR_IA32_CR_PAT_INIT_VAL, but we treat all guest memory
+     * as WB, so choose type 6 for all PAT slots, see @bugref{9634}.
+     *
+     * While guests can modify and see the modified values through the shadow values,
+     * we shall not honor any guest modifications of this MSR to ensure caching is always
+     * enabled similar to how we clear CR0.CD and NW bits.
+     *
+     * For nested-guests this needs to always be set as well, see @bugref{7243#c109}.
+     */
+    pVmcb->guest.u64PAT = UINT64_C(0x0006060606060606);
+
+    /* Enable the last branch record bit if LBR virtualization is enabled. */
+    if (pVmcb->ctrl.LbrVirt.n.u1LbrVirt)
+        pVmcb->guest.u64DBGCTL = MSR_IA32_DEBUGCTL_LBR;
+}
+
+
+/**
+ * Exports the guest (or nested-guest) debug state into the VMCB and programs
+ * the necessary intercepts accordingly.
+ *
+ * @param   pVCpu       The cross context virtual CPU structure.
+ * @param   pVmcb       Pointer to the VM control block.
+ *
+ * @remarks No-long-jump zone!!!
+ * @remarks Requires EFLAGS to be up-to-date in the VMCB!
+ */
+static void hmR0SvmExportSharedDebugState(PVMCPUCC pVCpu, PSVMVMCB pVmcb)
+{
+    PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+
+    /*
+     * Anyone single stepping on the host side? If so, we'll have to use the
+     * trap flag in the guest EFLAGS since AMD-V doesn't have a trap flag on
+     * the VMM level like the VT-x implementations does.
+     */
+    bool       fInterceptMovDRx = false;
+    bool const fStepping = pVCpu->hm.s.fSingleInstruction || DBGFIsStepping(pVCpu);
+    if (fStepping)
+    {
+        pVCpu->hm.s.fClearTrapFlag = true;
+        pVmcb->guest.u64RFlags |= X86_EFL_TF;
+        fInterceptMovDRx = true; /* Need clean DR6, no guest mess. */
+    }
+
+    if (   fStepping
+        || (CPUMGetHyperDR7(pVCpu) & X86_DR7_ENABLED_MASK))
+    {
+        /*
+         * Use the combined guest and host DRx values found in the hypervisor
+         * register set because the debugger has breakpoints active or someone
+         * is single stepping on the host side.
+         *
+         * Note! DBGF expects a clean DR6 state before executing guest code.
+         */
+        if (!CPUMIsHyperDebugStateActive(pVCpu))
+        {
+            CPUMR0LoadHyperDebugState(pVCpu, false /* include DR6 */);
+            Assert(!CPUMIsGuestDebugStateActive(pVCpu));
+            Assert(CPUMIsHyperDebugStateActive(pVCpu));
+        }
+
+        /* Update DR6 & DR7. (The other DRx values are handled by CPUM one way or the other.) */
+        if (   pVmcb->guest.u64DR6 != X86_DR6_INIT_VAL
+            || pVmcb->guest.u64DR7 != CPUMGetHyperDR7(pVCpu))
+        {
+            pVmcb->guest.u64DR7 = CPUMGetHyperDR7(pVCpu);
+            pVmcb->guest.u64DR6 = X86_DR6_INIT_VAL;
+            pVmcb->ctrl.u32VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_DRX;
+        }
+
+        /** @todo If we cared, we could optimize to allow the guest to read registers
+         *        with the same values. */
+        fInterceptMovDRx = true;
+        pVCpu->hm.s.fUsingHyperDR7 = true;
+        Log5(("hmR0SvmExportSharedDebugState: Loaded hyper DRx\n"));
+    }
+    else
+    {
+        /*
+         * Update DR6, DR7 with the guest values if necessary.
+         */
+        if (   pVmcb->guest.u64DR7 != pCtx->dr[7]
+            || pVmcb->guest.u64DR6 != pCtx->dr[6])
+        {
+            pVmcb->guest.u64DR7 = pCtx->dr[7];
+            pVmcb->guest.u64DR6 = pCtx->dr[6];
+            pVmcb->ctrl.u32VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_DRX;
+        }
+        pVCpu->hm.s.fUsingHyperDR7 = false;
+
+        /*
+         * If the guest has enabled debug registers, we need to load them prior to
+         * executing guest code so they'll trigger at the right time.
+         */
+        if (pCtx->dr[7] & (X86_DR7_ENABLED_MASK | X86_DR7_GD)) /** @todo Why GD? */
+        {
+            if (!CPUMIsGuestDebugStateActive(pVCpu))
+            {
+                CPUMR0LoadGuestDebugState(pVCpu, false /* include DR6 */);
+                STAM_COUNTER_INC(&pVCpu->hm.s.StatDRxArmed);
+                Assert(!CPUMIsHyperDebugStateActive(pVCpu));
+                Assert(CPUMIsGuestDebugStateActive(pVCpu));
+            }
+            Log5(("hmR0SvmExportSharedDebugState: Loaded guest DRx\n"));
+        }
+        /*
+         * If no debugging enabled, we'll lazy load DR0-3. We don't need to
+         * intercept #DB as DR6 is updated in the VMCB.
+         *
+         * Note! If we cared and dared, we could skip intercepting \#DB here.
+         *       However, \#DB shouldn't be performance critical, so we'll play safe
+         *       and keep the code similar to the VT-x code and always intercept it.
+         */
+        else if (!CPUMIsGuestDebugStateActive(pVCpu))
+            fInterceptMovDRx = true;
+    }
+
+    Assert(pVmcb->ctrl.u32InterceptXcpt & RT_BIT_32(X86_XCPT_DB));
+    if (fInterceptMovDRx)
+    {
+        if (   pVmcb->ctrl.u16InterceptRdDRx != 0xffff
+            || pVmcb->ctrl.u16InterceptWrDRx != 0xffff)
+        {
+            pVmcb->ctrl.u16InterceptRdDRx = 0xffff;
+            pVmcb->ctrl.u16InterceptWrDRx = 0xffff;
+            pVmcb->ctrl.u32VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_INTERCEPTS;
+        }
+    }
+    else
+    {
+        if (   pVmcb->ctrl.u16InterceptRdDRx
+            || pVmcb->ctrl.u16InterceptWrDRx)
+        {
+            pVmcb->ctrl.u16InterceptRdDRx = 0;
+            pVmcb->ctrl.u16InterceptWrDRx = 0;
+            pVmcb->ctrl.u32VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_INTERCEPTS;
+        }
+    }
+    Log4Func(("DR6=%#RX64 DR7=%#RX64\n", pCtx->dr[6], pCtx->dr[7]));
+}
+
+/**
+ * Exports the hardware virtualization state into the nested-guest
+ * VMCB.
+ *
+ * @param   pVCpu   The cross context virtual CPU structure.
+ * @param   pVmcb   Pointer to the VM control block.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+static void hmR0SvmExportGuestHwvirtState(PVMCPUCC pVCpu, PSVMVMCB pVmcb)
+{
+    Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+
+    if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_GUEST_HWVIRT)
+    {
+        if (pVmcb->ctrl.IntCtrl.n.u1VGifEnable)
+        {
+            PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+            PCVM      pVM  = pVCpu->CTX_SUFF(pVM);
+
+            HMSVM_ASSERT_NOT_IN_NESTED_GUEST(pCtx);                                /* Nested VGIF is not supported yet. */
+            Assert(pVM->hm.s.svm.u32Features & X86_CPUID_SVM_FEATURE_EDX_VGIF);    /* Physical hardware supports VGIF. */
+            Assert(HMIsSvmVGifActive(pVM));                                        /* Outer VM has enabled VGIF. */
+            NOREF(pVM);
+
+            pVmcb->ctrl.IntCtrl.n.u1VGif = CPUMGetGuestGif(pCtx);
+        }
+
+        /*
+         * Ensure the nested-guest pause-filter counters don't exceed the outer guest values esp.
+         * since SVM doesn't have a preemption timer.
+         *
+         * We do this here rather than in hmR0SvmSetupVmcbNested() as we may have been executing the
+         * nested-guest in IEM incl. PAUSE instructions which would update the pause-filter counters
+         * and may continue execution in SVM R0 without a nested-guest #VMEXIT in between.
+         */
+        PVMCC          pVM = pVCpu->CTX_SUFF(pVM);
+        PSVMVMCBCTRL   pVmcbCtrl = &pVmcb->ctrl;
+        uint16_t const uGuestPauseFilterCount     = pVM->hm.s.svm.cPauseFilter;
+        uint16_t const uGuestPauseFilterThreshold = pVM->hm.s.svm.cPauseFilterThresholdTicks;
+        if (CPUMIsGuestSvmCtrlInterceptSet(pVCpu, &pVCpu->cpum.GstCtx, SVM_CTRL_INTERCEPT_PAUSE))
+        {
+            PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+            pVmcbCtrl->u16PauseFilterCount     = RT_MIN(pCtx->hwvirt.svm.cPauseFilter, uGuestPauseFilterCount);
+            pVmcbCtrl->u16PauseFilterThreshold = RT_MIN(pCtx->hwvirt.svm.cPauseFilterThreshold, uGuestPauseFilterThreshold);
+        }
+        else
+        {
+            /** @todo r=ramshankar: We can turn these assignments into assertions. */
+            pVmcbCtrl->u16PauseFilterCount     = uGuestPauseFilterCount;
+            pVmcbCtrl->u16PauseFilterThreshold = uGuestPauseFilterThreshold;
+        }
+        pVmcbCtrl->u32VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_INTERCEPTS;
+
+        pVCpu->hm.s.fCtxChanged &= ~HM_CHANGED_GUEST_HWVIRT;
+    }
+}
+
+
+/**
+ * Exports the guest APIC TPR state into the VMCB.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu       The cross context virtual CPU structure.
+ * @param   pVmcb       Pointer to the VM control block.
+ */
+static int hmR0SvmExportGuestApicTpr(PVMCPUCC pVCpu, PSVMVMCB pVmcb)
+{
+    HMSVM_ASSERT_NOT_IN_NESTED_GUEST(&pVCpu->cpum.GstCtx);
+
+    if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_APIC_TPR)
+    {
+        PVMCC pVM = pVCpu->CTX_SUFF(pVM);
+        if (   PDMHasApic(pVM)
+            && APICIsEnabled(pVCpu))
+        {
+            bool    fPendingIntr;
+            uint8_t u8Tpr;
+            int rc = APICGetTpr(pVCpu, &u8Tpr, &fPendingIntr, NULL /* pu8PendingIrq */);
+            AssertRCReturn(rc, rc);
+
+            /* Assume that we need to trap all TPR accesses and thus need not check on
+               every #VMEXIT if we should update the TPR. */
+            Assert(pVmcb->ctrl.IntCtrl.n.u1VIntrMasking);
+            pVCpu->hm.s.svm.fSyncVTpr = false;
+
+            if (!pVM->hm.s.fTPRPatchingActive)
+            {
+                /* Bits 3-0 of the VTPR field correspond to bits 7-4 of the TPR (which is the Task-Priority Class). */
+                pVmcb->ctrl.IntCtrl.n.u8VTPR = (u8Tpr >> 4);
+
+                /* If there are interrupts pending, intercept CR8 writes to evaluate ASAP if we
+                   can deliver the interrupt to the guest. */
+                if (fPendingIntr)
+                    pVmcb->ctrl.u16InterceptWrCRx |= RT_BIT(8);
+                else
+                {
+                    pVmcb->ctrl.u16InterceptWrCRx &= ~RT_BIT(8);
+                    pVCpu->hm.s.svm.fSyncVTpr = true;
+                }
+
+                pVmcb->ctrl.u32VmcbCleanBits &= ~(HMSVM_VMCB_CLEAN_INTERCEPTS | HMSVM_VMCB_CLEAN_INT_CTRL);
+            }
+            else
+            {
+                /* 32-bit guests uses LSTAR MSR for patching guest code which touches the TPR. */
+                pVmcb->guest.u64LSTAR = u8Tpr;
+                uint8_t *pbMsrBitmap = (uint8_t *)pVCpu->hm.s.svm.pvMsrBitmap;
+
+                /* If there are interrupts pending, intercept LSTAR writes, otherwise don't intercept reads or writes. */
+                if (fPendingIntr)
+                    hmR0SvmSetMsrPermission(pVCpu, pbMsrBitmap, MSR_K8_LSTAR, SVMMSREXIT_PASSTHRU_READ, SVMMSREXIT_INTERCEPT_WRITE);
+                else
+                {
+                    hmR0SvmSetMsrPermission(pVCpu, pbMsrBitmap, MSR_K8_LSTAR, SVMMSREXIT_PASSTHRU_READ, SVMMSREXIT_PASSTHRU_WRITE);
+                    pVCpu->hm.s.svm.fSyncVTpr = true;
+                }
+                pVmcb->ctrl.u32VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_IOPM_MSRPM;
+            }
+        }
+        ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_APIC_TPR);
+    }
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Sets up the exception interrupts required for guest execution in the VMCB.
+ *
+ * @param   pVCpu       The cross context virtual CPU structure.
+ * @param   pVmcb       Pointer to the VM control block.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+static void hmR0SvmExportGuestXcptIntercepts(PVMCPUCC pVCpu, PSVMVMCB pVmcb)
+{
+    HMSVM_ASSERT_NOT_IN_NESTED_GUEST(&pVCpu->cpum.GstCtx);
+
+    /* If we modify intercepts from here, please check & adjust hmR0SvmMergeVmcbCtrlsNested() if required. */
+    if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_SVM_XCPT_INTERCEPTS)
+    {
+        /* Trap #UD for GIM provider (e.g. for hypercalls). */
+        if (pVCpu->hm.s.fGIMTrapXcptUD)
+            hmR0SvmSetXcptIntercept(pVmcb, X86_XCPT_UD);
+        else
+            hmR0SvmClearXcptIntercept(pVCpu, pVmcb, X86_XCPT_UD);
+
+        /* Trap #BP for INT3 debug breakpoints set by the VM debugger. */
+        if (pVCpu->CTX_SUFF(pVM)->dbgf.ro.cEnabledInt3Breakpoints)
+            hmR0SvmSetXcptIntercept(pVmcb, X86_XCPT_BP);
+        else
+            hmR0SvmClearXcptIntercept(pVCpu, pVmcb, X86_XCPT_BP);
+
+        /* The remaining intercepts are handled elsewhere, e.g. in hmR0SvmExportGuestCR0(). */
+        ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_SVM_XCPT_INTERCEPTS);
+    }
+}
+
+
+#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
+/**
+ * Merges guest and nested-guest intercepts for executing the nested-guest using
+ * hardware-assisted SVM.
+ *
+ * This merges the guest and nested-guest intercepts in a way that if the outer
+ * guest intercept is set we need to intercept it in the nested-guest as
+ * well.
+ *
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   pVmcbNstGst     Pointer to the nested-guest VM control block.
+ */
+static void hmR0SvmMergeVmcbCtrlsNested(PVMCPUCC pVCpu)
+{
+    PVMCC          pVM             = pVCpu->CTX_SUFF(pVM);
+    PCSVMVMCB    pVmcb           = pVCpu->hm.s.svm.pVmcb;
+    PSVMVMCB     pVmcbNstGst     = pVCpu->cpum.GstCtx.hwvirt.svm.CTX_SUFF(pVmcb);
+    PSVMVMCBCTRL pVmcbNstGstCtrl = &pVmcbNstGst->ctrl;
+
+    /* Merge the guest's CR intercepts into the nested-guest VMCB. */
+    pVmcbNstGstCtrl->u16InterceptRdCRx |= pVmcb->ctrl.u16InterceptRdCRx;
+    pVmcbNstGstCtrl->u16InterceptWrCRx |= pVmcb->ctrl.u16InterceptWrCRx;
+
+    /* Always intercept CR4 writes for tracking PGM mode changes. */
+    pVmcbNstGstCtrl->u16InterceptWrCRx |= RT_BIT(4);
+
+    /* Without nested paging, intercept CR3 reads and writes as we load shadow page tables. */
+    if (!pVM->hm.s.fNestedPaging)
+    {
+        pVmcbNstGstCtrl->u16InterceptRdCRx |= RT_BIT(3);
+        pVmcbNstGstCtrl->u16InterceptWrCRx |= RT_BIT(3);
+    }
+
+    /** @todo Figure out debugging with nested-guests, till then just intercept
+     *        all DR[0-15] accesses. */
+    pVmcbNstGstCtrl->u16InterceptRdDRx |= 0xffff;
+    pVmcbNstGstCtrl->u16InterceptWrDRx |= 0xffff;
+
+    /*
+     * Merge the guest's exception intercepts into the nested-guest VMCB.
+     *
+     * - #UD: Exclude these as the outer guest's GIM hypercalls are not applicable
+     * while executing the nested-guest.
+     *
+     * - #BP: Exclude breakpoints set by the VM debugger for the outer guest. This can
+     * be tweaked later depending on how we wish to implement breakpoints.
+     *
+     * - #GP: Exclude these as it's the inner VMMs problem to get vmsvga 3d drivers
+     *        loaded into their guests, not ours.
+     *
+     * Warning!! This ASSUMES we only intercept \#UD for hypercall purposes and \#BP
+     * for VM debugger breakpoints, see hmR0SvmExportGuestXcptIntercepts().
+     */
+#ifndef HMSVM_ALWAYS_TRAP_ALL_XCPTS
+    pVmcbNstGstCtrl->u32InterceptXcpt  |= pVmcb->ctrl.u32InterceptXcpt
+                                       & ~(  RT_BIT(X86_XCPT_UD)
+                                           | RT_BIT(X86_XCPT_BP)
+                                           | (pVCpu->hm.s.fTrapXcptGpForLovelyMesaDrv ? RT_BIT(X86_XCPT_GP) : 0));
+#else
+    pVmcbNstGstCtrl->u32InterceptXcpt  |= pVmcb->ctrl.u32InterceptXcpt;
+#endif
+
+    /*
+     * Adjust intercepts while executing the nested-guest that differ from the
+     * outer guest intercepts.
+     *
+     * - VINTR: Exclude the outer guest intercept as we don't need to cause VINTR #VMEXITs
+     *   that belong to the nested-guest to the outer guest.
+     *
+     * - VMMCALL: Exclude the outer guest intercept as when it's also not intercepted by
+     *   the nested-guest, the physical CPU raises a \#UD exception as expected.
+     */
+    pVmcbNstGstCtrl->u64InterceptCtrl  |= (pVmcb->ctrl.u64InterceptCtrl & ~(  SVM_CTRL_INTERCEPT_VINTR
+                                                                            | SVM_CTRL_INTERCEPT_VMMCALL))
+                                       |  HMSVM_MANDATORY_GUEST_CTRL_INTERCEPTS;
+
+    Assert(   (pVmcbNstGstCtrl->u64InterceptCtrl & HMSVM_MANDATORY_GUEST_CTRL_INTERCEPTS)
+           == HMSVM_MANDATORY_GUEST_CTRL_INTERCEPTS);
+
+    /* Finally, update the VMCB clean bits. */
+    pVmcbNstGstCtrl->u32VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_INTERCEPTS;
+}
+#endif
+
+
+/**
+ * Enters the AMD-V session.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu       The cross context virtual CPU structure.
+ */
+VMMR0DECL(int) SVMR0Enter(PVMCPUCC pVCpu)
+{
+    AssertPtr(pVCpu);
+    Assert(pVCpu->CTX_SUFF(pVM)->hm.s.svm.fSupported);
+    Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+
+    LogFlowFunc(("pVCpu=%p\n", pVCpu));
+    Assert((pVCpu->hm.s.fCtxChanged & (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_SVM_HOST_GUEST_SHARED_STATE))
+                                   == (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_SVM_HOST_GUEST_SHARED_STATE));
+
+    pVCpu->hm.s.fLeaveDone = false;
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Thread-context callback for AMD-V.
+ *
+ * @param   enmEvent        The thread-context event.
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   fGlobalInit     Whether global VT-x/AMD-V init. is used.
+ * @thread  EMT(pVCpu)
+ */
+VMMR0DECL(void) SVMR0ThreadCtxCallback(RTTHREADCTXEVENT enmEvent, PVMCPUCC pVCpu, bool fGlobalInit)
+{
+    NOREF(fGlobalInit);
+
+    switch (enmEvent)
+    {
+        case RTTHREADCTXEVENT_OUT:
+        {
+            Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+            Assert(VMMR0ThreadCtxHookIsEnabled(pVCpu));
+            VMCPU_ASSERT_EMT(pVCpu);
+
+            /* No longjmps (log-flush, locks) in this fragile context. */
+            VMMRZCallRing3Disable(pVCpu);
+
+            if (!pVCpu->hm.s.fLeaveDone)
+            {
+                hmR0SvmLeave(pVCpu, false /* fImportState */);
+                pVCpu->hm.s.fLeaveDone = true;
+            }
+
+            /* Leave HM context, takes care of local init (term). */
+            int rc = HMR0LeaveCpu(pVCpu);
+            AssertRC(rc); NOREF(rc);
+
+            /* Restore longjmp state. */
+            VMMRZCallRing3Enable(pVCpu);
+            STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatSwitchPreempt);
+            break;
+        }
+
+        case RTTHREADCTXEVENT_IN:
+        {
+            Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+            Assert(VMMR0ThreadCtxHookIsEnabled(pVCpu));
+            VMCPU_ASSERT_EMT(pVCpu);
+
+            /* No longjmps (log-flush, locks) in this fragile context. */
+            VMMRZCallRing3Disable(pVCpu);
+
+            /*
+             * Initialize the bare minimum state required for HM. This takes care of
+             * initializing AMD-V if necessary (onlined CPUs, local init etc.)
+             */
+            int rc = hmR0EnterCpu(pVCpu);
+            AssertRC(rc); NOREF(rc);
+            Assert((pVCpu->hm.s.fCtxChanged & (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_SVM_HOST_GUEST_SHARED_STATE))
+                                           == (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_SVM_HOST_GUEST_SHARED_STATE));
+
+            pVCpu->hm.s.fLeaveDone = false;
+
+            /* Restore longjmp state. */
+            VMMRZCallRing3Enable(pVCpu);
+            break;
+        }
+
+        default:
+            break;
+    }
+}
+
+
+/**
+ * Saves the host state.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu       The cross context virtual CPU structure.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+VMMR0DECL(int) SVMR0ExportHostState(PVMCPUCC pVCpu)
+{
+    NOREF(pVCpu);
+
+    /* Nothing to do here. AMD-V does this for us automatically during the world-switch. */
+    ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_HOST_CONTEXT);
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Exports the guest or nested-guest state from the virtual-CPU context into the
+ * VMCB.
+ *
+ * Also sets up the appropriate VMRUN function to execute guest or nested-guest
+ * code based on the virtual-CPU mode.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   pSvmTransient   Pointer to the SVM-transient structure.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+static int hmR0SvmExportGuestState(PVMCPUCC pVCpu, PCSVMTRANSIENT pSvmTransient)
+{
+    STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatExportGuestState, x);
+
+    PSVMVMCB  pVmcb = hmR0SvmGetCurrentVmcb(pVCpu);
+    PCCPUMCTX pCtx  = &pVCpu->cpum.GstCtx;
+    Assert(pVmcb);
+
+    pVmcb->guest.u64RIP    = pCtx->rip;
+    pVmcb->guest.u64RSP    = pCtx->rsp;
+    pVmcb->guest.u64RFlags = pCtx->eflags.u32;
+    pVmcb->guest.u64RAX    = pCtx->rax;
+
+    bool const fIsNestedGuest = pSvmTransient->fIsNestedGuest;
+    RTCCUINTREG const fEFlags = ASMIntDisableFlags();
+
+    int rc = hmR0SvmExportGuestControlRegs(pVCpu, pVmcb);
+    AssertRCReturnStmt(rc, ASMSetFlags(fEFlags), rc);
+    hmR0SvmExportGuestSegmentRegs(pVCpu, pVmcb);
+    hmR0SvmExportGuestMsrs(pVCpu, pVmcb);
+    hmR0SvmExportGuestHwvirtState(pVCpu, pVmcb);
+
+    ASMSetFlags(fEFlags);
+
+    if (!fIsNestedGuest)
+    {
+        /* hmR0SvmExportGuestApicTpr() must be called -after- hmR0SvmExportGuestMsrs() as we
+           otherwise we would overwrite the LSTAR MSR that we use for TPR patching. */
+        hmR0SvmExportGuestApicTpr(pVCpu, pVmcb);
+        hmR0SvmExportGuestXcptIntercepts(pVCpu, pVmcb);
+    }
+
+    /* Clear any bits that may be set but exported unconditionally or unused/reserved bits. */
+    uint64_t fUnusedMask = HM_CHANGED_GUEST_RIP
+                         | HM_CHANGED_GUEST_RFLAGS
+                         | HM_CHANGED_GUEST_GPRS_MASK
+                         | HM_CHANGED_GUEST_X87
+                         | HM_CHANGED_GUEST_SSE_AVX
+                         | HM_CHANGED_GUEST_OTHER_XSAVE
+                         | HM_CHANGED_GUEST_XCRx
+                         | HM_CHANGED_GUEST_TSC_AUX
+                         | HM_CHANGED_GUEST_OTHER_MSRS;
+    if (fIsNestedGuest)
+        fUnusedMask |= HM_CHANGED_SVM_XCPT_INTERCEPTS
+                    |  HM_CHANGED_GUEST_APIC_TPR;
+
+    ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~(  fUnusedMask
+                                                  | (HM_CHANGED_KEEPER_STATE_MASK & ~HM_CHANGED_SVM_MASK)));
+
+#ifdef VBOX_STRICT
+    /*
+     * All of the guest-CPU state and SVM keeper bits should be exported here by now,
+     * except for the host-context and/or shared host-guest context bits.
+     */
+    uint64_t const fCtxChanged = ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged);
+    AssertMsg(!(fCtxChanged & (HM_CHANGED_ALL_GUEST & ~HM_CHANGED_SVM_HOST_GUEST_SHARED_STATE)),
+              ("fCtxChanged=%#RX64\n", fCtxChanged));
+
+    /*
+     * If we need to log state that isn't always imported, we'll need to import them here.
+     * See hmR0SvmPostRunGuest() for which part of the state is imported uncondtionally.
+     */
+    hmR0SvmLogState(pVCpu, pVmcb, "hmR0SvmExportGuestState", 0 /* fFlags */, 0 /* uVerbose */);
+#endif
+
+    STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExportGuestState, x);
+    return VINF_SUCCESS;
+}
+
+
+#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
+/**
+ * Merges the guest and nested-guest MSR permission bitmap.
+ *
+ * If the guest is intercepting an MSR we need to intercept it regardless of
+ * whether the nested-guest is intercepting it or not.
+ *
+ * @param   pHostCpu    The HM physical-CPU structure.
+ * @param   pVCpu       The cross context virtual CPU structure.
+ *
+ * @remarks No-long-jmp zone!!!
+ */
+DECLINLINE(void) hmR0SvmMergeMsrpmNested(PHMPHYSCPU pHostCpu, PVMCPUCC pVCpu)
+{
+    uint64_t const *pu64GstMsrpm    = (uint64_t const *)pVCpu->hm.s.svm.pvMsrBitmap;
+    uint64_t const *pu64NstGstMsrpm = (uint64_t const *)pVCpu->cpum.GstCtx.hwvirt.svm.CTX_SUFF(pvMsrBitmap);
+    uint64_t       *pu64DstMsrpm    = (uint64_t *)pHostCpu->n.svm.pvNstGstMsrpm;
+
+    /* MSRPM bytes from offset 0x1800 are reserved, so we stop merging there. */
+    uint32_t const offRsvdQwords = 0x1800 >> 3;
+    for (uint32_t i = 0; i < offRsvdQwords; i++)
+        pu64DstMsrpm[i] = pu64NstGstMsrpm[i] | pu64GstMsrpm[i];
+}
+
+
+/**
+ * Caches the nested-guest VMCB fields before we modify them for execution using
+ * hardware-assisted SVM.
+ *
+ * @returns true if the VMCB was previously already cached, false otherwise.
+ * @param   pVCpu           The cross context virtual CPU structure.
+ *
+ * @sa      HMNotifySvmNstGstVmexit.
+ */
+static bool hmR0SvmCacheVmcbNested(PVMCPUCC pVCpu)
+{
+    /*
+     * Cache the nested-guest programmed VMCB fields if we have not cached it yet.
+     * Otherwise we risk re-caching the values we may have modified, see @bugref{7243#c44}.
+     *
+     * Nested-paging CR3 is not saved back into the VMCB on #VMEXIT, hence no need to
+     * cache and restore it, see AMD spec. 15.25.4 "Nested Paging and VMRUN/#VMEXIT".
+     */
+    PSVMNESTEDVMCBCACHE pVmcbNstGstCache = &pVCpu->hm.s.svm.NstGstVmcbCache;
+    bool const fWasCached = pVmcbNstGstCache->fCacheValid;
+    if (!fWasCached)
+    {
+        PCSVMVMCB      pVmcbNstGst    = pVCpu->cpum.GstCtx.hwvirt.svm.CTX_SUFF(pVmcb);
+        PCSVMVMCBCTRL pVmcbNstGstCtrl = &pVmcbNstGst->ctrl;
+        pVmcbNstGstCache->u16InterceptRdCRx       = pVmcbNstGstCtrl->u16InterceptRdCRx;
+        pVmcbNstGstCache->u16InterceptWrCRx       = pVmcbNstGstCtrl->u16InterceptWrCRx;
+        pVmcbNstGstCache->u16InterceptRdDRx       = pVmcbNstGstCtrl->u16InterceptRdDRx;
+        pVmcbNstGstCache->u16InterceptWrDRx       = pVmcbNstGstCtrl->u16InterceptWrDRx;
+        pVmcbNstGstCache->u16PauseFilterThreshold = pVmcbNstGstCtrl->u16PauseFilterThreshold;
+        pVmcbNstGstCache->u16PauseFilterCount     = pVmcbNstGstCtrl->u16PauseFilterCount;
+        pVmcbNstGstCache->u32InterceptXcpt        = pVmcbNstGstCtrl->u32InterceptXcpt;
+        pVmcbNstGstCache->u64InterceptCtrl        = pVmcbNstGstCtrl->u64InterceptCtrl;
+        pVmcbNstGstCache->u64TSCOffset            = pVmcbNstGstCtrl->u64TSCOffset;
+        pVmcbNstGstCache->fVIntrMasking           = pVmcbNstGstCtrl->IntCtrl.n.u1VIntrMasking;
+        pVmcbNstGstCache->fNestedPaging           = pVmcbNstGstCtrl->NestedPagingCtrl.n.u1NestedPaging;
+        pVmcbNstGstCache->fLbrVirt                = pVmcbNstGstCtrl->LbrVirt.n.u1LbrVirt;
+        pVmcbNstGstCache->fCacheValid             = true;
+        Log4Func(("Cached VMCB fields\n"));
+    }
+
+    return fWasCached;
+}
+
+
+/**
+ * Sets up the nested-guest VMCB for execution using hardware-assisted SVM.
+ *
+ * This is done the first time we enter nested-guest execution using SVM R0
+ * until the nested-guest \#VMEXIT (not to be confused with physical CPU
+ * \#VMEXITs which may or may not cause a corresponding nested-guest \#VMEXIT).
+ *
+ * @param   pVCpu           The cross context virtual CPU structure.
+ */
+static void hmR0SvmSetupVmcbNested(PVMCPUCC pVCpu)
+{
+    PSVMVMCB     pVmcbNstGst     = pVCpu->cpum.GstCtx.hwvirt.svm.CTX_SUFF(pVmcb);
+    PSVMVMCBCTRL pVmcbNstGstCtrl = &pVmcbNstGst->ctrl;
+
+    HMSVM_ASSERT_IN_NESTED_GUEST(&pVCpu->cpum.GstCtx);
+
+    /*
+     * First cache the nested-guest VMCB fields we may potentially modify.
+     */
+    bool const fVmcbCached = hmR0SvmCacheVmcbNested(pVCpu);
+    if (!fVmcbCached)
+    {
+        /*
+         * The IOPM of the nested-guest can be ignored because the the guest always
+         * intercepts all IO port accesses. Thus, we'll swap to the guest IOPM rather
+         * than the nested-guest IOPM and swap the field back on the #VMEXIT.
+         */
+        pVmcbNstGstCtrl->u64IOPMPhysAddr = g_HCPhysIOBitmap;
+
+        /*
+         * Use the same nested-paging as the outer guest. We can't dynamically switch off
+         * nested-paging suddenly while executing a VM (see assertion at the end of
+         * Trap0eHandler() in PGMAllBth.h).
+         */
+        pVmcbNstGstCtrl->NestedPagingCtrl.n.u1NestedPaging = pVCpu->CTX_SUFF(pVM)->hm.s.fNestedPaging;
+
+        /* Always enable V_INTR_MASKING as we do not want to allow access to the physical APIC TPR. */
+        pVmcbNstGstCtrl->IntCtrl.n.u1VIntrMasking = 1;
+
+        /*
+         * Turn off TPR syncing on #VMEXIT for nested-guests as CR8 intercepts are subject
+         * to the nested-guest intercepts and we always run with V_INTR_MASKING.
+         */
+        pVCpu->hm.s.svm.fSyncVTpr = false;
+
+#ifdef DEBUG_ramshankar
+        /* For debugging purposes - copy the LBR info. from outer guest VMCB. */
+        pVmcbNstGstCtrl->LbrVirt.n.u1LbrVirt = pVmcb->ctrl.LbrVirt.n.u1LbrVirt;
+#endif
+
+        /*
+         * If we don't expose Virtualized-VMSAVE/VMLOAD feature to the outer guest, we
+         * need to intercept VMSAVE/VMLOAD instructions executed by the nested-guest.
+         */
+        if (!pVCpu->CTX_SUFF(pVM)->cpum.ro.GuestFeatures.fSvmVirtVmsaveVmload)
+            pVmcbNstGstCtrl->u64InterceptCtrl |= SVM_CTRL_INTERCEPT_VMSAVE
+                                              |  SVM_CTRL_INTERCEPT_VMLOAD;
+
+        /*
+         * If we don't expose Virtual GIF feature to the outer guest, we need to intercept
+         * CLGI/STGI instructions executed by the nested-guest.
+         */
+        if (!pVCpu->CTX_SUFF(pVM)->cpum.ro.GuestFeatures.fSvmVGif)
+            pVmcbNstGstCtrl->u64InterceptCtrl |= SVM_CTRL_INTERCEPT_CLGI
+                                              |  SVM_CTRL_INTERCEPT_STGI;
+
+        /* Merge the guest and nested-guest intercepts. */
+        hmR0SvmMergeVmcbCtrlsNested(pVCpu);
+
+        /* Update the VMCB clean bits. */
+        pVmcbNstGstCtrl->u32VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_INTERCEPTS;
+    }
+    else
+    {
+        Assert(!pVCpu->hm.s.svm.fSyncVTpr);
+        Assert(pVmcbNstGstCtrl->u64IOPMPhysAddr == g_HCPhysIOBitmap);
+        Assert(RT_BOOL(pVmcbNstGstCtrl->NestedPagingCtrl.n.u1NestedPaging) == pVCpu->CTX_SUFF(pVM)->hm.s.fNestedPaging);
+    }
+}
+#endif /* VBOX_WITH_NESTED_HWVIRT_SVM */
+
+
+/**
+ * Exports the state shared between the host and guest (or nested-guest) into
+ * the VMCB.
+ *
+ * @param   pVCpu       The cross context virtual CPU structure.
+ * @param   pVmcb       Pointer to the VM control block.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+static void hmR0SvmExportSharedState(PVMCPUCC pVCpu, PSVMVMCB pVmcb)
+{
+    Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+    Assert(!VMMRZCallRing3IsEnabled(pVCpu));
+
+    if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_GUEST_DR_MASK)
+    {
+        /** @todo Figure out stepping with nested-guest. */
+        PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+        if (!CPUMIsGuestInSvmNestedHwVirtMode(pCtx))
+            hmR0SvmExportSharedDebugState(pVCpu, pVmcb);
+        else
+        {
+            pVmcb->guest.u64DR6 = pCtx->dr[6];
+            pVmcb->guest.u64DR7 = pCtx->dr[7];
+        }
+    }
+
+    pVCpu->hm.s.fCtxChanged &= ~HM_CHANGED_GUEST_DR_MASK;
+    AssertMsg(!(pVCpu->hm.s.fCtxChanged & HM_CHANGED_SVM_HOST_GUEST_SHARED_STATE),
+              ("fCtxChanged=%#RX64\n", pVCpu->hm.s.fCtxChanged));
+}
+
+
+/**
+ * Worker for SVMR0ImportStateOnDemand.
+ *
+ * @param   pVCpu   The cross context virtual CPU structure.
+ * @param   fWhat   What to import, CPUMCTX_EXTRN_XXX.
+ */
+static void hmR0SvmImportGuestState(PVMCPUCC pVCpu, uint64_t fWhat)
+{
+    STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatImportGuestState, x);
+
+    PCPUMCTX           pCtx       = &pVCpu->cpum.GstCtx;
+    PCSVMVMCB          pVmcb      = hmR0SvmGetCurrentVmcb(pVCpu);
+    PCSVMVMCBSTATESAVE pVmcbGuest = &pVmcb->guest;
+    PCSVMVMCBCTRL      pVmcbCtrl  = &pVmcb->ctrl;
+
+    /*
+     * We disable interrupts to make the updating of the state and in particular
+     * the fExtrn modification atomic wrt to preemption hooks.
+     */
+    RTCCUINTREG const fEFlags = ASMIntDisableFlags();
+
+    fWhat &= pCtx->fExtrn;
+    if (fWhat)
+    {
+#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
+        if (fWhat & CPUMCTX_EXTRN_HWVIRT)
+        {
+            if (pVmcbCtrl->IntCtrl.n.u1VGifEnable)
+            {
+                Assert(!CPUMIsGuestInSvmNestedHwVirtMode(pCtx));    /* We don't yet support passing VGIF feature to the guest. */
+                Assert(HMIsSvmVGifActive(pVCpu->CTX_SUFF(pVM)));    /* VM has configured it. */
+                CPUMSetGuestGif(pCtx, pVmcbCtrl->IntCtrl.n.u1VGif);
+            }
+        }
+
+        if (fWhat & CPUMCTX_EXTRN_HM_SVM_HWVIRT_VIRQ)
+        {
+            if (  !pVmcbCtrl->IntCtrl.n.u1VIrqPending
+                && VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INTERRUPT_NESTED_GUEST))
+                VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INTERRUPT_NESTED_GUEST);
+        }
+#endif
+
+        if (fWhat & CPUMCTX_EXTRN_HM_SVM_INT_SHADOW)
+        {
+            if (pVmcbCtrl->IntShadow.n.u1IntShadow)
+                EMSetInhibitInterruptsPC(pVCpu, pVmcbGuest->u64RIP);
+            else if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS))
+                VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS);
+        }
+
+        if (fWhat & CPUMCTX_EXTRN_RIP)
+            pCtx->rip = pVmcbGuest->u64RIP;
+
+        if (fWhat & CPUMCTX_EXTRN_RFLAGS)
+            pCtx->eflags.u32 = pVmcbGuest->u64RFlags;
+
+        if (fWhat & CPUMCTX_EXTRN_RSP)
+            pCtx->rsp = pVmcbGuest->u64RSP;
+
+        if (fWhat & CPUMCTX_EXTRN_RAX)
+            pCtx->rax = pVmcbGuest->u64RAX;
+
+        if (fWhat & CPUMCTX_EXTRN_SREG_MASK)
+        {
+            if (fWhat & CPUMCTX_EXTRN_CS)
+            {
+                HMSVM_SEG_REG_COPY_FROM_VMCB(pCtx, pVmcbGuest, CS, cs);
+                /* Correct the CS granularity bit. Haven't seen it being wrong in any other register (yet). */
+                /** @todo SELM might need to be fixed as it too should not care about the
+                 *        granularity bit. See @bugref{6785}. */
+                if (   !pCtx->cs.Attr.n.u1Granularity
+                    &&  pCtx->cs.Attr.n.u1Present
+                    &&  pCtx->cs.u32Limit > UINT32_C(0xfffff))
+                {
+                    Assert((pCtx->cs.u32Limit & 0xfff) == 0xfff);
+                    pCtx->cs.Attr.n.u1Granularity = 1;
+                }
+                HMSVM_ASSERT_SEG_GRANULARITY(pCtx, cs);
+            }
+            if (fWhat & CPUMCTX_EXTRN_SS)
+            {
+                HMSVM_SEG_REG_COPY_FROM_VMCB(pCtx, pVmcbGuest, SS, ss);
+                HMSVM_ASSERT_SEG_GRANULARITY(pCtx, ss);
+                /*
+                 * Sync the hidden SS DPL field. AMD CPUs have a separate CPL field in the
+                 * VMCB and uses that and thus it's possible that when the CPL changes during
+                 * guest execution that the SS DPL isn't updated by AMD-V. Observed on some
+                 * AMD Fusion CPUs with 64-bit guests.
+                 *
+                 * See AMD spec. 15.5.1 "Basic operation".
+                 */
+                Assert(!(pVmcbGuest->u8CPL & ~0x3));
+                uint8_t const uCpl = pVmcbGuest->u8CPL;
+                if (pCtx->ss.Attr.n.u2Dpl != uCpl)
+                    pCtx->ss.Attr.n.u2Dpl = uCpl & 0x3;
+            }
+            if (fWhat & CPUMCTX_EXTRN_DS)
+            {
+                HMSVM_SEG_REG_COPY_FROM_VMCB(pCtx, pVmcbGuest, DS, ds);
+                HMSVM_ASSERT_SEG_GRANULARITY(pCtx, ds);
+            }
+            if (fWhat & CPUMCTX_EXTRN_ES)
+            {
+                HMSVM_SEG_REG_COPY_FROM_VMCB(pCtx, pVmcbGuest, ES, es);
+                HMSVM_ASSERT_SEG_GRANULARITY(pCtx, es);
+            }
+            if (fWhat & CPUMCTX_EXTRN_FS)
+            {
+                HMSVM_SEG_REG_COPY_FROM_VMCB(pCtx, pVmcbGuest, FS, fs);
+                HMSVM_ASSERT_SEG_GRANULARITY(pCtx, fs);
+            }
+            if (fWhat & CPUMCTX_EXTRN_GS)
+            {
+                HMSVM_SEG_REG_COPY_FROM_VMCB(pCtx, pVmcbGuest, GS, gs);
+                HMSVM_ASSERT_SEG_GRANULARITY(pCtx, gs);
+            }
+        }
+
+        if (fWhat & CPUMCTX_EXTRN_TABLE_MASK)
+        {
+            if (fWhat & CPUMCTX_EXTRN_TR)
+            {
+                /*
+                 * Fixup TR attributes so it's compatible with Intel. Important when saved-states
+                 * are used between Intel and AMD, see @bugref{6208#c39}.
+                 * ASSUME that it's normally correct and that we're in 32-bit or 64-bit mode.
+                 */
+                HMSVM_SEG_REG_COPY_FROM_VMCB(pCtx, pVmcbGuest, TR, tr);
+                if (pCtx->tr.Attr.n.u4Type != X86_SEL_TYPE_SYS_386_TSS_BUSY)
+                {
+                    if (   pCtx->tr.Attr.n.u4Type == X86_SEL_TYPE_SYS_386_TSS_AVAIL
+                        || CPUMIsGuestInLongModeEx(pCtx))
+                        pCtx->tr.Attr.n.u4Type = X86_SEL_TYPE_SYS_386_TSS_BUSY;
+                    else if (pCtx->tr.Attr.n.u4Type == X86_SEL_TYPE_SYS_286_TSS_AVAIL)
+                        pCtx->tr.Attr.n.u4Type = X86_SEL_TYPE_SYS_286_TSS_BUSY;
+                }
+            }
+
+            if (fWhat & CPUMCTX_EXTRN_LDTR)
+                HMSVM_SEG_REG_COPY_FROM_VMCB(pCtx, pVmcbGuest, LDTR, ldtr);
+
+            if (fWhat & CPUMCTX_EXTRN_GDTR)
+            {
+                pCtx->gdtr.cbGdt = pVmcbGuest->GDTR.u32Limit;
+                pCtx->gdtr.pGdt  = pVmcbGuest->GDTR.u64Base;
+            }
+
+            if (fWhat & CPUMCTX_EXTRN_IDTR)
+            {
+                pCtx->idtr.cbIdt = pVmcbGuest->IDTR.u32Limit;
+                pCtx->idtr.pIdt  = pVmcbGuest->IDTR.u64Base;
+            }
+        }
+
+        if (fWhat & CPUMCTX_EXTRN_SYSCALL_MSRS)
+        {
+            pCtx->msrSTAR   = pVmcbGuest->u64STAR;
+            pCtx->msrLSTAR  = pVmcbGuest->u64LSTAR;
+            pCtx->msrCSTAR  = pVmcbGuest->u64CSTAR;
+            pCtx->msrSFMASK = pVmcbGuest->u64SFMASK;
+        }
+
+        if (fWhat & CPUMCTX_EXTRN_SYSENTER_MSRS)
+        {
+            pCtx->SysEnter.cs  = pVmcbGuest->u64SysEnterCS;
+            pCtx->SysEnter.eip = pVmcbGuest->u64SysEnterEIP;
+            pCtx->SysEnter.esp = pVmcbGuest->u64SysEnterESP;
+        }
+
+        if (fWhat & CPUMCTX_EXTRN_KERNEL_GS_BASE)
+            pCtx->msrKERNELGSBASE = pVmcbGuest->u64KernelGSBase;
+
+        if (fWhat & CPUMCTX_EXTRN_DR_MASK)
+        {
+            if (fWhat & CPUMCTX_EXTRN_DR6)
+            {
+                if (!pVCpu->hm.s.fUsingHyperDR7)
+                    pCtx->dr[6] = pVmcbGuest->u64DR6;
+                else
+                    CPUMSetHyperDR6(pVCpu, pVmcbGuest->u64DR6);
+            }
+
+            if (fWhat & CPUMCTX_EXTRN_DR7)
+            {
+                if (!pVCpu->hm.s.fUsingHyperDR7)
+                    pCtx->dr[7] = pVmcbGuest->u64DR7;
+                else
+                    Assert(pVmcbGuest->u64DR7 == CPUMGetHyperDR7(pVCpu));
+            }
+        }
+
+        if (fWhat & CPUMCTX_EXTRN_CR_MASK)
+        {
+            if (fWhat & CPUMCTX_EXTRN_CR0)
+            {
+                /* We intercept changes to all CR0 bits except maybe TS & MP bits. */
+                uint64_t const uCr0 = (pCtx->cr0          & ~(X86_CR0_TS | X86_CR0_MP))
+                                    | (pVmcbGuest->u64CR0 &  (X86_CR0_TS | X86_CR0_MP));
+                VMMRZCallRing3Disable(pVCpu); /* Calls into PGM which has Log statements. */
+                CPUMSetGuestCR0(pVCpu, uCr0);
+                VMMRZCallRing3Enable(pVCpu);
+            }
+
+            if (fWhat & CPUMCTX_EXTRN_CR2)
+                pCtx->cr2 = pVmcbGuest->u64CR2;
+
+            if (fWhat & CPUMCTX_EXTRN_CR3)
+            {
+                if (   pVmcbCtrl->NestedPagingCtrl.n.u1NestedPaging
+                    && pCtx->cr3 != pVmcbGuest->u64CR3)
+                {
+                    CPUMSetGuestCR3(pVCpu, pVmcbGuest->u64CR3);
+                    VMCPU_FF_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3);
+                }
+            }
+
+            /* Changes to CR4 are always intercepted. */
+        }
+
+        /* Update fExtrn. */
+        pCtx->fExtrn &= ~fWhat;
+
+        /* If everything has been imported, clear the HM keeper bit. */
+        if (!(pCtx->fExtrn & HMSVM_CPUMCTX_EXTRN_ALL))
+        {
+            pCtx->fExtrn &= ~CPUMCTX_EXTRN_KEEPER_HM;
+            Assert(!pCtx->fExtrn);
+        }
+    }
+    else
+        Assert(!pCtx->fExtrn || (pCtx->fExtrn & HMSVM_CPUMCTX_EXTRN_ALL));
+
+    ASMSetFlags(fEFlags);
+
+    STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatImportGuestState, x);
+
+    /*
+     * Honor any pending CR3 updates.
+     *
+     * Consider this scenario: #VMEXIT -> VMMRZCallRing3Enable() -> do stuff that causes a longjmp
+     * -> SVMR0CallRing3Callback() -> VMMRZCallRing3Disable() -> hmR0SvmImportGuestState()
+     * -> Sets VMCPU_FF_HM_UPDATE_CR3 pending -> return from the longjmp -> continue with #VMEXIT
+     * handling -> hmR0SvmImportGuestState() and here we are.
+     *
+     * The reason for such complicated handling is because VM-exits that call into PGM expect
+     * CR3 to be up-to-date and thus any CR3-saves -before- the VM-exit (longjmp) would've
+     * postponed the CR3 update via the force-flag and cleared CR3 from fExtrn. Any SVM R0
+     * VM-exit handler that requests CR3 to be saved will end up here and we call PGMUpdateCR3().
+     *
+     * The longjmp exit path can't check these CR3 force-flags and call code that takes a lock again,
+     * and does not process force-flag like regular exits to ring-3 either, we cover for it here.
+     */
+    if (   VMMRZCallRing3IsEnabled(pVCpu)
+        && VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3))
+    {
+        Assert(pCtx->cr3 == pVmcbGuest->u64CR3);
+        PGMUpdateCR3(pVCpu, pCtx->cr3);
+    }
+}
+
+
+/**
+ * Saves the guest (or nested-guest) state from the VMCB into the guest-CPU
+ * context.
+ *
+ * Currently there is no residual state left in the CPU that is not updated in the
+ * VMCB.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu   The cross context virtual CPU structure.
+ * @param   fWhat   What to import, CPUMCTX_EXTRN_XXX.
+ */
+VMMR0DECL(int) SVMR0ImportStateOnDemand(PVMCPUCC pVCpu, uint64_t fWhat)
+{
+    hmR0SvmImportGuestState(pVCpu, fWhat);
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Does the necessary state syncing before returning to ring-3 for any reason
+ * (longjmp, preemption, voluntary exits to ring-3) from AMD-V.
+ *
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   fImportState    Whether to import the guest state from the VMCB back
+ *                          to the guest-CPU context.
+ *
+ * @remarks No-long-jmp zone!!!
+ */
+static void hmR0SvmLeave(PVMCPUCC pVCpu, bool fImportState)
+{
+    Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+    Assert(!VMMRZCallRing3IsEnabled(pVCpu));
+    Assert(VMMR0IsLogFlushDisabled(pVCpu));
+
+    /*
+     * !!! IMPORTANT !!!
+     * If you modify code here, make sure to check whether SVMR0CallRing3Callback() needs to be updated too.
+     */
+
+    /* Save the guest state if necessary. */
+    if (fImportState)
+        hmR0SvmImportGuestState(pVCpu, HMSVM_CPUMCTX_EXTRN_ALL);
+
+    /* Restore host FPU state if necessary and resync on next R0 reentry. */
+    CPUMR0FpuStateMaybeSaveGuestAndRestoreHost(pVCpu);
+    Assert(!CPUMIsGuestFPUStateActive(pVCpu));
+
+    /*
+     * Restore host debug registers if necessary and resync on next R0 reentry.
+     */
+#ifdef VBOX_STRICT
+    if (CPUMIsHyperDebugStateActive(pVCpu))
+    {
+        PSVMVMCB pVmcb = pVCpu->hm.s.svm.pVmcb; /** @todo nested-guest. */
+        Assert(pVmcb->ctrl.u16InterceptRdDRx == 0xffff);
+        Assert(pVmcb->ctrl.u16InterceptWrDRx == 0xffff);
+    }
+#endif
+    CPUMR0DebugStateMaybeSaveGuestAndRestoreHost(pVCpu, false /* save DR6 */);
+    Assert(!CPUMIsHyperDebugStateActive(pVCpu));
+    Assert(!CPUMIsGuestDebugStateActive(pVCpu));
+
+    STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatEntry);
+    STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatImportGuestState);
+    STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExportGuestState);
+    STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatPreExit);
+    STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitHandling);
+    STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitVmentry);
+    STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchLongJmpToR3);
+
+    VMCPU_CMPXCHG_STATE(pVCpu, VMCPUSTATE_STARTED_HM, VMCPUSTATE_STARTED_EXEC);
+}
+
+
+/**
+ * Leaves the AMD-V session.
+ *
+ * Only used while returning to ring-3 either due to longjump or exits to
+ * ring-3.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu       The cross context virtual CPU structure.
+ */
+static int hmR0SvmLeaveSession(PVMCPUCC pVCpu)
+{
+    HM_DISABLE_PREEMPT(pVCpu);
+    Assert(!VMMRZCallRing3IsEnabled(pVCpu));
+    Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+
+    /* When thread-context hooks are used, we can avoid doing the leave again if we had been preempted before
+       and done this from the SVMR0ThreadCtxCallback(). */
+    if (!pVCpu->hm.s.fLeaveDone)
+    {
+        hmR0SvmLeave(pVCpu, true /* fImportState */);
+        pVCpu->hm.s.fLeaveDone = true;
+    }
+
+    /*
+     * !!! IMPORTANT !!!
+     * If you modify code here, make sure to check whether SVMR0CallRing3Callback() needs to be updated too.
+     */
+
+    /** @todo eliminate the need for calling VMMR0ThreadCtxHookDisable here!  */
+    /* Deregister hook now that we've left HM context before re-enabling preemption. */
+    VMMR0ThreadCtxHookDisable(pVCpu);
+
+    /* Leave HM context. This takes care of local init (term). */
+    int rc = HMR0LeaveCpu(pVCpu);
+
+    HM_RESTORE_PREEMPT();
+    return rc;
+}
+
+
+/**
+ * Does the necessary state syncing before doing a longjmp to ring-3.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu       The cross context virtual CPU structure.
+ *
+ * @remarks No-long-jmp zone!!!
+ */
+static int hmR0SvmLongJmpToRing3(PVMCPUCC pVCpu)
+{
+    return hmR0SvmLeaveSession(pVCpu);
+}
+
+
+/**
+ * VMMRZCallRing3() callback wrapper which saves the guest state (or restores
+ * any remaining host state) before we longjump to ring-3 and possibly get
+ * preempted.
+ *
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   enmOperation    The operation causing the ring-3 longjump.
+ */
+VMMR0DECL(int) SVMR0CallRing3Callback(PVMCPUCC pVCpu, VMMCALLRING3 enmOperation)
+{
+    if (enmOperation == VMMCALLRING3_VM_R0_ASSERTION)
+    {
+        /*
+         * !!! IMPORTANT !!!
+         * If you modify code here, make sure to check whether hmR0SvmLeave() and hmR0SvmLeaveSession() needs
+         * to be updated too. This is a stripped down version which gets out ASAP trying to not trigger any assertion.
+         */
+        VMMRZCallRing3RemoveNotification(pVCpu);
+        VMMRZCallRing3Disable(pVCpu);
+        HM_DISABLE_PREEMPT(pVCpu);
+
+        /* Import the entire guest state. */
+        hmR0SvmImportGuestState(pVCpu, HMSVM_CPUMCTX_EXTRN_ALL);
+
+        /* Restore host FPU state if necessary and resync on next R0 reentry. */
+        CPUMR0FpuStateMaybeSaveGuestAndRestoreHost(pVCpu);
+
+        /* Restore host debug registers if necessary and resync on next R0 reentry. */
+        CPUMR0DebugStateMaybeSaveGuestAndRestoreHost(pVCpu, false /* save DR6 */);
+
+        /* Deregister the hook now that we've left HM context before re-enabling preemption. */
+        /** @todo eliminate the need for calling VMMR0ThreadCtxHookDisable here!  */
+        VMMR0ThreadCtxHookDisable(pVCpu);
+
+        /* Leave HM context. This takes care of local init (term). */
+        HMR0LeaveCpu(pVCpu);
+
+        HM_RESTORE_PREEMPT();
+        return VINF_SUCCESS;
+    }
+
+    Assert(pVCpu);
+    Assert(VMMRZCallRing3IsEnabled(pVCpu));
+    HMSVM_ASSERT_PREEMPT_SAFE(pVCpu);
+
+    VMMRZCallRing3Disable(pVCpu);
+    Assert(VMMR0IsLogFlushDisabled(pVCpu));
+
+    Log4Func(("Calling hmR0SvmLongJmpToRing3\n"));
+    int rc = hmR0SvmLongJmpToRing3(pVCpu);
+    AssertRCReturn(rc, rc);
+
+    VMMRZCallRing3Enable(pVCpu);
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Take necessary actions before going back to ring-3.
+ *
+ * An action requires us to go back to ring-3. This function does the necessary
+ * steps before we can safely return to ring-3. This is not the same as longjmps
+ * to ring-3, this is voluntary.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu       The cross context virtual CPU structure.
+ * @param   rcExit      The reason for exiting to ring-3. Can be
+ *                      VINF_VMM_UNKNOWN_RING3_CALL.
+ */
+static int hmR0SvmExitToRing3(PVMCPUCC pVCpu, int rcExit)
+{
+    Assert(pVCpu);
+    HMSVM_ASSERT_PREEMPT_SAFE(pVCpu);
+
+    /* Please, no longjumps here (any logging shouldn't flush jump back to ring-3). NO LOGGING BEFORE THIS POINT! */
+    VMMRZCallRing3Disable(pVCpu);
+    Log4Func(("rcExit=%d LocalFF=%#RX64 GlobalFF=%#RX32\n", rcExit, (uint64_t)pVCpu->fLocalForcedActions,
+              pVCpu->CTX_SUFF(pVM)->fGlobalForcedActions));
+
+    /* We need to do this only while truly exiting the "inner loop" back to ring-3 and -not- for any longjmp to ring3. */
+    if (pVCpu->hm.s.Event.fPending)
+    {
+        hmR0SvmPendingEventToTrpmTrap(pVCpu);
+        Assert(!pVCpu->hm.s.Event.fPending);
+    }
+
+    /* Sync. the necessary state for going back to ring-3. */
+    hmR0SvmLeaveSession(pVCpu);
+    STAM_COUNTER_DEC(&pVCpu->hm.s.StatSwitchLongJmpToR3);
+
+    /* Thread-context hooks are unregistered at this point!!! */
+    /* Ring-3 callback notifications are unregistered at this point!!! */
+
+    VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TO_R3);
+    CPUMSetChangedFlags(pVCpu, CPUM_CHANGED_SYSENTER_MSR
+                             | CPUM_CHANGED_LDTR
+                             | CPUM_CHANGED_GDTR
+                             | CPUM_CHANGED_IDTR
+                             | CPUM_CHANGED_TR
+                             | CPUM_CHANGED_HIDDEN_SEL_REGS);
+    if (   pVCpu->CTX_SUFF(pVM)->hm.s.fNestedPaging
+        && CPUMIsGuestPagingEnabledEx(&pVCpu->cpum.GstCtx))
+    {
+        CPUMSetChangedFlags(pVCpu, CPUM_CHANGED_GLOBAL_TLB_FLUSH);
+    }
+
+    /* Update the exit-to-ring 3 reason. */
+    pVCpu->hm.s.rcLastExitToR3 = rcExit;
+
+    /* On our way back from ring-3, reload the guest-CPU state if it may change while in ring-3. */
+    if (   rcExit != VINF_EM_RAW_INTERRUPT
+        || CPUMIsGuestInSvmNestedHwVirtMode(&pVCpu->cpum.GstCtx))
+    {
+        Assert(!(pVCpu->cpum.GstCtx.fExtrn & HMSVM_CPUMCTX_EXTRN_ALL));
+        ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_ALL_GUEST);
+    }
+
+    STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchExitToR3);
+    VMMRZCallRing3Enable(pVCpu);
+
+    /*
+     * If we're emulating an instruction, we shouldn't have any TRPM traps pending
+     * and if we're injecting an event we should have a TRPM trap pending.
+     */
+    AssertReturnStmt(rcExit != VINF_EM_RAW_INJECT_TRPM_EVENT || TRPMHasTrap(pVCpu),
+                     pVCpu->hm.s.u32HMError = rcExit,
+                     VERR_SVM_IPE_5);
+    AssertReturnStmt(rcExit != VINF_EM_RAW_EMULATE_INSTR || !TRPMHasTrap(pVCpu),
+                     pVCpu->hm.s.u32HMError = rcExit,
+                     VERR_SVM_IPE_4);
+
+    return rcExit;
+}
+
+
+/**
+ * Updates the use of TSC offsetting mode for the CPU and adjusts the necessary
+ * intercepts.
+ *
+ * @param   pVCpu       The cross context virtual CPU structure.
+ * @param   pVmcb       Pointer to the VM control block.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+static void hmR0SvmUpdateTscOffsetting(PVMCPUCC pVCpu, PSVMVMCB pVmcb)
+{
+    /*
+     * Avoid intercepting RDTSC/RDTSCP if we determined the host TSC (++) is stable
+     * and in case of a nested-guest, if the nested-VMCB specifies it is not intercepting
+     * RDTSC/RDTSCP as well.
+     */
+    bool       fParavirtTsc;
+    uint64_t   uTscOffset;
+    bool const fCanUseRealTsc = TMCpuTickCanUseRealTSC(pVCpu->CTX_SUFF(pVM), pVCpu, &uTscOffset, &fParavirtTsc);
+
+    bool fIntercept;
+    if (fCanUseRealTsc)
+         fIntercept = hmR0SvmClearCtrlIntercept(pVCpu, pVmcb, SVM_CTRL_INTERCEPT_RDTSC | SVM_CTRL_INTERCEPT_RDTSCP);
+    else
+    {
+        hmR0SvmSetCtrlIntercept(pVmcb, SVM_CTRL_INTERCEPT_RDTSC | SVM_CTRL_INTERCEPT_RDTSCP);
+        fIntercept = true;
+    }
+
+    if (!fIntercept)
+    {
+#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
+        /* Apply the nested-guest VMCB's TSC offset over the guest TSC offset. */
+        if (CPUMIsGuestInSvmNestedHwVirtMode(&pVCpu->cpum.GstCtx))
+            uTscOffset = CPUMApplyNestedGuestTscOffset(pVCpu, uTscOffset);
+#endif
+
+        /* Update the TSC offset in the VMCB and the relevant clean bits. */
+        pVmcb->ctrl.u64TSCOffset = uTscOffset;
+        pVmcb->ctrl.u32VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_INTERCEPTS;
+    }
+
+    /* Currently neither Hyper-V nor KVM need to update their paravirt. TSC
+       information before every VM-entry, hence we have nothing to do here at the moment. */
+    if (fParavirtTsc)
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatTscParavirt);
+}
+
+
+/**
+ * Sets an event as a pending event to be injected into the guest.
+ *
+ * @param   pVCpu               The cross context virtual CPU structure.
+ * @param   pEvent              Pointer to the SVM event.
+ * @param   GCPtrFaultAddress   The fault-address (CR2) in case it's a
+ *                              page-fault.
+ *
+ * @remarks Statistics counter assumes this is a guest event being reflected to
+ *          the guest i.e. 'StatInjectPendingReflect' is incremented always.
+ */
+DECLINLINE(void) hmR0SvmSetPendingEvent(PVMCPUCC pVCpu, PSVMEVENT pEvent, RTGCUINTPTR GCPtrFaultAddress)
+{
+    Assert(!pVCpu->hm.s.Event.fPending);
+    Assert(pEvent->n.u1Valid);
+
+    pVCpu->hm.s.Event.u64IntInfo        = pEvent->u;
+    pVCpu->hm.s.Event.fPending          = true;
+    pVCpu->hm.s.Event.GCPtrFaultAddress = GCPtrFaultAddress;
+
+    Log4Func(("u=%#RX64 u8Vector=%#x Type=%#x ErrorCodeValid=%RTbool ErrorCode=%#RX32\n", pEvent->u, pEvent->n.u8Vector,
+              (uint8_t)pEvent->n.u3Type, !!pEvent->n.u1ErrorCodeValid, pEvent->n.u32ErrorCode));
+}
+
+
+/**
+ * Sets an invalid-opcode (\#UD) exception as pending-for-injection into the VM.
+ *
+ * @param   pVCpu       The cross context virtual CPU structure.
+ */
+DECLINLINE(void) hmR0SvmSetPendingXcptUD(PVMCPUCC pVCpu)
+{
+    SVMEVENT Event;
+    Event.u          = 0;
+    Event.n.u1Valid  = 1;
+    Event.n.u3Type   = SVM_EVENT_EXCEPTION;
+    Event.n.u8Vector = X86_XCPT_UD;
+    hmR0SvmSetPendingEvent(pVCpu, &Event, 0 /* GCPtrFaultAddress */);
+}
+
+
+/**
+ * Sets a debug (\#DB) exception as pending-for-injection into the VM.
+ *
+ * @param   pVCpu       The cross context virtual CPU structure.
+ */
+DECLINLINE(void) hmR0SvmSetPendingXcptDB(PVMCPUCC pVCpu)
+{
+    SVMEVENT Event;
+    Event.u          = 0;
+    Event.n.u1Valid  = 1;
+    Event.n.u3Type   = SVM_EVENT_EXCEPTION;
+    Event.n.u8Vector = X86_XCPT_DB;
+    hmR0SvmSetPendingEvent(pVCpu, &Event, 0 /* GCPtrFaultAddress */);
+}
+
+
+/**
+ * Sets a page fault (\#PF) exception as pending-for-injection into the VM.
+ *
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   u32ErrCode      The error-code for the page-fault.
+ * @param   uFaultAddress   The page fault address (CR2).
+ *
+ * @remarks This updates the guest CR2 with @a uFaultAddress!
+ */
+DECLINLINE(void) hmR0SvmSetPendingXcptPF(PVMCPUCC pVCpu, uint32_t u32ErrCode, RTGCUINTPTR uFaultAddress)
+{
+    SVMEVENT Event;
+    Event.u                  = 0;
+    Event.n.u1Valid          = 1;
+    Event.n.u3Type           = SVM_EVENT_EXCEPTION;
+    Event.n.u8Vector         = X86_XCPT_PF;
+    Event.n.u1ErrorCodeValid = 1;
+    Event.n.u32ErrorCode     = u32ErrCode;
+
+    /* Update CR2 of the guest. */
+    HMSVM_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_CR2);
+    if (pVCpu->cpum.GstCtx.cr2 != uFaultAddress)
+    {
+        pVCpu->cpum.GstCtx.cr2 = uFaultAddress;
+        /* The VMCB clean bit for CR2 will be updated while re-loading the guest state. */
+        ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_CR2);
+    }
+
+    hmR0SvmSetPendingEvent(pVCpu, &Event, uFaultAddress);
+}
+
+
+/**
+ * Sets a math-fault (\#MF) exception as pending-for-injection into the VM.
+ *
+ * @param   pVCpu       The cross context virtual CPU structure.
+ */
+DECLINLINE(void) hmR0SvmSetPendingXcptMF(PVMCPUCC pVCpu)
+{
+    SVMEVENT Event;
+    Event.u          = 0;
+    Event.n.u1Valid  = 1;
+    Event.n.u3Type   = SVM_EVENT_EXCEPTION;
+    Event.n.u8Vector = X86_XCPT_MF;
+    hmR0SvmSetPendingEvent(pVCpu, &Event, 0 /* GCPtrFaultAddress */);
+}
+
+
+/**
+ * Sets a double fault (\#DF) exception as pending-for-injection into the VM.
+ *
+ * @param   pVCpu       The cross context virtual CPU structure.
+ */
+DECLINLINE(void) hmR0SvmSetPendingXcptDF(PVMCPUCC pVCpu)
+{
+    SVMEVENT Event;
+    Event.u                  = 0;
+    Event.n.u1Valid          = 1;
+    Event.n.u3Type           = SVM_EVENT_EXCEPTION;
+    Event.n.u8Vector         = X86_XCPT_DF;
+    Event.n.u1ErrorCodeValid = 1;
+    Event.n.u32ErrorCode     = 0;
+    hmR0SvmSetPendingEvent(pVCpu, &Event, 0 /* GCPtrFaultAddress */);
+}
+
+
+/**
+ * Injects an event into the guest upon VMRUN by updating the relevant field
+ * in the VMCB.
+ *
+ * @param   pVCpu       The cross context virtual CPU structure.
+ * @param   pVmcb       Pointer to the guest VM control block.
+ * @param   pEvent      Pointer to the event.
+ *
+ * @remarks No-long-jump zone!!!
+ * @remarks Requires CR0!
+ */
+DECLINLINE(void) hmR0SvmInjectEventVmcb(PVMCPUCC pVCpu, PSVMVMCB pVmcb, PSVMEVENT pEvent)
+{
+    Assert(!pVmcb->ctrl.EventInject.n.u1Valid);
+    pVmcb->ctrl.EventInject.u = pEvent->u;
+    if (   pVmcb->ctrl.EventInject.n.u3Type == SVM_EVENT_EXCEPTION
+        || pVmcb->ctrl.EventInject.n.u3Type == SVM_EVENT_NMI)
+    {
+        Assert(pEvent->n.u8Vector <= X86_XCPT_LAST);
+        STAM_COUNTER_INC(&pVCpu->hm.s.paStatInjectedXcptsR0[pEvent->n.u8Vector]);
+    }
+    else
+        STAM_COUNTER_INC(&pVCpu->hm.s.paStatInjectedIrqsR0[pEvent->n.u8Vector & MASK_INJECT_IRQ_STAT]);
+    RT_NOREF(pVCpu);
+
+    Log4Func(("u=%#RX64 u8Vector=%#x Type=%#x ErrorCodeValid=%RTbool ErrorCode=%#RX32\n", pEvent->u, pEvent->n.u8Vector,
+              (uint8_t)pEvent->n.u3Type, !!pEvent->n.u1ErrorCodeValid, pEvent->n.u32ErrorCode));
+}
+
+
+
+/**
+ * Converts any TRPM trap into a pending HM event. This is typically used when
+ * entering from ring-3 (not longjmp returns).
+ *
+ * @param   pVCpu           The cross context virtual CPU structure.
+ */
+static void hmR0SvmTrpmTrapToPendingEvent(PVMCPUCC pVCpu)
+{
+    Assert(TRPMHasTrap(pVCpu));
+    Assert(!pVCpu->hm.s.Event.fPending);
+
+    uint8_t     uVector;
+    TRPMEVENT   enmTrpmEvent;
+    uint32_t    uErrCode;
+    RTGCUINTPTR GCPtrFaultAddress;
+    uint8_t     cbInstr;
+
+    int rc = TRPMQueryTrapAll(pVCpu, &uVector, &enmTrpmEvent, &uErrCode, &GCPtrFaultAddress, &cbInstr, NULL /* pfIcebp */);
+    AssertRC(rc);
+
+    SVMEVENT Event;
+    Event.u          = 0;
+    Event.n.u1Valid  = 1;
+    Event.n.u8Vector = uVector;
+
+    /* Refer AMD spec. 15.20 "Event Injection" for the format. */
+    if (enmTrpmEvent == TRPM_TRAP)
+    {
+        Event.n.u3Type = SVM_EVENT_EXCEPTION;
+        switch (uVector)
+        {
+            case X86_XCPT_NMI:
+            {
+                Event.n.u3Type = SVM_EVENT_NMI;
+                break;
+            }
+
+            case X86_XCPT_BP:
+            case X86_XCPT_OF:
+                AssertMsgFailed(("Invalid TRPM vector %d for event type %d\n", uVector, enmTrpmEvent));
+                RT_FALL_THRU();
+
+            case X86_XCPT_PF:
+            case X86_XCPT_DF:
+            case X86_XCPT_TS:
+            case X86_XCPT_NP:
+            case X86_XCPT_SS:
+            case X86_XCPT_GP:
+            case X86_XCPT_AC:
+            {
+                Event.n.u1ErrorCodeValid = 1;
+                Event.n.u32ErrorCode     = uErrCode;
+                break;
+            }
+        }
+    }
+    else if (enmTrpmEvent == TRPM_HARDWARE_INT)
+        Event.n.u3Type = SVM_EVENT_EXTERNAL_IRQ;
+    else if (enmTrpmEvent == TRPM_SOFTWARE_INT)
+        Event.n.u3Type = SVM_EVENT_SOFTWARE_INT;
+    else
+        AssertMsgFailed(("Invalid TRPM event type %d\n", enmTrpmEvent));
+
+    rc = TRPMResetTrap(pVCpu);
+    AssertRC(rc);
+
+    Log4(("TRPM->HM event: u=%#RX64 u8Vector=%#x uErrorCodeValid=%RTbool uErrorCode=%#RX32\n", Event.u, Event.n.u8Vector,
+          !!Event.n.u1ErrorCodeValid, Event.n.u32ErrorCode));
+
+    hmR0SvmSetPendingEvent(pVCpu, &Event, GCPtrFaultAddress);
+}
+
+
+/**
+ * Converts any pending SVM event into a TRPM trap. Typically used when leaving
+ * AMD-V to execute any instruction.
+ *
+ * @param   pVCpu           The cross context virtual CPU structure.
+ */
+static void hmR0SvmPendingEventToTrpmTrap(PVMCPUCC pVCpu)
+{
+    Assert(pVCpu->hm.s.Event.fPending);
+    Assert(TRPMQueryTrap(pVCpu, NULL /* pu8TrapNo */, NULL /* pEnmType */) == VERR_TRPM_NO_ACTIVE_TRAP);
+
+    SVMEVENT Event;
+    Event.u = pVCpu->hm.s.Event.u64IntInfo;
+
+    uint8_t   uVector     = Event.n.u8Vector;
+    TRPMEVENT enmTrapType = HMSvmEventToTrpmEventType(&Event, uVector);
+
+    Log4(("HM event->TRPM: uVector=%#x enmTrapType=%d\n", uVector, Event.n.u3Type));
+
+    int rc = TRPMAssertTrap(pVCpu, uVector, enmTrapType);
+    AssertRC(rc);
+
+    if (Event.n.u1ErrorCodeValid)
+        TRPMSetErrorCode(pVCpu, Event.n.u32ErrorCode);
+
+    if (   enmTrapType == TRPM_TRAP
+        && uVector     == X86_XCPT_PF)
+    {
+        TRPMSetFaultAddress(pVCpu, pVCpu->hm.s.Event.GCPtrFaultAddress);
+        Assert(pVCpu->hm.s.Event.GCPtrFaultAddress == CPUMGetGuestCR2(pVCpu));
+    }
+    else if (enmTrapType == TRPM_SOFTWARE_INT)
+        TRPMSetInstrLength(pVCpu, pVCpu->hm.s.Event.cbInstr);
+    pVCpu->hm.s.Event.fPending = false;
+}
+
+
+/**
+ * Checks if the guest (or nested-guest) has an interrupt shadow active right
+ * now.
+ *
+ * @returns @c true if the interrupt shadow is active, @c false otherwise.
+ * @param   pVCpu   The cross context virtual CPU structure.
+ *
+ * @remarks No-long-jump zone!!!
+ * @remarks Has side-effects with VMCPU_FF_INHIBIT_INTERRUPTS force-flag.
+ */
+static bool hmR0SvmIsIntrShadowActive(PVMCPUCC pVCpu)
+{
+    /*
+     * Instructions like STI and MOV SS inhibit interrupts till the next instruction
+     * completes. Check if we should inhibit interrupts or clear any existing
+     * interrupt inhibition.
+     */
+    if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS))
+    {
+        if (pVCpu->cpum.GstCtx.rip != EMGetInhibitInterruptsPC(pVCpu))
+        {
+            /*
+             * We can clear the inhibit force flag as even if we go back to the recompiler
+             * without executing guest code in AMD-V, the flag's condition to be cleared is
+             * met and thus the cleared state is correct.
+             */
+            VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS);
+            return false;
+        }
+        return true;
+    }
+    return false;
+}
+
+
+/**
+ * Sets the virtual interrupt intercept control in the VMCB.
+ *
+ * @param   pVCpu   The cross context virtual CPU structure.
+ * @param   pVmcb   Pointer to the VM control block.
+ */
+static void hmR0SvmSetIntWindowExiting(PVMCPUCC pVCpu, PSVMVMCB pVmcb)
+{
+    HMSVM_ASSERT_NOT_IN_NESTED_GUEST(&pVCpu->cpum.GstCtx); NOREF(pVCpu);
+
+    /*
+     * When AVIC isn't supported, set up an interrupt window to cause a #VMEXIT when the guest
+     * is ready to accept interrupts. At #VMEXIT, we then get the interrupt from the APIC
+     * (updating ISR at the right time) and inject the interrupt.
+     *
+     * With AVIC is supported, we could make use of the asynchronously delivery without
+     * #VMEXIT and we would be passing the AVIC page to SVM.
+     *
+     * In AMD-V, an interrupt window is achieved using a combination of V_IRQ (an interrupt
+     * is pending), V_IGN_TPR (ignore TPR priorities) and the VINTR intercept all being set.
+     */
+    Assert(pVmcb->ctrl.IntCtrl.n.u1IgnoreTPR);
+    pVmcb->ctrl.IntCtrl.n.u1VIrqPending = 1;
+    pVmcb->ctrl.u32VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_INT_CTRL;
+    hmR0SvmSetCtrlIntercept(pVmcb, SVM_CTRL_INTERCEPT_VINTR);
+    Log4(("Set VINTR intercept\n"));
+}
+
+
+/**
+ * Clears the virtual interrupt intercept control in the VMCB as
+ * we are figured the guest is unable process any interrupts
+ * at this point of time.
+ *
+ * @param   pVCpu   The cross context virtual CPU structure.
+ * @param   pVmcb   Pointer to the VM control block.
+ */
+static void hmR0SvmClearIntWindowExiting(PVMCPUCC pVCpu, PSVMVMCB pVmcb)
+{
+    HMSVM_ASSERT_NOT_IN_NESTED_GUEST(&pVCpu->cpum.GstCtx); NOREF(pVCpu);
+
+    PSVMVMCBCTRL pVmcbCtrl = &pVmcb->ctrl;
+    if (    pVmcbCtrl->IntCtrl.n.u1VIrqPending
+        || (pVmcbCtrl->u64InterceptCtrl & SVM_CTRL_INTERCEPT_VINTR))
+    {
+        pVmcbCtrl->IntCtrl.n.u1VIrqPending = 0;
+        pVmcbCtrl->u32VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_INT_CTRL;
+        hmR0SvmClearCtrlIntercept(pVCpu, pVmcb, SVM_CTRL_INTERCEPT_VINTR);
+        Log4(("Cleared VINTR intercept\n"));
+    }
+}
+
+
+/**
+ * Evaluates the event to be delivered to the guest and sets it as the pending
+ * event.
+ *
+ * @returns Strict VBox status code.
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   pSvmTransient   Pointer to the SVM transient structure.
+ */
+static VBOXSTRICTRC hmR0SvmEvaluatePendingEvent(PVMCPUCC pVCpu, PCSVMTRANSIENT pSvmTransient)
+{
+    PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+    HMSVM_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_HWVIRT
+                              | CPUMCTX_EXTRN_RFLAGS
+                              | CPUMCTX_EXTRN_HM_SVM_INT_SHADOW
+                              | CPUMCTX_EXTRN_HM_SVM_HWVIRT_VIRQ);
+
+    Assert(!pVCpu->hm.s.Event.fPending);
+    PSVMVMCB pVmcb = hmR0SvmGetCurrentVmcb(pVCpu);
+    Assert(pVmcb);
+
+    bool const fGif        = CPUMGetGuestGif(pCtx);
+    bool const fIntShadow  = hmR0SvmIsIntrShadowActive(pVCpu);
+    bool const fBlockNmi   = VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_BLOCK_NMIS);
+
+    Log4Func(("fGif=%RTbool fBlockNmi=%RTbool fIntShadow=%RTbool fIntPending=%RTbool fNmiPending=%RTbool\n",
+              fGif, fBlockNmi, fIntShadow, VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC),
+              VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INTERRUPT_NMI)));
+
+    /** @todo SMI. SMIs take priority over NMIs. */
+
+    /*
+     * Check if the guest or nested-guest can receive NMIs.
+     * Nested NMIs are not allowed, see AMD spec. 8.1.4 "Masking External Interrupts".
+     * NMIs take priority over maskable interrupts, see AMD spec. 8.5 "Priorities".
+     */
+    if (    VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INTERRUPT_NMI)
+        && !fBlockNmi)
+    {
+        if (    fGif
+            && !fIntShadow)
+        {
+#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
+            if (CPUMIsGuestSvmCtrlInterceptSet(pVCpu, pCtx, SVM_CTRL_INTERCEPT_NMI))
+            {
+                Log4(("Intercepting NMI -> #VMEXIT\n"));
+                HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, HMSVM_CPUMCTX_EXTRN_ALL);
+                return IEMExecSvmVmexit(pVCpu, SVM_EXIT_NMI, 0, 0);
+            }
+#endif
+            Log4(("Setting NMI pending for injection\n"));
+            SVMEVENT Event;
+            Event.u = 0;
+            Event.n.u1Valid  = 1;
+            Event.n.u8Vector = X86_XCPT_NMI;
+            Event.n.u3Type   = SVM_EVENT_NMI;
+            hmR0SvmSetPendingEvent(pVCpu, &Event, 0 /* GCPtrFaultAddress */);
+            VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INTERRUPT_NMI);
+        }
+        else if (!fGif)
+            hmR0SvmSetCtrlIntercept(pVmcb, SVM_CTRL_INTERCEPT_STGI);
+        else if (!pSvmTransient->fIsNestedGuest)
+            hmR0SvmSetIntWindowExiting(pVCpu, pVmcb);
+        /* else: for nested-guests, interrupt-window exiting will be picked up when merging VMCB controls. */
+    }
+    /*
+     * Check if the guest can receive external interrupts (PIC/APIC). Once PDMGetInterrupt()
+     * returns a valid interrupt we -must- deliver the interrupt. We can no longer re-request
+     * it from the APIC device.
+     *
+     * For nested-guests, physical interrupts always take priority over virtual interrupts.
+     * We don't need to inject nested-guest virtual interrupts here, we can let the hardware
+     * do that work when we execute nested-guest code esp. since all the required information
+     * is in the VMCB, unlike physical interrupts where we need to fetch the interrupt from
+     * the virtual interrupt controller.
+     *
+     * See AMD spec. 15.21.4 "Injecting Virtual (INTR) Interrupts".
+     */
+    else if (   VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC)
+             && !pVCpu->hm.s.fSingleInstruction)
+    {
+        bool const fBlockInt = !pSvmTransient->fIsNestedGuest ? !(pCtx->eflags.u32 & X86_EFL_IF)
+                                                              : CPUMIsGuestSvmPhysIntrEnabled(pVCpu, pCtx);
+        if (    fGif
+            && !fBlockInt
+            && !fIntShadow)
+        {
+#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
+            if (CPUMIsGuestSvmCtrlInterceptSet(pVCpu, pCtx, SVM_CTRL_INTERCEPT_INTR))
+            {
+                Log4(("Intercepting INTR -> #VMEXIT\n"));
+                HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, HMSVM_CPUMCTX_EXTRN_ALL);
+                return IEMExecSvmVmexit(pVCpu, SVM_EXIT_INTR, 0, 0);
+            }
+#endif
+            uint8_t u8Interrupt;
+            int rc = PDMGetInterrupt(pVCpu, &u8Interrupt);
+            if (RT_SUCCESS(rc))
+            {
+                Log4(("Setting external interrupt %#x pending for injection\n", u8Interrupt));
+                SVMEVENT Event;
+                Event.u = 0;
+                Event.n.u1Valid  = 1;
+                Event.n.u8Vector = u8Interrupt;
+                Event.n.u3Type   = SVM_EVENT_EXTERNAL_IRQ;
+                hmR0SvmSetPendingEvent(pVCpu, &Event, 0 /* GCPtrFaultAddress */);
+            }
+            else if (rc == VERR_APIC_INTR_MASKED_BY_TPR)
+            {
+                /*
+                 * AMD-V has no TPR thresholding feature. TPR and the force-flag will be
+                 * updated eventually when the TPR is written by the guest.
+                 */
+                STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchTprMaskedIrq);
+            }
+            else
+                STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchGuestIrq);
+        }
+        else if (!fGif)
+            hmR0SvmSetCtrlIntercept(pVmcb, SVM_CTRL_INTERCEPT_STGI);
+        else if (!pSvmTransient->fIsNestedGuest)
+            hmR0SvmSetIntWindowExiting(pVCpu, pVmcb);
+        /* else: for nested-guests, interrupt-window exiting will be picked up when merging VMCB controls. */
+    }
+
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Injects any pending events into the guest (or nested-guest).
+ *
+ * @param   pVCpu       The cross context virtual CPU structure.
+ * @param   pVmcb       Pointer to the VM control block.
+ *
+ * @remarks Must only be called when we are guaranteed to enter
+ *          hardware-assisted SVM execution and not return to ring-3
+ *          prematurely.
+ */
+static void hmR0SvmInjectPendingEvent(PVMCPUCC pVCpu, PSVMVMCB pVmcb)
+{
+    Assert(!TRPMHasTrap(pVCpu));
+    Assert(!VMMRZCallRing3IsEnabled(pVCpu));
+
+    bool const fIntShadow = hmR0SvmIsIntrShadowActive(pVCpu);
+#ifdef VBOX_STRICT
+    PCCPUMCTX  pCtx       = &pVCpu->cpum.GstCtx;
+    bool const fGif       = CPUMGetGuestGif(pCtx);
+    bool       fAllowInt  = fGif;
+    if (fGif)
+    {
+        /*
+         * For nested-guests we have no way to determine if we're injecting a physical or
+         * virtual interrupt at this point. Hence the partial verification below.
+         */
+        if (CPUMIsGuestInSvmNestedHwVirtMode(pCtx))
+            fAllowInt = CPUMIsGuestSvmPhysIntrEnabled(pVCpu, pCtx) || CPUMIsGuestSvmVirtIntrEnabled(pVCpu, pCtx);
+        else
+            fAllowInt = RT_BOOL(pCtx->eflags.u32 & X86_EFL_IF);
+    }
+#endif
+
+    if (pVCpu->hm.s.Event.fPending)
+    {
+        SVMEVENT Event;
+        Event.u = pVCpu->hm.s.Event.u64IntInfo;
+        Assert(Event.n.u1Valid);
+
+        /*
+         * Validate event injection pre-conditions.
+         */
+        if (Event.n.u3Type == SVM_EVENT_EXTERNAL_IRQ)
+        {
+            Assert(fAllowInt);
+            Assert(!fIntShadow);
+        }
+        else if (Event.n.u3Type == SVM_EVENT_NMI)
+        {
+            Assert(fGif);
+            Assert(!fIntShadow);
+        }
+
+        /*
+         * Before injecting an NMI we must set VMCPU_FF_BLOCK_NMIS to prevent nested NMIs. We
+         * do this only when we are surely going to inject the NMI as otherwise if we return
+         * to ring-3 prematurely we could leave NMIs blocked indefinitely upon re-entry into
+         * SVM R0.
+         *
+         * With VT-x, this is handled by the Guest interruptibility information VMCS field
+         * which will set the VMCS field after actually delivering the NMI which we read on
+         * VM-exit to determine the state.
+         */
+        if (    Event.n.u3Type   == SVM_EVENT_NMI
+            &&  Event.n.u8Vector == X86_XCPT_NMI
+            && !VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_BLOCK_NMIS))
+        {
+            VMCPU_FF_SET(pVCpu, VMCPU_FF_BLOCK_NMIS);
+        }
+
+        /*
+         * Inject it (update VMCB for injection by the hardware).
+         */
+        Log4(("Injecting pending HM event\n"));
+        hmR0SvmInjectEventVmcb(pVCpu, pVmcb, &Event);
+        pVCpu->hm.s.Event.fPending = false;
+
+        if (Event.n.u3Type == SVM_EVENT_EXTERNAL_IRQ)
+            STAM_COUNTER_INC(&pVCpu->hm.s.StatInjectInterrupt);
+        else
+            STAM_COUNTER_INC(&pVCpu->hm.s.StatInjectXcpt);
+    }
+    else
+        Assert(pVmcb->ctrl.EventInject.n.u1Valid == 0);
+
+    /*
+     * We could have injected an NMI through IEM and continue guest execution using
+     * hardware-assisted SVM. In which case, we would not have any events pending (above)
+     * but we still need to intercept IRET in order to eventually clear NMI inhibition.
+     */
+    if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_BLOCK_NMIS))
+        hmR0SvmSetCtrlIntercept(pVmcb, SVM_CTRL_INTERCEPT_IRET);
+
+    /*
+     * Update the guest interrupt shadow in the guest (or nested-guest) VMCB.
+     *
+     * For nested-guests: We need to update it too for the scenario where IEM executes
+     * the nested-guest but execution later continues here with an interrupt shadow active.
+     */
+    pVmcb->ctrl.IntShadow.n.u1IntShadow = fIntShadow;
+}
+
+
+/**
+ * Reports world-switch error and dumps some useful debug info.
+ *
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   rcVMRun         The return code from VMRUN (or
+ *                          VERR_SVM_INVALID_GUEST_STATE for invalid
+ *                          guest-state).
+ */
+static void hmR0SvmReportWorldSwitchError(PVMCPUCC pVCpu, int rcVMRun)
+{
+    HMSVM_ASSERT_PREEMPT_SAFE(pVCpu);
+    HMSVM_ASSERT_NOT_IN_NESTED_GUEST(&pVCpu->cpum.GstCtx);
+    HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, HMSVM_CPUMCTX_EXTRN_ALL);
+
+    if (rcVMRun == VERR_SVM_INVALID_GUEST_STATE)
+    {
+#ifdef VBOX_STRICT
+        hmR0DumpRegs(pVCpu, HM_DUMP_REG_FLAGS_ALL);
+        PCSVMVMCB pVmcb = hmR0SvmGetCurrentVmcb(pVCpu);
+        Log4(("ctrl.u32VmcbCleanBits                 %#RX32\n",   pVmcb->ctrl.u32VmcbCleanBits));
+        Log4(("ctrl.u16InterceptRdCRx                %#x\n",      pVmcb->ctrl.u16InterceptRdCRx));
+        Log4(("ctrl.u16InterceptWrCRx                %#x\n",      pVmcb->ctrl.u16InterceptWrCRx));
+        Log4(("ctrl.u16InterceptRdDRx                %#x\n",      pVmcb->ctrl.u16InterceptRdDRx));
+        Log4(("ctrl.u16InterceptWrDRx                %#x\n",      pVmcb->ctrl.u16InterceptWrDRx));
+        Log4(("ctrl.u32InterceptXcpt                 %#x\n",      pVmcb->ctrl.u32InterceptXcpt));
+        Log4(("ctrl.u64InterceptCtrl                 %#RX64\n",   pVmcb->ctrl.u64InterceptCtrl));
+        Log4(("ctrl.u64IOPMPhysAddr                  %#RX64\n",   pVmcb->ctrl.u64IOPMPhysAddr));
+        Log4(("ctrl.u64MSRPMPhysAddr                 %#RX64\n",   pVmcb->ctrl.u64MSRPMPhysAddr));
+        Log4(("ctrl.u64TSCOffset                     %#RX64\n",   pVmcb->ctrl.u64TSCOffset));
+
+        Log4(("ctrl.TLBCtrl.u32ASID                  %#x\n",      pVmcb->ctrl.TLBCtrl.n.u32ASID));
+        Log4(("ctrl.TLBCtrl.u8TLBFlush               %#x\n",      pVmcb->ctrl.TLBCtrl.n.u8TLBFlush));
+        Log4(("ctrl.TLBCtrl.u24Reserved              %#x\n",      pVmcb->ctrl.TLBCtrl.n.u24Reserved));
+
+        Log4(("ctrl.IntCtrl.u8VTPR                   %#x\n",      pVmcb->ctrl.IntCtrl.n.u8VTPR));
+        Log4(("ctrl.IntCtrl.u1VIrqPending            %#x\n",      pVmcb->ctrl.IntCtrl.n.u1VIrqPending));
+        Log4(("ctrl.IntCtrl.u1VGif                   %#x\n",      pVmcb->ctrl.IntCtrl.n.u1VGif));
+        Log4(("ctrl.IntCtrl.u6Reserved0              %#x\n",      pVmcb->ctrl.IntCtrl.n.u6Reserved));
+        Log4(("ctrl.IntCtrl.u4VIntrPrio              %#x\n",      pVmcb->ctrl.IntCtrl.n.u4VIntrPrio));
+        Log4(("ctrl.IntCtrl.u1IgnoreTPR              %#x\n",      pVmcb->ctrl.IntCtrl.n.u1IgnoreTPR));
+        Log4(("ctrl.IntCtrl.u3Reserved               %#x\n",      pVmcb->ctrl.IntCtrl.n.u3Reserved));
+        Log4(("ctrl.IntCtrl.u1VIntrMasking           %#x\n",      pVmcb->ctrl.IntCtrl.n.u1VIntrMasking));
+        Log4(("ctrl.IntCtrl.u1VGifEnable             %#x\n",      pVmcb->ctrl.IntCtrl.n.u1VGifEnable));
+        Log4(("ctrl.IntCtrl.u5Reserved1              %#x\n",      pVmcb->ctrl.IntCtrl.n.u5Reserved));
+        Log4(("ctrl.IntCtrl.u8VIntrVector            %#x\n",      pVmcb->ctrl.IntCtrl.n.u8VIntrVector));
+        Log4(("ctrl.IntCtrl.u24Reserved              %#x\n",      pVmcb->ctrl.IntCtrl.n.u24Reserved));
+
+        Log4(("ctrl.IntShadow.u1IntShadow            %#x\n",      pVmcb->ctrl.IntShadow.n.u1IntShadow));
+        Log4(("ctrl.IntShadow.u1GuestIntMask         %#x\n",      pVmcb->ctrl.IntShadow.n.u1GuestIntMask));
+        Log4(("ctrl.u64ExitCode                      %#RX64\n",   pVmcb->ctrl.u64ExitCode));
+        Log4(("ctrl.u64ExitInfo1                     %#RX64\n",   pVmcb->ctrl.u64ExitInfo1));
+        Log4(("ctrl.u64ExitInfo2                     %#RX64\n",   pVmcb->ctrl.u64ExitInfo2));
+        Log4(("ctrl.ExitIntInfo.u8Vector             %#x\n",      pVmcb->ctrl.ExitIntInfo.n.u8Vector));
+        Log4(("ctrl.ExitIntInfo.u3Type               %#x\n",      pVmcb->ctrl.ExitIntInfo.n.u3Type));
+        Log4(("ctrl.ExitIntInfo.u1ErrorCodeValid     %#x\n",      pVmcb->ctrl.ExitIntInfo.n.u1ErrorCodeValid));
+        Log4(("ctrl.ExitIntInfo.u19Reserved          %#x\n",      pVmcb->ctrl.ExitIntInfo.n.u19Reserved));
+        Log4(("ctrl.ExitIntInfo.u1Valid              %#x\n",      pVmcb->ctrl.ExitIntInfo.n.u1Valid));
+        Log4(("ctrl.ExitIntInfo.u32ErrorCode         %#x\n",      pVmcb->ctrl.ExitIntInfo.n.u32ErrorCode));
+        Log4(("ctrl.NestedPagingCtrl.u1NestedPaging  %#x\n",      pVmcb->ctrl.NestedPagingCtrl.n.u1NestedPaging));
+        Log4(("ctrl.NestedPagingCtrl.u1Sev           %#x\n",      pVmcb->ctrl.NestedPagingCtrl.n.u1Sev));
+        Log4(("ctrl.NestedPagingCtrl.u1SevEs         %#x\n",      pVmcb->ctrl.NestedPagingCtrl.n.u1SevEs));
+        Log4(("ctrl.EventInject.u8Vector             %#x\n",      pVmcb->ctrl.EventInject.n.u8Vector));
+        Log4(("ctrl.EventInject.u3Type               %#x\n",      pVmcb->ctrl.EventInject.n.u3Type));
+        Log4(("ctrl.EventInject.u1ErrorCodeValid     %#x\n",      pVmcb->ctrl.EventInject.n.u1ErrorCodeValid));
+        Log4(("ctrl.EventInject.u19Reserved          %#x\n",      pVmcb->ctrl.EventInject.n.u19Reserved));
+        Log4(("ctrl.EventInject.u1Valid              %#x\n",      pVmcb->ctrl.EventInject.n.u1Valid));
+        Log4(("ctrl.EventInject.u32ErrorCode         %#x\n",      pVmcb->ctrl.EventInject.n.u32ErrorCode));
+
+        Log4(("ctrl.u64NestedPagingCR3               %#RX64\n",   pVmcb->ctrl.u64NestedPagingCR3));
+
+        Log4(("ctrl.LbrVirt.u1LbrVirt                %#x\n",      pVmcb->ctrl.LbrVirt.n.u1LbrVirt));
+        Log4(("ctrl.LbrVirt.u1VirtVmsaveVmload       %#x\n",      pVmcb->ctrl.LbrVirt.n.u1VirtVmsaveVmload));
+
+        Log4(("guest.CS.u16Sel                       %RTsel\n",   pVmcb->guest.CS.u16Sel));
+        Log4(("guest.CS.u16Attr                      %#x\n",      pVmcb->guest.CS.u16Attr));
+        Log4(("guest.CS.u32Limit                     %#RX32\n",   pVmcb->guest.CS.u32Limit));
+        Log4(("guest.CS.u64Base                      %#RX64\n",   pVmcb->guest.CS.u64Base));
+        Log4(("guest.DS.u16Sel                       %#RTsel\n",  pVmcb->guest.DS.u16Sel));
+        Log4(("guest.DS.u16Attr                      %#x\n",      pVmcb->guest.DS.u16Attr));
+        Log4(("guest.DS.u32Limit                     %#RX32\n",   pVmcb->guest.DS.u32Limit));
+        Log4(("guest.DS.u64Base                      %#RX64\n",   pVmcb->guest.DS.u64Base));
+        Log4(("guest.ES.u16Sel                       %RTsel\n",   pVmcb->guest.ES.u16Sel));
+        Log4(("guest.ES.u16Attr                      %#x\n",      pVmcb->guest.ES.u16Attr));
+        Log4(("guest.ES.u32Limit                     %#RX32\n",   pVmcb->guest.ES.u32Limit));
+        Log4(("guest.ES.u64Base                      %#RX64\n",   pVmcb->guest.ES.u64Base));
+        Log4(("guest.FS.u16Sel                       %RTsel\n",   pVmcb->guest.FS.u16Sel));
+        Log4(("guest.FS.u16Attr                      %#x\n",      pVmcb->guest.FS.u16Attr));
+        Log4(("guest.FS.u32Limit                     %#RX32\n",   pVmcb->guest.FS.u32Limit));
+        Log4(("guest.FS.u64Base                      %#RX64\n",   pVmcb->guest.FS.u64Base));
+        Log4(("guest.GS.u16Sel                       %RTsel\n",   pVmcb->guest.GS.u16Sel));
+        Log4(("guest.GS.u16Attr                      %#x\n",      pVmcb->guest.GS.u16Attr));
+        Log4(("guest.GS.u32Limit                     %#RX32\n",   pVmcb->guest.GS.u32Limit));
+        Log4(("guest.GS.u64Base                      %#RX64\n",   pVmcb->guest.GS.u64Base));
+
+        Log4(("guest.GDTR.u32Limit                   %#RX32\n",   pVmcb->guest.GDTR.u32Limit));
+        Log4(("guest.GDTR.u64Base                    %#RX64\n",   pVmcb->guest.GDTR.u64Base));
+
+        Log4(("guest.LDTR.u16Sel                     %RTsel\n",   pVmcb->guest.LDTR.u16Sel));
+        Log4(("guest.LDTR.u16Attr                    %#x\n",      pVmcb->guest.LDTR.u16Attr));
+        Log4(("guest.LDTR.u32Limit                   %#RX32\n",   pVmcb->guest.LDTR.u32Limit));
+        Log4(("guest.LDTR.u64Base                    %#RX64\n",   pVmcb->guest.LDTR.u64Base));
+
+        Log4(("guest.IDTR.u32Limit                   %#RX32\n",   pVmcb->guest.IDTR.u32Limit));
+        Log4(("guest.IDTR.u64Base                    %#RX64\n",   pVmcb->guest.IDTR.u64Base));
+
+        Log4(("guest.TR.u16Sel                       %RTsel\n",   pVmcb->guest.TR.u16Sel));
+        Log4(("guest.TR.u16Attr                      %#x\n",      pVmcb->guest.TR.u16Attr));
+        Log4(("guest.TR.u32Limit                     %#RX32\n",   pVmcb->guest.TR.u32Limit));
+        Log4(("guest.TR.u64Base                      %#RX64\n",   pVmcb->guest.TR.u64Base));
+
+        Log4(("guest.u8CPL                           %#x\n",      pVmcb->guest.u8CPL));
+        Log4(("guest.u64CR0                          %#RX64\n",   pVmcb->guest.u64CR0));
+        Log4(("guest.u64CR2                          %#RX64\n",   pVmcb->guest.u64CR2));
+        Log4(("guest.u64CR3                          %#RX64\n",   pVmcb->guest.u64CR3));
+        Log4(("guest.u64CR4                          %#RX64\n",   pVmcb->guest.u64CR4));
+        Log4(("guest.u64DR6                          %#RX64\n",   pVmcb->guest.u64DR6));
+        Log4(("guest.u64DR7                          %#RX64\n",   pVmcb->guest.u64DR7));
+
+        Log4(("guest.u64RIP                          %#RX64\n",   pVmcb->guest.u64RIP));
+        Log4(("guest.u64RSP                          %#RX64\n",   pVmcb->guest.u64RSP));
+        Log4(("guest.u64RAX                          %#RX64\n",   pVmcb->guest.u64RAX));
+        Log4(("guest.u64RFlags                       %#RX64\n",   pVmcb->guest.u64RFlags));
+
+        Log4(("guest.u64SysEnterCS                   %#RX64\n",   pVmcb->guest.u64SysEnterCS));
+        Log4(("guest.u64SysEnterEIP                  %#RX64\n",   pVmcb->guest.u64SysEnterEIP));
+        Log4(("guest.u64SysEnterESP                  %#RX64\n",   pVmcb->guest.u64SysEnterESP));
+
+        Log4(("guest.u64EFER                         %#RX64\n",   pVmcb->guest.u64EFER));
+        Log4(("guest.u64STAR                         %#RX64\n",   pVmcb->guest.u64STAR));
+        Log4(("guest.u64LSTAR                        %#RX64\n",   pVmcb->guest.u64LSTAR));
+        Log4(("guest.u64CSTAR                        %#RX64\n",   pVmcb->guest.u64CSTAR));
+        Log4(("guest.u64SFMASK                       %#RX64\n",   pVmcb->guest.u64SFMASK));
+        Log4(("guest.u64KernelGSBase                 %#RX64\n",   pVmcb->guest.u64KernelGSBase));
+        Log4(("guest.u64PAT                          %#RX64\n",   pVmcb->guest.u64PAT));
+        Log4(("guest.u64DBGCTL                       %#RX64\n",   pVmcb->guest.u64DBGCTL));
+        Log4(("guest.u64BR_FROM                      %#RX64\n",   pVmcb->guest.u64BR_FROM));
+        Log4(("guest.u64BR_TO                        %#RX64\n",   pVmcb->guest.u64BR_TO));
+        Log4(("guest.u64LASTEXCPFROM                 %#RX64\n",   pVmcb->guest.u64LASTEXCPFROM));
+        Log4(("guest.u64LASTEXCPTO                   %#RX64\n",   pVmcb->guest.u64LASTEXCPTO));
+
+        NOREF(pVmcb);
+#endif  /* VBOX_STRICT */
+    }
+    else
+        Log4Func(("rcVMRun=%d\n", rcVMRun));
+}
+
+
+/**
+ * Check per-VM and per-VCPU force flag actions that require us to go back to
+ * ring-3 for one reason or another.
+ *
+ * @returns VBox status code (information status code included).
+ * @retval VINF_SUCCESS if we don't have any actions that require going back to
+ *         ring-3.
+ * @retval VINF_PGM_SYNC_CR3 if we have pending PGM CR3 sync.
+ * @retval VINF_EM_PENDING_REQUEST if we have pending requests (like hardware
+ *         interrupts)
+ * @retval VINF_PGM_POOL_FLUSH_PENDING if PGM is doing a pool flush and requires
+ *         all EMTs to be in ring-3.
+ * @retval VINF_EM_RAW_TO_R3 if there is pending DMA requests.
+ * @retval VINF_EM_NO_MEMORY PGM is out of memory, we need to return
+ *         to the EM loop.
+ *
+ * @param   pVCpu       The cross context virtual CPU structure.
+ */
+static int hmR0SvmCheckForceFlags(PVMCPUCC pVCpu)
+{
+    Assert(VMMRZCallRing3IsEnabled(pVCpu));
+    Assert(!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_PAE_PDPES));
+
+    /* Could happen as a result of longjump. */
+    if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3))
+        PGMUpdateCR3(pVCpu, CPUMGetGuestCR3(pVCpu));
+
+    /* Update pending interrupts into the APIC's IRR. */
+    if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_UPDATE_APIC))
+        APICUpdatePendingInterrupts(pVCpu);
+
+    PVMCC pVM = pVCpu->CTX_SUFF(pVM);
+    if (   VM_FF_IS_ANY_SET(pVM, !pVCpu->hm.s.fSingleInstruction
+                            ? VM_FF_HP_R0_PRE_HM_MASK : VM_FF_HP_R0_PRE_HM_STEP_MASK)
+        || VMCPU_FF_IS_ANY_SET(pVCpu, !pVCpu->hm.s.fSingleInstruction
+                               ? VMCPU_FF_HP_R0_PRE_HM_MASK : VMCPU_FF_HP_R0_PRE_HM_STEP_MASK) )
+    {
+        /* Pending PGM C3 sync. */
+        if (VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL))
+        {
+            int rc = PGMSyncCR3(pVCpu, pVCpu->cpum.GstCtx.cr0, pVCpu->cpum.GstCtx.cr3, pVCpu->cpum.GstCtx.cr4,
+                                VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3));
+            if (rc != VINF_SUCCESS)
+            {
+                Log4Func(("PGMSyncCR3 forcing us back to ring-3. rc=%d\n", rc));
+                return rc;
+            }
+        }
+
+        /* Pending HM-to-R3 operations (critsects, timers, EMT rendezvous etc.) */
+        /* -XXX- what was that about single stepping?  */
+        if (   VM_FF_IS_ANY_SET(pVM, VM_FF_HM_TO_R3_MASK)
+            || VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_HM_TO_R3_MASK))
+        {
+            STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchHmToR3FF);
+            int rc = RT_LIKELY(!VM_FF_IS_SET(pVM, VM_FF_PGM_NO_MEMORY)) ? VINF_EM_RAW_TO_R3 : VINF_EM_NO_MEMORY;
+            Log4Func(("HM_TO_R3 forcing us back to ring-3. rc=%d\n", rc));
+            return rc;
+        }
+
+        /* Pending VM request packets, such as hardware interrupts. */
+        if (   VM_FF_IS_SET(pVM, VM_FF_REQUEST)
+            || VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_REQUEST))
+        {
+            STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchVmReq);
+            Log4Func(("Pending VM request forcing us back to ring-3\n"));
+            return VINF_EM_PENDING_REQUEST;
+        }
+
+        /* Pending PGM pool flushes. */
+        if (VM_FF_IS_SET(pVM, VM_FF_PGM_POOL_FLUSH_PENDING))
+        {
+            STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchPgmPoolFlush);
+            Log4Func(("PGM pool flush pending forcing us back to ring-3\n"));
+            return VINF_PGM_POOL_FLUSH_PENDING;
+        }
+
+        /* Pending DMA requests. */
+        if (VM_FF_IS_SET(pVM, VM_FF_PDM_DMA))
+        {
+            STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchDma);
+            Log4Func(("Pending DMA request forcing us back to ring-3\n"));
+            return VINF_EM_RAW_TO_R3;
+        }
+    }
+
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Does the preparations before executing guest code in AMD-V.
+ *
+ * This may cause longjmps to ring-3 and may even result in rescheduling to the
+ * recompiler. We must be cautious what we do here regarding committing
+ * guest-state information into the VMCB assuming we assuredly execute the guest
+ * in AMD-V. If we fall back to the recompiler after updating the VMCB and
+ * clearing the common-state (TRPM/forceflags), we must undo those changes so
+ * that the recompiler can (and should) use them when it resumes guest
+ * execution. Otherwise such operations must be done when we can no longer
+ * exit to ring-3.
+ *
+ * @returns VBox status code (informational status codes included).
+ * @retval VINF_SUCCESS if we can proceed with running the guest.
+ * @retval VINF_* scheduling changes, we have to go back to ring-3.
+ *
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   pSvmTransient   Pointer to the SVM transient structure.
+ */
+static int hmR0SvmPreRunGuest(PVMCPUCC pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+    HMSVM_ASSERT_PREEMPT_SAFE(pVCpu);
+
+#ifdef VBOX_WITH_NESTED_HWVIRT_ONLY_IN_IEM
+    if (pSvmTransient->fIsNestedGuest)
+    {
+        Log2(("hmR0SvmPreRunGuest: Rescheduling to IEM due to nested-hwvirt or forced IEM exec -> VINF_EM_RESCHEDULE_REM\n"));
+        return VINF_EM_RESCHEDULE_REM;
+    }
+#endif
+
+    /* Check force flag actions that might require us to go back to ring-3. */
+    int rc = hmR0SvmCheckForceFlags(pVCpu);
+    if (rc != VINF_SUCCESS)
+        return rc;
+
+    if (TRPMHasTrap(pVCpu))
+        hmR0SvmTrpmTrapToPendingEvent(pVCpu);
+    else if (!pVCpu->hm.s.Event.fPending)
+    {
+        VBOXSTRICTRC rcStrict = hmR0SvmEvaluatePendingEvent(pVCpu, pSvmTransient);
+        if (   rcStrict != VINF_SUCCESS
+            || pSvmTransient->fIsNestedGuest != CPUMIsGuestInSvmNestedHwVirtMode(&pVCpu->cpum.GstCtx))
+        {
+            /* If a nested-guest VM-exit occurred, bail. */
+            if (pSvmTransient->fIsNestedGuest)
+                STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchNstGstVmexit);
+            return VBOXSTRICTRC_VAL(rcStrict);
+        }
+    }
+
+    /*
+     * On the oldest AMD-V systems, we may not get enough information to reinject an NMI.
+     * Just do it in software, see @bugref{8411}.
+     * NB: If we could continue a task switch exit we wouldn't need to do this.
+     */
+    PVMCC pVM = pVCpu->CTX_SUFF(pVM);
+    if (RT_UNLIKELY(   !pVM->hm.s.svm.u32Features
+                    &&  pVCpu->hm.s.Event.fPending
+                    &&  SVM_EVENT_GET_TYPE(pVCpu->hm.s.Event.u64IntInfo) == SVM_EVENT_NMI))
+        return VINF_EM_RAW_INJECT_TRPM_EVENT;
+
+#ifdef HMSVM_SYNC_FULL_GUEST_STATE
+    Assert(!(pVCpu->cpum.GstCtx.fExtrn & HMSVM_CPUMCTX_EXTRN_ALL));
+    ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_ALL_GUEST);
+#endif
+
+#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
+    /*
+     * Set up the nested-guest VMCB for execution using hardware-assisted SVM.
+     */
+    if (pSvmTransient->fIsNestedGuest)
+        hmR0SvmSetupVmcbNested(pVCpu);
+#endif
+
+    /*
+     * Export the guest state bits that are not shared with the host in any way as we can
+     * longjmp or get preempted in the midst of exporting some of the state.
+     */
+    rc = hmR0SvmExportGuestState(pVCpu, pSvmTransient);
+    AssertRCReturn(rc, rc);
+    STAM_COUNTER_INC(&pVCpu->hm.s.StatExportFull);
+
+    /* Ensure we've cached (and hopefully modified) the nested-guest VMCB for execution using hardware-assisted SVM. */
+    Assert(!pSvmTransient->fIsNestedGuest || pVCpu->hm.s.svm.NstGstVmcbCache.fCacheValid);
+
+    /*
+     * If we're not intercepting TPR changes in the guest, save the guest TPR before the
+     * world-switch so we can update it on the way back if the guest changed the TPR.
+     */
+    if (pVCpu->hm.s.svm.fSyncVTpr)
+    {
+        Assert(!pSvmTransient->fIsNestedGuest);
+        PCSVMVMCB pVmcb = pVCpu->hm.s.svm.pVmcb;
+        if (pVM->hm.s.fTPRPatchingActive)
+            pSvmTransient->u8GuestTpr = pVmcb->guest.u64LSTAR;
+        else
+            pSvmTransient->u8GuestTpr = pVmcb->ctrl.IntCtrl.n.u8VTPR;
+    }
+
+    /*
+     * No longjmps to ring-3 from this point on!!!
+     *
+     * Asserts() will still longjmp to ring-3 (but won't return), which is intentional,
+     * better than a kernel panic. This also disables flushing of the R0-logger instance.
+     */
+    VMMRZCallRing3Disable(pVCpu);
+
+    /*
+     * We disable interrupts so that we don't miss any interrupts that would flag preemption
+     * (IPI/timers etc.) when thread-context hooks aren't used and we've been running with
+     * preemption disabled for a while.  Since this is purly to aid the
+     * RTThreadPreemptIsPending() code, it doesn't matter that it may temporarily reenable and
+     * disable interrupt on NT.
+     *
+     * We need to check for force-flags that could've possible been altered since we last
+     * checked them (e.g. by PDMGetInterrupt() leaving the PDM critical section,
+     * see @bugref{6398}).
+     *
+     * We also check a couple of other force-flags as a last opportunity to get the EMT back
+     * to ring-3 before executing guest code.
+     */
+    pSvmTransient->fEFlags = ASMIntDisableFlags();
+    if (   VM_FF_IS_ANY_SET(pVM, VM_FF_EMT_RENDEZVOUS | VM_FF_TM_VIRTUAL_SYNC)
+        || VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_HM_TO_R3_MASK))
+    {
+        ASMSetFlags(pSvmTransient->fEFlags);
+        VMMRZCallRing3Enable(pVCpu);
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchHmToR3FF);
+        return VINF_EM_RAW_TO_R3;
+    }
+    if (RTThreadPreemptIsPending(NIL_RTTHREAD))
+    {
+        ASMSetFlags(pSvmTransient->fEFlags);
+        VMMRZCallRing3Enable(pVCpu);
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchPendingHostIrq);
+        return VINF_EM_RAW_INTERRUPT;
+    }
+
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Prepares to run guest (or nested-guest) code in AMD-V and we've committed to
+ * doing so.
+ *
+ * This means there is no backing out to ring-3 or anywhere else at this point.
+ *
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   pSvmTransient   Pointer to the SVM transient structure.
+ *
+ * @remarks Called with preemption disabled.
+ * @remarks No-long-jump zone!!!
+ */
+static void hmR0SvmPreRunGuestCommitted(PVMCPUCC pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+    Assert(!VMMRZCallRing3IsEnabled(pVCpu));
+    Assert(VMMR0IsLogFlushDisabled(pVCpu));
+    Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+
+    VMCPU_ASSERT_STATE(pVCpu, VMCPUSTATE_STARTED_HM);
+    VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_EXEC);            /* Indicate the start of guest execution. */
+
+    PVMCC      pVM   = pVCpu->CTX_SUFF(pVM);
+    PSVMVMCB pVmcb = pSvmTransient->pVmcb;
+
+    hmR0SvmInjectPendingEvent(pVCpu, pVmcb);
+
+    if (!CPUMIsGuestFPUStateActive(pVCpu))
+    {
+        STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatLoadGuestFpuState, x);
+        CPUMR0LoadGuestFPU(pVM, pVCpu);     /* (Ignore rc, no need to set HM_CHANGED_HOST_CONTEXT for SVM.) */
+        STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatLoadGuestFpuState, x);
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatLoadGuestFpu);
+    }
+
+    /* Load the state shared between host and guest (FPU, debug). */
+    if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_SVM_HOST_GUEST_SHARED_STATE)
+        hmR0SvmExportSharedState(pVCpu, pVmcb);
+
+    pVCpu->hm.s.fCtxChanged &= ~HM_CHANGED_HOST_CONTEXT;        /* Preemption might set this, nothing to do on AMD-V. */
+    AssertMsg(!pVCpu->hm.s.fCtxChanged, ("fCtxChanged=%#RX64\n", pVCpu->hm.s.fCtxChanged));
+
+    PHMPHYSCPU    pHostCpu         = hmR0GetCurrentCpu();
+    RTCPUID const idHostCpu        = pHostCpu->idCpu;
+    bool const    fMigratedHostCpu = idHostCpu != pVCpu->hm.s.idLastCpu;
+
+    /* Setup TSC offsetting. */
+    if (   pSvmTransient->fUpdateTscOffsetting
+        || fMigratedHostCpu)
+    {
+        hmR0SvmUpdateTscOffsetting(pVCpu, pVmcb);
+        pSvmTransient->fUpdateTscOffsetting = false;
+    }
+
+    /* Record statistics of how often we use TSC offsetting as opposed to intercepting RDTSC/P. */
+    if (!(pVmcb->ctrl.u64InterceptCtrl & (SVM_CTRL_INTERCEPT_RDTSC | SVM_CTRL_INTERCEPT_RDTSCP)))
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatTscOffset);
+    else
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatTscIntercept);
+
+    /* If we've migrating CPUs, mark the VMCB Clean bits as dirty. */
+    if (fMigratedHostCpu)
+        pVmcb->ctrl.u32VmcbCleanBits = 0;
+
+    /* Store status of the shared guest-host state at the time of VMRUN. */
+    pSvmTransient->fWasGuestDebugStateActive = CPUMIsGuestDebugStateActive(pVCpu);
+    pSvmTransient->fWasHyperDebugStateActive = CPUMIsHyperDebugStateActive(pVCpu);
+
+#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
+    uint8_t *pbMsrBitmap;
+    if (!pSvmTransient->fIsNestedGuest)
+        pbMsrBitmap = (uint8_t *)pVCpu->hm.s.svm.pvMsrBitmap;
+    else
+    {
+        hmR0SvmMergeMsrpmNested(pHostCpu, pVCpu);
+
+        /* Update the nested-guest VMCB with the newly merged MSRPM (clean bits updated below). */
+        pVmcb->ctrl.u64MSRPMPhysAddr = pHostCpu->n.svm.HCPhysNstGstMsrpm;
+        pbMsrBitmap = (uint8_t *)pHostCpu->n.svm.pvNstGstMsrpm;
+    }
+#else
+    uint8_t *pbMsrBitmap = (uint8_t *)pVCpu->hm.s.svm.pvMsrBitmap;
+#endif
+
+    ASMAtomicWriteBool(&pVCpu->hm.s.fCheckedTLBFlush, true);    /* Used for TLB flushing, set this across the world switch. */
+    /* Flush the appropriate tagged-TLB entries. */
+    hmR0SvmFlushTaggedTlb(pHostCpu,  pVCpu, pVmcb);
+    Assert(pVCpu->hm.s.idLastCpu == idHostCpu);
+
+    STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatEntry, &pVCpu->hm.s.StatInGC, x);
+
+    TMNotifyStartOfExecution(pVM, pVCpu);                       /* Finally, notify TM to resume its clocks as we're about
+                                                                   to start executing. */
+
+    /*
+     * Save the current Host TSC_AUX and write the guest TSC_AUX to the host, so that RDTSCPs
+     * (that don't cause exits) reads the guest MSR, see @bugref{3324}.
+     *
+     * This should be done -after- any RDTSCPs for obtaining the host timestamp (TM, STAM etc).
+     */
+    if (   pVM->cpum.ro.HostFeatures.fRdTscP
+        && !(pVmcb->ctrl.u64InterceptCtrl & SVM_CTRL_INTERCEPT_RDTSCP))
+    {
+        uint64_t const uGuestTscAux = CPUMGetGuestTscAux(pVCpu);
+        pVCpu->hm.s.svm.u64HostTscAux   = ASMRdMsr(MSR_K8_TSC_AUX);
+        if (uGuestTscAux != pVCpu->hm.s.svm.u64HostTscAux)
+            ASMWrMsr(MSR_K8_TSC_AUX, uGuestTscAux);
+        hmR0SvmSetMsrPermission(pVCpu, pbMsrBitmap, MSR_K8_TSC_AUX, SVMMSREXIT_PASSTHRU_READ, SVMMSREXIT_PASSTHRU_WRITE);
+        pSvmTransient->fRestoreTscAuxMsr = true;
+    }
+    else
+    {
+        hmR0SvmSetMsrPermission(pVCpu, pbMsrBitmap, MSR_K8_TSC_AUX, SVMMSREXIT_INTERCEPT_READ, SVMMSREXIT_INTERCEPT_WRITE);
+        pSvmTransient->fRestoreTscAuxMsr = false;
+    }
+    pVmcb->ctrl.u32VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_IOPM_MSRPM;
+
+    /*
+     * If VMCB Clean bits isn't supported by the CPU or exposed to the guest in the nested
+     * virtualization case, mark all state-bits as dirty indicating to the CPU to re-load
+     * from the VMCB.
+     */
+    bool const fSupportsVmcbCleanBits = hmR0SvmSupportsVmcbCleanBits(pVCpu, pSvmTransient->fIsNestedGuest);
+    if (!fSupportsVmcbCleanBits)
+        pVmcb->ctrl.u32VmcbCleanBits = 0;
+}
+
+
+/**
+ * Wrapper for running the guest (or nested-guest) code in AMD-V.
+ *
+ * @returns VBox strict status code.
+ * @param   pVCpu       The cross context virtual CPU structure.
+ * @param   HCPhysVmcb  The host physical address of the VMCB.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+DECLINLINE(int) hmR0SvmRunGuest(PVMCPUCC pVCpu, RTHCPHYS HCPhysVmcb)
+{
+    /* Mark that HM is the keeper of all guest-CPU registers now that we're going to execute guest code. */
+    PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+    pCtx->fExtrn |= HMSVM_CPUMCTX_EXTRN_ALL | CPUMCTX_EXTRN_KEEPER_HM;
+
+    /*
+     * 64-bit Windows uses XMM registers in the kernel as the Microsoft compiler expresses
+     * floating-point operations using SSE instructions. Some XMM registers (XMM6-XMM15) are
+     * callee-saved and thus the need for this XMM wrapper.
+     *
+     * Refer MSDN "Configuring Programs for 64-bit/x64 Software Conventions / Register Usage".
+     */
+    PVMCC pVM = pVCpu->CTX_SUFF(pVM);
+#ifdef VBOX_WITH_KERNEL_USING_XMM
+    return hmR0SVMRunWrapXMM(pVCpu->hm.s.svm.HCPhysVmcbHost, HCPhysVmcb, pCtx, pVM, pVCpu, pVCpu->hm.s.svm.pfnVMRun);
+#else
+    return pVCpu->hm.s.svm.pfnVMRun(pVCpu->hm.s.svm.HCPhysVmcbHost, HCPhysVmcb, pCtx, pVM, pVCpu);
+#endif
+}
+
+
+/**
+ * Performs some essential restoration of state after running guest (or
+ * nested-guest) code in AMD-V.
+ *
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   pSvmTransient   Pointer to the SVM transient structure.
+ * @param   rcVMRun         Return code of VMRUN.
+ *
+ * @remarks Called with interrupts disabled.
+ * @remarks No-long-jump zone!!! This function will however re-enable longjmps
+ *          unconditionally when it is safe to do so.
+ */
+static void hmR0SvmPostRunGuest(PVMCPUCC pVCpu, PSVMTRANSIENT pSvmTransient, int rcVMRun)
+{
+    Assert(!VMMRZCallRing3IsEnabled(pVCpu));
+
+    uint64_t const uHostTsc = ASMReadTSC();                     /* Read the TSC as soon as possible. */
+    ASMAtomicWriteBool(&pVCpu->hm.s.fCheckedTLBFlush, false);   /* See HMInvalidatePageOnAllVCpus(): used for TLB flushing. */
+    ASMAtomicIncU32(&pVCpu->hm.s.cWorldSwitchExits);            /* Initialized in vmR3CreateUVM(): used for EMT poking. */
+
+    PSVMVMCB     pVmcb     = pSvmTransient->pVmcb;
+    PSVMVMCBCTRL pVmcbCtrl = &pVmcb->ctrl;
+
+    /* TSC read must be done early for maximum accuracy. */
+    if (!(pVmcbCtrl->u64InterceptCtrl & SVM_CTRL_INTERCEPT_RDTSC))
+    {
+        if (!pSvmTransient->fIsNestedGuest)
+            TMCpuTickSetLastSeen(pVCpu, uHostTsc + pVmcbCtrl->u64TSCOffset);
+#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
+        else
+        {
+            /* The nested-guest VMCB TSC offset shall eventually be restored on #VMEXIT via HMNotifySvmNstGstVmexit(). */
+            uint64_t const uGstTsc = CPUMRemoveNestedGuestTscOffset(pVCpu, uHostTsc + pVmcbCtrl->u64TSCOffset);
+            TMCpuTickSetLastSeen(pVCpu, uGstTsc);
+        }
+#endif
+    }
+
+    if (pSvmTransient->fRestoreTscAuxMsr)
+    {
+        uint64_t u64GuestTscAuxMsr = ASMRdMsr(MSR_K8_TSC_AUX);
+        CPUMSetGuestTscAux(pVCpu, u64GuestTscAuxMsr);
+        if (u64GuestTscAuxMsr != pVCpu->hm.s.svm.u64HostTscAux)
+            ASMWrMsr(MSR_K8_TSC_AUX, pVCpu->hm.s.svm.u64HostTscAux);
+    }
+
+    STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatInGC, &pVCpu->hm.s.StatPreExit, x);
+    PVMCC pVM = pVCpu->CTX_SUFF(pVM);
+    TMNotifyEndOfExecution(pVM, pVCpu);                         /* Notify TM that the guest is no longer running. */
+    VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_HM);
+
+    Assert(!(ASMGetFlags() & X86_EFL_IF));
+    ASMSetFlags(pSvmTransient->fEFlags);                        /* Enable interrupts. */
+    VMMRZCallRing3Enable(pVCpu);                                /* It is now safe to do longjmps to ring-3!!! */
+
+    /* If VMRUN failed, we can bail out early. This does -not- cover SVM_EXIT_INVALID. */
+    if (RT_UNLIKELY(rcVMRun != VINF_SUCCESS))
+    {
+        Log4Func(("VMRUN failure: rcVMRun=%Rrc\n", rcVMRun));
+        return;
+    }
+
+    pSvmTransient->u64ExitCode        = pVmcbCtrl->u64ExitCode; /* Save the #VMEXIT reason. */
+    pSvmTransient->fVectoringDoublePF = false;                  /* Vectoring double page-fault needs to be determined later. */
+    pSvmTransient->fVectoringPF       = false;                  /* Vectoring page-fault needs to be determined later. */
+    pVmcbCtrl->u32VmcbCleanBits       = HMSVM_VMCB_CLEAN_ALL;   /* Mark the VMCB-state cache as unmodified by VMM. */
+
+#ifdef HMSVM_SYNC_FULL_GUEST_STATE
+    hmR0SvmImportGuestState(pVCpu, HMSVM_CPUMCTX_EXTRN_ALL);
+    Assert(!(pVCpu->cpum.GstCtx.fExtrn & HMSVM_CPUMCTX_EXTRN_ALL));
+#else
+    /*
+     * Always import the following:
+     *
+     *   - RIP for exit optimizations and evaluating event injection on re-entry.
+     *   - RFLAGS for evaluating event injection on VM re-entry and for exporting shared debug
+     *     state on preemption.
+     *   - Interrupt shadow, GIF for evaluating event injection on VM re-entry.
+     *   - CS for exit optimizations.
+     *   - RAX, RSP for simplifying assumptions on GPRs. All other GPRs are swapped by the
+     *     assembly switcher code.
+     *   - Shared state (only DR7 currently) for exporting shared debug state on preemption.
+     */
+    hmR0SvmImportGuestState(pVCpu, CPUMCTX_EXTRN_RIP
+                                 | CPUMCTX_EXTRN_RFLAGS
+                                 | CPUMCTX_EXTRN_RAX
+                                 | CPUMCTX_EXTRN_RSP
+                                 | CPUMCTX_EXTRN_CS
+                                 | CPUMCTX_EXTRN_HWVIRT
+                                 | CPUMCTX_EXTRN_HM_SVM_INT_SHADOW
+                                 | CPUMCTX_EXTRN_HM_SVM_HWVIRT_VIRQ
+                                 | HMSVM_CPUMCTX_SHARED_STATE);
+#endif
+
+    if (   pSvmTransient->u64ExitCode != SVM_EXIT_INVALID
+        && pVCpu->hm.s.svm.fSyncVTpr)
+    {
+        Assert(!pSvmTransient->fIsNestedGuest);
+        /* TPR patching (for 32-bit guests) uses LSTAR MSR for holding the TPR value, otherwise uses the VTPR. */
+        if (   pVM->hm.s.fTPRPatchingActive
+            && (pVmcb->guest.u64LSTAR & 0xff) != pSvmTransient->u8GuestTpr)
+        {
+            int rc = APICSetTpr(pVCpu, pVmcb->guest.u64LSTAR & 0xff);
+            AssertRC(rc);
+            ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_APIC_TPR);
+        }
+        /* Sync TPR when we aren't intercepting CR8 writes. */
+        else if (pSvmTransient->u8GuestTpr != pVmcbCtrl->IntCtrl.n.u8VTPR)
+        {
+            int rc = APICSetTpr(pVCpu, pVmcbCtrl->IntCtrl.n.u8VTPR << 4);
+            AssertRC(rc);
+            ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_APIC_TPR);
+        }
+    }
+
+#ifdef DEBUG_ramshankar
+    if (CPUMIsGuestInSvmNestedHwVirtMode(&pVCpu->cpum.GstCtx))
+    {
+        hmR0SvmImportGuestState(pVCpu, HMSVM_CPUMCTX_EXTRN_ALL);
+        hmR0SvmLogState(pVCpu, pVmcb, pVCpu->cpum.GstCtx, "hmR0SvmPostRunGuestNested", HMSVM_LOG_ALL & ~HMSVM_LOG_LBR,
+                        0 /* uVerbose */);
+    }
+#endif
+
+    HMSVM_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_CS | CPUMCTX_EXTRN_RIP);
+    EMHistoryAddExit(pVCpu, EMEXIT_MAKE_FT(EMEXIT_F_KIND_SVM, pSvmTransient->u64ExitCode & EMEXIT_F_TYPE_MASK),
+                     pVCpu->cpum.GstCtx.cs.u64Base + pVCpu->cpum.GstCtx.rip, uHostTsc);
+}
+
+
+/**
+ * Runs the guest code using AMD-V.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu       The cross context virtual CPU structure.
+ * @param   pcLoops     Pointer to the number of executed loops.
+ */
+static int hmR0SvmRunGuestCodeNormal(PVMCPUCC pVCpu, uint32_t *pcLoops)
+{
+    uint32_t const cMaxResumeLoops = pVCpu->CTX_SUFF(pVM)->hm.s.cMaxResumeLoops;
+    Assert(pcLoops);
+    Assert(*pcLoops <= cMaxResumeLoops);
+
+    SVMTRANSIENT SvmTransient;
+    RT_ZERO(SvmTransient);
+    SvmTransient.fUpdateTscOffsetting = true;
+    SvmTransient.pVmcb = pVCpu->hm.s.svm.pVmcb;
+
+    int rc = VERR_INTERNAL_ERROR_5;
+    for (;;)
+    {
+        Assert(!HMR0SuspendPending());
+        HMSVM_ASSERT_CPU_SAFE(pVCpu);
+
+        /* Preparatory work for running nested-guest code, this may force us to return to
+           ring-3.  This bugger disables interrupts on VINF_SUCCESS! */
+        STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatEntry, x);
+        rc = hmR0SvmPreRunGuest(pVCpu, &SvmTransient);
+        if (rc != VINF_SUCCESS)
+            break;
+
+        /*
+         * No longjmps to ring-3 from this point on!!!
+         *
+         * Asserts() will still longjmp to ring-3 (but won't return), which is intentional,
+         * better than a kernel panic. This also disables flushing of the R0-logger instance.
+         */
+        hmR0SvmPreRunGuestCommitted(pVCpu, &SvmTransient);
+        rc = hmR0SvmRunGuest(pVCpu, pVCpu->hm.s.svm.HCPhysVmcb);
+
+        /* Restore any residual host-state and save any bits shared between host and guest
+           into the guest-CPU state.  Re-enables interrupts! */
+        hmR0SvmPostRunGuest(pVCpu, &SvmTransient, rc);
+
+        if (RT_UNLIKELY(   rc != VINF_SUCCESS                               /* Check for VMRUN errors. */
+                        || SvmTransient.u64ExitCode == SVM_EXIT_INVALID))   /* Check for invalid guest-state errors. */
+        {
+            if (rc == VINF_SUCCESS)
+                rc = VERR_SVM_INVALID_GUEST_STATE;
+            STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatPreExit, x);
+            hmR0SvmReportWorldSwitchError(pVCpu, rc);
+            break;
+        }
+
+        /* Handle the #VMEXIT. */
+        HMSVM_EXITCODE_STAM_COUNTER_INC(SvmTransient.u64ExitCode);
+        STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatPreExit, &pVCpu->hm.s.StatExitHandling, x);
+        VBOXVMM_R0_HMSVM_VMEXIT(pVCpu, &pVCpu->cpum.GstCtx, SvmTransient.u64ExitCode, pVCpu->hm.s.svm.pVmcb);
+        rc = hmR0SvmHandleExit(pVCpu, &SvmTransient);
+        STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitHandling, x);
+        if (rc != VINF_SUCCESS)
+            break;
+        if (++(*pcLoops) >= cMaxResumeLoops)
+        {
+            STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchMaxResumeLoops);
+            rc = VINF_EM_RAW_INTERRUPT;
+            break;
+        }
+    }
+
+    STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatEntry, x);
+    return rc;
+}
+
+
+/**
+ * Runs the guest code using AMD-V in single step mode.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu       The cross context virtual CPU structure.
+ * @param   pcLoops     Pointer to the number of executed loops.
+ */
+static int hmR0SvmRunGuestCodeStep(PVMCPUCC pVCpu, uint32_t *pcLoops)
+{
+    uint32_t const cMaxResumeLoops = pVCpu->CTX_SUFF(pVM)->hm.s.cMaxResumeLoops;
+    Assert(pcLoops);
+    Assert(*pcLoops <= cMaxResumeLoops);
+
+    SVMTRANSIENT SvmTransient;
+    RT_ZERO(SvmTransient);
+    SvmTransient.fUpdateTscOffsetting = true;
+    SvmTransient.pVmcb = pVCpu->hm.s.svm.pVmcb;
+
+    PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+    uint16_t uCsStart  = pCtx->cs.Sel;
+    uint64_t uRipStart = pCtx->rip;
+
+    int rc = VERR_INTERNAL_ERROR_5;
+    for (;;)
+    {
+        Assert(!HMR0SuspendPending());
+        AssertMsg(pVCpu->hm.s.idEnteredCpu == RTMpCpuId(),
+                  ("Illegal migration! Entered on CPU %u Current %u cLoops=%u\n", (unsigned)pVCpu->hm.s.idEnteredCpu,
+                  (unsigned)RTMpCpuId(), *pcLoops));
+
+        /* Preparatory work for running nested-guest code, this may force us to return to
+           ring-3.  This bugger disables interrupts on VINF_SUCCESS! */
+        STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatEntry, x);
+        rc = hmR0SvmPreRunGuest(pVCpu, &SvmTransient);
+        if (rc != VINF_SUCCESS)
+            break;
+
+        /*
+         * No longjmps to ring-3 from this point on!!!
+         *
+         * Asserts() will still longjmp to ring-3 (but won't return), which is intentional,
+         * better than a kernel panic. This also disables flushing of the R0-logger instance.
+         */
+        hmR0SvmPreRunGuestCommitted(pVCpu, &SvmTransient);
+
+        rc = hmR0SvmRunGuest(pVCpu, pVCpu->hm.s.svm.HCPhysVmcb);
+
+        /* Restore any residual host-state and save any bits shared between host and guest
+           into the guest-CPU state.  Re-enables interrupts! */
+        hmR0SvmPostRunGuest(pVCpu, &SvmTransient, rc);
+
+        if (RT_UNLIKELY(   rc != VINF_SUCCESS                               /* Check for VMRUN errors. */
+                        || SvmTransient.u64ExitCode == SVM_EXIT_INVALID))   /* Check for invalid guest-state errors. */
+        {
+            if (rc == VINF_SUCCESS)
+                rc = VERR_SVM_INVALID_GUEST_STATE;
+            STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatPreExit, x);
+            hmR0SvmReportWorldSwitchError(pVCpu, rc);
+            return rc;
+        }
+
+        /* Handle the #VMEXIT. */
+        HMSVM_EXITCODE_STAM_COUNTER_INC(SvmTransient.u64ExitCode);
+        STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatPreExit, &pVCpu->hm.s.StatExitHandling, x);
+        VBOXVMM_R0_HMSVM_VMEXIT(pVCpu, pCtx, SvmTransient.u64ExitCode, pVCpu->hm.s.svm.pVmcb);
+        rc = hmR0SvmHandleExit(pVCpu, &SvmTransient);
+        STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitHandling, x);
+        if (rc != VINF_SUCCESS)
+            break;
+        if (++(*pcLoops) >= cMaxResumeLoops)
+        {
+            STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchMaxResumeLoops);
+            rc = VINF_EM_RAW_INTERRUPT;
+            break;
+        }
+
+        /*
+         * Did the RIP change, if so, consider it a single step.
+         * Otherwise, make sure one of the TFs gets set.
+         */
+        if (   pCtx->rip    != uRipStart
+            || pCtx->cs.Sel != uCsStart)
+        {
+            rc = VINF_EM_DBG_STEPPED;
+            break;
+        }
+        pVCpu->hm.s.fCtxChanged |= HM_CHANGED_GUEST_DR_MASK;
+    }
+
+    /*
+     * Clear the X86_EFL_TF if necessary.
+     */
+    if (pVCpu->hm.s.fClearTrapFlag)
+    {
+        pVCpu->hm.s.fClearTrapFlag = false;
+        pCtx->eflags.Bits.u1TF = 0;
+    }
+
+    STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatEntry, x);
+    return rc;
+}
+
+#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
+/**
+ * Runs the nested-guest code using AMD-V.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu       The cross context virtual CPU structure.
+ * @param   pcLoops     Pointer to the number of executed loops. If we're switching
+ *                      from the guest-code execution loop to this nested-guest
+ *                      execution loop pass the remainder value, else pass 0.
+ */
+static int hmR0SvmRunGuestCodeNested(PVMCPUCC pVCpu, uint32_t *pcLoops)
+{
+    PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+    HMSVM_ASSERT_IN_NESTED_GUEST(pCtx);
+    Assert(pcLoops);
+    Assert(*pcLoops <= pVCpu->CTX_SUFF(pVM)->hm.s.cMaxResumeLoops);
+
+    SVMTRANSIENT SvmTransient;
+    RT_ZERO(SvmTransient);
+    SvmTransient.fUpdateTscOffsetting = true;
+    SvmTransient.pVmcb = pCtx->hwvirt.svm.CTX_SUFF(pVmcb);
+    SvmTransient.fIsNestedGuest = true;
+
+    int rc = VERR_INTERNAL_ERROR_4;
+    for (;;)
+    {
+        Assert(!HMR0SuspendPending());
+        HMSVM_ASSERT_CPU_SAFE(pVCpu);
+
+        /* Preparatory work for running nested-guest code, this may force us to return to
+           ring-3.  This bugger disables interrupts on VINF_SUCCESS! */
+        STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatEntry, x);
+        rc = hmR0SvmPreRunGuest(pVCpu, &SvmTransient);
+        if (    rc != VINF_SUCCESS
+            || !CPUMIsGuestInSvmNestedHwVirtMode(pCtx))
+            break;
+
+        /*
+         * No longjmps to ring-3 from this point on!!!
+         *
+         * Asserts() will still longjmp to ring-3 (but won't return), which is intentional,
+         * better than a kernel panic. This also disables flushing of the R0-logger instance.
+         */
+        hmR0SvmPreRunGuestCommitted(pVCpu, &SvmTransient);
+
+        rc = hmR0SvmRunGuest(pVCpu, pCtx->hwvirt.svm.HCPhysVmcb);
+
+        /* Restore any residual host-state and save any bits shared between host and guest
+           into the guest-CPU state.  Re-enables interrupts! */
+        hmR0SvmPostRunGuest(pVCpu, &SvmTransient, rc);
+
+        if (RT_LIKELY(   rc == VINF_SUCCESS
+                      && SvmTransient.u64ExitCode != SVM_EXIT_INVALID))
+        { /* extremely likely */ }
+        else
+        {
+            /* VMRUN failed, shouldn't really happen, Guru. */
+            if (rc != VINF_SUCCESS)
+                break;
+
+            /* Invalid nested-guest state. Cause a #VMEXIT but assert on strict builds. */
+            HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, HMSVM_CPUMCTX_EXTRN_ALL);
+            AssertMsgFailed(("Invalid nested-guest state. rc=%Rrc u64ExitCode=%#RX64\n", rc, SvmTransient.u64ExitCode));
+            rc = VBOXSTRICTRC_TODO(IEMExecSvmVmexit(pVCpu, SVM_EXIT_INVALID, 0, 0));
+            break;
+        }
+
+        /* Handle the #VMEXIT. */
+        HMSVM_NESTED_EXITCODE_STAM_COUNTER_INC(SvmTransient.u64ExitCode);
+        STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatPreExit, &pVCpu->hm.s.StatExitHandling, x);
+        VBOXVMM_R0_HMSVM_VMEXIT(pVCpu, pCtx, SvmTransient.u64ExitCode, pCtx->hwvirt.svm.CTX_SUFF(pVmcb));
+        rc = hmR0SvmHandleExitNested(pVCpu, &SvmTransient);
+        STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitHandling, x);
+        if (rc == VINF_SUCCESS)
+        {
+            if (!CPUMIsGuestInSvmNestedHwVirtMode(pCtx))
+            {
+                STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchNstGstVmexit);
+                rc = VINF_SVM_VMEXIT;
+            }
+            else
+            {
+                if (++(*pcLoops) <= pVCpu->CTX_SUFF(pVM)->hm.s.cMaxResumeLoops)
+                    continue;
+                STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchMaxResumeLoops);
+                rc = VINF_EM_RAW_INTERRUPT;
+            }
+        }
+        else
+            Assert(rc != VINF_SVM_VMEXIT);
+        break;
+        /** @todo NSTSVM: handle single-stepping. */
+    }
+
+    STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatEntry, x);
+    return rc;
+}
+#endif
+
+
+/**
+ * Runs the guest code using AMD-V.
+ *
+ * @returns Strict VBox status code.
+ * @param   pVCpu       The cross context virtual CPU structure.
+ */
+VMMR0DECL(VBOXSTRICTRC) SVMR0RunGuestCode(PVMCPUCC pVCpu)
+{
+    AssertPtr(pVCpu);
+    PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+    Assert(VMMRZCallRing3IsEnabled(pVCpu));
+    Assert(!ASMAtomicUoReadU64(&pCtx->fExtrn));
+    HMSVM_ASSERT_PREEMPT_SAFE(pVCpu);
+
+    uint32_t cLoops = 0;
+    int      rc;
+    for (;;)
+    {
+#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
+        bool const fInNestedGuestMode = CPUMIsGuestInSvmNestedHwVirtMode(pCtx);
+#else
+        NOREF(pCtx);
+        bool const fInNestedGuestMode = false;
+#endif
+        if (!fInNestedGuestMode)
+        {
+            if (!pVCpu->hm.s.fSingleInstruction)
+                rc = hmR0SvmRunGuestCodeNormal(pVCpu, &cLoops);
+            else
+                rc = hmR0SvmRunGuestCodeStep(pVCpu, &cLoops);
+        }
+#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
+        else
+            rc = hmR0SvmRunGuestCodeNested(pVCpu, &cLoops);
+
+        if (rc == VINF_SVM_VMRUN)
+        {
+            Assert(CPUMIsGuestInSvmNestedHwVirtMode(pCtx));
+            continue;
+        }
+        if (rc == VINF_SVM_VMEXIT)
+        {
+            Assert(!CPUMIsGuestInSvmNestedHwVirtMode(pCtx));
+            continue;
+        }
+#endif
+        break;
+    }
+
+    /* Fixup error codes. */
+    if (rc == VERR_EM_INTERPRETER)
+        rc = VINF_EM_RAW_EMULATE_INSTR;
+    else if (rc == VINF_EM_RESET)
+        rc = VINF_EM_TRIPLE_FAULT;
+
+    /* Prepare to return to ring-3. This will remove longjmp notifications. */
+    rc = hmR0SvmExitToRing3(pVCpu, rc);
+    Assert(!ASMAtomicUoReadU64(&pCtx->fExtrn));
+    Assert(!VMMRZCallRing3IsNotificationSet(pVCpu));
+    return rc;
+}
+
+
+#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
+/**
+ * Determines whether the given I/O access should cause a nested-guest \#VMEXIT.
+ *
+ * @param   pvIoBitmap      Pointer to the nested-guest IO bitmap.
+ * @param   pIoExitInfo     Pointer to the SVMIOIOEXITINFO.
+ */
+static bool hmR0SvmIsIoInterceptSet(void *pvIoBitmap, PSVMIOIOEXITINFO pIoExitInfo)
+{
+    const uint16_t    u16Port       = pIoExitInfo->n.u16Port;
+    const SVMIOIOTYPE enmIoType     = (SVMIOIOTYPE)pIoExitInfo->n.u1Type;
+    const uint8_t     cbReg         = (pIoExitInfo->u  >> SVM_IOIO_OP_SIZE_SHIFT)   & 7;
+    const uint8_t     cAddrSizeBits = ((pIoExitInfo->u >> SVM_IOIO_ADDR_SIZE_SHIFT) & 7) << 4;
+    const uint8_t     iEffSeg       = pIoExitInfo->n.u3Seg;
+    const bool        fRep          = pIoExitInfo->n.u1Rep;
+    const bool        fStrIo        = pIoExitInfo->n.u1Str;
+
+    return CPUMIsSvmIoInterceptSet(pvIoBitmap, u16Port, enmIoType, cbReg, cAddrSizeBits, iEffSeg, fRep, fStrIo,
+                                   NULL /* pIoExitInfo */);
+}
+
+
+/**
+ * Handles a nested-guest \#VMEXIT (for all EXITCODE values except
+ * SVM_EXIT_INVALID).
+ *
+ * @returns VBox status code (informational status codes included).
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   pSvmTransient   Pointer to the SVM transient structure.
+ */
+static int hmR0SvmHandleExitNested(PVMCPUCC pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+    HMSVM_ASSERT_IN_NESTED_GUEST(&pVCpu->cpum.GstCtx);
+    Assert(pSvmTransient->u64ExitCode != SVM_EXIT_INVALID);
+    Assert(pSvmTransient->u64ExitCode <= SVM_EXIT_MAX);
+
+    /*
+     * We import the complete state here because we use separate VMCBs for the guest and the
+     * nested-guest, and the guest's VMCB is used after the #VMEXIT. We can only save/restore
+     * the #VMEXIT specific state if we used the same VMCB for both guest and nested-guest.
+     */
+#define NST_GST_VMEXIT_CALL_RET(a_pVCpu, a_uExitCode, a_uExitInfo1, a_uExitInfo2) \
+    do { \
+        HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, HMSVM_CPUMCTX_EXTRN_ALL); \
+        return VBOXSTRICTRC_TODO(IEMExecSvmVmexit((a_pVCpu), (a_uExitCode), (a_uExitInfo1), (a_uExitInfo2))); \
+    } while (0)
+
+    /*
+     * For all the #VMEXITs here we primarily figure out if the #VMEXIT is expected by the
+     * nested-guest. If it isn't, it should be handled by the (outer) guest.
+     */
+    PSVMVMCB       pVmcbNstGst     = pVCpu->cpum.GstCtx.hwvirt.svm.CTX_SUFF(pVmcb);
+    PCCPUMCTX      pCtx            = &pVCpu->cpum.GstCtx;
+    PSVMVMCBCTRL   pVmcbNstGstCtrl = &pVmcbNstGst->ctrl;
+    uint64_t const uExitCode       = pVmcbNstGstCtrl->u64ExitCode;
+    uint64_t const uExitInfo1      = pVmcbNstGstCtrl->u64ExitInfo1;
+    uint64_t const uExitInfo2      = pVmcbNstGstCtrl->u64ExitInfo2;
+
+    Assert(uExitCode == pVmcbNstGstCtrl->u64ExitCode);
+    switch (uExitCode)
+    {
+        case SVM_EXIT_CPUID:
+        {
+            if (CPUMIsGuestSvmCtrlInterceptSet(pVCpu, pCtx, SVM_CTRL_INTERCEPT_CPUID))
+                NST_GST_VMEXIT_CALL_RET(pVCpu, uExitCode, uExitInfo1, uExitInfo2);
+            return hmR0SvmExitCpuid(pVCpu, pSvmTransient);
+        }
+
+        case SVM_EXIT_RDTSC:
+        {
+            if (CPUMIsGuestSvmCtrlInterceptSet(pVCpu, pCtx, SVM_CTRL_INTERCEPT_RDTSC))
+                NST_GST_VMEXIT_CALL_RET(pVCpu, uExitCode, uExitInfo1, uExitInfo2);
+            return hmR0SvmExitRdtsc(pVCpu, pSvmTransient);
+        }
+
+        case SVM_EXIT_RDTSCP:
+        {
+            if (CPUMIsGuestSvmCtrlInterceptSet(pVCpu, pCtx, SVM_CTRL_INTERCEPT_RDTSCP))
+                NST_GST_VMEXIT_CALL_RET(pVCpu, uExitCode, uExitInfo1, uExitInfo2);
+            return hmR0SvmExitRdtscp(pVCpu, pSvmTransient);
+        }
+
+        case SVM_EXIT_MONITOR:
+        {
+            if (CPUMIsGuestSvmCtrlInterceptSet(pVCpu, pCtx, SVM_CTRL_INTERCEPT_MONITOR))
+                NST_GST_VMEXIT_CALL_RET(pVCpu, uExitCode, uExitInfo1, uExitInfo2);
+            return hmR0SvmExitMonitor(pVCpu, pSvmTransient);
+        }
+
+        case SVM_EXIT_MWAIT:
+        {
+            if (CPUMIsGuestSvmCtrlInterceptSet(pVCpu, pCtx, SVM_CTRL_INTERCEPT_MWAIT))
+                NST_GST_VMEXIT_CALL_RET(pVCpu, uExitCode, uExitInfo1, uExitInfo2);
+            return hmR0SvmExitMwait(pVCpu, pSvmTransient);
+        }
+
+        case SVM_EXIT_HLT:
+        {
+            if (CPUMIsGuestSvmCtrlInterceptSet(pVCpu, pCtx, SVM_CTRL_INTERCEPT_HLT))
+                NST_GST_VMEXIT_CALL_RET(pVCpu, uExitCode, uExitInfo1, uExitInfo2);
+            return hmR0SvmExitHlt(pVCpu, pSvmTransient);
+        }
+
+        case SVM_EXIT_MSR:
+        {
+            if (CPUMIsGuestSvmCtrlInterceptSet(pVCpu, pCtx, SVM_CTRL_INTERCEPT_MSR_PROT))
+            {
+                uint32_t const idMsr = pVCpu->cpum.GstCtx.ecx;
+                uint16_t offMsrpm;
+                uint8_t  uMsrpmBit;
+                int rc = CPUMGetSvmMsrpmOffsetAndBit(idMsr, &offMsrpm, &uMsrpmBit);
+                if (RT_SUCCESS(rc))
+                {
+                    Assert(uMsrpmBit == 0 || uMsrpmBit == 2 || uMsrpmBit == 4 || uMsrpmBit == 6);
+                    Assert(offMsrpm < SVM_MSRPM_PAGES << X86_PAGE_4K_SHIFT);
+
+                    uint8_t const *pbMsrBitmap = (uint8_t const *)pVCpu->cpum.GstCtx.hwvirt.svm.CTX_SUFF(pvMsrBitmap);
+                    pbMsrBitmap               += offMsrpm;
+                    bool const fInterceptRead  = RT_BOOL(*pbMsrBitmap & RT_BIT(uMsrpmBit));
+                    bool const fInterceptWrite = RT_BOOL(*pbMsrBitmap & RT_BIT(uMsrpmBit + 1));
+
+                    if (   (fInterceptWrite && pVmcbNstGstCtrl->u64ExitInfo1 == SVM_EXIT1_MSR_WRITE)
+                        || (fInterceptRead  && pVmcbNstGstCtrl->u64ExitInfo1 == SVM_EXIT1_MSR_READ))
+                    {
+                        NST_GST_VMEXIT_CALL_RET(pVCpu, uExitCode, uExitInfo1, uExitInfo2);
+                    }
+                }
+                else
+                {
+                    /*
+                     * MSRs not covered by the MSRPM automatically cause an #VMEXIT.
+                     * See AMD-V spec. "15.11 MSR Intercepts".
+                     */
+                    Assert(rc == VERR_OUT_OF_RANGE);
+                    NST_GST_VMEXIT_CALL_RET(pVCpu, uExitCode, uExitInfo1, uExitInfo2);
+                }
+            }
+            return hmR0SvmExitMsr(pVCpu, pSvmTransient);
+        }
+
+        case SVM_EXIT_IOIO:
+        {
+            if (CPUMIsGuestSvmCtrlInterceptSet(pVCpu, pCtx, SVM_CTRL_INTERCEPT_IOIO_PROT))
+            {
+                void *pvIoBitmap = pVCpu->cpum.GstCtx.hwvirt.svm.CTX_SUFF(pvIoBitmap);
+                SVMIOIOEXITINFO IoExitInfo;
+                IoExitInfo.u = pVmcbNstGst->ctrl.u64ExitInfo1;
+                bool const fIntercept = hmR0SvmIsIoInterceptSet(pvIoBitmap, &IoExitInfo);
+                if (fIntercept)
+                    NST_GST_VMEXIT_CALL_RET(pVCpu, uExitCode, uExitInfo1, uExitInfo2);
+            }
+            return hmR0SvmExitIOInstr(pVCpu, pSvmTransient);
+        }
+
+        case SVM_EXIT_XCPT_PF:
+        {
+            PVMCC pVM = pVCpu->CTX_SUFF(pVM);
+            if (pVM->hm.s.fNestedPaging)
+            {
+                uint32_t const u32ErrCode    = pVmcbNstGstCtrl->u64ExitInfo1;
+                uint64_t const uFaultAddress = pVmcbNstGstCtrl->u64ExitInfo2;
+
+                /* If the nested-guest is intercepting #PFs, cause a #PF #VMEXIT. */
+                if (CPUMIsGuestSvmXcptInterceptSet(pVCpu, pCtx, X86_XCPT_PF))
+                    NST_GST_VMEXIT_CALL_RET(pVCpu, uExitCode, u32ErrCode, uFaultAddress);
+
+                /* If the nested-guest is not intercepting #PFs, forward the #PF to the guest. */
+                HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, CPUMCTX_EXTRN_CR2);
+                hmR0SvmSetPendingXcptPF(pVCpu, u32ErrCode, uFaultAddress);
+                return VINF_SUCCESS;
+            }
+            return hmR0SvmExitXcptPF(pVCpu, pSvmTransient);
+        }
+
+        case SVM_EXIT_XCPT_UD:
+        {
+            if (CPUMIsGuestSvmXcptInterceptSet(pVCpu, pCtx, X86_XCPT_UD))
+                NST_GST_VMEXIT_CALL_RET(pVCpu, uExitCode, uExitInfo1, uExitInfo2);
+            hmR0SvmSetPendingXcptUD(pVCpu);
+            return VINF_SUCCESS;
+        }
+
+        case SVM_EXIT_XCPT_MF:
+        {
+            if (CPUMIsGuestSvmXcptInterceptSet(pVCpu, pCtx, X86_XCPT_MF))
+                NST_GST_VMEXIT_CALL_RET(pVCpu, uExitCode, uExitInfo1, uExitInfo2);
+            return hmR0SvmExitXcptMF(pVCpu, pSvmTransient);
+        }
+
+        case SVM_EXIT_XCPT_DB:
+        {
+            if (CPUMIsGuestSvmXcptInterceptSet(pVCpu, pCtx, X86_XCPT_DB))
+                NST_GST_VMEXIT_CALL_RET(pVCpu, uExitCode, uExitInfo1, uExitInfo2);
+            return hmR0SvmNestedExitXcptDB(pVCpu, pSvmTransient);
+        }
+
+        case SVM_EXIT_XCPT_AC:
+        {
+            if (CPUMIsGuestSvmXcptInterceptSet(pVCpu, pCtx, X86_XCPT_AC))
+                NST_GST_VMEXIT_CALL_RET(pVCpu, uExitCode, uExitInfo1, uExitInfo2);
+            return hmR0SvmExitXcptAC(pVCpu, pSvmTransient);
+        }
+
+        case SVM_EXIT_XCPT_BP:
+        {
+            if (CPUMIsGuestSvmXcptInterceptSet(pVCpu, pCtx, X86_XCPT_BP))
+                NST_GST_VMEXIT_CALL_RET(pVCpu, uExitCode, uExitInfo1, uExitInfo2);
+            return hmR0SvmNestedExitXcptBP(pVCpu, pSvmTransient);
+        }
+
+        case SVM_EXIT_READ_CR0:
+        case SVM_EXIT_READ_CR3:
+        case SVM_EXIT_READ_CR4:
+        {
+            uint8_t const uCr = uExitCode - SVM_EXIT_READ_CR0;
+            if (CPUMIsGuestSvmReadCRxInterceptSet(pVCpu, pCtx, uCr))
+                NST_GST_VMEXIT_CALL_RET(pVCpu, uExitCode, uExitInfo1, uExitInfo2);
+            return hmR0SvmExitReadCRx(pVCpu, pSvmTransient);
+        }
+
+        case SVM_EXIT_CR0_SEL_WRITE:
+        {
+            if (CPUMIsGuestSvmCtrlInterceptSet(pVCpu, pCtx, SVM_CTRL_INTERCEPT_CR0_SEL_WRITE))
+                NST_GST_VMEXIT_CALL_RET(pVCpu, uExitCode, uExitInfo1, uExitInfo2);
+            return hmR0SvmExitWriteCRx(pVCpu, pSvmTransient);
+        }
+
+        case SVM_EXIT_WRITE_CR0:
+        case SVM_EXIT_WRITE_CR3:
+        case SVM_EXIT_WRITE_CR4:
+        case SVM_EXIT_WRITE_CR8:    /* CR8 writes would go to the V_TPR rather than here, since we run with V_INTR_MASKING. */
+        {
+            uint8_t const uCr = uExitCode - SVM_EXIT_WRITE_CR0;
+            Log4Func(("Write CR%u: uExitInfo1=%#RX64 uExitInfo2=%#RX64\n", uCr, uExitInfo1, uExitInfo2));
+
+            if (CPUMIsGuestSvmWriteCRxInterceptSet(pVCpu, pCtx, uCr))
+                NST_GST_VMEXIT_CALL_RET(pVCpu, uExitCode, uExitInfo1, uExitInfo2);
+            return hmR0SvmExitWriteCRx(pVCpu, pSvmTransient);
+        }
+
+        case SVM_EXIT_PAUSE:
+        {
+            if (CPUMIsGuestSvmCtrlInterceptSet(pVCpu, pCtx, SVM_CTRL_INTERCEPT_PAUSE))
+                NST_GST_VMEXIT_CALL_RET(pVCpu, uExitCode, uExitInfo1, uExitInfo2);
+            return hmR0SvmExitPause(pVCpu, pSvmTransient);
+        }
+
+        case SVM_EXIT_VINTR:
+        {
+            if (CPUMIsGuestSvmCtrlInterceptSet(pVCpu, pCtx, SVM_CTRL_INTERCEPT_VINTR))
+                NST_GST_VMEXIT_CALL_RET(pVCpu, uExitCode, uExitInfo1, uExitInfo2);
+            return hmR0SvmExitUnexpected(pVCpu, pSvmTransient);
+        }
+
+        case SVM_EXIT_INTR:
+        case SVM_EXIT_NMI:
+        case SVM_EXIT_SMI:
+        case SVM_EXIT_XCPT_NMI:     /* Should not occur, SVM_EXIT_NMI is used instead. */
+        {
+            /*
+             * We shouldn't direct physical interrupts, NMIs, SMIs to the nested-guest.
+             *
+             * Although we don't intercept SMIs, the nested-guest might. Therefore, we might
+             * get an SMI #VMEXIT here so simply ignore rather than causing a corresponding
+             * nested-guest #VMEXIT.
+             *
+             * We shall import the complete state here as we may cause #VMEXITs from ring-3
+             * while trying to inject interrupts, see comment at the top of this function.
+             */
+            HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, CPUMCTX_EXTRN_ALL);
+            return hmR0SvmExitIntr(pVCpu, pSvmTransient);
+        }
+
+        case SVM_EXIT_FERR_FREEZE:
+        {
+            if (CPUMIsGuestSvmCtrlInterceptSet(pVCpu, pCtx, SVM_CTRL_INTERCEPT_FERR_FREEZE))
+                NST_GST_VMEXIT_CALL_RET(pVCpu, uExitCode, uExitInfo1, uExitInfo2);
+            return hmR0SvmExitFerrFreeze(pVCpu, pSvmTransient);
+        }
+
+        case SVM_EXIT_INVLPG:
+        {
+            if (CPUMIsGuestSvmCtrlInterceptSet(pVCpu, pCtx, SVM_CTRL_INTERCEPT_INVLPG))
+                NST_GST_VMEXIT_CALL_RET(pVCpu, uExitCode, uExitInfo1, uExitInfo2);
+            return hmR0SvmExitInvlpg(pVCpu, pSvmTransient);
+        }
+
+        case SVM_EXIT_WBINVD:
+        {
+            if (CPUMIsGuestSvmCtrlInterceptSet(pVCpu, pCtx, SVM_CTRL_INTERCEPT_WBINVD))
+                NST_GST_VMEXIT_CALL_RET(pVCpu, uExitCode, uExitInfo1, uExitInfo2);
+            return hmR0SvmExitWbinvd(pVCpu, pSvmTransient);
+        }
+
+        case SVM_EXIT_INVD:
+        {
+            if (CPUMIsGuestSvmCtrlInterceptSet(pVCpu, pCtx, SVM_CTRL_INTERCEPT_INVD))
+                NST_GST_VMEXIT_CALL_RET(pVCpu, uExitCode, uExitInfo1, uExitInfo2);
+            return hmR0SvmExitInvd(pVCpu, pSvmTransient);
+        }
+
+        case SVM_EXIT_RDPMC:
+        {
+            if (CPUMIsGuestSvmCtrlInterceptSet(pVCpu, pCtx, SVM_CTRL_INTERCEPT_RDPMC))
+                NST_GST_VMEXIT_CALL_RET(pVCpu, uExitCode, uExitInfo1, uExitInfo2);
+            return hmR0SvmExitRdpmc(pVCpu, pSvmTransient);
+        }
+
+        default:
+        {
+            switch (uExitCode)
+            {
+                case SVM_EXIT_READ_DR0:     case SVM_EXIT_READ_DR1:     case SVM_EXIT_READ_DR2:     case SVM_EXIT_READ_DR3:
+                case SVM_EXIT_READ_DR6:     case SVM_EXIT_READ_DR7:     case SVM_EXIT_READ_DR8:     case SVM_EXIT_READ_DR9:
+                case SVM_EXIT_READ_DR10:    case SVM_EXIT_READ_DR11:    case SVM_EXIT_READ_DR12:    case SVM_EXIT_READ_DR13:
+                case SVM_EXIT_READ_DR14:    case SVM_EXIT_READ_DR15:
+                {
+                    uint8_t const uDr = uExitCode - SVM_EXIT_READ_DR0;
+                    if (CPUMIsGuestSvmReadDRxInterceptSet(pVCpu, pCtx, uDr))
+                        NST_GST_VMEXIT_CALL_RET(pVCpu, uExitCode, uExitInfo1, uExitInfo2);
+                    return hmR0SvmExitReadDRx(pVCpu, pSvmTransient);
+                }
+
+                case SVM_EXIT_WRITE_DR0:    case SVM_EXIT_WRITE_DR1:    case SVM_EXIT_WRITE_DR2:    case SVM_EXIT_WRITE_DR3:
+                case SVM_EXIT_WRITE_DR6:    case SVM_EXIT_WRITE_DR7:    case SVM_EXIT_WRITE_DR8:    case SVM_EXIT_WRITE_DR9:
+                case SVM_EXIT_WRITE_DR10:   case SVM_EXIT_WRITE_DR11:   case SVM_EXIT_WRITE_DR12:   case SVM_EXIT_WRITE_DR13:
+                case SVM_EXIT_WRITE_DR14:   case SVM_EXIT_WRITE_DR15:
+                {
+                    uint8_t const uDr = uExitCode - SVM_EXIT_WRITE_DR0;
+                    if (CPUMIsGuestSvmWriteDRxInterceptSet(pVCpu, pCtx, uDr))
+                        NST_GST_VMEXIT_CALL_RET(pVCpu, uExitCode, uExitInfo1, uExitInfo2);
+                    return hmR0SvmExitWriteDRx(pVCpu, pSvmTransient);
+                }
+
+                case SVM_EXIT_XCPT_DE:
+                /*   SVM_EXIT_XCPT_DB: */       /* Handled above. */
+                /*   SVM_EXIT_XCPT_NMI: */      /* Handled above. */
+                /*   SVM_EXIT_XCPT_BP: */       /* Handled above. */
+                case SVM_EXIT_XCPT_OF:
+                case SVM_EXIT_XCPT_BR:
+                /*   SVM_EXIT_XCPT_UD: */       /* Handled above. */
+                case SVM_EXIT_XCPT_NM:
+                case SVM_EXIT_XCPT_DF:
+                case SVM_EXIT_XCPT_CO_SEG_OVERRUN:
+                case SVM_EXIT_XCPT_TS:
+                case SVM_EXIT_XCPT_NP:
+                case SVM_EXIT_XCPT_SS:
+                case SVM_EXIT_XCPT_GP:
+                /*   SVM_EXIT_XCPT_PF: */       /* Handled above. */
+                case SVM_EXIT_XCPT_15:          /* Reserved.      */
+                /*   SVM_EXIT_XCPT_MF: */       /* Handled above. */
+                /*   SVM_EXIT_XCPT_AC: */       /* Handled above. */
+                case SVM_EXIT_XCPT_MC:
+                case SVM_EXIT_XCPT_XF:
+                case SVM_EXIT_XCPT_20: case SVM_EXIT_XCPT_21: case SVM_EXIT_XCPT_22: case SVM_EXIT_XCPT_23:
+                case SVM_EXIT_XCPT_24: case SVM_EXIT_XCPT_25: case SVM_EXIT_XCPT_26: case SVM_EXIT_XCPT_27:
+                case SVM_EXIT_XCPT_28: case SVM_EXIT_XCPT_29: case SVM_EXIT_XCPT_30: case SVM_EXIT_XCPT_31:
+                {
+                    uint8_t const uVector = uExitCode - SVM_EXIT_XCPT_0;
+                    if (CPUMIsGuestSvmXcptInterceptSet(pVCpu, pCtx, uVector))
+                        NST_GST_VMEXIT_CALL_RET(pVCpu, uExitCode, uExitInfo1, uExitInfo2);
+                    return hmR0SvmExitXcptGeneric(pVCpu, pSvmTransient);
+                }
+
+                case SVM_EXIT_XSETBV:
+                {
+                    if (CPUMIsGuestSvmCtrlInterceptSet(pVCpu, pCtx, SVM_CTRL_INTERCEPT_XSETBV))
+                        NST_GST_VMEXIT_CALL_RET(pVCpu, uExitCode, uExitInfo1, uExitInfo2);
+                    return hmR0SvmExitXsetbv(pVCpu, pSvmTransient);
+                }
+
+                case SVM_EXIT_TASK_SWITCH:
+                {
+                    if (CPUMIsGuestSvmCtrlInterceptSet(pVCpu, pCtx, SVM_CTRL_INTERCEPT_TASK_SWITCH))
+                        NST_GST_VMEXIT_CALL_RET(pVCpu, uExitCode, uExitInfo1, uExitInfo2);
+                    return hmR0SvmExitTaskSwitch(pVCpu, pSvmTransient);
+                }
+
+                case SVM_EXIT_IRET:
+                {
+                    if (CPUMIsGuestSvmCtrlInterceptSet(pVCpu, pCtx, SVM_CTRL_INTERCEPT_IRET))
+                        NST_GST_VMEXIT_CALL_RET(pVCpu, uExitCode, uExitInfo1, uExitInfo2);
+                    return hmR0SvmExitIret(pVCpu, pSvmTransient);
+                }
+
+                case SVM_EXIT_SHUTDOWN:
+                {
+                    if (CPUMIsGuestSvmCtrlInterceptSet(pVCpu, pCtx, SVM_CTRL_INTERCEPT_SHUTDOWN))
+                        NST_GST_VMEXIT_CALL_RET(pVCpu, uExitCode, uExitInfo1, uExitInfo2);
+                    return hmR0SvmExitShutdown(pVCpu, pSvmTransient);
+                }
+
+                case SVM_EXIT_VMMCALL:
+                {
+                    if (CPUMIsGuestSvmCtrlInterceptSet(pVCpu, pCtx, SVM_CTRL_INTERCEPT_VMMCALL))
+                        NST_GST_VMEXIT_CALL_RET(pVCpu, uExitCode, uExitInfo1, uExitInfo2);
+                    return hmR0SvmExitVmmCall(pVCpu, pSvmTransient);
+                }
+
+                case SVM_EXIT_CLGI:
+                {
+                    if (CPUMIsGuestSvmCtrlInterceptSet(pVCpu, pCtx, SVM_CTRL_INTERCEPT_CLGI))
+                        NST_GST_VMEXIT_CALL_RET(pVCpu, uExitCode, uExitInfo1, uExitInfo2);
+                     return hmR0SvmExitClgi(pVCpu, pSvmTransient);
+                }
+
+                case SVM_EXIT_STGI:
+                {
+                    if (CPUMIsGuestSvmCtrlInterceptSet(pVCpu, pCtx, SVM_CTRL_INTERCEPT_STGI))
+                        NST_GST_VMEXIT_CALL_RET(pVCpu, uExitCode, uExitInfo1, uExitInfo2);
+                     return hmR0SvmExitStgi(pVCpu, pSvmTransient);
+                }
+
+                case SVM_EXIT_VMLOAD:
+                {
+                    if (CPUMIsGuestSvmCtrlInterceptSet(pVCpu, pCtx, SVM_CTRL_INTERCEPT_VMLOAD))
+                        NST_GST_VMEXIT_CALL_RET(pVCpu, uExitCode, uExitInfo1, uExitInfo2);
+                    return hmR0SvmExitVmload(pVCpu, pSvmTransient);
+                }
+
+                case SVM_EXIT_VMSAVE:
+                {
+                    if (CPUMIsGuestSvmCtrlInterceptSet(pVCpu, pCtx, SVM_CTRL_INTERCEPT_VMSAVE))
+                        NST_GST_VMEXIT_CALL_RET(pVCpu, uExitCode, uExitInfo1, uExitInfo2);
+                    return hmR0SvmExitVmsave(pVCpu, pSvmTransient);
+                }
+
+                case SVM_EXIT_INVLPGA:
+                {
+                    if (CPUMIsGuestSvmCtrlInterceptSet(pVCpu, pCtx, SVM_CTRL_INTERCEPT_INVLPGA))
+                        NST_GST_VMEXIT_CALL_RET(pVCpu, uExitCode, uExitInfo1, uExitInfo2);
+                    return hmR0SvmExitInvlpga(pVCpu, pSvmTransient);
+                }
+
+                case SVM_EXIT_VMRUN:
+                {
+                    if (CPUMIsGuestSvmCtrlInterceptSet(pVCpu, pCtx, SVM_CTRL_INTERCEPT_VMRUN))
+                        NST_GST_VMEXIT_CALL_RET(pVCpu, uExitCode, uExitInfo1, uExitInfo2);
+                    return hmR0SvmExitVmrun(pVCpu, pSvmTransient);
+                }
+
+                case SVM_EXIT_RSM:
+                {
+                    if (CPUMIsGuestSvmCtrlInterceptSet(pVCpu, pCtx, SVM_CTRL_INTERCEPT_RSM))
+                        NST_GST_VMEXIT_CALL_RET(pVCpu, uExitCode, uExitInfo1, uExitInfo2);
+                    hmR0SvmSetPendingXcptUD(pVCpu);
+                    return VINF_SUCCESS;
+                }
+
+                case SVM_EXIT_SKINIT:
+                {
+                    if (CPUMIsGuestSvmCtrlInterceptSet(pVCpu, pCtx, SVM_CTRL_INTERCEPT_SKINIT))
+                        NST_GST_VMEXIT_CALL_RET(pVCpu, uExitCode, uExitInfo1, uExitInfo2);
+                    hmR0SvmSetPendingXcptUD(pVCpu);
+                    return VINF_SUCCESS;
+                }
+
+                case SVM_EXIT_NPF:
+                {
+                    Assert(pVCpu->CTX_SUFF(pVM)->hm.s.fNestedPaging);
+                    return hmR0SvmExitNestedPF(pVCpu, pSvmTransient);
+                }
+
+                case SVM_EXIT_INIT:  /* We shouldn't get INIT signals while executing a nested-guest. */
+                    return hmR0SvmExitUnexpected(pVCpu, pSvmTransient);
+
+                default:
+                {
+                    AssertMsgFailed(("hmR0SvmHandleExitNested: Unknown exit code %#x\n", pSvmTransient->u64ExitCode));
+                    pVCpu->hm.s.u32HMError = pSvmTransient->u64ExitCode;
+                    return VERR_SVM_UNKNOWN_EXIT;
+                }
+            }
+        }
+    }
+    /* not reached */
+
+#undef NST_GST_VMEXIT_CALL_RET
+}
+#endif
+
+
+/**
+ * Handles a guest \#VMEXIT (for all EXITCODE values except SVM_EXIT_INVALID).
+ *
+ * @returns VBox status code (informational status codes included).
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   pSvmTransient   Pointer to the SVM transient structure.
+ */
+static int hmR0SvmHandleExit(PVMCPUCC pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+    Assert(pSvmTransient->u64ExitCode != SVM_EXIT_INVALID);
+    Assert(pSvmTransient->u64ExitCode <= SVM_EXIT_MAX);
+
+#ifdef DEBUG_ramshankar
+# define VMEXIT_CALL_RET(a_fDbg, a_CallExpr) \
+        do { \
+            if ((a_fDbg) == 1) \
+                HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, HMSVM_CPUMCTX_EXTRN_ALL); \
+            int rc = a_CallExpr; \
+            if ((a_fDbg) == 1) \
+                ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_ALL_GUEST); \
+            return rc; \
+        } while (0)
+#else
+# define VMEXIT_CALL_RET(a_fDbg, a_CallExpr) return a_CallExpr
+#endif
+
+    /*
+     * The ordering of the case labels is based on most-frequently-occurring #VMEXITs
+     * for most guests under normal workloads (for some definition of "normal").
+     */
+    uint64_t const uExitCode = pSvmTransient->u64ExitCode;
+    switch (uExitCode)
+    {
+        case SVM_EXIT_NPF:          VMEXIT_CALL_RET(0, hmR0SvmExitNestedPF(pVCpu, pSvmTransient));
+        case SVM_EXIT_IOIO:         VMEXIT_CALL_RET(0, hmR0SvmExitIOInstr(pVCpu, pSvmTransient));
+        case SVM_EXIT_RDTSC:        VMEXIT_CALL_RET(0, hmR0SvmExitRdtsc(pVCpu, pSvmTransient));
+        case SVM_EXIT_RDTSCP:       VMEXIT_CALL_RET(0, hmR0SvmExitRdtscp(pVCpu, pSvmTransient));
+        case SVM_EXIT_CPUID:        VMEXIT_CALL_RET(0, hmR0SvmExitCpuid(pVCpu, pSvmTransient));
+        case SVM_EXIT_XCPT_PF:      VMEXIT_CALL_RET(0, hmR0SvmExitXcptPF(pVCpu, pSvmTransient));
+        case SVM_EXIT_MSR:          VMEXIT_CALL_RET(0, hmR0SvmExitMsr(pVCpu, pSvmTransient));
+        case SVM_EXIT_MONITOR:      VMEXIT_CALL_RET(0, hmR0SvmExitMonitor(pVCpu, pSvmTransient));
+        case SVM_EXIT_MWAIT:        VMEXIT_CALL_RET(0, hmR0SvmExitMwait(pVCpu, pSvmTransient));
+        case SVM_EXIT_HLT:          VMEXIT_CALL_RET(0, hmR0SvmExitHlt(pVCpu, pSvmTransient));
+
+        case SVM_EXIT_XCPT_NMI:     /* Should not occur, SVM_EXIT_NMI is used instead. */
+        case SVM_EXIT_INTR:
+        case SVM_EXIT_NMI:          VMEXIT_CALL_RET(0, hmR0SvmExitIntr(pVCpu, pSvmTransient));
+
+        case SVM_EXIT_READ_CR0:
+        case SVM_EXIT_READ_CR3:
+        case SVM_EXIT_READ_CR4:     VMEXIT_CALL_RET(0, hmR0SvmExitReadCRx(pVCpu, pSvmTransient));
+
+        case SVM_EXIT_CR0_SEL_WRITE:
+        case SVM_EXIT_WRITE_CR0:
+        case SVM_EXIT_WRITE_CR3:
+        case SVM_EXIT_WRITE_CR4:
+        case SVM_EXIT_WRITE_CR8:    VMEXIT_CALL_RET(0, hmR0SvmExitWriteCRx(pVCpu, pSvmTransient));
+
+        case SVM_EXIT_VINTR:        VMEXIT_CALL_RET(0, hmR0SvmExitVIntr(pVCpu, pSvmTransient));
+        case SVM_EXIT_PAUSE:        VMEXIT_CALL_RET(0, hmR0SvmExitPause(pVCpu, pSvmTransient));
+        case SVM_EXIT_VMMCALL:      VMEXIT_CALL_RET(0, hmR0SvmExitVmmCall(pVCpu, pSvmTransient));
+        case SVM_EXIT_INVLPG:       VMEXIT_CALL_RET(0, hmR0SvmExitInvlpg(pVCpu, pSvmTransient));
+        case SVM_EXIT_WBINVD:       VMEXIT_CALL_RET(0, hmR0SvmExitWbinvd(pVCpu, pSvmTransient));
+        case SVM_EXIT_INVD:         VMEXIT_CALL_RET(0, hmR0SvmExitInvd(pVCpu, pSvmTransient));
+        case SVM_EXIT_RDPMC:        VMEXIT_CALL_RET(0, hmR0SvmExitRdpmc(pVCpu, pSvmTransient));
+        case SVM_EXIT_IRET:         VMEXIT_CALL_RET(0, hmR0SvmExitIret(pVCpu, pSvmTransient));
+        case SVM_EXIT_XCPT_UD:      VMEXIT_CALL_RET(0, hmR0SvmExitXcptUD(pVCpu, pSvmTransient));
+        case SVM_EXIT_XCPT_MF:      VMEXIT_CALL_RET(0, hmR0SvmExitXcptMF(pVCpu, pSvmTransient));
+        case SVM_EXIT_XCPT_DB:      VMEXIT_CALL_RET(0, hmR0SvmExitXcptDB(pVCpu, pSvmTransient));
+        case SVM_EXIT_XCPT_AC:      VMEXIT_CALL_RET(0, hmR0SvmExitXcptAC(pVCpu, pSvmTransient));
+        case SVM_EXIT_XCPT_BP:      VMEXIT_CALL_RET(0, hmR0SvmExitXcptBP(pVCpu, pSvmTransient));
+        case SVM_EXIT_XCPT_GP:      VMEXIT_CALL_RET(0, hmR0SvmExitXcptGP(pVCpu, pSvmTransient));
+        case SVM_EXIT_XSETBV:       VMEXIT_CALL_RET(0, hmR0SvmExitXsetbv(pVCpu, pSvmTransient));
+        case SVM_EXIT_FERR_FREEZE:  VMEXIT_CALL_RET(0, hmR0SvmExitFerrFreeze(pVCpu, pSvmTransient));
+
+        default:
+        {
+            switch (pSvmTransient->u64ExitCode)
+            {
+                case SVM_EXIT_READ_DR0:     case SVM_EXIT_READ_DR1:     case SVM_EXIT_READ_DR2:     case SVM_EXIT_READ_DR3:
+                case SVM_EXIT_READ_DR6:     case SVM_EXIT_READ_DR7:     case SVM_EXIT_READ_DR8:     case SVM_EXIT_READ_DR9:
+                case SVM_EXIT_READ_DR10:    case SVM_EXIT_READ_DR11:    case SVM_EXIT_READ_DR12:    case SVM_EXIT_READ_DR13:
+                case SVM_EXIT_READ_DR14:    case SVM_EXIT_READ_DR15:
+                    VMEXIT_CALL_RET(0, hmR0SvmExitReadDRx(pVCpu, pSvmTransient));
+
+                case SVM_EXIT_WRITE_DR0:    case SVM_EXIT_WRITE_DR1:    case SVM_EXIT_WRITE_DR2:    case SVM_EXIT_WRITE_DR3:
+                case SVM_EXIT_WRITE_DR6:    case SVM_EXIT_WRITE_DR7:    case SVM_EXIT_WRITE_DR8:    case SVM_EXIT_WRITE_DR9:
+                case SVM_EXIT_WRITE_DR10:   case SVM_EXIT_WRITE_DR11:   case SVM_EXIT_WRITE_DR12:   case SVM_EXIT_WRITE_DR13:
+                case SVM_EXIT_WRITE_DR14:   case SVM_EXIT_WRITE_DR15:
+                    VMEXIT_CALL_RET(0, hmR0SvmExitWriteDRx(pVCpu, pSvmTransient));
+
+                case SVM_EXIT_TASK_SWITCH:  VMEXIT_CALL_RET(0, hmR0SvmExitTaskSwitch(pVCpu, pSvmTransient));
+                case SVM_EXIT_SHUTDOWN:     VMEXIT_CALL_RET(0, hmR0SvmExitShutdown(pVCpu, pSvmTransient));
+
+                case SVM_EXIT_SMI:
+                case SVM_EXIT_INIT:
+                {
+                    /*
+                     * We don't intercept SMIs. As for INIT signals, it really shouldn't ever happen here.
+                     * If it ever does, we want to know about it so log the exit code and bail.
+                     */
+                    VMEXIT_CALL_RET(0, hmR0SvmExitUnexpected(pVCpu, pSvmTransient));
+                }
+
+#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
+                case SVM_EXIT_CLGI:     VMEXIT_CALL_RET(0, hmR0SvmExitClgi(pVCpu, pSvmTransient));
+                case SVM_EXIT_STGI:     VMEXIT_CALL_RET(0, hmR0SvmExitStgi(pVCpu, pSvmTransient));
+                case SVM_EXIT_VMLOAD:   VMEXIT_CALL_RET(0, hmR0SvmExitVmload(pVCpu, pSvmTransient));
+                case SVM_EXIT_VMSAVE:   VMEXIT_CALL_RET(0, hmR0SvmExitVmsave(pVCpu, pSvmTransient));
+                case SVM_EXIT_INVLPGA:  VMEXIT_CALL_RET(0, hmR0SvmExitInvlpga(pVCpu, pSvmTransient));
+                case SVM_EXIT_VMRUN:    VMEXIT_CALL_RET(0, hmR0SvmExitVmrun(pVCpu, pSvmTransient));
+#else
+                case SVM_EXIT_CLGI:
+                case SVM_EXIT_STGI:
+                case SVM_EXIT_VMLOAD:
+                case SVM_EXIT_VMSAVE:
+                case SVM_EXIT_INVLPGA:
+                case SVM_EXIT_VMRUN:
+#endif
+                case SVM_EXIT_RSM:
+                case SVM_EXIT_SKINIT:
+                {
+                    hmR0SvmSetPendingXcptUD(pVCpu);
+                    return VINF_SUCCESS;
+                }
+
+#ifdef HMSVM_ALWAYS_TRAP_ALL_XCPTS
+                case SVM_EXIT_XCPT_DE:
+                /*   SVM_EXIT_XCPT_DB: */       /* Handled above. */
+                /*   SVM_EXIT_XCPT_NMI: */      /* Handled above. */
+                /*   SVM_EXIT_XCPT_BP: */       /* Handled above. */
+                case SVM_EXIT_XCPT_OF:
+                case SVM_EXIT_XCPT_BR:
+                /*   SVM_EXIT_XCPT_UD: */       /* Handled above. */
+                case SVM_EXIT_XCPT_NM:
+                case SVM_EXIT_XCPT_DF:
+                case SVM_EXIT_XCPT_CO_SEG_OVERRUN:
+                case SVM_EXIT_XCPT_TS:
+                case SVM_EXIT_XCPT_NP:
+                case SVM_EXIT_XCPT_SS:
+                /*   SVM_EXIT_XCPT_GP: */       /* Handled above. */
+                /*   SVM_EXIT_XCPT_PF: */
+                case SVM_EXIT_XCPT_15:          /* Reserved. */
+                /*   SVM_EXIT_XCPT_MF: */       /* Handled above. */
+                /*   SVM_EXIT_XCPT_AC: */       /* Handled above. */
+                case SVM_EXIT_XCPT_MC:
+                case SVM_EXIT_XCPT_XF:
+                case SVM_EXIT_XCPT_20: case SVM_EXIT_XCPT_21: case SVM_EXIT_XCPT_22: case SVM_EXIT_XCPT_23:
+                case SVM_EXIT_XCPT_24: case SVM_EXIT_XCPT_25: case SVM_EXIT_XCPT_26: case SVM_EXIT_XCPT_27:
+                case SVM_EXIT_XCPT_28: case SVM_EXIT_XCPT_29: case SVM_EXIT_XCPT_30: case SVM_EXIT_XCPT_31:
+                    VMEXIT_CALL_RET(0, hmR0SvmExitXcptGeneric(pVCpu, pSvmTransient));
+#endif  /* HMSVM_ALWAYS_TRAP_ALL_XCPTS */
+
+                default:
+                {
+                    AssertMsgFailed(("hmR0SvmHandleExit: Unknown exit code %#RX64\n", uExitCode));
+                    pVCpu->hm.s.u32HMError = uExitCode;
+                    return VERR_SVM_UNKNOWN_EXIT;
+                }
+            }
+        }
+    }
+    /* not reached */
+#undef VMEXIT_CALL_RET
+}
+
+
+#ifdef VBOX_STRICT
+/* Is there some generic IPRT define for this that are not in Runtime/internal/\* ?? */
+# define HMSVM_ASSERT_PREEMPT_CPUID_VAR() \
+    RTCPUID const idAssertCpu = RTThreadPreemptIsEnabled(NIL_RTTHREAD) ? NIL_RTCPUID : RTMpCpuId()
+
+# define HMSVM_ASSERT_PREEMPT_CPUID() \
+    do \
+    { \
+         RTCPUID const idAssertCpuNow = RTThreadPreemptIsEnabled(NIL_RTTHREAD) ? NIL_RTCPUID : RTMpCpuId(); \
+         AssertMsg(idAssertCpu == idAssertCpuNow, ("SVM %#x, %#x\n", idAssertCpu, idAssertCpuNow)); \
+    } while (0)
+
+# define HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(a_pVCpu, a_pSvmTransient) \
+    do { \
+        AssertPtr((a_pVCpu)); \
+        AssertPtr((a_pSvmTransient)); \
+        Assert(ASMIntAreEnabled()); \
+        HMSVM_ASSERT_PREEMPT_SAFE((a_pVCpu)); \
+        HMSVM_ASSERT_PREEMPT_CPUID_VAR(); \
+        Log4Func(("vcpu[%u] -v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-\n", (a_pVCpu)->idCpu)); \
+        HMSVM_ASSERT_PREEMPT_SAFE((a_pVCpu)); \
+        if (VMMR0IsLogFlushDisabled((a_pVCpu))) \
+            HMSVM_ASSERT_PREEMPT_CPUID(); \
+    } while (0)
+#else
+# define HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(a_pVCpu, a_pSvmTransient) \
+    do { \
+        RT_NOREF2(a_pVCpu, a_pSvmTransient); \
+    } while (0)
+#endif
+
+
+/**
+ * Gets the IEM exception flags for the specified SVM event.
+ *
+ * @returns The IEM exception flags.
+ * @param   pEvent      Pointer to the SVM event.
+ *
+ * @remarks This function currently only constructs flags required for
+ *          IEMEvaluateRecursiveXcpt and not the complete flags (e.g. error-code
+ *          and CR2 aspects of an exception are not included).
+ */
+static uint32_t hmR0SvmGetIemXcptFlags(PCSVMEVENT pEvent)
+{
+    uint8_t const uEventType = pEvent->n.u3Type;
+    uint32_t      fIemXcptFlags;
+    switch (uEventType)
+    {
+        case SVM_EVENT_EXCEPTION:
+            /*
+             * Only INT3 and INTO instructions can raise #BP and #OF exceptions.
+             * See AMD spec. Table 8-1. "Interrupt Vector Source and Cause".
+             */
+            if (pEvent->n.u8Vector == X86_XCPT_BP)
+            {
+                fIemXcptFlags = IEM_XCPT_FLAGS_T_SOFT_INT | IEM_XCPT_FLAGS_BP_INSTR;
+                break;
+            }
+            if (pEvent->n.u8Vector == X86_XCPT_OF)
+            {
+                fIemXcptFlags = IEM_XCPT_FLAGS_T_SOFT_INT | IEM_XCPT_FLAGS_OF_INSTR;
+                break;
+            }
+            /** @todo How do we distinguish ICEBP \#DB from the regular one? */
+            RT_FALL_THRU();
+        case SVM_EVENT_NMI:
+            fIemXcptFlags = IEM_XCPT_FLAGS_T_CPU_XCPT;
+            break;
+
+        case SVM_EVENT_EXTERNAL_IRQ:
+            fIemXcptFlags = IEM_XCPT_FLAGS_T_EXT_INT;
+            break;
+
+        case SVM_EVENT_SOFTWARE_INT:
+            fIemXcptFlags = IEM_XCPT_FLAGS_T_SOFT_INT;
+            break;
+
+        default:
+            fIemXcptFlags = 0;
+            AssertMsgFailed(("Unexpected event type! uEventType=%#x uVector=%#x", uEventType, pEvent->n.u8Vector));
+            break;
+    }
+    return fIemXcptFlags;
+}
+
+
+/**
+ * Handle a condition that occurred while delivering an event through the guest
+ * IDT.
+ *
+ * @returns VBox status code (informational error codes included).
+ * @retval VINF_SUCCESS if we should continue handling the \#VMEXIT.
+ * @retval VINF_HM_DOUBLE_FAULT if a \#DF condition was detected and we ought to
+ *         continue execution of the guest which will delivery the \#DF.
+ * @retval VINF_EM_RESET if we detected a triple-fault condition.
+ * @retval VERR_EM_GUEST_CPU_HANG if we detected a guest CPU hang.
+ *
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   pSvmTransient   Pointer to the SVM transient structure.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+static int hmR0SvmCheckExitDueToEventDelivery(PVMCPUCC pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+    int rc = VINF_SUCCESS;
+    PSVMVMCB pVmcb = hmR0SvmGetCurrentVmcb(pVCpu);
+    HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, CPUMCTX_EXTRN_CR2);
+
+    Log4(("EXITINTINFO: Pending vectoring event %#RX64 Valid=%RTbool ErrValid=%RTbool Err=%#RX32 Type=%u Vector=%u\n",
+          pVmcb->ctrl.ExitIntInfo.u, !!pVmcb->ctrl.ExitIntInfo.n.u1Valid, !!pVmcb->ctrl.ExitIntInfo.n.u1ErrorCodeValid,
+          pVmcb->ctrl.ExitIntInfo.n.u32ErrorCode, pVmcb->ctrl.ExitIntInfo.n.u3Type, pVmcb->ctrl.ExitIntInfo.n.u8Vector));
+
+    /*
+     * The EXITINTINFO (if valid) contains the prior exception (IDT vector) that was trying to
+     * be delivered to the guest which caused a #VMEXIT which was intercepted (Exit vector).
+     *
+     * See AMD spec. 15.7.3 "EXITINFO Pseudo-Code".
+     */
+    if (pVmcb->ctrl.ExitIntInfo.n.u1Valid)
+    {
+        IEMXCPTRAISE     enmRaise;
+        IEMXCPTRAISEINFO fRaiseInfo;
+        bool const       fExitIsHwXcpt  = pSvmTransient->u64ExitCode - SVM_EXIT_XCPT_0 <= SVM_EXIT_XCPT_31;
+        uint8_t const    uIdtVector     = pVmcb->ctrl.ExitIntInfo.n.u8Vector;
+        if (fExitIsHwXcpt)
+        {
+            uint8_t  const uExitVector      = pSvmTransient->u64ExitCode - SVM_EXIT_XCPT_0;
+            uint32_t const fIdtVectorFlags  = hmR0SvmGetIemXcptFlags(&pVmcb->ctrl.ExitIntInfo);
+            uint32_t const fExitVectorFlags = IEM_XCPT_FLAGS_T_CPU_XCPT;
+            enmRaise = IEMEvaluateRecursiveXcpt(pVCpu, fIdtVectorFlags, uIdtVector, fExitVectorFlags, uExitVector, &fRaiseInfo);
+        }
+        else
+        {
+            /*
+             * If delivery of an event caused a #VMEXIT that is not an exception (e.g. #NPF)
+             * then we end up here.
+             *
+             * If the event was:
+             *   - a software interrupt, we can re-execute the instruction which will
+             *     regenerate the event.
+             *   - an NMI, we need to clear NMI blocking and re-inject the NMI.
+             *   - a hardware exception or external interrupt, we re-inject it.
+             */
+            fRaiseInfo = IEMXCPTRAISEINFO_NONE;
+            if (pVmcb->ctrl.ExitIntInfo.n.u3Type == SVM_EVENT_SOFTWARE_INT)
+                enmRaise = IEMXCPTRAISE_REEXEC_INSTR;
+            else
+                enmRaise = IEMXCPTRAISE_PREV_EVENT;
+        }
+
+        switch (enmRaise)
+        {
+            case IEMXCPTRAISE_CURRENT_XCPT:
+            case IEMXCPTRAISE_PREV_EVENT:
+            {
+                /* For software interrupts, we shall re-execute the instruction. */
+                if (!(fRaiseInfo & IEMXCPTRAISEINFO_SOFT_INT_XCPT))
+                {
+                    RTGCUINTPTR GCPtrFaultAddress = 0;
+
+                    /* If we are re-injecting an NMI, clear NMI blocking. */
+                    if (pVmcb->ctrl.ExitIntInfo.n.u3Type == SVM_EVENT_NMI)
+                        VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_BLOCK_NMIS);
+
+                    /* Determine a vectoring #PF condition, see comment in hmR0SvmExitXcptPF(). */
+                    if (fRaiseInfo & (IEMXCPTRAISEINFO_EXT_INT_PF | IEMXCPTRAISEINFO_NMI_PF))
+                    {
+                        pSvmTransient->fVectoringPF = true;
+                        Log4Func(("IDT: Pending vectoring #PF due to delivery of Ext-Int/NMI. uCR2=%#RX64\n",
+                                  pVCpu->cpum.GstCtx.cr2));
+                    }
+                    else if (   pVmcb->ctrl.ExitIntInfo.n.u3Type == SVM_EVENT_EXCEPTION
+                             && uIdtVector == X86_XCPT_PF)
+                    {
+                        /*
+                         * If the previous exception was a #PF, we need to recover the CR2 value.
+                         * This can't happen with shadow paging.
+                         */
+                        GCPtrFaultAddress = pVCpu->cpum.GstCtx.cr2;
+                    }
+
+                    /*
+                     * Without nested paging, when uExitVector is #PF, CR2 value will be updated from the VMCB's
+                     * exit info. fields, if it's a guest #PF, see hmR0SvmExitXcptPF().
+                     */
+                    Assert(pVmcb->ctrl.ExitIntInfo.n.u3Type != SVM_EVENT_SOFTWARE_INT);
+                    STAM_COUNTER_INC(&pVCpu->hm.s.StatInjectReflect);
+                    hmR0SvmSetPendingEvent(pVCpu, &pVmcb->ctrl.ExitIntInfo, GCPtrFaultAddress);
+
+                    Log4Func(("IDT: Pending vectoring event %#RX64 ErrValid=%RTbool Err=%#RX32 GCPtrFaultAddress=%#RX64\n",
+                              pVmcb->ctrl.ExitIntInfo.u, RT_BOOL(pVmcb->ctrl.ExitIntInfo.n.u1ErrorCodeValid),
+                              pVmcb->ctrl.ExitIntInfo.n.u32ErrorCode, GCPtrFaultAddress));
+                }
+                break;
+            }
+
+            case IEMXCPTRAISE_REEXEC_INSTR:
+            {
+                Assert(rc == VINF_SUCCESS);
+                break;
+            }
+
+            case IEMXCPTRAISE_DOUBLE_FAULT:
+            {
+                /*
+                 * Determing a vectoring double #PF condition. Used later, when PGM evaluates
+                 * the second #PF as a guest #PF (and not a shadow #PF) and needs to be
+                 * converted into a #DF.
+                 */
+                if (fRaiseInfo & IEMXCPTRAISEINFO_PF_PF)
+                {
+                    Log4Func(("IDT: Pending vectoring double #PF uCR2=%#RX64\n", pVCpu->cpum.GstCtx.cr2));
+                    pSvmTransient->fVectoringDoublePF = true;
+                    Assert(rc == VINF_SUCCESS);
+                }
+                else
+                {
+                    STAM_COUNTER_INC(&pVCpu->hm.s.StatInjectConvertDF);
+                    hmR0SvmSetPendingXcptDF(pVCpu);
+                    rc = VINF_HM_DOUBLE_FAULT;
+                }
+                break;
+            }
+
+            case IEMXCPTRAISE_TRIPLE_FAULT:
+            {
+                rc = VINF_EM_RESET;
+                break;
+            }
+
+            case IEMXCPTRAISE_CPU_HANG:
+            {
+                rc = VERR_EM_GUEST_CPU_HANG;
+                break;
+            }
+
+            default:
+                AssertMsgFailedBreakStmt(("Bogus enmRaise value: %d (%#x)\n", enmRaise, enmRaise), rc = VERR_SVM_IPE_2);
+        }
+    }
+    Assert(rc == VINF_SUCCESS || rc == VINF_HM_DOUBLE_FAULT || rc == VINF_EM_RESET || rc == VERR_EM_GUEST_CPU_HANG);
+    return rc;
+}
+
+
+/**
+ * Advances the guest RIP by the number of bytes specified in @a cb.
+ *
+ * @param   pVCpu       The cross context virtual CPU structure.
+ * @param   cb          RIP increment value in bytes.
+ */
+DECLINLINE(void) hmR0SvmAdvanceRip(PVMCPUCC pVCpu, uint32_t cb)
+{
+    PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+    pCtx->rip += cb;
+
+    /* Update interrupt shadow. */
+    if (   VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS)
+        && pCtx->rip != EMGetInhibitInterruptsPC(pVCpu))
+        VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS);
+}
+
+
+/* -=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= */
+/* -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- #VMEXIT handlers -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- */
+/* -=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= */
+
+/** @name \#VMEXIT handlers.
+ * @{
+ */
+
+/**
+ * \#VMEXIT handler for external interrupts, NMIs, FPU assertion freeze and INIT
+ * signals (SVM_EXIT_INTR, SVM_EXIT_NMI, SVM_EXIT_FERR_FREEZE, SVM_EXIT_INIT).
+ */
+HMSVM_EXIT_DECL hmR0SvmExitIntr(PVMCPUCC pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+    HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pSvmTransient);
+
+    if (pSvmTransient->u64ExitCode == SVM_EXIT_NMI)
+        STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatExitHostNmiInGC);
+    else if (pSvmTransient->u64ExitCode == SVM_EXIT_INTR)
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatExitExtInt);
+
+    /*
+     * AMD-V has no preemption timer and the generic periodic preemption timer has no way to
+     * signal -before- the timer fires if the current interrupt is our own timer or a some
+     * other host interrupt. We also cannot examine what interrupt it is until the host
+     * actually take the interrupt.
+     *
+     * Going back to executing guest code here unconditionally causes random scheduling
+     * problems (observed on an AMD Phenom 9850 Quad-Core on Windows 64-bit host).
+     */
+    return VINF_EM_RAW_INTERRUPT;
+}
+
+
+/**
+ * \#VMEXIT handler for WBINVD (SVM_EXIT_WBINVD). Conditional \#VMEXIT.
+ */
+HMSVM_EXIT_DECL hmR0SvmExitWbinvd(PVMCPUCC pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+    HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pSvmTransient);
+
+    VBOXSTRICTRC rcStrict;
+    bool const fSupportsNextRipSave = hmR0SvmSupportsNextRipSave(pVCpu);
+    if (fSupportsNextRipSave)
+    {
+        HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_EXEC_DECODED_NO_MEM_MASK);
+        PCSVMVMCB     pVmcb   = hmR0SvmGetCurrentVmcb(pVCpu);
+        uint8_t const cbInstr = pVmcb->ctrl.u64NextRIP - pVCpu->cpum.GstCtx.rip;
+        rcStrict = IEMExecDecodedWbinvd(pVCpu, cbInstr);
+    }
+    else
+    {
+        HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_MUST_MASK);
+        rcStrict = IEMExecOne(pVCpu);
+    }
+
+    if (rcStrict == VINF_IEM_RAISED_XCPT)
+    {
+        rcStrict = VINF_SUCCESS;
+        ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
+    }
+    HMSVM_CHECK_SINGLE_STEP(pVCpu, rcStrict);
+    return VBOXSTRICTRC_TODO(rcStrict);
+}
+
+
+/**
+ * \#VMEXIT handler for INVD (SVM_EXIT_INVD). Unconditional \#VMEXIT.
+ */
+HMSVM_EXIT_DECL hmR0SvmExitInvd(PVMCPUCC pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+    HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pSvmTransient);
+
+    VBOXSTRICTRC rcStrict;
+    bool const fSupportsNextRipSave = hmR0SvmSupportsNextRipSave(pVCpu);
+    if (fSupportsNextRipSave)
+    {
+        HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_EXEC_DECODED_NO_MEM_MASK);
+        PCSVMVMCB     pVmcb   = hmR0SvmGetCurrentVmcb(pVCpu);
+        uint8_t const cbInstr = pVmcb->ctrl.u64NextRIP - pVCpu->cpum.GstCtx.rip;
+        rcStrict = IEMExecDecodedInvd(pVCpu, cbInstr);
+    }
+    else
+    {
+        HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_MUST_MASK);
+        rcStrict = IEMExecOne(pVCpu);
+    }
+
+    if (rcStrict == VINF_IEM_RAISED_XCPT)
+    {
+        rcStrict = VINF_SUCCESS;
+        ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
+    }
+    HMSVM_CHECK_SINGLE_STEP(pVCpu, rcStrict);
+    return VBOXSTRICTRC_TODO(rcStrict);
+}
+
+
+/**
+ * \#VMEXIT handler for INVD (SVM_EXIT_CPUID). Conditional \#VMEXIT.
+ */
+HMSVM_EXIT_DECL hmR0SvmExitCpuid(PVMCPUCC pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+    HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pSvmTransient);
+
+    HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_EXEC_DECODED_NO_MEM_MASK | CPUMCTX_EXTRN_RAX | CPUMCTX_EXTRN_RCX);
+    VBOXSTRICTRC rcStrict;
+    PCEMEXITREC pExitRec = EMHistoryUpdateFlagsAndTypeAndPC(pVCpu,
+                                                            EMEXIT_MAKE_FT(EMEXIT_F_KIND_EM | EMEXIT_F_HM, EMEXITTYPE_CPUID),
+                                                            pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base);
+    if (!pExitRec)
+    {
+        bool const fSupportsNextRipSave = hmR0SvmSupportsNextRipSave(pVCpu);
+        if (fSupportsNextRipSave)
+        {
+            PCSVMVMCB     pVmcb   = hmR0SvmGetCurrentVmcb(pVCpu);
+            uint8_t const cbInstr = pVmcb->ctrl.u64NextRIP - pVCpu->cpum.GstCtx.rip;
+            rcStrict = IEMExecDecodedCpuid(pVCpu, cbInstr);
+        }
+        else
+        {
+            HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_MUST_MASK);
+            rcStrict = IEMExecOne(pVCpu);
+        }
+
+        if (rcStrict == VINF_IEM_RAISED_XCPT)
+        {
+            rcStrict = VINF_SUCCESS;
+            ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
+        }
+        HMSVM_CHECK_SINGLE_STEP(pVCpu, rcStrict);
+    }
+    else
+    {
+        /*
+         * Frequent exit or something needing probing.  Get state and call EMHistoryExec.
+         */
+        HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_MUST_MASK);
+
+        Log4(("CpuIdExit/%u: %04x:%08RX64: %#x/%#x -> EMHistoryExec\n",
+              pVCpu->idCpu, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, pVCpu->cpum.GstCtx.eax, pVCpu->cpum.GstCtx.ecx));
+
+        rcStrict = EMHistoryExec(pVCpu, pExitRec, 0);
+
+        Log4(("CpuIdExit/%u: %04x:%08RX64: EMHistoryExec -> %Rrc + %04x:%08RX64\n",
+              pVCpu->idCpu, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
+              VBOXSTRICTRC_VAL(rcStrict), pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
+    }
+    return VBOXSTRICTRC_TODO(rcStrict);
+}
+
+
+/**
+ * \#VMEXIT handler for RDTSC (SVM_EXIT_RDTSC). Conditional \#VMEXIT.
+ */
+HMSVM_EXIT_DECL hmR0SvmExitRdtsc(PVMCPUCC pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+    HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pSvmTransient);
+
+    VBOXSTRICTRC rcStrict;
+    bool const fSupportsNextRipSave = hmR0SvmSupportsNextRipSave(pVCpu);
+    if (fSupportsNextRipSave)
+    {
+        HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_EXEC_DECODED_NO_MEM_MASK | CPUMCTX_EXTRN_CR4);
+        PCSVMVMCB     pVmcb   = hmR0SvmGetCurrentVmcb(pVCpu);
+        uint8_t const cbInstr = pVmcb->ctrl.u64NextRIP - pVCpu->cpum.GstCtx.rip;
+        rcStrict = IEMExecDecodedRdtsc(pVCpu, cbInstr);
+    }
+    else
+    {
+        HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_MUST_MASK);
+        rcStrict = IEMExecOne(pVCpu);
+    }
+
+    if (rcStrict == VINF_SUCCESS)
+        pSvmTransient->fUpdateTscOffsetting = true;
+    else if (rcStrict == VINF_IEM_RAISED_XCPT)
+    {
+        rcStrict = VINF_SUCCESS;
+        ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
+    }
+    HMSVM_CHECK_SINGLE_STEP(pVCpu, rcStrict);
+    return VBOXSTRICTRC_TODO(rcStrict);
+}
+
+
+/**
+ * \#VMEXIT handler for RDTSCP (SVM_EXIT_RDTSCP). Conditional \#VMEXIT.
+ */
+HMSVM_EXIT_DECL hmR0SvmExitRdtscp(PVMCPUCC pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+    HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pSvmTransient);
+
+    VBOXSTRICTRC rcStrict;
+    bool const fSupportsNextRipSave = hmR0SvmSupportsNextRipSave(pVCpu);
+    if (fSupportsNextRipSave)
+    {
+        HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_EXEC_DECODED_NO_MEM_MASK | CPUMCTX_EXTRN_CR4 | CPUMCTX_EXTRN_TSC_AUX);
+        PCSVMVMCB     pVmcb   = hmR0SvmGetCurrentVmcb(pVCpu);
+        uint8_t const cbInstr = pVmcb->ctrl.u64NextRIP - pVCpu->cpum.GstCtx.rip;
+        rcStrict = IEMExecDecodedRdtscp(pVCpu, cbInstr);
+    }
+    else
+    {
+        HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_MUST_MASK);
+        rcStrict = IEMExecOne(pVCpu);
+    }
+
+    if (rcStrict == VINF_SUCCESS)
+        pSvmTransient->fUpdateTscOffsetting = true;
+    else if (rcStrict == VINF_IEM_RAISED_XCPT)
+    {
+        rcStrict = VINF_SUCCESS;
+        ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
+    }
+    HMSVM_CHECK_SINGLE_STEP(pVCpu, rcStrict);
+    return VBOXSTRICTRC_TODO(rcStrict);
+}
+
+
+/**
+ * \#VMEXIT handler for RDPMC (SVM_EXIT_RDPMC). Conditional \#VMEXIT.
+ */
+HMSVM_EXIT_DECL hmR0SvmExitRdpmc(PVMCPUCC pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+    HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pSvmTransient);
+
+    VBOXSTRICTRC rcStrict;
+    bool const fSupportsNextRipSave = hmR0SvmSupportsNextRipSave(pVCpu);
+    if (fSupportsNextRipSave)
+    {
+        HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_EXEC_DECODED_NO_MEM_MASK | CPUMCTX_EXTRN_CR4);
+        PCSVMVMCB     pVmcb   = hmR0SvmGetCurrentVmcb(pVCpu);
+        uint8_t const cbInstr = pVmcb->ctrl.u64NextRIP - pVCpu->cpum.GstCtx.rip;
+        rcStrict = IEMExecDecodedRdpmc(pVCpu, cbInstr);
+    }
+    else
+    {
+        HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_MUST_MASK);
+        rcStrict = IEMExecOne(pVCpu);
+    }
+
+    if (rcStrict == VINF_IEM_RAISED_XCPT)
+    {
+        rcStrict = VINF_SUCCESS;
+        ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
+    }
+    HMSVM_CHECK_SINGLE_STEP(pVCpu, rcStrict);
+    return VBOXSTRICTRC_TODO(rcStrict);
+}
+
+
+/**
+ * \#VMEXIT handler for INVLPG (SVM_EXIT_INVLPG). Conditional \#VMEXIT.
+ */
+HMSVM_EXIT_DECL hmR0SvmExitInvlpg(PVMCPUCC pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+    HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pSvmTransient);
+    Assert(!pVCpu->CTX_SUFF(pVM)->hm.s.fNestedPaging);
+
+    VBOXSTRICTRC rcStrict;
+    bool const fSupportsDecodeAssists = hmR0SvmSupportsDecodeAssists(pVCpu);
+    bool const fSupportsNextRipSave   = hmR0SvmSupportsNextRipSave(pVCpu);
+    if (   fSupportsDecodeAssists
+        && fSupportsNextRipSave)
+    {
+        HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_EXEC_DECODED_MEM_MASK);
+        PCSVMVMCB     pVmcb     = hmR0SvmGetCurrentVmcb(pVCpu);
+        uint8_t const cbInstr   = pVmcb->ctrl.u64NextRIP - pVCpu->cpum.GstCtx.rip;
+        RTGCPTR const GCPtrPage = pVmcb->ctrl.u64ExitInfo1;
+        rcStrict = IEMExecDecodedInvlpg(pVCpu, cbInstr, GCPtrPage);
+    }
+    else
+    {
+        HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_MUST_MASK);
+        rcStrict = IEMExecOne(pVCpu);
+    }
+
+    if (rcStrict == VINF_IEM_RAISED_XCPT)
+    {
+        rcStrict = VINF_SUCCESS;
+        ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
+    }
+    HMSVM_CHECK_SINGLE_STEP(pVCpu, rcStrict);
+    return VBOXSTRICTRC_VAL(rcStrict);
+}
+
+
+/**
+ * \#VMEXIT handler for HLT (SVM_EXIT_HLT). Conditional \#VMEXIT.
+ */
+HMSVM_EXIT_DECL hmR0SvmExitHlt(PVMCPUCC pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+    HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pSvmTransient);
+
+    VBOXSTRICTRC rcStrict;
+    bool const fSupportsNextRipSave = hmR0SvmSupportsNextRipSave(pVCpu);
+    if (fSupportsNextRipSave)
+    {
+        HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_EXEC_DECODED_NO_MEM_MASK);
+        PCSVMVMCB     pVmcb   = hmR0SvmGetCurrentVmcb(pVCpu);
+        uint8_t const cbInstr = pVmcb->ctrl.u64NextRIP - pVCpu->cpum.GstCtx.rip;
+        rcStrict = IEMExecDecodedHlt(pVCpu, cbInstr);
+    }
+    else
+    {
+        HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_MUST_MASK);
+        rcStrict = IEMExecOne(pVCpu);
+    }
+
+    if (   rcStrict == VINF_EM_HALT
+        || rcStrict == VINF_SUCCESS)
+        rcStrict = EMShouldContinueAfterHalt(pVCpu, &pVCpu->cpum.GstCtx) ? VINF_SUCCESS : VINF_EM_HALT;
+    else if (rcStrict == VINF_IEM_RAISED_XCPT)
+    {
+        rcStrict = VINF_SUCCESS;
+        ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
+    }
+    HMSVM_CHECK_SINGLE_STEP(pVCpu, rcStrict);
+    if (rcStrict != VINF_SUCCESS)
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchHltToR3);
+    return VBOXSTRICTRC_VAL(rcStrict);;
+}
+
+
+/**
+ * \#VMEXIT handler for MONITOR (SVM_EXIT_MONITOR). Conditional \#VMEXIT.
+ */
+HMSVM_EXIT_DECL hmR0SvmExitMonitor(PVMCPUCC pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+    HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pSvmTransient);
+
+    /*
+     * If the instruction length is supplied by the CPU is 3 bytes, we can be certain that no
+     * segment override prefix is present (and thus use the default segment DS). Otherwise, a
+     * segment override prefix or other prefixes might be used, in which case we fallback to
+     * IEMExecOne() to figure out.
+     */
+    VBOXSTRICTRC  rcStrict;
+    PCSVMVMCB     pVmcb   = hmR0SvmGetCurrentVmcb(pVCpu);
+    uint8_t const cbInstr = hmR0SvmSupportsNextRipSave(pVCpu) ? pVmcb->ctrl.u64NextRIP - pVCpu->cpum.GstCtx.rip : 0;
+    if (cbInstr)
+    {
+        HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_EXEC_DECODED_MEM_MASK | CPUMCTX_EXTRN_DS);
+        rcStrict = IEMExecDecodedMonitor(pVCpu, cbInstr);
+    }
+    else
+    {
+        HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_MUST_MASK);
+        rcStrict = IEMExecOne(pVCpu);
+    }
+
+    if (rcStrict == VINF_IEM_RAISED_XCPT)
+    {
+        rcStrict = VINF_SUCCESS;
+        ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
+    }
+    HMSVM_CHECK_SINGLE_STEP(pVCpu, rcStrict);
+    return VBOXSTRICTRC_TODO(rcStrict);
+}
+
+
+/**
+ * \#VMEXIT handler for MWAIT (SVM_EXIT_MWAIT). Conditional \#VMEXIT.
+ */
+HMSVM_EXIT_DECL hmR0SvmExitMwait(PVMCPUCC pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+    HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pSvmTransient);
+
+    VBOXSTRICTRC rcStrict;
+    bool const fSupportsNextRipSave = hmR0SvmSupportsNextRipSave(pVCpu);
+    if (fSupportsNextRipSave)
+    {
+        HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_EXEC_DECODED_NO_MEM_MASK);
+        PCSVMVMCB     pVmcb   = hmR0SvmGetCurrentVmcb(pVCpu);
+        uint8_t const cbInstr = pVmcb->ctrl.u64NextRIP - pVCpu->cpum.GstCtx.rip;
+        rcStrict = IEMExecDecodedMwait(pVCpu, cbInstr);
+    }
+    else
+    {
+        HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_MUST_MASK);
+        rcStrict = IEMExecOne(pVCpu);
+    }
+
+    if (   rcStrict == VINF_EM_HALT
+        && EMMonitorWaitShouldContinue(pVCpu, &pVCpu->cpum.GstCtx))
+        rcStrict = VINF_SUCCESS;
+    else if (rcStrict == VINF_IEM_RAISED_XCPT)
+    {
+        rcStrict = VINF_SUCCESS;
+        ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
+    }
+    HMSVM_CHECK_SINGLE_STEP(pVCpu, rcStrict);
+    return VBOXSTRICTRC_TODO(rcStrict);
+}
+
+
+/**
+ * \#VMEXIT handler for shutdown (triple-fault) (SVM_EXIT_SHUTDOWN). Conditional
+ * \#VMEXIT.
+ */
+HMSVM_EXIT_DECL hmR0SvmExitShutdown(PVMCPUCC pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+    HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pSvmTransient);
+    HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, HMSVM_CPUMCTX_EXTRN_ALL);
+    return VINF_EM_RESET;
+}
+
+
+/**
+ * \#VMEXIT handler for unexpected exits. Conditional \#VMEXIT.
+ */
+HMSVM_EXIT_DECL hmR0SvmExitUnexpected(PVMCPUCC pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+    PCSVMVMCB pVmcb = hmR0SvmGetCurrentVmcb(pVCpu);
+    HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, HMSVM_CPUMCTX_EXTRN_ALL);
+    AssertMsgFailed(("hmR0SvmExitUnexpected: ExitCode=%#RX64 uExitInfo1=%#RX64 uExitInfo2=%#RX64\n", pSvmTransient->u64ExitCode,
+                     pVmcb->ctrl.u64ExitInfo1, pVmcb->ctrl.u64ExitInfo2));
+    RT_NOREF(pVmcb);
+    pVCpu->hm.s.u32HMError = (uint32_t)pSvmTransient->u64ExitCode;
+    return VERR_SVM_UNEXPECTED_EXIT;
+}
+
+
+/**
+ * \#VMEXIT handler for CRx reads (SVM_EXIT_READ_CR*). Conditional \#VMEXIT.
+ */
+HMSVM_EXIT_DECL hmR0SvmExitReadCRx(PVMCPUCC pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+    HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pSvmTransient);
+
+    PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+    Log4Func(("CS:RIP=%04x:%#RX64\n", pCtx->cs.Sel, pCtx->rip));
+#ifdef VBOX_WITH_STATISTICS
+    switch (pSvmTransient->u64ExitCode)
+    {
+        case SVM_EXIT_READ_CR0: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCR0Read); break;
+        case SVM_EXIT_READ_CR2: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCR2Read); break;
+        case SVM_EXIT_READ_CR3: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCR3Read); break;
+        case SVM_EXIT_READ_CR4: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCR4Read); break;
+        case SVM_EXIT_READ_CR8: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCR8Read); break;
+    }
+#endif
+
+    bool const fSupportsDecodeAssists = hmR0SvmSupportsDecodeAssists(pVCpu);
+    bool const fSupportsNextRipSave   = hmR0SvmSupportsNextRipSave(pVCpu);
+    if (   fSupportsDecodeAssists
+        && fSupportsNextRipSave)
+    {
+        PCSVMVMCB pVmcb = hmR0SvmGetCurrentVmcb(pVCpu);
+        bool const fMovCRx = RT_BOOL(pVmcb->ctrl.u64ExitInfo1 & SVM_EXIT1_MOV_CRX_MASK);
+        if (fMovCRx)
+        {
+            HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_EXEC_DECODED_NO_MEM_MASK | CPUMCTX_EXTRN_CR_MASK
+                                            | CPUMCTX_EXTRN_APIC_TPR);
+            uint8_t const cbInstr = pVmcb->ctrl.u64NextRIP - pCtx->rip;
+            uint8_t const iCrReg  = pSvmTransient->u64ExitCode - SVM_EXIT_READ_CR0;
+            uint8_t const iGReg   = pVmcb->ctrl.u64ExitInfo1 & SVM_EXIT1_MOV_CRX_GPR_NUMBER;
+            VBOXSTRICTRC rcStrict = IEMExecDecodedMovCRxRead(pVCpu, cbInstr, iGReg, iCrReg);
+            HMSVM_CHECK_SINGLE_STEP(pVCpu, rcStrict);
+            return VBOXSTRICTRC_VAL(rcStrict);
+        }
+        /* else: SMSW instruction, fall back below to IEM for this. */
+    }
+
+    HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_MUST_MASK);
+    VBOXSTRICTRC rcStrict = IEMExecOne(pVCpu);
+    AssertMsg(   rcStrict == VINF_SUCCESS
+              || rcStrict == VINF_PGM_SYNC_CR3
+              || rcStrict == VINF_IEM_RAISED_XCPT,
+              ("hmR0SvmExitReadCRx: IEMExecOne failed rc=%Rrc\n", VBOXSTRICTRC_VAL(rcStrict)));
+    Assert((pSvmTransient->u64ExitCode - SVM_EXIT_READ_CR0) <= 15);
+    if (rcStrict == VINF_IEM_RAISED_XCPT)
+    {
+        rcStrict = VINF_SUCCESS;
+        ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
+    }
+    HMSVM_CHECK_SINGLE_STEP(pVCpu, rcStrict);
+    return VBOXSTRICTRC_TODO(rcStrict);
+}
+
+
+/**
+ * \#VMEXIT handler for CRx writes (SVM_EXIT_WRITE_CR*). Conditional \#VMEXIT.
+ */
+HMSVM_EXIT_DECL hmR0SvmExitWriteCRx(PVMCPUCC pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+    HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pSvmTransient);
+
+    uint64_t const uExitCode = pSvmTransient->u64ExitCode;
+    uint8_t  const iCrReg    = uExitCode == SVM_EXIT_CR0_SEL_WRITE ? 0 : (pSvmTransient->u64ExitCode - SVM_EXIT_WRITE_CR0);
+    Assert(iCrReg <= 15);
+
+    VBOXSTRICTRC rcStrict = VERR_SVM_IPE_5;
+    PCPUMCTX     pCtx = &pVCpu->cpum.GstCtx;
+    bool         fDecodedInstr = false;
+    bool const   fSupportsDecodeAssists = hmR0SvmSupportsDecodeAssists(pVCpu);
+    bool const   fSupportsNextRipSave   = hmR0SvmSupportsNextRipSave(pVCpu);
+    if (   fSupportsDecodeAssists
+        && fSupportsNextRipSave)
+    {
+        PCSVMVMCB pVmcb = hmR0SvmGetCurrentVmcb(pVCpu);
+        bool const fMovCRx = RT_BOOL(pVmcb->ctrl.u64ExitInfo1 & SVM_EXIT1_MOV_CRX_MASK);
+        if (fMovCRx)
+        {
+            HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_EXEC_DECODED_MEM_MASK | CPUMCTX_EXTRN_CR3 | CPUMCTX_EXTRN_CR4
+                                            | CPUMCTX_EXTRN_APIC_TPR);
+            uint8_t const cbInstr = pVmcb->ctrl.u64NextRIP - pCtx->rip;
+            uint8_t const iGReg   = pVmcb->ctrl.u64ExitInfo1 & SVM_EXIT1_MOV_CRX_GPR_NUMBER;
+            Log4Func(("Mov CR%u w/ iGReg=%#x\n", iCrReg, iGReg));
+            rcStrict = IEMExecDecodedMovCRxWrite(pVCpu, cbInstr, iCrReg, iGReg);
+            fDecodedInstr = true;
+        }
+        /* else: LMSW or CLTS instruction, fall back below to IEM for this. */
+    }
+
+    if (!fDecodedInstr)
+    {
+        HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_MUST_MASK);
+        Log4Func(("iCrReg=%#x\n", iCrReg));
+        rcStrict = IEMExecOne(pVCpu);
+        if (RT_UNLIKELY(   rcStrict == VERR_IEM_ASPECT_NOT_IMPLEMENTED
+                        || rcStrict == VERR_IEM_INSTR_NOT_IMPLEMENTED))
+            rcStrict = VERR_EM_INTERPRETER;
+    }
+
+    if (rcStrict == VINF_SUCCESS)
+    {
+        switch (iCrReg)
+        {
+            case 0:
+                ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_CR0);
+                STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCR0Write);
+                break;
+
+            case 2:
+                ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_CR2);
+                STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCR2Write);
+                break;
+
+            case 3:
+                ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_CR3);
+                STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCR3Write);
+                break;
+
+            case 4:
+                ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_CR4);
+                STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCR4Write);
+                break;
+
+            case 8:
+                ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_APIC_TPR);
+                STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCR8Write);
+                break;
+
+            default:
+            {
+                AssertMsgFailed(("hmR0SvmExitWriteCRx: Invalid/Unexpected Write-CRx exit. u64ExitCode=%#RX64 %#x\n",
+                                 pSvmTransient->u64ExitCode, iCrReg));
+                break;
+            }
+        }
+        HMSVM_CHECK_SINGLE_STEP(pVCpu, rcStrict);
+    }
+    else if (rcStrict == VINF_IEM_RAISED_XCPT)
+    {
+        rcStrict = VINF_SUCCESS;
+        ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
+        HMSVM_CHECK_SINGLE_STEP(pVCpu, rcStrict);
+    }
+    else
+        Assert(rcStrict == VERR_EM_INTERPRETER || rcStrict == VINF_PGM_SYNC_CR3);
+    return VBOXSTRICTRC_TODO(rcStrict);
+}
+
+
+/**
+ * \#VMEXIT helper for read MSRs, see hmR0SvmExitMsr.
+ *
+ * @returns Strict VBox status code.
+ * @param   pVCpu       The cross context virtual CPU structure.
+ * @param   pVmcb       Pointer to the VM control block.
+ */
+static VBOXSTRICTRC hmR0SvmExitReadMsr(PVMCPUCC pVCpu, PSVMVMCB pVmcb)
+{
+    STAM_COUNTER_INC(&pVCpu->hm.s.StatExitRdmsr);
+    Log4Func(("idMsr=%#RX32\n", pVCpu->cpum.GstCtx.ecx));
+
+    VBOXSTRICTRC rcStrict;
+    bool const fSupportsNextRipSave = hmR0SvmSupportsNextRipSave(pVCpu);
+    if (fSupportsNextRipSave)
+    {
+        /** @todo Optimize this: Only retrieve the MSR bits we need here. CPUMAllMsrs.cpp
+         *  can ask for what it needs instead of using CPUMCTX_EXTRN_ALL_MSRS. */
+        HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_EXEC_DECODED_NO_MEM_MASK | CPUMCTX_EXTRN_ALL_MSRS);
+        uint8_t const cbInstr = pVmcb->ctrl.u64NextRIP - pVCpu->cpum.GstCtx.rip;
+        rcStrict = IEMExecDecodedRdmsr(pVCpu, cbInstr);
+    }
+    else
+    {
+        HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_MUST_MASK | CPUMCTX_EXTRN_ALL_MSRS);
+        rcStrict = IEMExecOne(pVCpu);
+    }
+
+    AssertMsg(   rcStrict == VINF_SUCCESS
+              || rcStrict == VINF_IEM_RAISED_XCPT
+              || rcStrict == VINF_CPUM_R3_MSR_READ,
+              ("hmR0SvmExitReadMsr: Unexpected status %Rrc\n", VBOXSTRICTRC_VAL(rcStrict)));
+
+    if (rcStrict == VINF_IEM_RAISED_XCPT)
+    {
+        rcStrict = VINF_SUCCESS;
+        ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
+    }
+    HMSVM_CHECK_SINGLE_STEP(pVCpu, rcStrict);
+    return rcStrict;
+}
+
+
+/**
+ * \#VMEXIT helper for write MSRs, see hmR0SvmExitMsr.
+ *
+ * @returns Strict VBox status code.
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   pVmcb       Pointer to the VM control block.
+ * @param   pSvmTransient   Pointer to the SVM-transient structure.
+ */
+static VBOXSTRICTRC hmR0SvmExitWriteMsr(PVMCPUCC pVCpu, PSVMVMCB pVmcb, PSVMTRANSIENT pSvmTransient)
+{
+    PCPUMCTX pCtx  = &pVCpu->cpum.GstCtx;
+    uint32_t const idMsr = pCtx->ecx;
+    STAM_COUNTER_INC(&pVCpu->hm.s.StatExitWrmsr);
+    Log4Func(("idMsr=%#RX32\n", idMsr));
+
+    /*
+     * Handle TPR patching MSR writes.
+     * We utilitize the LSTAR MSR for patching.
+     */
+    bool const fSupportsNextRipSave = hmR0SvmSupportsNextRipSave(pVCpu);
+    if (   pVCpu->CTX_SUFF(pVM)->hm.s.fTPRPatchingActive
+        && idMsr == MSR_K8_LSTAR)
+    {
+        unsigned cbInstr;
+        if (fSupportsNextRipSave)
+            cbInstr = pVmcb->ctrl.u64NextRIP - pVCpu->cpum.GstCtx.rip;
+        else
+        {
+            PDISCPUSTATE pDis = &pVCpu->hm.s.DisState;
+            int rc = EMInterpretDisasCurrent(pVCpu->CTX_SUFF(pVM), pVCpu, pDis, &cbInstr);
+            if (   rc == VINF_SUCCESS
+                && pDis->pCurInstr->uOpcode == OP_WRMSR)
+                Assert(cbInstr > 0);
+            else
+                cbInstr = 0;
+        }
+
+        /* Our patch code uses LSTAR for TPR caching for 32-bit guests. */
+        if ((pCtx->eax & 0xff) != pSvmTransient->u8GuestTpr)
+        {
+            int rc = APICSetTpr(pVCpu, pCtx->eax & 0xff);
+            AssertRCReturn(rc, rc);
+            ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_APIC_TPR);
+        }
+
+        int rc = VINF_SUCCESS;
+        hmR0SvmAdvanceRip(pVCpu, cbInstr);
+        HMSVM_CHECK_SINGLE_STEP(pVCpu, rc);
+        return rc;
+    }
+
+    /*
+     * Handle regular MSR writes.
+     */
+    VBOXSTRICTRC rcStrict;
+    if (fSupportsNextRipSave)
+    {
+        /** @todo Optimize this: We don't need to get much of the MSR state here
+         * since we're only updating.  CPUMAllMsrs.cpp can ask for what it needs and
+         * clear the applicable extern flags. */
+        HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_EXEC_DECODED_NO_MEM_MASK | CPUMCTX_EXTRN_ALL_MSRS);
+        uint8_t const cbInstr = pVmcb->ctrl.u64NextRIP - pVCpu->cpum.GstCtx.rip;
+        rcStrict = IEMExecDecodedWrmsr(pVCpu, cbInstr);
+    }
+    else
+    {
+        HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_MUST_MASK | CPUMCTX_EXTRN_ALL_MSRS);
+        rcStrict = IEMExecOne(pVCpu);
+    }
+
+    AssertMsg(   rcStrict == VINF_SUCCESS
+              || rcStrict == VINF_IEM_RAISED_XCPT
+              || rcStrict == VINF_CPUM_R3_MSR_WRITE,
+              ("hmR0SvmExitWriteMsr: Unexpected status %Rrc\n", VBOXSTRICTRC_VAL(rcStrict)));
+
+    if (rcStrict == VINF_SUCCESS)
+    {
+        /* If this is an X2APIC WRMSR access, update the APIC TPR state. */
+        if (   idMsr >= MSR_IA32_X2APIC_START
+            && idMsr <= MSR_IA32_X2APIC_END)
+        {
+            /*
+             * We've already saved the APIC related guest-state (TPR) in hmR0SvmPostRunGuest().
+             * When full APIC register virtualization is implemented we'll have to make sure
+             * APIC state is saved from the VMCB before IEM changes it.
+             */
+            ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_APIC_TPR);
+        }
+        else
+        {
+            switch (idMsr)
+            {
+                case MSR_IA32_TSC:          pSvmTransient->fUpdateTscOffsetting = true;                                     break;
+                case MSR_K6_EFER:           ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_EFER_MSR);          break;
+                case MSR_K8_FS_BASE:        ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_FS);                break;
+                case MSR_K8_GS_BASE:        ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_GS);                break;
+                case MSR_IA32_SYSENTER_CS:  ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_SYSENTER_CS_MSR);   break;
+                case MSR_IA32_SYSENTER_EIP: ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_SYSENTER_EIP_MSR);  break;
+                case MSR_IA32_SYSENTER_ESP: ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_SYSENTER_ESP_MSR);  break;
+            }
+        }
+    }
+    else if (rcStrict == VINF_IEM_RAISED_XCPT)
+    {
+        rcStrict = VINF_SUCCESS;
+        ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
+    }
+    HMSVM_CHECK_SINGLE_STEP(pVCpu, rcStrict);
+    return rcStrict;
+}
+
+
+/**
+ * \#VMEXIT handler for MSR read and writes (SVM_EXIT_MSR). Conditional
+ * \#VMEXIT.
+ */
+HMSVM_EXIT_DECL hmR0SvmExitMsr(PVMCPUCC pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+    HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pSvmTransient);
+
+    PSVMVMCB pVmcb = hmR0SvmGetCurrentVmcb(pVCpu);
+    if (pVmcb->ctrl.u64ExitInfo1 == SVM_EXIT1_MSR_READ)
+        return VBOXSTRICTRC_TODO(hmR0SvmExitReadMsr(pVCpu, pVmcb));
+
+    Assert(pVmcb->ctrl.u64ExitInfo1 == SVM_EXIT1_MSR_WRITE);
+    return VBOXSTRICTRC_TODO(hmR0SvmExitWriteMsr(pVCpu, pVmcb, pSvmTransient));
+}
+
+
+/**
+ * \#VMEXIT handler for DRx read (SVM_EXIT_READ_DRx). Conditional \#VMEXIT.
+ */
+HMSVM_EXIT_DECL hmR0SvmExitReadDRx(PVMCPUCC pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+    HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pSvmTransient);
+    HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, HMSVM_CPUMCTX_EXTRN_ALL);
+
+    STAM_COUNTER_INC(&pVCpu->hm.s.StatExitDRxRead);
+
+    /** @todo Stepping with nested-guest. */
+    PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+    if (!CPUMIsGuestInSvmNestedHwVirtMode(pCtx))
+    {
+        /* We should -not- get this #VMEXIT if the guest's debug registers were active. */
+        if (pSvmTransient->fWasGuestDebugStateActive)
+        {
+            AssertMsgFailed(("hmR0SvmExitReadDRx: Unexpected exit %#RX32\n", (uint32_t)pSvmTransient->u64ExitCode));
+            pVCpu->hm.s.u32HMError = (uint32_t)pSvmTransient->u64ExitCode;
+            return VERR_SVM_UNEXPECTED_EXIT;
+        }
+
+        /*
+         * Lazy DR0-3 loading.
+         */
+        if (!pSvmTransient->fWasHyperDebugStateActive)
+        {
+            Assert(!DBGFIsStepping(pVCpu)); Assert(!pVCpu->hm.s.fSingleInstruction);
+            Log5(("hmR0SvmExitReadDRx: Lazy loading guest debug registers\n"));
+
+            /* Don't intercept DRx read and writes. */
+            PSVMVMCB pVmcb = pVCpu->hm.s.svm.pVmcb;
+            pVmcb->ctrl.u16InterceptRdDRx = 0;
+            pVmcb->ctrl.u16InterceptWrDRx = 0;
+            pVmcb->ctrl.u32VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_INTERCEPTS;
+
+            /* We're playing with the host CPU state here, make sure we don't preempt or longjmp. */
+            VMMRZCallRing3Disable(pVCpu);
+            HM_DISABLE_PREEMPT(pVCpu);
+
+            /* Save the host & load the guest debug state, restart execution of the MOV DRx instruction. */
+            CPUMR0LoadGuestDebugState(pVCpu, false /* include DR6 */);
+            Assert(CPUMIsGuestDebugStateActive(pVCpu));
+
+            HM_RESTORE_PREEMPT();
+            VMMRZCallRing3Enable(pVCpu);
+
+            STAM_COUNTER_INC(&pVCpu->hm.s.StatDRxContextSwitch);
+            return VINF_SUCCESS;
+        }
+    }
+
+    /*
+     * Interpret the read/writing of DRx.
+     */
+    /** @todo Decode assist.  */
+    VBOXSTRICTRC rc = EMInterpretInstruction(pVCpu, CPUMCTX2CORE(pCtx), 0 /* pvFault */);
+    Log5(("hmR0SvmExitReadDRx: Emulated DRx access: rc=%Rrc\n", VBOXSTRICTRC_VAL(rc)));
+    if (RT_LIKELY(rc == VINF_SUCCESS))
+    {
+        /* Not necessary for read accesses but whatever doesn't hurt for now, will be fixed with decode assist. */
+        /** @todo CPUM should set this flag! */
+        ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_DR_MASK);
+        HMSVM_CHECK_SINGLE_STEP(pVCpu, rc);
+    }
+    else
+        Assert(rc == VERR_EM_INTERPRETER);
+    return VBOXSTRICTRC_TODO(rc);
+}
+
+
+/**
+ * \#VMEXIT handler for DRx write (SVM_EXIT_WRITE_DRx). Conditional \#VMEXIT.
+ */
+HMSVM_EXIT_DECL hmR0SvmExitWriteDRx(PVMCPUCC pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+    HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pSvmTransient);
+    /* For now it's the same since we interpret the instruction anyway. Will change when using of Decode Assist is implemented. */
+    int rc = hmR0SvmExitReadDRx(pVCpu, pSvmTransient);
+    STAM_COUNTER_INC(&pVCpu->hm.s.StatExitDRxWrite);
+    STAM_COUNTER_DEC(&pVCpu->hm.s.StatExitDRxRead);
+    return rc;
+}
+
+
+/**
+ * \#VMEXIT handler for XCRx write (SVM_EXIT_XSETBV). Conditional \#VMEXIT.
+ */
+HMSVM_EXIT_DECL hmR0SvmExitXsetbv(PVMCPUCC pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+    HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pSvmTransient);
+    HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_MUST_MASK);
+
+    /** @todo decode assists... */
+    VBOXSTRICTRC rcStrict = IEMExecOne(pVCpu);
+    if (RT_LIKELY(rcStrict == VINF_SUCCESS))
+    {
+        PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+        pVCpu->hm.s.fLoadSaveGuestXcr0 = (pCtx->cr4 & X86_CR4_OSXSAVE) && pCtx->aXcr[0] != ASMGetXcr0();
+        Log4Func(("New XCR0=%#RX64 fLoadSaveGuestXcr0=%RTbool (cr4=%#RX64)\n", pCtx->aXcr[0], pVCpu->hm.s.fLoadSaveGuestXcr0,
+                  pCtx->cr4));
+    }
+    else if (rcStrict == VINF_IEM_RAISED_XCPT)
+    {
+        rcStrict = VINF_SUCCESS;
+        ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
+    }
+    HMSVM_CHECK_SINGLE_STEP(pVCpu, rcStrict);
+    return VBOXSTRICTRC_TODO(rcStrict);
+}
+
+
+/**
+ * \#VMEXIT handler for I/O instructions (SVM_EXIT_IOIO). Conditional \#VMEXIT.
+ */
+HMSVM_EXIT_DECL hmR0SvmExitIOInstr(PVMCPUCC pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+    HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pSvmTransient);
+    HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_MUST_MASK | CPUMCTX_EXTRN_SREG_MASK);
+
+    /* I/O operation lookup arrays. */
+    static uint32_t const s_aIOSize[8]  = { 0, 1, 2, 0, 4, 0, 0, 0 };                   /* Size of the I/O accesses in bytes. */
+    static uint32_t const s_aIOOpAnd[8] = { 0, 0xff, 0xffff, 0, 0xffffffff, 0, 0, 0 };  /* AND masks for saving
+                                                                                           the result (in AL/AX/EAX). */
+    PVMCC      pVM   = pVCpu->CTX_SUFF(pVM);
+    PCPUMCTX pCtx  = &pVCpu->cpum.GstCtx;
+    PSVMVMCB pVmcb = hmR0SvmGetCurrentVmcb(pVCpu);
+
+    Log4Func(("CS:RIP=%04x:%#RX64\n", pCtx->cs.Sel, pCtx->rip));
+
+    /* Refer AMD spec. 15.10.2 "IN and OUT Behaviour" and Figure 15-2. "EXITINFO1 for IOIO Intercept" for the format. */
+    SVMIOIOEXITINFO IoExitInfo;
+    IoExitInfo.u       = (uint32_t)pVmcb->ctrl.u64ExitInfo1;
+    uint32_t uIOWidth  = (IoExitInfo.u >> 4) & 0x7;
+    uint32_t cbValue   = s_aIOSize[uIOWidth];
+    uint32_t uAndVal   = s_aIOOpAnd[uIOWidth];
+
+    if (RT_UNLIKELY(!cbValue))
+    {
+        AssertMsgFailed(("hmR0SvmExitIOInstr: Invalid IO operation. uIOWidth=%u\n", uIOWidth));
+        return VERR_EM_INTERPRETER;
+    }
+
+    HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, CPUMCTX_EXTRN_CS | CPUMCTX_EXTRN_RIP | CPUMCTX_EXTRN_RFLAGS);
+    VBOXSTRICTRC rcStrict;
+    PCEMEXITREC pExitRec = NULL;
+    if (   !pVCpu->hm.s.fSingleInstruction
+        && !pVCpu->cpum.GstCtx.eflags.Bits.u1TF)
+        pExitRec = EMHistoryUpdateFlagsAndTypeAndPC(pVCpu,
+                                                    !IoExitInfo.n.u1Str
+                                                    ? IoExitInfo.n.u1Type == SVM_IOIO_READ
+                                                    ? EMEXIT_MAKE_FT(EMEXIT_F_KIND_EM | EMEXIT_F_HM, EMEXITTYPE_IO_PORT_READ)
+                                                    : EMEXIT_MAKE_FT(EMEXIT_F_KIND_EM | EMEXIT_F_HM, EMEXITTYPE_IO_PORT_WRITE)
+                                                    : IoExitInfo.n.u1Type == SVM_IOIO_READ
+                                                    ? EMEXIT_MAKE_FT(EMEXIT_F_KIND_EM | EMEXIT_F_HM, EMEXITTYPE_IO_PORT_STR_READ)
+                                                    : EMEXIT_MAKE_FT(EMEXIT_F_KIND_EM | EMEXIT_F_HM, EMEXITTYPE_IO_PORT_STR_WRITE),
+                                                    pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base);
+    if (!pExitRec)
+    {
+        bool fUpdateRipAlready = false;
+        if (IoExitInfo.n.u1Str)
+        {
+            /* INS/OUTS - I/O String instruction. */
+            /** @todo Huh? why can't we use the segment prefix information given by AMD-V
+             *        in EXITINFO1? Investigate once this thing is up and running. */
+            Log4Func(("CS:RIP=%04x:%08RX64 %#06x/%u %c str\n", pCtx->cs.Sel, pCtx->rip, IoExitInfo.n.u16Port, cbValue,
+                      IoExitInfo.n.u1Type == SVM_IOIO_WRITE ? 'w' : 'r'));
+            AssertReturn(pCtx->dx == IoExitInfo.n.u16Port, VERR_SVM_IPE_2);
+            static IEMMODE const s_aenmAddrMode[8] =
+            {
+                (IEMMODE)-1, IEMMODE_16BIT, IEMMODE_32BIT, (IEMMODE)-1, IEMMODE_64BIT, (IEMMODE)-1, (IEMMODE)-1, (IEMMODE)-1
+            };
+            IEMMODE enmAddrMode = s_aenmAddrMode[(IoExitInfo.u >> 7) & 0x7];
+            if (enmAddrMode != (IEMMODE)-1)
+            {
+                uint64_t cbInstr = pVmcb->ctrl.u64ExitInfo2 - pCtx->rip;
+                if (cbInstr <= 15 && cbInstr >= 1)
+                {
+                    Assert(cbInstr >= 1U + IoExitInfo.n.u1Rep);
+                    if (IoExitInfo.n.u1Type == SVM_IOIO_WRITE)
+                    {
+                        /* Don't know exactly how to detect whether u3Seg is valid, currently
+                           only enabling it for Bulldozer and later with NRIP.  OS/2 broke on
+                           2384 Opterons when only checking NRIP. */
+                        bool const fSupportsNextRipSave = hmR0SvmSupportsNextRipSave(pVCpu);
+                        if (   fSupportsNextRipSave
+                            && pVM->cpum.ro.GuestFeatures.enmMicroarch >= kCpumMicroarch_AMD_15h_First)
+                        {
+                            AssertMsg(IoExitInfo.n.u3Seg == X86_SREG_DS || cbInstr > 1U + IoExitInfo.n.u1Rep,
+                                      ("u32Seg=%d cbInstr=%d u1REP=%d", IoExitInfo.n.u3Seg, cbInstr, IoExitInfo.n.u1Rep));
+                            rcStrict = IEMExecStringIoWrite(pVCpu, cbValue, enmAddrMode, IoExitInfo.n.u1Rep, (uint8_t)cbInstr,
+                                                            IoExitInfo.n.u3Seg, true /*fIoChecked*/);
+                        }
+                        else if (cbInstr == 1U + IoExitInfo.n.u1Rep)
+                            rcStrict = IEMExecStringIoWrite(pVCpu, cbValue, enmAddrMode, IoExitInfo.n.u1Rep, (uint8_t)cbInstr,
+                                                            X86_SREG_DS, true /*fIoChecked*/);
+                        else
+                            rcStrict = IEMExecOne(pVCpu);
+                        STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIOStringWrite);
+                    }
+                    else
+                    {
+                        AssertMsg(IoExitInfo.n.u3Seg == X86_SREG_ES /*=0*/, ("%#x\n", IoExitInfo.n.u3Seg));
+                        rcStrict = IEMExecStringIoRead(pVCpu, cbValue, enmAddrMode, IoExitInfo.n.u1Rep, (uint8_t)cbInstr,
+                                                       true /*fIoChecked*/);
+                        STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIOStringRead);
+                    }
+                }
+                else
+                {
+                    AssertMsgFailed(("rip=%RX64 nrip=%#RX64 cbInstr=%#RX64\n", pCtx->rip, pVmcb->ctrl.u64ExitInfo2, cbInstr));
+                    rcStrict = IEMExecOne(pVCpu);
+                }
+            }
+            else
+            {
+                AssertMsgFailed(("IoExitInfo=%RX64\n", IoExitInfo.u));
+                rcStrict = IEMExecOne(pVCpu);
+            }
+            fUpdateRipAlready = true;
+        }
+        else
+        {
+            /* IN/OUT - I/O instruction. */
+            Assert(!IoExitInfo.n.u1Rep);
+
+            uint8_t const cbInstr = pVmcb->ctrl.u64ExitInfo2 - pCtx->rip;
+            if (IoExitInfo.n.u1Type == SVM_IOIO_WRITE)
+            {
+                rcStrict = IOMIOPortWrite(pVM, pVCpu, IoExitInfo.n.u16Port, pCtx->eax & uAndVal, cbValue);
+                if (    rcStrict == VINF_IOM_R3_IOPORT_WRITE
+                    && !pCtx->eflags.Bits.u1TF)
+                    rcStrict = EMRZSetPendingIoPortWrite(pVCpu, IoExitInfo.n.u16Port, cbInstr, cbValue, pCtx->eax & uAndVal);
+                STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIOWrite);
+            }
+            else
+            {
+                uint32_t u32Val = 0;
+                rcStrict = IOMIOPortRead(pVM, pVCpu, IoExitInfo.n.u16Port, &u32Val, cbValue);
+                if (IOM_SUCCESS(rcStrict))
+                {
+                    /* Save result of I/O IN instr. in AL/AX/EAX. */
+                    /** @todo r=bird: 32-bit op size should clear high bits of rax! */
+                    pCtx->eax = (pCtx->eax & ~uAndVal) | (u32Val & uAndVal);
+                }
+                else if (    rcStrict == VINF_IOM_R3_IOPORT_READ
+                         && !pCtx->eflags.Bits.u1TF)
+                    rcStrict = EMRZSetPendingIoPortRead(pVCpu, IoExitInfo.n.u16Port, cbInstr, cbValue);
+
+                STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIORead);
+            }
+        }
+
+        if (IOM_SUCCESS(rcStrict))
+        {
+            /* AMD-V saves the RIP of the instruction following the IO instruction in EXITINFO2. */
+            if (!fUpdateRipAlready)
+                pCtx->rip = pVmcb->ctrl.u64ExitInfo2;
+
+            /*
+             * If any I/O breakpoints are armed, we need to check if one triggered
+             * and take appropriate action.
+             * Note that the I/O breakpoint type is undefined if CR4.DE is 0.
+             */
+            /** @todo Optimize away the DBGFBpIsHwIoArmed call by having DBGF tell the
+             *  execution engines about whether hyper BPs and such are pending. */
+            HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, CPUMCTX_EXTRN_DR7);
+            uint32_t const uDr7 = pCtx->dr[7];
+            if (RT_UNLIKELY(   (   (uDr7 & X86_DR7_ENABLED_MASK)
+                                && X86_DR7_ANY_RW_IO(uDr7)
+                                && (pCtx->cr4 & X86_CR4_DE))
+                            || DBGFBpIsHwIoArmed(pVM)))
+            {
+                /* We're playing with the host CPU state here, make sure we don't preempt or longjmp. */
+                VMMRZCallRing3Disable(pVCpu);
+                HM_DISABLE_PREEMPT(pVCpu);
+
+                STAM_COUNTER_INC(&pVCpu->hm.s.StatDRxIoCheck);
+                CPUMR0DebugStateMaybeSaveGuest(pVCpu, false /*fDr6*/);
+
+                VBOXSTRICTRC rcStrict2 = DBGFBpCheckIo(pVM, pVCpu, &pVCpu->cpum.GstCtx, IoExitInfo.n.u16Port, cbValue);
+                if (rcStrict2 == VINF_EM_RAW_GUEST_TRAP)
+                {
+                    /* Raise #DB. */
+                    pVmcb->guest.u64DR6 = pCtx->dr[6];
+                    pVmcb->guest.u64DR7 = pCtx->dr[7];
+                    pVmcb->ctrl.u32VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_DRX;
+                    hmR0SvmSetPendingXcptDB(pVCpu);
+                }
+                /* rcStrict is VINF_SUCCESS, VINF_IOM_R3_IOPORT_COMMIT_WRITE, or in [VINF_EM_FIRST..VINF_EM_LAST],
+                   however we can ditch VINF_IOM_R3_IOPORT_COMMIT_WRITE as it has VMCPU_FF_IOM as backup. */
+                else if (   rcStrict2 != VINF_SUCCESS
+                         && (rcStrict == VINF_SUCCESS || rcStrict2 < rcStrict))
+                    rcStrict = rcStrict2;
+                AssertCompile(VINF_EM_LAST < VINF_IOM_R3_IOPORT_COMMIT_WRITE);
+
+                HM_RESTORE_PREEMPT();
+                VMMRZCallRing3Enable(pVCpu);
+            }
+
+            HMSVM_CHECK_SINGLE_STEP(pVCpu, rcStrict);
+        }
+
+#ifdef VBOX_STRICT
+        if (   rcStrict == VINF_IOM_R3_IOPORT_READ
+            || rcStrict == VINF_EM_PENDING_R3_IOPORT_READ)
+            Assert(IoExitInfo.n.u1Type == SVM_IOIO_READ);
+        else if (   rcStrict == VINF_IOM_R3_IOPORT_WRITE
+                 || rcStrict == VINF_IOM_R3_IOPORT_COMMIT_WRITE
+                 || rcStrict == VINF_EM_PENDING_R3_IOPORT_WRITE)
+            Assert(IoExitInfo.n.u1Type == SVM_IOIO_WRITE);
+        else
+        {
+            /** @todo r=bird: This is missing a bunch of VINF_EM_FIRST..VINF_EM_LAST
+             *        statuses, that the VMM device and some others may return. See
+             *        IOM_SUCCESS() for guidance. */
+            AssertMsg(   RT_FAILURE(rcStrict)
+                      || rcStrict == VINF_SUCCESS
+                      || rcStrict == VINF_EM_RAW_EMULATE_INSTR
+                      || rcStrict == VINF_EM_DBG_BREAKPOINT
+                      || rcStrict == VINF_EM_RAW_GUEST_TRAP
+                      || rcStrict == VINF_EM_RAW_TO_R3
+                      || rcStrict == VINF_TRPM_XCPT_DISPATCHED
+                      || rcStrict == VINF_EM_TRIPLE_FAULT, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict)));
+        }
+#endif
+    }
+    else
+    {
+        /*
+         * Frequent exit or something needing probing.  Get state and call EMHistoryExec.
+         */
+        HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, HMSVM_CPUMCTX_EXTRN_ALL);
+        STAM_COUNTER_INC(!IoExitInfo.n.u1Str
+                         ? IoExitInfo.n.u1Type == SVM_IOIO_WRITE ? &pVCpu->hm.s.StatExitIOWrite : &pVCpu->hm.s.StatExitIORead
+                         : IoExitInfo.n.u1Type == SVM_IOIO_WRITE ? &pVCpu->hm.s.StatExitIOStringWrite : &pVCpu->hm.s.StatExitIOStringRead);
+        Log4(("IOExit/%u: %04x:%08RX64: %s%s%s %#x LB %u -> EMHistoryExec\n",
+              pVCpu->idCpu, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, IoExitInfo.n.u1Rep ? "REP " : "",
+              IoExitInfo.n.u1Type == SVM_IOIO_WRITE ? "OUT" : "IN", IoExitInfo.n.u1Str ? "S" : "", IoExitInfo.n.u16Port, uIOWidth));
+
+        rcStrict = EMHistoryExec(pVCpu, pExitRec, 0);
+        ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_ALL_GUEST);
+
+        Log4(("IOExit/%u: %04x:%08RX64: EMHistoryExec -> %Rrc + %04x:%08RX64\n",
+              pVCpu->idCpu, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
+              VBOXSTRICTRC_VAL(rcStrict), pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
+    }
+    return VBOXSTRICTRC_TODO(rcStrict);
+}
+
+
+/**
+ * \#VMEXIT handler for Nested Page-faults (SVM_EXIT_NPF). Conditional \#VMEXIT.
+ */
+HMSVM_EXIT_DECL hmR0SvmExitNestedPF(PVMCPUCC pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+    HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pSvmTransient);
+    HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, HMSVM_CPUMCTX_EXTRN_ALL);
+    HMSVM_CHECK_EXIT_DUE_TO_EVENT_DELIVERY(pVCpu, pSvmTransient);
+
+    PVMCC      pVM  = pVCpu->CTX_SUFF(pVM);
+    PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+    Assert(pVM->hm.s.fNestedPaging);
+
+    /* See AMD spec. 15.25.6 "Nested versus Guest Page Faults, Fault Ordering" for VMCB details for #NPF. */
+    PSVMVMCB pVmcb           = hmR0SvmGetCurrentVmcb(pVCpu);
+    RTGCPHYS GCPhysFaultAddr = pVmcb->ctrl.u64ExitInfo2;
+    uint32_t u32ErrCode      = pVmcb->ctrl.u64ExitInfo1;    /* Note! High bits in EXITINFO1 may contain additional info and are
+                                                               thus intentionally not copied into u32ErrCode. */
+
+    Log4Func(("#NPF at CS:RIP=%04x:%#RX64 GCPhysFaultAddr=%RGp ErrCode=%#x \n", pCtx->cs.Sel, pCtx->rip, GCPhysFaultAddr,
+              u32ErrCode));
+
+    /*
+     * TPR patching for 32-bit guests, using the reserved bit in the page tables for MMIO regions.
+     */
+    if (   pVM->hm.s.fTprPatchingAllowed
+        && (GCPhysFaultAddr & PAGE_OFFSET_MASK) == XAPIC_OFF_TPR
+        && (   !(u32ErrCode & X86_TRAP_PF_P)                                                             /* Not present */
+            || (u32ErrCode & (X86_TRAP_PF_P | X86_TRAP_PF_RSVD)) == (X86_TRAP_PF_P | X86_TRAP_PF_RSVD))  /* MMIO page. */
+        && !CPUMIsGuestInSvmNestedHwVirtMode(pCtx)
+        && !CPUMIsGuestInLongModeEx(pCtx)
+        && !CPUMGetGuestCPL(pVCpu)
+        && pVM->hm.s.cPatches < RT_ELEMENTS(pVM->hm.s.aPatches))
+    {
+        RTGCPHYS GCPhysApicBase = APICGetBaseMsrNoCheck(pVCpu);
+        GCPhysApicBase &= PAGE_BASE_GC_MASK;
+
+        if (GCPhysFaultAddr == GCPhysApicBase + XAPIC_OFF_TPR)
+        {
+            /* Only attempt to patch the instruction once. */
+            PHMTPRPATCH pPatch = (PHMTPRPATCH)RTAvloU32Get(&pVM->hm.s.PatchTree, (AVLOU32KEY)pCtx->eip);
+            if (!pPatch)
+                return VINF_EM_HM_PATCH_TPR_INSTR;
+        }
+    }
+
+    /*
+     * Determine the nested paging mode.
+     */
+/** @todo r=bird: Gotta love this nested paging hacking we're still carrying with us... (Split PGM_TYPE_NESTED.) */
+    PGMMODE const enmNestedPagingMode = PGMGetHostMode(pVM);
+
+    /*
+     * MMIO optimization using the reserved (RSVD) bit in the guest page tables for MMIO pages.
+     */
+    Assert((u32ErrCode & (X86_TRAP_PF_RSVD | X86_TRAP_PF_P)) != X86_TRAP_PF_RSVD);
+    if ((u32ErrCode & (X86_TRAP_PF_RSVD | X86_TRAP_PF_P)) == (X86_TRAP_PF_RSVD | X86_TRAP_PF_P))
+    {
+        /*
+         * If event delivery causes an MMIO #NPF, go back to instruction emulation as otherwise
+         * injecting the original pending event would most likely cause the same MMIO #NPF.
+         */
+        if (pVCpu->hm.s.Event.fPending)
+        {
+            STAM_COUNTER_INC(&pVCpu->hm.s.StatInjectInterpret);
+            return VINF_EM_RAW_INJECT_TRPM_EVENT;
+        }
+
+        HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, CPUMCTX_EXTRN_CS | CPUMCTX_EXTRN_RIP);
+        VBOXSTRICTRC rcStrict;
+        PCEMEXITREC pExitRec = EMHistoryUpdateFlagsAndTypeAndPC(pVCpu,
+                                                                EMEXIT_MAKE_FT(EMEXIT_F_KIND_EM | EMEXIT_F_HM, EMEXITTYPE_MMIO),
+                                                                pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base);
+        if (!pExitRec)
+        {
+
+            rcStrict = PGMR0Trap0eHandlerNPMisconfig(pVM, pVCpu, enmNestedPagingMode, CPUMCTX2CORE(pCtx), GCPhysFaultAddr,
+                                                     u32ErrCode);
+
+            /*
+             * If we succeed, resume guest execution.
+             *
+             * If we fail in interpreting the instruction because we couldn't get the guest
+             * physical address of the page containing the instruction via the guest's page
+             * tables (we would invalidate the guest page in the host TLB), resume execution
+             * which would cause a guest page fault to let the guest handle this weird case.
+             *
+             * See @bugref{6043}.
+             */
+            if (   rcStrict == VINF_SUCCESS
+                || rcStrict == VERR_PAGE_TABLE_NOT_PRESENT
+                || rcStrict == VERR_PAGE_NOT_PRESENT)
+            {
+                /* Successfully handled MMIO operation. */
+                ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_APIC_TPR);
+                rcStrict = VINF_SUCCESS;
+            }
+        }
+        else
+        {
+            /*
+             * Frequent exit or something needing probing.  Get state and call EMHistoryExec.
+             */
+            Assert(pCtx == &pVCpu->cpum.GstCtx);
+            HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, HMSVM_CPUMCTX_EXTRN_ALL);
+            Log4(("EptMisscfgExit/%u: %04x:%08RX64: %RGp -> EMHistoryExec\n",
+                  pVCpu->idCpu, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, GCPhysFaultAddr));
+
+            rcStrict = EMHistoryExec(pVCpu, pExitRec, 0);
+            ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_ALL_GUEST);
+
+            Log4(("EptMisscfgExit/%u: %04x:%08RX64: EMHistoryExec -> %Rrc + %04x:%08RX64\n",
+                  pVCpu->idCpu, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
+                  VBOXSTRICTRC_VAL(rcStrict), pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
+        }
+        return VBOXSTRICTRC_TODO(rcStrict);
+    }
+
+    /*
+     * Nested page-fault.
+     */
+    TRPMAssertXcptPF(pVCpu, GCPhysFaultAddr, u32ErrCode);
+    int rc = PGMR0Trap0eHandlerNestedPaging(pVM, pVCpu, enmNestedPagingMode, u32ErrCode, CPUMCTX2CORE(pCtx), GCPhysFaultAddr);
+    TRPMResetTrap(pVCpu);
+
+    Log4Func(("#NPF: PGMR0Trap0eHandlerNestedPaging returns %Rrc CS:RIP=%04x:%#RX64\n", rc, pCtx->cs.Sel, pCtx->rip));
+
+    /*
+     * Same case as PGMR0Trap0eHandlerNPMisconfig(). See comment above, @bugref{6043}.
+     */
+    if (   rc == VINF_SUCCESS
+        || rc == VERR_PAGE_TABLE_NOT_PRESENT
+        || rc == VERR_PAGE_NOT_PRESENT)
+    {
+        /* We've successfully synced our shadow page tables. */
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatExitShadowPF);
+        rc = VINF_SUCCESS;
+    }
+
+    /*
+     * If delivering an event causes an #NPF (and not MMIO), we shall resolve the fault and
+     * re-inject the original event.
+     */
+    if (pVCpu->hm.s.Event.fPending)
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatInjectReflectNPF);
+
+    return rc;
+}
+
+
+/**
+ * \#VMEXIT handler for virtual interrupt (SVM_EXIT_VINTR). Conditional
+ * \#VMEXIT.
+ */
+HMSVM_EXIT_DECL hmR0SvmExitVIntr(PVMCPUCC pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+    HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pSvmTransient);
+    HMSVM_ASSERT_NOT_IN_NESTED_GUEST(&pVCpu->cpum.GstCtx);
+
+    /* Indicate that we no longer need to #VMEXIT when the guest is ready to receive NMIs, it is now ready. */
+    PSVMVMCB pVmcb = hmR0SvmGetCurrentVmcb(pVCpu);
+    hmR0SvmClearIntWindowExiting(pVCpu, pVmcb);
+
+    /* Deliver the pending interrupt via hmR0SvmEvaluatePendingEvent() and resume guest execution. */
+    STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIntWindow);
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * \#VMEXIT handler for task switches (SVM_EXIT_TASK_SWITCH). Conditional
+ * \#VMEXIT.
+ */
+HMSVM_EXIT_DECL hmR0SvmExitTaskSwitch(PVMCPUCC pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+    HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pSvmTransient);
+    HMSVM_CHECK_EXIT_DUE_TO_EVENT_DELIVERY(pVCpu, pSvmTransient);
+
+#ifndef HMSVM_ALWAYS_TRAP_TASK_SWITCH
+    Assert(!pVCpu->CTX_SUFF(pVM)->hm.s.fNestedPaging);
+#endif
+
+    /* Check if this task-switch occurred while delivering an event through the guest IDT. */
+    if (pVCpu->hm.s.Event.fPending)  /* Can happen with exceptions/NMI. See @bugref{8411}. */
+    {
+        /*
+         * AMD-V provides us with the exception which caused the TS; we collect
+         * the information in the call to hmR0SvmCheckExitDueToEventDelivery().
+         */
+        Log4Func(("TS occurred during event delivery\n"));
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatExitTaskSwitch);
+        return VINF_EM_RAW_INJECT_TRPM_EVENT;
+    }
+
+    /** @todo Emulate task switch someday, currently just going back to ring-3 for
+     *        emulation. */
+    STAM_COUNTER_INC(&pVCpu->hm.s.StatExitTaskSwitch);
+    return VERR_EM_INTERPRETER;
+}
+
+
+/**
+ * \#VMEXIT handler for VMMCALL (SVM_EXIT_VMMCALL). Conditional \#VMEXIT.
+ */
+HMSVM_EXIT_DECL hmR0SvmExitVmmCall(PVMCPUCC pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+    HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pSvmTransient);
+    HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, HMSVM_CPUMCTX_EXTRN_ALL);
+
+    PVMCC pVM = pVCpu->CTX_SUFF(pVM);
+    if (pVM->hm.s.fTprPatchingAllowed)
+    {
+        int rc = hmEmulateSvmMovTpr(pVM, pVCpu);
+        if (rc != VERR_NOT_FOUND)
+        {
+            Log4Func(("hmEmulateSvmMovTpr returns %Rrc\n", rc));
+            return rc;
+        }
+    }
+
+    if (EMAreHypercallInstructionsEnabled(pVCpu))
+    {
+        unsigned cbInstr;
+        if (hmR0SvmSupportsNextRipSave(pVCpu))
+        {
+            PCSVMVMCB pVmcb = hmR0SvmGetCurrentVmcb(pVCpu);
+            cbInstr = pVmcb->ctrl.u64NextRIP - pVCpu->cpum.GstCtx.rip;
+        }
+        else
+        {
+            PDISCPUSTATE pDis = &pVCpu->hm.s.DisState;
+            int rc = EMInterpretDisasCurrent(pVCpu->CTX_SUFF(pVM), pVCpu, pDis, &cbInstr);
+            if (   rc == VINF_SUCCESS
+                && pDis->pCurInstr->uOpcode == OP_VMMCALL)
+                Assert(cbInstr > 0);
+            else
+                cbInstr = 0;
+        }
+
+        VBOXSTRICTRC rcStrict = GIMHypercall(pVCpu, &pVCpu->cpum.GstCtx);
+        if (RT_SUCCESS(rcStrict))
+        {
+            /* Only update the RIP if we're continuing guest execution and not in the case
+               of say VINF_GIM_R3_HYPERCALL. */
+            if (rcStrict == VINF_SUCCESS)
+                hmR0SvmAdvanceRip(pVCpu, cbInstr);
+
+            return VBOXSTRICTRC_VAL(rcStrict);
+        }
+        else
+            Log4Func(("GIMHypercall returns %Rrc -> #UD\n", VBOXSTRICTRC_VAL(rcStrict)));
+    }
+
+    hmR0SvmSetPendingXcptUD(pVCpu);
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * \#VMEXIT handler for VMMCALL (SVM_EXIT_VMMCALL). Conditional \#VMEXIT.
+ */
+HMSVM_EXIT_DECL hmR0SvmExitPause(PVMCPUCC pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+    HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pSvmTransient);
+
+    unsigned cbInstr;
+    bool const fSupportsNextRipSave = hmR0SvmSupportsNextRipSave(pVCpu);
+    if (fSupportsNextRipSave)
+    {
+        PCSVMVMCB pVmcb = hmR0SvmGetCurrentVmcb(pVCpu);
+        cbInstr = pVmcb->ctrl.u64NextRIP - pVCpu->cpum.GstCtx.rip;
+    }
+    else
+    {
+        PDISCPUSTATE pDis = &pVCpu->hm.s.DisState;
+        int rc = EMInterpretDisasCurrent(pVCpu->CTX_SUFF(pVM), pVCpu, pDis, &cbInstr);
+        if (   rc == VINF_SUCCESS
+            && pDis->pCurInstr->uOpcode == OP_PAUSE)
+            Assert(cbInstr > 0);
+        else
+            cbInstr = 0;
+    }
+
+    /** @todo The guest has likely hit a contended spinlock. We might want to
+     *        poke a schedule different guest VCPU. */
+    hmR0SvmAdvanceRip(pVCpu, cbInstr);
+    return VINF_EM_RAW_INTERRUPT;
+}
+
+
+/**
+ * \#VMEXIT handler for FERR intercept (SVM_EXIT_FERR_FREEZE). Conditional
+ * \#VMEXIT.
+ */
+HMSVM_EXIT_DECL hmR0SvmExitFerrFreeze(PVMCPUCC pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+    HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pSvmTransient);
+    HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, CPUMCTX_EXTRN_CR0);
+    Assert(!(pVCpu->cpum.GstCtx.cr0 & X86_CR0_NE));
+
+    Log4Func(("Raising IRQ 13 in response to #FERR\n"));
+    return PDMIsaSetIrq(pVCpu->CTX_SUFF(pVM), 13 /* u8Irq */, 1 /* u8Level */, 0 /* uTagSrc */);
+}
+
+
+/**
+ * \#VMEXIT handler for IRET (SVM_EXIT_IRET). Conditional \#VMEXIT.
+ */
+HMSVM_EXIT_DECL hmR0SvmExitIret(PVMCPUCC pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+    HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pSvmTransient);
+
+    /* Clear NMI blocking. */
+    if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_BLOCK_NMIS))
+        VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_BLOCK_NMIS);
+
+    /* Indicate that we no longer need to #VMEXIT when the guest is ready to receive NMIs, it is now ready. */
+    PSVMVMCB pVmcb = hmR0SvmGetCurrentVmcb(pVCpu);
+    hmR0SvmClearCtrlIntercept(pVCpu, pVmcb, SVM_CTRL_INTERCEPT_IRET);
+
+    /* Deliver the pending NMI via hmR0SvmEvaluatePendingEvent() and resume guest execution. */
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * \#VMEXIT handler for page-fault exceptions (SVM_EXIT_XCPT_14).
+ * Conditional \#VMEXIT.
+ */
+HMSVM_EXIT_DECL hmR0SvmExitXcptPF(PVMCPUCC pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+    HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pSvmTransient);
+    HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, HMSVM_CPUMCTX_EXTRN_ALL);
+    HMSVM_CHECK_EXIT_DUE_TO_EVENT_DELIVERY(pVCpu, pSvmTransient);
+
+    /* See AMD spec. 15.12.15 "#PF (Page Fault)". */
+    PVMCC             pVM           = pVCpu->CTX_SUFF(pVM);
+    PCPUMCTX        pCtx          = &pVCpu->cpum.GstCtx;
+    PSVMVMCB        pVmcb         = hmR0SvmGetCurrentVmcb(pVCpu);
+    uint32_t        uErrCode      = pVmcb->ctrl.u64ExitInfo1;
+    uint64_t const  uFaultAddress = pVmcb->ctrl.u64ExitInfo2;
+
+#if defined(HMSVM_ALWAYS_TRAP_ALL_XCPTS) || defined(HMSVM_ALWAYS_TRAP_PF)
+    if (pVM->hm.s.fNestedPaging)
+    {
+        pVCpu->hm.s.Event.fPending = false;     /* In case it's a contributory or vectoring #PF. */
+        if (   !pSvmTransient->fVectoringDoublePF
+            || CPUMIsGuestInSvmNestedHwVirtMode(pCtx))
+        {
+            /* A genuine guest #PF, reflect it to the guest. */
+            hmR0SvmSetPendingXcptPF(pVCpu, uErrCode, uFaultAddress);
+            Log4Func(("#PF: Guest page fault at %04X:%RGv FaultAddr=%RX64 ErrCode=%#x\n", pCtx->cs.Sel, (RTGCPTR)pCtx->rip,
+                      uFaultAddress, uErrCode));
+        }
+        else
+        {
+            /* A guest page-fault occurred during delivery of a page-fault. Inject #DF. */
+            hmR0SvmSetPendingXcptDF(pVCpu);
+            Log4Func(("Pending #DF due to vectoring #PF. NP\n"));
+        }
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestPF);
+        return VINF_SUCCESS;
+    }
+#endif
+
+    Assert(!pVM->hm.s.fNestedPaging);
+
+    /*
+     * TPR patching shortcut for APIC TPR reads and writes; only applicable to 32-bit guests.
+     */
+    if (   pVM->hm.s.fTprPatchingAllowed
+        && (uFaultAddress & 0xfff) == XAPIC_OFF_TPR
+        && !(uErrCode & X86_TRAP_PF_P)                /* Not present. */
+        && !CPUMIsGuestInSvmNestedHwVirtMode(pCtx)
+        && !CPUMIsGuestInLongModeEx(pCtx)
+        && !CPUMGetGuestCPL(pVCpu)
+        && pVM->hm.s.cPatches < RT_ELEMENTS(pVM->hm.s.aPatches))
+    {
+        RTGCPHYS GCPhysApicBase;
+        GCPhysApicBase  = APICGetBaseMsrNoCheck(pVCpu);
+        GCPhysApicBase &= PAGE_BASE_GC_MASK;
+
+        /* Check if the page at the fault-address is the APIC base. */
+        RTGCPHYS GCPhysPage;
+        int rc2 = PGMGstGetPage(pVCpu, (RTGCPTR)uFaultAddress, NULL /* pfFlags */, &GCPhysPage);
+        if (   rc2 == VINF_SUCCESS
+            && GCPhysPage == GCPhysApicBase)
+        {
+            /* Only attempt to patch the instruction once. */
+            PHMTPRPATCH pPatch = (PHMTPRPATCH)RTAvloU32Get(&pVM->hm.s.PatchTree, (AVLOU32KEY)pCtx->eip);
+            if (!pPatch)
+                return VINF_EM_HM_PATCH_TPR_INSTR;
+        }
+    }
+
+    Log4Func(("#PF: uFaultAddress=%#RX64 CS:RIP=%#04x:%#RX64 uErrCode %#RX32 cr3=%#RX64\n", uFaultAddress, pCtx->cs.Sel,
+              pCtx->rip, uErrCode, pCtx->cr3));
+
+    /*
+     * If it's a vectoring #PF, emulate injecting the original event injection as
+     * PGMTrap0eHandler() is incapable of differentiating between instruction emulation and
+     * event injection that caused a #PF. See @bugref{6607}.
+     */
+    if (pSvmTransient->fVectoringPF)
+    {
+        Assert(pVCpu->hm.s.Event.fPending);
+        return VINF_EM_RAW_INJECT_TRPM_EVENT;
+    }
+
+    TRPMAssertXcptPF(pVCpu, uFaultAddress, uErrCode);
+    int rc = PGMTrap0eHandler(pVCpu, uErrCode, CPUMCTX2CORE(pCtx), (RTGCPTR)uFaultAddress);
+
+    Log4Func(("#PF: rc=%Rrc\n", rc));
+
+    if (rc == VINF_SUCCESS)
+    {
+        /* Successfully synced shadow pages tables or emulated an MMIO instruction. */
+        TRPMResetTrap(pVCpu);
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatExitShadowPF);
+        ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_ALL_GUEST);
+        return rc;
+    }
+
+    if (rc == VINF_EM_RAW_GUEST_TRAP)
+    {
+        pVCpu->hm.s.Event.fPending = false;     /* In case it's a contributory or vectoring #PF. */
+
+        /*
+         * If a nested-guest delivers a #PF and that causes a #PF which is -not- a shadow #PF,
+         * we should simply forward the #PF to the guest and is up to the nested-hypervisor to
+         * determine whether it is a nested-shadow #PF or a #DF, see @bugref{7243#c121}.
+         */
+        if (  !pSvmTransient->fVectoringDoublePF
+            || CPUMIsGuestInSvmNestedHwVirtMode(pCtx))
+        {
+            /* It's a guest (or nested-guest) page fault and needs to be reflected. */
+            uErrCode = TRPMGetErrorCode(pVCpu);        /* The error code might have been changed. */
+            TRPMResetTrap(pVCpu);
+
+#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
+            /* If the nested-guest is intercepting #PFs, cause a #PF #VMEXIT. */
+            if (   CPUMIsGuestInSvmNestedHwVirtMode(pCtx)
+                && CPUMIsGuestSvmXcptInterceptSet(pVCpu, pCtx, X86_XCPT_PF))
+                return VBOXSTRICTRC_TODO(IEMExecSvmVmexit(pVCpu, SVM_EXIT_XCPT_PF, uErrCode, uFaultAddress));
+#endif
+
+            hmR0SvmSetPendingXcptPF(pVCpu, uErrCode, uFaultAddress);
+        }
+        else
+        {
+            /* A guest page-fault occurred during delivery of a page-fault. Inject #DF. */
+            TRPMResetTrap(pVCpu);
+            hmR0SvmSetPendingXcptDF(pVCpu);
+            Log4Func(("#PF: Pending #DF due to vectoring #PF\n"));
+        }
+
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestPF);
+        return VINF_SUCCESS;
+    }
+
+    TRPMResetTrap(pVCpu);
+    STAM_COUNTER_INC(&pVCpu->hm.s.StatExitShadowPFEM);
+    return rc;
+}
+
+
+/**
+ * \#VMEXIT handler for undefined opcode (SVM_EXIT_XCPT_6).
+ * Conditional \#VMEXIT.
+ */
+HMSVM_EXIT_DECL hmR0SvmExitXcptUD(PVMCPUCC pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+    HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pSvmTransient);
+    HMSVM_ASSERT_NOT_IN_NESTED_GUEST(&pVCpu->cpum.GstCtx);
+    STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestUD);
+
+    /* Paranoia; Ensure we cannot be called as a result of event delivery. */
+    PSVMVMCB pVmcb = pVCpu->hm.s.svm.pVmcb;
+    Assert(!pVmcb->ctrl.ExitIntInfo.n.u1Valid);  NOREF(pVmcb);
+
+    int rc = VERR_SVM_UNEXPECTED_XCPT_EXIT;
+    if (pVCpu->hm.s.fGIMTrapXcptUD)
+    {
+        HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, HMSVM_CPUMCTX_EXTRN_ALL);
+        uint8_t cbInstr = 0;
+        VBOXSTRICTRC rcStrict = GIMXcptUD(pVCpu, &pVCpu->cpum.GstCtx, NULL /* pDis */, &cbInstr);
+        if (rcStrict == VINF_SUCCESS)
+        {
+            /* #UD #VMEXIT does not have valid NRIP information, manually advance RIP. See @bugref{7270#c170}. */
+            hmR0SvmAdvanceRip(pVCpu, cbInstr);
+            rc = VINF_SUCCESS;
+            HMSVM_CHECK_SINGLE_STEP(pVCpu, rc);
+        }
+        else if (rcStrict == VINF_GIM_HYPERCALL_CONTINUING)
+            rc = VINF_SUCCESS;
+        else if (rcStrict == VINF_GIM_R3_HYPERCALL)
+            rc = VINF_GIM_R3_HYPERCALL;
+        else
+            Assert(RT_FAILURE(VBOXSTRICTRC_VAL(rcStrict)));
+    }
+
+    /* If the GIM #UD exception handler didn't succeed for some reason or wasn't needed, raise #UD. */
+    if (RT_FAILURE(rc))
+    {
+        hmR0SvmSetPendingXcptUD(pVCpu);
+        rc = VINF_SUCCESS;
+    }
+
+    STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestUD);
+    return rc;
+}
+
+
+/**
+ * \#VMEXIT handler for math-fault exceptions (SVM_EXIT_XCPT_16).
+ * Conditional \#VMEXIT.
+ */
+HMSVM_EXIT_DECL hmR0SvmExitXcptMF(PVMCPUCC pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+    HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pSvmTransient);
+    HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, HMSVM_CPUMCTX_EXTRN_ALL);
+    STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestMF);
+
+    PCPUMCTX pCtx  = &pVCpu->cpum.GstCtx;
+    PSVMVMCB pVmcb = hmR0SvmGetCurrentVmcb(pVCpu);
+
+    /* Paranoia; Ensure we cannot be called as a result of event delivery. */
+    Assert(!pVmcb->ctrl.ExitIntInfo.n.u1Valid); NOREF(pVmcb);
+
+    STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestMF);
+
+    if (!(pCtx->cr0 & X86_CR0_NE))
+    {
+        PVMCC       pVM  = pVCpu->CTX_SUFF(pVM);
+        PDISSTATE pDis = &pVCpu->hm.s.DisState;
+        unsigned  cbInstr;
+        int rc = EMInterpretDisasCurrent(pVM, pVCpu, pDis, &cbInstr);
+        if (RT_SUCCESS(rc))
+        {
+            /* Convert a #MF into a FERR -> IRQ 13. See @bugref{6117}. */
+            rc = PDMIsaSetIrq(pVCpu->CTX_SUFF(pVM), 13 /* u8Irq */, 1 /* u8Level */, 0 /* uTagSrc */);
+            if (RT_SUCCESS(rc))
+                hmR0SvmAdvanceRip(pVCpu, cbInstr);
+        }
+        else
+            Log4Func(("EMInterpretDisasCurrent returned %Rrc uOpCode=%#x\n", rc, pDis->pCurInstr->uOpcode));
+        return rc;
+    }
+
+    hmR0SvmSetPendingXcptMF(pVCpu);
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * \#VMEXIT handler for debug exceptions (SVM_EXIT_XCPT_1). Conditional
+ * \#VMEXIT.
+ */
+HMSVM_EXIT_DECL hmR0SvmExitXcptDB(PVMCPUCC pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+    HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pSvmTransient);
+    HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, HMSVM_CPUMCTX_EXTRN_ALL);
+    HMSVM_CHECK_EXIT_DUE_TO_EVENT_DELIVERY(pVCpu, pSvmTransient);
+    STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestDB);
+
+    if (RT_UNLIKELY(pVCpu->hm.s.Event.fPending))
+    {
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatInjectInterpret);
+        return VINF_EM_RAW_INJECT_TRPM_EVENT;
+    }
+
+    STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestDB);
+
+    /*
+     * This can be a fault-type #DB (instruction breakpoint) or a trap-type #DB (data
+     * breakpoint). However, for both cases DR6 and DR7 are updated to what the exception
+     * handler expects. See AMD spec. 15.12.2 "#DB (Debug)".
+     */
+    PVMCC      pVM   = pVCpu->CTX_SUFF(pVM);
+    PSVMVMCB pVmcb = pVCpu->hm.s.svm.pVmcb;
+    PCPUMCTX pCtx  = &pVCpu->cpum.GstCtx;
+    int rc = DBGFRZTrap01Handler(pVM, pVCpu, CPUMCTX2CORE(pCtx), pVmcb->guest.u64DR6, pVCpu->hm.s.fSingleInstruction);
+    if (rc == VINF_EM_RAW_GUEST_TRAP)
+    {
+        Log5(("hmR0SvmExitXcptDB: DR6=%#RX64 -> guest trap\n", pVmcb->guest.u64DR6));
+        if (CPUMIsHyperDebugStateActive(pVCpu))
+            CPUMSetGuestDR6(pVCpu, CPUMGetGuestDR6(pVCpu) | pVmcb->guest.u64DR6);
+
+        /* Reflect the exception back to the guest. */
+        hmR0SvmSetPendingXcptDB(pVCpu);
+        rc = VINF_SUCCESS;
+    }
+
+    /*
+     * Update DR6.
+     */
+    if (CPUMIsHyperDebugStateActive(pVCpu))
+    {
+        Log5(("hmR0SvmExitXcptDB: DR6=%#RX64 -> %Rrc\n", pVmcb->guest.u64DR6, rc));
+        pVmcb->guest.u64DR6 = X86_DR6_INIT_VAL;
+        pVmcb->ctrl.u32VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_DRX;
+    }
+    else
+    {
+        AssertMsg(rc == VINF_SUCCESS, ("rc=%Rrc\n", rc));
+        Assert(!pVCpu->hm.s.fSingleInstruction && !DBGFIsStepping(pVCpu));
+    }
+
+    return rc;
+}
+
+
+/**
+ * \#VMEXIT handler for alignment check exceptions (SVM_EXIT_XCPT_17).
+ * Conditional \#VMEXIT.
+ */
+HMSVM_EXIT_DECL hmR0SvmExitXcptAC(PVMCPUCC pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+    HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pSvmTransient);
+    HMSVM_CHECK_EXIT_DUE_TO_EVENT_DELIVERY(pVCpu, pSvmTransient);
+    STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestAC);
+
+    SVMEVENT Event;
+    Event.u          = 0;
+    Event.n.u1Valid  = 1;
+    Event.n.u3Type   = SVM_EVENT_EXCEPTION;
+    Event.n.u8Vector = X86_XCPT_AC;
+    Event.n.u1ErrorCodeValid = 1;
+    hmR0SvmSetPendingEvent(pVCpu, &Event, 0 /* GCPtrFaultAddress */);
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * \#VMEXIT handler for breakpoint exceptions (SVM_EXIT_XCPT_3).
+ * Conditional \#VMEXIT.
+ */
+HMSVM_EXIT_DECL hmR0SvmExitXcptBP(PVMCPUCC pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+    HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pSvmTransient);
+    HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, HMSVM_CPUMCTX_EXTRN_ALL);
+    HMSVM_CHECK_EXIT_DUE_TO_EVENT_DELIVERY(pVCpu, pSvmTransient);
+    STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestBP);
+
+    PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+    int rc = DBGFRZTrap03Handler(pVCpu->CTX_SUFF(pVM), pVCpu, CPUMCTX2CORE(pCtx));
+    if (rc == VINF_EM_RAW_GUEST_TRAP)
+    {
+        SVMEVENT Event;
+        Event.u          = 0;
+        Event.n.u1Valid  = 1;
+        Event.n.u3Type   = SVM_EVENT_EXCEPTION;
+        Event.n.u8Vector = X86_XCPT_BP;
+        hmR0SvmSetPendingEvent(pVCpu, &Event, 0 /* GCPtrFaultAddress */);
+        rc = VINF_SUCCESS;
+    }
+
+    Assert(rc == VINF_SUCCESS || rc == VINF_EM_DBG_BREAKPOINT);
+    return rc;
+}
+
+
+/**
+ * Hacks its way around the lovely mesa driver's backdoor accesses.
+ *
+ * @sa hmR0VmxHandleMesaDrvGp
+ */
+static int hmR0SvmHandleMesaDrvGp(PVMCPUCC pVCpu, PCPUMCTX pCtx, PCSVMVMCB pVmcb)
+{
+    HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, CPUMCTX_EXTRN_CS  | CPUMCTX_EXTRN_RIP | CPUMCTX_EXTRN_RFLAGS | CPUMCTX_EXTRN_GPRS_MASK);
+    Log(("hmR0SvmHandleMesaDrvGp: at %04x:%08RX64 rcx=%RX64 rbx=%RX64\n",
+         pVmcb->guest.CS.u16Sel, pVmcb->guest.u64RIP, pCtx->rcx, pCtx->rbx));
+    RT_NOREF(pCtx, pVmcb);
+
+    /* For now we'll just skip the instruction. */
+    hmR0SvmAdvanceRip(pVCpu, 1);
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Checks if the \#GP'ing instruction is the mesa driver doing it's lovely
+ * backdoor logging w/o checking what it is running inside.
+ *
+ * This recognizes an "IN EAX,DX" instruction executed in flat ring-3, with the
+ * backdoor port and magic numbers loaded in registers.
+ *
+ * @returns true if it is, false if it isn't.
+ * @sa      hmR0VmxIsMesaDrvGp
+ */
+DECLINLINE(bool) hmR0SvmIsMesaDrvGp(PVMCPUCC pVCpu, PCPUMCTX pCtx, PCSVMVMCB pVmcb)
+{
+    /* Check magic and port. */
+    Assert(!(pCtx->fExtrn & (CPUMCTX_EXTRN_RDX | CPUMCTX_EXTRN_RCX)));
+    /*Log8(("hmR0SvmIsMesaDrvGp: rax=%RX64 rdx=%RX64\n", pCtx->fExtrn & CPUMCTX_EXTRN_RAX ? pVmcb->guest.u64RAX : pCtx->rax, pCtx->rdx));*/
+    if (pCtx->dx != UINT32_C(0x5658))
+        return false;
+    if ((pCtx->fExtrn & CPUMCTX_EXTRN_RAX ? pVmcb->guest.u64RAX : pCtx->rax) != UINT32_C(0x564d5868))
+        return false;
+
+    /* Check that it is #GP(0). */
+    if (pVmcb->ctrl.u64ExitInfo1 != 0)
+        return false;
+
+    /* Flat ring-3 CS. */
+    /*Log8(("hmR0SvmIsMesaDrvGp: u8CPL=%d base=%RX64\n", pVmcb->guest.u8CPL, pCtx->fExtrn & CPUMCTX_EXTRN_CS ? pVmcb->guest.CS.u64Base : pCtx->cs.u64Base));*/
+    if (pVmcb->guest.u8CPL != 3)
+        return false;
+    if ((pCtx->fExtrn & CPUMCTX_EXTRN_CS ? pVmcb->guest.CS.u64Base : pCtx->cs.u64Base) != 0)
+        return false;
+
+    /* 0xed:  IN eAX,dx */
+    if (pVmcb->ctrl.cbInstrFetched < 1) /* unlikely, it turns out. */
+    {
+        HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, CPUMCTX_EXTRN_CS  | CPUMCTX_EXTRN_RIP | CPUMCTX_EXTRN_GPRS_MASK
+                                        | CPUMCTX_EXTRN_CR0 | CPUMCTX_EXTRN_CR3 | CPUMCTX_EXTRN_CR4 | CPUMCTX_EXTRN_EFER);
+        uint8_t abInstr[1];
+        int rc = PGMPhysSimpleReadGCPtr(pVCpu, abInstr, pCtx->rip, sizeof(abInstr));
+        /*Log8(("hmR0SvmIsMesaDrvGp: PGMPhysSimpleReadGCPtr -> %Rrc %#x\n", rc, abInstr[0])); */
+        if (RT_FAILURE(rc))
+            return false;
+        if (abInstr[0] != 0xed)
+            return false;
+    }
+    else
+    {
+        /*Log8(("hmR0SvmIsMesaDrvGp: %#x\n", pVmcb->ctrl.abInstr));*/
+        if (pVmcb->ctrl.abInstr[0] != 0xed)
+            return false;
+    }
+    return true;
+}
+
+
+/**
+ * \#VMEXIT handler for general protection faults (SVM_EXIT_XCPT_BP).
+ * Conditional \#VMEXIT.
+ */
+HMSVM_EXIT_DECL hmR0SvmExitXcptGP(PVMCPUCC pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+    HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pSvmTransient);
+    HMSVM_CHECK_EXIT_DUE_TO_EVENT_DELIVERY(pVCpu, pSvmTransient);
+    STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestGP);
+
+    PCSVMVMCB pVmcb = hmR0SvmGetCurrentVmcb(pVCpu);
+    Assert(pSvmTransient->u64ExitCode == pVmcb->ctrl.u64ExitCode);
+
+    PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+    if (   !pVCpu->hm.s.fTrapXcptGpForLovelyMesaDrv
+        || !hmR0SvmIsMesaDrvGp(pVCpu, pCtx, pVmcb))
+    {
+        SVMEVENT Event;
+        Event.u                  = 0;
+        Event.n.u1Valid          = 1;
+        Event.n.u3Type           = SVM_EVENT_EXCEPTION;
+        Event.n.u8Vector         = X86_XCPT_GP;
+        Event.n.u1ErrorCodeValid = 1;
+        Event.n.u32ErrorCode     = (uint32_t)pVmcb->ctrl.u64ExitInfo1;
+        hmR0SvmSetPendingEvent(pVCpu, &Event, 0 /* GCPtrFaultAddress */);
+        return VINF_SUCCESS;
+    }
+    return hmR0SvmHandleMesaDrvGp(pVCpu, pCtx, pVmcb);
+}
+
+
+#if defined(HMSVM_ALWAYS_TRAP_ALL_XCPTS) || defined(VBOX_WITH_NESTED_HWVIRT_SVM)
+/**
+ * \#VMEXIT handler for generic exceptions. Conditional \#VMEXIT.
+ */
+HMSVM_EXIT_DECL hmR0SvmExitXcptGeneric(PVMCPUCC pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+    HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pSvmTransient);
+    HMSVM_CHECK_EXIT_DUE_TO_EVENT_DELIVERY(pVCpu, pSvmTransient);
+
+    PCSVMVMCB pVmcb = hmR0SvmGetCurrentVmcb(pVCpu);
+    uint8_t const  uVector  = pVmcb->ctrl.u64ExitCode - SVM_EXIT_XCPT_0;
+    uint32_t const uErrCode = pVmcb->ctrl.u64ExitInfo1;
+    Assert(pSvmTransient->u64ExitCode == pVmcb->ctrl.u64ExitCode);
+    Assert(uVector <= X86_XCPT_LAST);
+    Log4Func(("uVector=%#x uErrCode=%u\n", uVector, uErrCode));
+
+    SVMEVENT Event;
+    Event.u          = 0;
+    Event.n.u1Valid  = 1;
+    Event.n.u3Type   = SVM_EVENT_EXCEPTION;
+    Event.n.u8Vector = uVector;
+    switch (uVector)
+    {
+        /* Shouldn't be here for reflecting #PFs (among other things, the fault address isn't passed along). */
+        case X86_XCPT_PF:   AssertMsgFailed(("hmR0SvmExitXcptGeneric: Unexpected exception")); return VERR_SVM_IPE_5;
+        case X86_XCPT_DF:
+        case X86_XCPT_TS:
+        case X86_XCPT_NP:
+        case X86_XCPT_SS:
+        case X86_XCPT_GP:
+        case X86_XCPT_AC:
+        {
+            Event.n.u1ErrorCodeValid = 1;
+            Event.n.u32ErrorCode     = uErrCode;
+            break;
+        }
+    }
+
+#ifdef VBOX_WITH_STATISTICS
+    switch (uVector)
+    {
+        case X86_XCPT_DE:   STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestDE);     break;
+        case X86_XCPT_DB:   STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestDB);     break;
+        case X86_XCPT_BP:   STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestBP);     break;
+        case X86_XCPT_OF:   STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestOF);     break;
+        case X86_XCPT_BR:   STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestBR);     break;
+        case X86_XCPT_UD:   STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestUD);     break;
+        case X86_XCPT_NM:   STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestOF);     break;
+        case X86_XCPT_DF:   STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestDF);     break;
+        case X86_XCPT_TS:   STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestTS);     break;
+        case X86_XCPT_NP:   STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestNP);     break;
+        case X86_XCPT_SS:   STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestSS);     break;
+        case X86_XCPT_GP:   STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestGP);     break;
+        case X86_XCPT_PF:   STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestPF);     break;
+        case X86_XCPT_MF:   STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestMF);     break;
+        case X86_XCPT_AC:   STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestAC);     break;
+        case X86_XCPT_XF:   STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestXF);     break;
+        default:
+            STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestXcpUnk);
+            break;
+    }
+#endif
+
+    hmR0SvmSetPendingEvent(pVCpu, &Event, 0 /* GCPtrFaultAddress */);
+    return VINF_SUCCESS;
+}
+#endif
+
+#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
+/**
+ * \#VMEXIT handler for CLGI (SVM_EXIT_CLGI). Conditional \#VMEXIT.
+ */
+HMSVM_EXIT_DECL hmR0SvmExitClgi(PVMCPUCC pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+    HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pSvmTransient);
+
+    PCSVMVMCB pVmcb = hmR0SvmGetCurrentVmcb(pVCpu);
+    Assert(pVmcb);
+    Assert(!pVmcb->ctrl.IntCtrl.n.u1VGifEnable);
+
+    VBOXSTRICTRC   rcStrict;
+    bool const     fSupportsNextRipSave = hmR0SvmSupportsNextRipSave(pVCpu);
+    uint64_t const fImport = CPUMCTX_EXTRN_HWVIRT;
+    if (fSupportsNextRipSave)
+    {
+        HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_EXEC_DECODED_NO_MEM_MASK | fImport);
+        uint8_t const cbInstr = pVmcb->ctrl.u64NextRIP - pVCpu->cpum.GstCtx.rip;
+        rcStrict = IEMExecDecodedClgi(pVCpu, cbInstr);
+    }
+    else
+    {
+        HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_MUST_MASK | fImport);
+        rcStrict = IEMExecOne(pVCpu);
+    }
+
+    if (rcStrict == VINF_SUCCESS)
+        ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_HWVIRT);
+    else if (rcStrict == VINF_IEM_RAISED_XCPT)
+    {
+        rcStrict = VINF_SUCCESS;
+        ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
+    }
+    HMSVM_CHECK_SINGLE_STEP(pVCpu, rcStrict);
+    return VBOXSTRICTRC_TODO(rcStrict);
+}
+
+
+/**
+ * \#VMEXIT handler for STGI (SVM_EXIT_STGI). Conditional \#VMEXIT.
+ */
+HMSVM_EXIT_DECL hmR0SvmExitStgi(PVMCPUCC pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+    HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pSvmTransient);
+
+    /*
+     * When VGIF is not used we always intercept STGI instructions. When VGIF is used,
+     * we only intercept STGI when events are pending for GIF to become 1.
+     */
+    PSVMVMCB pVmcb = hmR0SvmGetCurrentVmcb(pVCpu);
+    if (pVmcb->ctrl.IntCtrl.n.u1VGifEnable)
+        hmR0SvmClearCtrlIntercept(pVCpu, pVmcb, SVM_CTRL_INTERCEPT_STGI);
+
+    VBOXSTRICTRC   rcStrict;
+    bool const     fSupportsNextRipSave = hmR0SvmSupportsNextRipSave(pVCpu);
+    uint64_t const fImport = CPUMCTX_EXTRN_HWVIRT;
+    if (fSupportsNextRipSave)
+    {
+        HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_EXEC_DECODED_NO_MEM_MASK | fImport);
+        uint8_t const cbInstr = pVmcb->ctrl.u64NextRIP - pVCpu->cpum.GstCtx.rip;
+        rcStrict = IEMExecDecodedStgi(pVCpu, cbInstr);
+    }
+    else
+    {
+        HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_MUST_MASK | fImport);
+        rcStrict = IEMExecOne(pVCpu);
+    }
+
+    if (rcStrict == VINF_SUCCESS)
+        ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_HWVIRT);
+    else if (rcStrict == VINF_IEM_RAISED_XCPT)
+    {
+        rcStrict = VINF_SUCCESS;
+        ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
+    }
+    HMSVM_CHECK_SINGLE_STEP(pVCpu, rcStrict);
+    return VBOXSTRICTRC_TODO(rcStrict);
+}
+
+
+/**
+ * \#VMEXIT handler for VMLOAD (SVM_EXIT_VMLOAD). Conditional \#VMEXIT.
+ */
+HMSVM_EXIT_DECL hmR0SvmExitVmload(PVMCPUCC pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+    HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pSvmTransient);
+
+    PCSVMVMCB pVmcb = hmR0SvmGetCurrentVmcb(pVCpu);
+    Assert(pVmcb);
+    Assert(!pVmcb->ctrl.LbrVirt.n.u1VirtVmsaveVmload);
+
+    VBOXSTRICTRC   rcStrict;
+    bool const     fSupportsNextRipSave = hmR0SvmSupportsNextRipSave(pVCpu);
+    uint64_t const fImport = CPUMCTX_EXTRN_FS   | CPUMCTX_EXTRN_GS   | CPUMCTX_EXTRN_KERNEL_GS_BASE
+                           | CPUMCTX_EXTRN_TR   | CPUMCTX_EXTRN_LDTR | CPUMCTX_EXTRN_SYSCALL_MSRS
+                           | CPUMCTX_EXTRN_SYSENTER_MSRS;
+    if (fSupportsNextRipSave)
+    {
+        HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_EXEC_DECODED_NO_MEM_MASK | fImport);
+        uint8_t const cbInstr = pVmcb->ctrl.u64NextRIP - pVCpu->cpum.GstCtx.rip;
+        rcStrict = IEMExecDecodedVmload(pVCpu, cbInstr);
+    }
+    else
+    {
+        HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_MUST_MASK | fImport);
+        rcStrict = IEMExecOne(pVCpu);
+    }
+
+    if (rcStrict == VINF_SUCCESS)
+    {
+        ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_FS              | HM_CHANGED_GUEST_GS
+                                                 | HM_CHANGED_GUEST_TR              | HM_CHANGED_GUEST_LDTR
+                                                 | HM_CHANGED_GUEST_KERNEL_GS_BASE  | HM_CHANGED_GUEST_SYSCALL_MSRS
+                                                 | HM_CHANGED_GUEST_SYSENTER_MSR_MASK);
+    }
+    else if (rcStrict == VINF_IEM_RAISED_XCPT)
+    {
+        rcStrict = VINF_SUCCESS;
+        ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
+    }
+    HMSVM_CHECK_SINGLE_STEP(pVCpu, rcStrict);
+    return VBOXSTRICTRC_TODO(rcStrict);
+}
+
+
+/**
+ * \#VMEXIT handler for VMSAVE (SVM_EXIT_VMSAVE). Conditional \#VMEXIT.
+ */
+HMSVM_EXIT_DECL hmR0SvmExitVmsave(PVMCPUCC pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+    HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pSvmTransient);
+
+    PCSVMVMCB pVmcb = hmR0SvmGetCurrentVmcb(pVCpu);
+    Assert(!pVmcb->ctrl.LbrVirt.n.u1VirtVmsaveVmload);
+
+    VBOXSTRICTRC rcStrict;
+    bool const fSupportsNextRipSave = hmR0SvmSupportsNextRipSave(pVCpu);
+    if (fSupportsNextRipSave)
+    {
+        HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_EXEC_DECODED_NO_MEM_MASK);
+        uint8_t const cbInstr = pVmcb->ctrl.u64NextRIP - pVCpu->cpum.GstCtx.rip;
+        rcStrict = IEMExecDecodedVmsave(pVCpu, cbInstr);
+    }
+    else
+    {
+        HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_MUST_MASK);
+        rcStrict = IEMExecOne(pVCpu);
+    }
+
+    if (rcStrict == VINF_IEM_RAISED_XCPT)
+    {
+        rcStrict = VINF_SUCCESS;
+        ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
+    }
+    HMSVM_CHECK_SINGLE_STEP(pVCpu, rcStrict);
+    return VBOXSTRICTRC_TODO(rcStrict);
+}
+
+
+/**
+ * \#VMEXIT handler for INVLPGA (SVM_EXIT_INVLPGA). Conditional \#VMEXIT.
+ */
+HMSVM_EXIT_DECL hmR0SvmExitInvlpga(PVMCPUCC pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+    HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pSvmTransient);
+
+    VBOXSTRICTRC rcStrict;
+    bool const fSupportsNextRipSave = hmR0SvmSupportsNextRipSave(pVCpu);
+    if (fSupportsNextRipSave)
+    {
+        HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_EXEC_DECODED_NO_MEM_MASK);
+        PCSVMVMCB     pVmcb   = hmR0SvmGetCurrentVmcb(pVCpu);
+        uint8_t const cbInstr = pVmcb->ctrl.u64NextRIP - pVCpu->cpum.GstCtx.rip;
+        rcStrict = IEMExecDecodedInvlpga(pVCpu, cbInstr);
+    }
+    else
+    {
+        HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_MUST_MASK);
+        rcStrict = IEMExecOne(pVCpu);
+    }
+
+    if (rcStrict == VINF_IEM_RAISED_XCPT)
+    {
+        rcStrict = VINF_SUCCESS;
+        ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
+    }
+    HMSVM_CHECK_SINGLE_STEP(pVCpu, rcStrict);
+    return VBOXSTRICTRC_TODO(rcStrict);
+}
+
+
+/**
+ * \#VMEXIT handler for STGI (SVM_EXIT_VMRUN). Conditional \#VMEXIT.
+ */
+HMSVM_EXIT_DECL hmR0SvmExitVmrun(PVMCPUCC pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+    HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pSvmTransient);
+    /* We shall import the entire state here, just in case we enter and continue execution of
+       the nested-guest with hardware-assisted SVM in ring-0, we would be switching VMCBs and
+       could lose lose part of CPU state. */
+    HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, HMSVM_CPUMCTX_EXTRN_ALL);
+
+    VBOXSTRICTRC rcStrict;
+    bool const fSupportsNextRipSave = hmR0SvmSupportsNextRipSave(pVCpu);
+    STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatExitVmentry, z);
+    if (fSupportsNextRipSave)
+    {
+        PCSVMVMCB     pVmcb   = hmR0SvmGetCurrentVmcb(pVCpu);
+        uint8_t const cbInstr = pVmcb->ctrl.u64NextRIP - pVCpu->cpum.GstCtx.rip;
+        rcStrict = IEMExecDecodedVmrun(pVCpu, cbInstr);
+    }
+    else
+    {
+        /* We use IEMExecOneBypassEx() here as it supresses attempt to continue emulating any
+           instruction(s) when interrupt inhibition is set as part of emulating the VMRUN
+           instruction itself, see @bugref{7243#c126} */
+        rcStrict = IEMExecOneBypassEx(pVCpu, CPUMCTX2CORE(&pVCpu->cpum.GstCtx), NULL /* pcbWritten */);
+    }
+    STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitVmentry, z);
+
+    if (rcStrict == VINF_SUCCESS)
+    {
+        rcStrict = VINF_SVM_VMRUN;
+        ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_SVM_VMRUN_MASK);
+    }
+    else if (rcStrict == VINF_IEM_RAISED_XCPT)
+    {
+        rcStrict = VINF_SUCCESS;
+        ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
+    }
+    HMSVM_CHECK_SINGLE_STEP(pVCpu, rcStrict);
+    return VBOXSTRICTRC_TODO(rcStrict);
+}
+
+
+/**
+ * Nested-guest \#VMEXIT handler for debug exceptions (SVM_EXIT_XCPT_1).
+ * Unconditional \#VMEXIT.
+ */
+HMSVM_EXIT_DECL hmR0SvmNestedExitXcptDB(PVMCPUCC pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+    HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pSvmTransient);
+    HMSVM_CHECK_EXIT_DUE_TO_EVENT_DELIVERY(pVCpu, pSvmTransient);
+
+    if (pVCpu->hm.s.Event.fPending)
+    {
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatInjectInterpret);
+        return VINF_EM_RAW_INJECT_TRPM_EVENT;
+    }
+
+    hmR0SvmSetPendingXcptDB(pVCpu);
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Nested-guest \#VMEXIT handler for breakpoint exceptions (SVM_EXIT_XCPT_3).
+ * Conditional \#VMEXIT.
+ */
+HMSVM_EXIT_DECL hmR0SvmNestedExitXcptBP(PVMCPUCC pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+    HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pSvmTransient);
+    HMSVM_CHECK_EXIT_DUE_TO_EVENT_DELIVERY(pVCpu, pSvmTransient);
+
+    SVMEVENT Event;
+    Event.u          = 0;
+    Event.n.u1Valid  = 1;
+    Event.n.u3Type   = SVM_EVENT_EXCEPTION;
+    Event.n.u8Vector = X86_XCPT_BP;
+    hmR0SvmSetPendingEvent(pVCpu, &Event, 0 /* GCPtrFaultAddress */);
+    return VINF_SUCCESS;
+}
+#endif /* VBOX_WITH_NESTED_HWVIRT_SVM */
+
+/** @} */
+
diff --git a/src/VBox/VMM/VMMR0/HMSVMR0.h b/src/VBox/VMM/VMMR0/HMSVMR0.h
new file mode 100644
index 00000000..3ba777ea
--- /dev/null
+++ b/src/VBox/VMM/VMMR0/HMSVMR0.h
@@ -0,0 +1,82 @@
+/* $Id: HMSVMR0.h $ */
+/** @file
+ * HM SVM (AMD-V) - Internal header file.
+ */
+
+/*
+ * Copyright (C) 2006-2020 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+#ifndef VMM_INCLUDED_SRC_VMMR0_HMSVMR0_h
+#define VMM_INCLUDED_SRC_VMMR0_HMSVMR0_h
+#ifndef RT_WITHOUT_PRAGMA_ONCE
+# pragma once
+#endif
+
+#include <VBox/cdefs.h>
+#include <VBox/types.h>
+#include <VBox/vmm/hm.h>
+#include <VBox/vmm/hm_svm.h>
+
+RT_C_DECLS_BEGIN
+
+/** @defgroup grp_svm_int  Internal
+ * @ingroup grp_svm
+ * @internal
+ * @{
+ */
+
+#ifdef IN_RING0
+
+VMMR0DECL(int)          SVMR0GlobalInit(void);
+VMMR0DECL(void)         SVMR0GlobalTerm(void);
+VMMR0DECL(int)          SVMR0Enter(PVMCPUCC pVCpu);
+VMMR0DECL(void)         SVMR0ThreadCtxCallback(RTTHREADCTXEVENT enmEvent, PVMCPUCC pVCpu, bool fGlobalInit);
+VMMR0DECL(int)          SVMR0CallRing3Callback(PVMCPUCC pVCpu, VMMCALLRING3 enmOperation);
+VMMR0DECL(int)          SVMR0EnableCpu(PHMPHYSCPU pHostCpu, PVMCC pVM, void *pvPageCpu, RTHCPHYS HCPhysCpuPage,
+                                       bool fEnabledBySystem, PCSUPHWVIRTMSRS pHwvirtMsrs);
+VMMR0DECL(int)          SVMR0DisableCpu(PHMPHYSCPU pHostCpu, void *pvPageCpu, RTHCPHYS pPageCpuPhys);
+VMMR0DECL(int)          SVMR0InitVM(PVMCC pVM);
+VMMR0DECL(int)          SVMR0TermVM(PVMCC pVM);
+VMMR0DECL(int)          SVMR0SetupVM(PVMCC pVM);
+VMMR0DECL(VBOXSTRICTRC) SVMR0RunGuestCode(PVMCPUCC pVCpu);
+VMMR0DECL(int)          SVMR0ExportHostState(PVMCPUCC pVCpu);
+VMMR0DECL(int)          SVMR0ImportStateOnDemand(PVMCPUCC pVCpu, uint64_t fWhat);
+VMMR0DECL(int)          SVMR0InvalidatePage(PVMCPUCC pVCpu, RTGCPTR GCVirt);
+
+/**
+ * Prepares for and executes VMRUN (64-bit register context).
+ *
+ * @returns VBox status code.
+ * @param   pVMCBHostPhys   Physical address of host VMCB.
+ * @param   pVMCBPhys       Physical address of the VMCB.
+ * @param   pCtx            Pointer to the guest CPU context.
+ * @param   pVM             The cross context VM structure. (Not used.)
+ * @param   pVCpu           The cross context virtual CPU structure. (Not used.)
+ */
+DECLASM(int) SVMR0VMRun(RTHCPHYS pVMCBHostPhys, RTHCPHYS pVMCBPhys, PCPUMCTX pCtx, PVMCC pVM, PVMCPUCC pVCpu);
+
+/**
+ * Executes INVLPGA.
+ *
+ * @param   pPageGC         Virtual page to invalidate.
+ * @param   u32ASID         Tagged TLB id.
+ */
+DECLASM(void) SVMR0InvlpgA(RTGCPTR pPageGC, uint32_t u32ASID);
+
+#endif /* IN_RING0 */
+
+/** @} */
+
+RT_C_DECLS_END
+
+#endif /* !VMM_INCLUDED_SRC_VMMR0_HMSVMR0_h */
+
diff --git a/src/VBox/VMM/VMMR0/HMVMXR0.cpp b/src/VBox/VMM/VMMR0/HMVMXR0.cpp
new file mode 100644
index 00000000..4ceb3e36
--- /dev/null
+++ b/src/VBox/VMM/VMMR0/HMVMXR0.cpp
@@ -0,0 +1,17380 @@
+/* $Id: HMVMXR0.cpp $ */
+/** @file
+ * HM VMX (Intel VT-x) - Host Context Ring-0.
+ */
+
+/*
+ * Copyright (C) 2012-2020 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+
+/*********************************************************************************************************************************
+*   Header Files                                                                                                                 *
+*********************************************************************************************************************************/
+#define LOG_GROUP LOG_GROUP_HM
+#define VMCPU_INCL_CPUM_GST_CTX
+#include <iprt/x86.h>
+#include <iprt/asm-amd64-x86.h>
+#include <iprt/thread.h>
+#include <iprt/mem.h>
+#include <iprt/mp.h>
+
+#include <VBox/vmm/pdmapi.h>
+#include <VBox/vmm/dbgf.h>
+#include <VBox/vmm/iem.h>
+#include <VBox/vmm/iom.h>
+#include <VBox/vmm/tm.h>
+#include <VBox/vmm/em.h>
+#include <VBox/vmm/gim.h>
+#include <VBox/vmm/apic.h>
+#include "HMInternal.h"
+#include <VBox/vmm/vmcc.h>
+#include <VBox/vmm/hmvmxinline.h>
+#include "HMVMXR0.h"
+#include "dtrace/VBoxVMM.h"
+
+#ifdef DEBUG_ramshankar
+# define HMVMX_ALWAYS_SAVE_GUEST_RFLAGS
+# define HMVMX_ALWAYS_SAVE_RO_GUEST_STATE
+# define HMVMX_ALWAYS_SAVE_FULL_GUEST_STATE
+# define HMVMX_ALWAYS_SYNC_FULL_GUEST_STATE
+# define HMVMX_ALWAYS_CLEAN_TRANSIENT
+# define HMVMX_ALWAYS_CHECK_GUEST_STATE
+# define HMVMX_ALWAYS_TRAP_ALL_XCPTS
+# define HMVMX_ALWAYS_TRAP_PF
+# define HMVMX_ALWAYS_FLUSH_TLB
+# define HMVMX_ALWAYS_SWAP_EFER
+#endif
+
+
+/*********************************************************************************************************************************
+*   Defined Constants And Macros                                                                                                 *
+*********************************************************************************************************************************/
+/** Use the function table. */
+#define HMVMX_USE_FUNCTION_TABLE
+
+/** Determine which tagged-TLB flush handler to use. */
+#define HMVMX_FLUSH_TAGGED_TLB_EPT_VPID             0
+#define HMVMX_FLUSH_TAGGED_TLB_EPT                  1
+#define HMVMX_FLUSH_TAGGED_TLB_VPID                 2
+#define HMVMX_FLUSH_TAGGED_TLB_NONE                 3
+
+/**
+ * Flags to skip redundant reads of some common VMCS fields that are not part of
+ * the guest-CPU or VCPU state but are needed while handling VM-exits.
+ */
+#define HMVMX_READ_IDT_VECTORING_INFO               RT_BIT_32(0)
+#define HMVMX_READ_IDT_VECTORING_ERROR_CODE         RT_BIT_32(1)
+#define HMVMX_READ_EXIT_QUALIFICATION               RT_BIT_32(2)
+#define HMVMX_READ_EXIT_INSTR_LEN                   RT_BIT_32(3)
+#define HMVMX_READ_EXIT_INTERRUPTION_INFO           RT_BIT_32(4)
+#define HMVMX_READ_EXIT_INTERRUPTION_ERROR_CODE     RT_BIT_32(5)
+#define HMVMX_READ_EXIT_INSTR_INFO                  RT_BIT_32(6)
+#define HMVMX_READ_GUEST_LINEAR_ADDR                RT_BIT_32(7)
+#define HMVMX_READ_GUEST_PHYSICAL_ADDR              RT_BIT_32(8)
+#define HMVMX_READ_GUEST_PENDING_DBG_XCPTS          RT_BIT_32(9)
+
+/** All the VMCS fields required for processing of exception/NMI VM-exits. */
+#define HMVMX_READ_XCPT_INFO         (  HMVMX_READ_EXIT_INTERRUPTION_INFO        \
+                                      | HMVMX_READ_EXIT_INTERRUPTION_ERROR_CODE  \
+                                      | HMVMX_READ_EXIT_INSTR_LEN                \
+                                      | HMVMX_READ_IDT_VECTORING_INFO            \
+                                      | HMVMX_READ_IDT_VECTORING_ERROR_CODE)
+
+/** Assert that all the given fields have been read from the VMCS. */
+#ifdef VBOX_STRICT
+# define HMVMX_ASSERT_READ(a_pVmxTransient, a_fReadFields) \
+        do { \
+            uint32_t const fVmcsFieldRead = ASMAtomicUoReadU32(&pVmxTransient->fVmcsFieldsRead); \
+            Assert((fVmcsFieldRead & (a_fReadFields)) == (a_fReadFields)); \
+        } while (0)
+#else
+# define HMVMX_ASSERT_READ(a_pVmxTransient, a_fReadFields) do { } while (0)
+#endif
+
+/**
+ * Subset of the guest-CPU state that is kept by VMX R0 code while executing the
+ * guest using hardware-assisted VMX.
+ *
+ * This excludes state like GPRs (other than RSP) which are always are
+ * swapped and restored across the world-switch and also registers like EFER,
+ * MSR which cannot be modified by the guest without causing a VM-exit.
+ */
+#define HMVMX_CPUMCTX_EXTRN_ALL      (  CPUMCTX_EXTRN_RIP             \
+                                      | CPUMCTX_EXTRN_RFLAGS          \
+                                      | CPUMCTX_EXTRN_RSP             \
+                                      | CPUMCTX_EXTRN_SREG_MASK       \
+                                      | CPUMCTX_EXTRN_TABLE_MASK      \
+                                      | CPUMCTX_EXTRN_KERNEL_GS_BASE  \
+                                      | CPUMCTX_EXTRN_SYSCALL_MSRS    \
+                                      | CPUMCTX_EXTRN_SYSENTER_MSRS   \
+                                      | CPUMCTX_EXTRN_TSC_AUX         \
+                                      | CPUMCTX_EXTRN_OTHER_MSRS      \
+                                      | CPUMCTX_EXTRN_CR0             \
+                                      | CPUMCTX_EXTRN_CR3             \
+                                      | CPUMCTX_EXTRN_CR4             \
+                                      | CPUMCTX_EXTRN_DR7             \
+                                      | CPUMCTX_EXTRN_HWVIRT          \
+                                      | CPUMCTX_EXTRN_HM_VMX_MASK)
+
+/**
+ * Exception bitmap mask for real-mode guests (real-on-v86).
+ *
+ * We need to intercept all exceptions manually except:
+ * - \#AC and \#DB are always intercepted to prevent the CPU from deadlocking
+ *   due to bugs in Intel CPUs.
+ * - \#PF need not be intercepted even in real-mode if we have nested paging
+ * support.
+ */
+#define HMVMX_REAL_MODE_XCPT_MASK    (  RT_BIT(X86_XCPT_DE)  /* always: | RT_BIT(X86_XCPT_DB) */ | RT_BIT(X86_XCPT_NMI)   \
+                                      | RT_BIT(X86_XCPT_BP)             | RT_BIT(X86_XCPT_OF)    | RT_BIT(X86_XCPT_BR)    \
+                                      | RT_BIT(X86_XCPT_UD)             | RT_BIT(X86_XCPT_NM)    | RT_BIT(X86_XCPT_DF)    \
+                                      | RT_BIT(X86_XCPT_CO_SEG_OVERRUN) | RT_BIT(X86_XCPT_TS)    | RT_BIT(X86_XCPT_NP)    \
+                                      | RT_BIT(X86_XCPT_SS)             | RT_BIT(X86_XCPT_GP)   /* RT_BIT(X86_XCPT_PF) */ \
+                                      | RT_BIT(X86_XCPT_MF)  /* always: | RT_BIT(X86_XCPT_AC) */ | RT_BIT(X86_XCPT_MC)    \
+                                      | RT_BIT(X86_XCPT_XF))
+
+/** Maximum VM-instruction error number. */
+#define HMVMX_INSTR_ERROR_MAX        28
+
+/** Profiling macro. */
+#ifdef HM_PROFILE_EXIT_DISPATCH
+# define HMVMX_START_EXIT_DISPATCH_PROF()           STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatExitDispatch, ed)
+# define HMVMX_STOP_EXIT_DISPATCH_PROF()            STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitDispatch, ed)
+#else
+# define HMVMX_START_EXIT_DISPATCH_PROF()           do { } while (0)
+# define HMVMX_STOP_EXIT_DISPATCH_PROF()            do { } while (0)
+#endif
+
+/** Assert that preemption is disabled or covered by thread-context hooks. */
+#define HMVMX_ASSERT_PREEMPT_SAFE(a_pVCpu)          Assert(   VMMR0ThreadCtxHookIsEnabled((a_pVCpu))   \
+                                                           || !RTThreadPreemptIsEnabled(NIL_RTTHREAD))
+
+/** Assert that we haven't migrated CPUs when thread-context hooks are not
+ *  used. */
+#define HMVMX_ASSERT_CPU_SAFE(a_pVCpu)              AssertMsg(   VMMR0ThreadCtxHookIsEnabled((a_pVCpu)) \
+                                                              || (a_pVCpu)->hm.s.idEnteredCpu == RTMpCpuId(), \
+                                                              ("Illegal migration! Entered on CPU %u Current %u\n", \
+                                                              (a_pVCpu)->hm.s.idEnteredCpu, RTMpCpuId()))
+
+/** Asserts that the given CPUMCTX_EXTRN_XXX bits are present in the guest-CPU
+ *  context. */
+#define HMVMX_CPUMCTX_ASSERT(a_pVCpu, a_fExtrnMbz)  AssertMsg(!((a_pVCpu)->cpum.GstCtx.fExtrn & (a_fExtrnMbz)), \
+                                                              ("fExtrn=%#RX64 fExtrnMbz=%#RX64\n", \
+                                                              (a_pVCpu)->cpum.GstCtx.fExtrn, (a_fExtrnMbz)))
+
+/** Log the VM-exit reason with an easily visible marker to identify it in a
+ *  potential sea of logging data. */
+#define HMVMX_LOG_EXIT(a_pVCpu, a_uExitReason) \
+    do { \
+        Log4(("VM-exit: vcpu[%RU32] %85s -v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-\n", (a_pVCpu)->idCpu, \
+             HMGetVmxExitName(a_uExitReason))); \
+    } while (0) \
+
+
+/*********************************************************************************************************************************
+*   Structures and Typedefs                                                                                                      *
+*********************************************************************************************************************************/
+/**
+ * VMX per-VCPU transient state.
+ *
+ * A state structure for holding miscellaneous information across
+ * VMX non-root operation and restored after the transition.
+ *
+ * Note: The members are ordered and aligned such that the most
+ * frequently used ones (in the guest execution loop) fall within
+ * the first cache line.
+ */
+typedef struct VMXTRANSIENT
+{
+   /** Mask of currently read VMCS fields; HMVMX_READ_XXX. */
+   uint32_t            fVmcsFieldsRead;
+   /** The guest's TPR value used for TPR shadowing. */
+   uint8_t             u8GuestTpr;
+   uint8_t             abAlignment0[3];
+
+   /** Whether the VM-exit was caused by a page-fault during delivery of an
+    *  external interrupt or NMI. */
+   bool                fVectoringPF;
+   /** Whether the VM-exit was caused by a page-fault during delivery of a
+    *  contributory exception or a page-fault. */
+   bool                fVectoringDoublePF;
+   /** Whether the VM-entry failed or not. */
+   bool                fVMEntryFailed;
+   /** Whether the TSC_AUX MSR needs to be removed from the auto-load/store MSR
+    *  area after VM-exit. */
+   bool                fRemoveTscAuxMsr;
+   /** Whether TSC-offsetting and VMX-preemption timer was updated before VM-entry. */
+   bool                fUpdatedTscOffsettingAndPreemptTimer;
+   /** Whether we are currently executing a nested-guest. */
+   bool                fIsNestedGuest;
+   /** Whether the guest debug state was active at the time of VM-exit. */
+   bool                fWasGuestDebugStateActive;
+   /** Whether the hyper debug state was active at the time of VM-exit. */
+   bool                fWasHyperDebugStateActive;
+
+   /** The basic VM-exit reason. */
+   uint32_t            uExitReason;
+   /** The VM-exit interruption error code. */
+   uint32_t            uExitIntErrorCode;
+
+   /** The host's rflags/eflags. */
+   RTCCUINTREG         fEFlags;
+
+   /** The VM-exit exit code qualification. */
+   uint64_t            uExitQual;
+
+   /** The VMCS info. object. */
+   PVMXVMCSINFO        pVmcsInfo;
+
+   /** The VM-exit interruption-information field. */
+   uint32_t            uExitIntInfo;
+   /** The VM-exit instruction-length field. */
+   uint32_t            cbExitInstr;
+
+   /** The VM-exit instruction-information field. */
+   VMXEXITINSTRINFO    ExitInstrInfo;
+   /** IDT-vectoring information field. */
+   uint32_t            uIdtVectoringInfo;
+
+   /** IDT-vectoring error code. */
+   uint32_t            uIdtVectoringErrorCode;
+   uint32_t            u32Alignment0;
+
+   /** The Guest-linear address. */
+   uint64_t            uGuestLinearAddr;
+
+   /** The Guest-physical address. */
+   uint64_t            uGuestPhysicalAddr;
+
+   /** The Guest pending-debug exceptions. */
+   uint64_t            uGuestPendingDbgXcpts;
+
+   /** The VM-entry interruption-information field. */
+   uint32_t            uEntryIntInfo;
+   /** The VM-entry exception error code field. */
+   uint32_t            uEntryXcptErrorCode;
+
+   /** The VM-entry instruction length field. */
+   uint32_t            cbEntryInstr;
+} VMXTRANSIENT;
+AssertCompileMemberSize(VMXTRANSIENT, ExitInstrInfo, sizeof(uint32_t));
+AssertCompileMemberAlignment(VMXTRANSIENT, fVmcsFieldsRead,        8);
+AssertCompileMemberAlignment(VMXTRANSIENT, fVectoringPF,           8);
+AssertCompileMemberAlignment(VMXTRANSIENT, uExitReason,            8);
+AssertCompileMemberAlignment(VMXTRANSIENT, fEFlags,                8);
+AssertCompileMemberAlignment(VMXTRANSIENT, uExitQual,              8);
+AssertCompileMemberAlignment(VMXTRANSIENT, pVmcsInfo,              8);
+AssertCompileMemberAlignment(VMXTRANSIENT, uExitIntInfo,           8);
+AssertCompileMemberAlignment(VMXTRANSIENT, ExitInstrInfo,          8);
+AssertCompileMemberAlignment(VMXTRANSIENT, uIdtVectoringErrorCode, 8);
+AssertCompileMemberAlignment(VMXTRANSIENT, uGuestLinearAddr,       8);
+AssertCompileMemberAlignment(VMXTRANSIENT, uGuestPhysicalAddr,     8);
+AssertCompileMemberAlignment(VMXTRANSIENT, uEntryIntInfo,          8);
+AssertCompileMemberAlignment(VMXTRANSIENT, cbEntryInstr,           8);
+/** Pointer to VMX transient state. */
+typedef VMXTRANSIENT *PVMXTRANSIENT;
+/** Pointer to a const VMX transient state. */
+typedef const VMXTRANSIENT *PCVMXTRANSIENT;
+
+/**
+ * VMX page allocation information.
+ */
+typedef struct
+{
+    uint32_t    fValid;       /**< Whether to allocate this page (e.g, based on a CPU feature). */
+    uint32_t    uPadding0;    /**< Padding to ensure array of these structs are aligned to a multiple of 8. */
+    PRTHCPHYS   pHCPhys;      /**< Where to store the host-physical address of the allocation. */
+    PRTR0PTR    ppVirt;       /**< Where to store the host-virtual address of the allocation. */
+} VMXPAGEALLOCINFO;
+/** Pointer to VMX page-allocation info. */
+typedef VMXPAGEALLOCINFO *PVMXPAGEALLOCINFO;
+/** Pointer to a const VMX page-allocation info. */
+typedef const VMXPAGEALLOCINFO *PCVMXPAGEALLOCINFO;
+AssertCompileSizeAlignment(VMXPAGEALLOCINFO, 8);
+
+/**
+ * Memory operand read or write access.
+ */
+typedef enum VMXMEMACCESS
+{
+    VMXMEMACCESS_READ  = 0,
+    VMXMEMACCESS_WRITE = 1
+} VMXMEMACCESS;
+
+/**
+ * VMX VM-exit handler.
+ *
+ * @returns Strict VBox status code (i.e. informational status codes too).
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   pVmxTransient   The VMX-transient structure.
+ */
+#ifndef HMVMX_USE_FUNCTION_TABLE
+typedef VBOXSTRICTRC               FNVMXEXITHANDLER(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient);
+#else
+typedef DECLCALLBACK(VBOXSTRICTRC) FNVMXEXITHANDLER(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient);
+/** Pointer to VM-exit handler. */
+typedef FNVMXEXITHANDLER          *PFNVMXEXITHANDLER;
+#endif
+
+/**
+ * VMX VM-exit handler, non-strict status code.
+ *
+ * This is generally the same as FNVMXEXITHANDLER, the NSRC bit is just FYI.
+ *
+ * @returns VBox status code, no informational status code returned.
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   pVmxTransient   The VMX-transient structure.
+ *
+ * @remarks This is not used on anything returning VERR_EM_INTERPRETER as the
+ *          use of that status code will be replaced with VINF_EM_SOMETHING
+ *          later when switching over to IEM.
+ */
+#ifndef HMVMX_USE_FUNCTION_TABLE
+typedef int                        FNVMXEXITHANDLERNSRC(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient);
+#else
+typedef FNVMXEXITHANDLER           FNVMXEXITHANDLERNSRC;
+#endif
+
+
+/*********************************************************************************************************************************
+*   Internal Functions                                                                                                           *
+*********************************************************************************************************************************/
+#ifndef HMVMX_USE_FUNCTION_TABLE
+DECLINLINE(VBOXSTRICTRC)           hmR0VmxHandleExit(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient);
+# define HMVMX_EXIT_DECL           DECLINLINE(VBOXSTRICTRC)
+# define HMVMX_EXIT_NSRC_DECL      DECLINLINE(int)
+#else
+# define HMVMX_EXIT_DECL           static DECLCALLBACK(VBOXSTRICTRC)
+# define HMVMX_EXIT_NSRC_DECL      HMVMX_EXIT_DECL
+#endif
+#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+DECLINLINE(VBOXSTRICTRC)           hmR0VmxHandleExitNested(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient);
+#endif
+
+static int hmR0VmxImportGuestState(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo, uint64_t fWhat);
+
+/** @name VM-exit handler prototypes.
+ * @{
+ */
+static FNVMXEXITHANDLER            hmR0VmxExitXcptOrNmi;
+static FNVMXEXITHANDLER            hmR0VmxExitExtInt;
+static FNVMXEXITHANDLER            hmR0VmxExitTripleFault;
+static FNVMXEXITHANDLERNSRC        hmR0VmxExitIntWindow;
+static FNVMXEXITHANDLERNSRC        hmR0VmxExitNmiWindow;
+static FNVMXEXITHANDLER            hmR0VmxExitTaskSwitch;
+static FNVMXEXITHANDLER            hmR0VmxExitCpuid;
+static FNVMXEXITHANDLER            hmR0VmxExitGetsec;
+static FNVMXEXITHANDLER            hmR0VmxExitHlt;
+static FNVMXEXITHANDLERNSRC        hmR0VmxExitInvd;
+static FNVMXEXITHANDLER            hmR0VmxExitInvlpg;
+static FNVMXEXITHANDLER            hmR0VmxExitRdpmc;
+static FNVMXEXITHANDLER            hmR0VmxExitVmcall;
+#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+static FNVMXEXITHANDLER            hmR0VmxExitVmclear;
+static FNVMXEXITHANDLER            hmR0VmxExitVmlaunch;
+static FNVMXEXITHANDLER            hmR0VmxExitVmptrld;
+static FNVMXEXITHANDLER            hmR0VmxExitVmptrst;
+static FNVMXEXITHANDLER            hmR0VmxExitVmread;
+static FNVMXEXITHANDLER            hmR0VmxExitVmresume;
+static FNVMXEXITHANDLER            hmR0VmxExitVmwrite;
+static FNVMXEXITHANDLER            hmR0VmxExitVmxoff;
+static FNVMXEXITHANDLER            hmR0VmxExitVmxon;
+static FNVMXEXITHANDLER            hmR0VmxExitInvvpid;
+#endif
+static FNVMXEXITHANDLER            hmR0VmxExitRdtsc;
+static FNVMXEXITHANDLER            hmR0VmxExitMovCRx;
+static FNVMXEXITHANDLER            hmR0VmxExitMovDRx;
+static FNVMXEXITHANDLER            hmR0VmxExitIoInstr;
+static FNVMXEXITHANDLER            hmR0VmxExitRdmsr;
+static FNVMXEXITHANDLER            hmR0VmxExitWrmsr;
+static FNVMXEXITHANDLER            hmR0VmxExitMwait;
+static FNVMXEXITHANDLER            hmR0VmxExitMtf;
+static FNVMXEXITHANDLER            hmR0VmxExitMonitor;
+static FNVMXEXITHANDLER            hmR0VmxExitPause;
+static FNVMXEXITHANDLERNSRC        hmR0VmxExitTprBelowThreshold;
+static FNVMXEXITHANDLER            hmR0VmxExitApicAccess;
+static FNVMXEXITHANDLER            hmR0VmxExitEptViolation;
+static FNVMXEXITHANDLER            hmR0VmxExitEptMisconfig;
+static FNVMXEXITHANDLER            hmR0VmxExitRdtscp;
+static FNVMXEXITHANDLER            hmR0VmxExitPreemptTimer;
+static FNVMXEXITHANDLERNSRC        hmR0VmxExitWbinvd;
+static FNVMXEXITHANDLER            hmR0VmxExitXsetbv;
+static FNVMXEXITHANDLER            hmR0VmxExitInvpcid;
+static FNVMXEXITHANDLERNSRC        hmR0VmxExitSetPendingXcptUD;
+static FNVMXEXITHANDLERNSRC        hmR0VmxExitErrInvalidGuestState;
+static FNVMXEXITHANDLERNSRC        hmR0VmxExitErrUnexpected;
+/** @} */
+
+#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+/** @name Nested-guest VM-exit handler prototypes.
+ * @{
+ */
+static FNVMXEXITHANDLER            hmR0VmxExitXcptOrNmiNested;
+static FNVMXEXITHANDLER            hmR0VmxExitTripleFaultNested;
+static FNVMXEXITHANDLERNSRC        hmR0VmxExitIntWindowNested;
+static FNVMXEXITHANDLERNSRC        hmR0VmxExitNmiWindowNested;
+static FNVMXEXITHANDLER            hmR0VmxExitTaskSwitchNested;
+static FNVMXEXITHANDLER            hmR0VmxExitHltNested;
+static FNVMXEXITHANDLER            hmR0VmxExitInvlpgNested;
+static FNVMXEXITHANDLER            hmR0VmxExitRdpmcNested;
+static FNVMXEXITHANDLER            hmR0VmxExitVmreadVmwriteNested;
+static FNVMXEXITHANDLER            hmR0VmxExitRdtscNested;
+static FNVMXEXITHANDLER            hmR0VmxExitMovCRxNested;
+static FNVMXEXITHANDLER            hmR0VmxExitMovDRxNested;
+static FNVMXEXITHANDLER            hmR0VmxExitIoInstrNested;
+static FNVMXEXITHANDLER            hmR0VmxExitRdmsrNested;
+static FNVMXEXITHANDLER            hmR0VmxExitWrmsrNested;
+static FNVMXEXITHANDLER            hmR0VmxExitMwaitNested;
+static FNVMXEXITHANDLER            hmR0VmxExitMtfNested;
+static FNVMXEXITHANDLER            hmR0VmxExitMonitorNested;
+static FNVMXEXITHANDLER            hmR0VmxExitPauseNested;
+static FNVMXEXITHANDLERNSRC        hmR0VmxExitTprBelowThresholdNested;
+static FNVMXEXITHANDLER            hmR0VmxExitApicAccessNested;
+static FNVMXEXITHANDLER            hmR0VmxExitApicWriteNested;
+static FNVMXEXITHANDLER            hmR0VmxExitVirtEoiNested;
+static FNVMXEXITHANDLER            hmR0VmxExitRdtscpNested;
+static FNVMXEXITHANDLERNSRC        hmR0VmxExitWbinvdNested;
+static FNVMXEXITHANDLER            hmR0VmxExitInvpcidNested;
+static FNVMXEXITHANDLERNSRC        hmR0VmxExitErrInvalidGuestStateNested;
+static FNVMXEXITHANDLER            hmR0VmxExitInstrNested;
+static FNVMXEXITHANDLER            hmR0VmxExitInstrWithInfoNested;
+/** @} */
+#endif /* VBOX_WITH_NESTED_HWVIRT_VMX */
+
+
+/*********************************************************************************************************************************
+*   Global Variables                                                                                                             *
+*********************************************************************************************************************************/
+#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+/**
+ * Array of all VMCS fields.
+ * Any fields added to the VT-x spec. should be added here.
+ *
+ * Currently only used to derive shadow VMCS fields for hardware-assisted execution
+ * of nested-guests.
+ */
+static const uint32_t g_aVmcsFields[] =
+{
+    /* 16-bit control fields. */
+    VMX_VMCS16_VPID,
+    VMX_VMCS16_POSTED_INT_NOTIFY_VECTOR,
+    VMX_VMCS16_EPTP_INDEX,
+
+    /* 16-bit guest-state fields. */
+    VMX_VMCS16_GUEST_ES_SEL,
+    VMX_VMCS16_GUEST_CS_SEL,
+    VMX_VMCS16_GUEST_SS_SEL,
+    VMX_VMCS16_GUEST_DS_SEL,
+    VMX_VMCS16_GUEST_FS_SEL,
+    VMX_VMCS16_GUEST_GS_SEL,
+    VMX_VMCS16_GUEST_LDTR_SEL,
+    VMX_VMCS16_GUEST_TR_SEL,
+    VMX_VMCS16_GUEST_INTR_STATUS,
+    VMX_VMCS16_GUEST_PML_INDEX,
+
+    /* 16-bits host-state fields. */
+    VMX_VMCS16_HOST_ES_SEL,
+    VMX_VMCS16_HOST_CS_SEL,
+    VMX_VMCS16_HOST_SS_SEL,
+    VMX_VMCS16_HOST_DS_SEL,
+    VMX_VMCS16_HOST_FS_SEL,
+    VMX_VMCS16_HOST_GS_SEL,
+    VMX_VMCS16_HOST_TR_SEL,
+
+    /* 64-bit control fields. */
+    VMX_VMCS64_CTRL_IO_BITMAP_A_FULL,
+    VMX_VMCS64_CTRL_IO_BITMAP_A_HIGH,
+    VMX_VMCS64_CTRL_IO_BITMAP_B_FULL,
+    VMX_VMCS64_CTRL_IO_BITMAP_B_HIGH,
+    VMX_VMCS64_CTRL_MSR_BITMAP_FULL,
+    VMX_VMCS64_CTRL_MSR_BITMAP_HIGH,
+    VMX_VMCS64_CTRL_EXIT_MSR_STORE_FULL,
+    VMX_VMCS64_CTRL_EXIT_MSR_STORE_HIGH,
+    VMX_VMCS64_CTRL_EXIT_MSR_LOAD_FULL,
+    VMX_VMCS64_CTRL_EXIT_MSR_LOAD_HIGH,
+    VMX_VMCS64_CTRL_ENTRY_MSR_LOAD_FULL,
+    VMX_VMCS64_CTRL_ENTRY_MSR_LOAD_HIGH,
+    VMX_VMCS64_CTRL_EXEC_VMCS_PTR_FULL,
+    VMX_VMCS64_CTRL_EXEC_VMCS_PTR_HIGH,
+    VMX_VMCS64_CTRL_EXEC_PML_ADDR_FULL,
+    VMX_VMCS64_CTRL_EXEC_PML_ADDR_HIGH,
+    VMX_VMCS64_CTRL_TSC_OFFSET_FULL,
+    VMX_VMCS64_CTRL_TSC_OFFSET_HIGH,
+    VMX_VMCS64_CTRL_VIRT_APIC_PAGEADDR_FULL,
+    VMX_VMCS64_CTRL_VIRT_APIC_PAGEADDR_HIGH,
+    VMX_VMCS64_CTRL_APIC_ACCESSADDR_FULL,
+    VMX_VMCS64_CTRL_APIC_ACCESSADDR_HIGH,
+    VMX_VMCS64_CTRL_POSTED_INTR_DESC_FULL,
+    VMX_VMCS64_CTRL_POSTED_INTR_DESC_HIGH,
+    VMX_VMCS64_CTRL_VMFUNC_CTRLS_FULL,
+    VMX_VMCS64_CTRL_VMFUNC_CTRLS_HIGH,
+    VMX_VMCS64_CTRL_EPTP_FULL,
+    VMX_VMCS64_CTRL_EPTP_HIGH,
+    VMX_VMCS64_CTRL_EOI_BITMAP_0_FULL,
+    VMX_VMCS64_CTRL_EOI_BITMAP_0_HIGH,
+    VMX_VMCS64_CTRL_EOI_BITMAP_1_FULL,
+    VMX_VMCS64_CTRL_EOI_BITMAP_1_HIGH,
+    VMX_VMCS64_CTRL_EOI_BITMAP_2_FULL,
+    VMX_VMCS64_CTRL_EOI_BITMAP_2_HIGH,
+    VMX_VMCS64_CTRL_EOI_BITMAP_3_FULL,
+    VMX_VMCS64_CTRL_EOI_BITMAP_3_HIGH,
+    VMX_VMCS64_CTRL_EPTP_LIST_FULL,
+    VMX_VMCS64_CTRL_EPTP_LIST_HIGH,
+    VMX_VMCS64_CTRL_VMREAD_BITMAP_FULL,
+    VMX_VMCS64_CTRL_VMREAD_BITMAP_HIGH,
+    VMX_VMCS64_CTRL_VMWRITE_BITMAP_FULL,
+    VMX_VMCS64_CTRL_VMWRITE_BITMAP_HIGH,
+    VMX_VMCS64_CTRL_VIRTXCPT_INFO_ADDR_FULL,
+    VMX_VMCS64_CTRL_VIRTXCPT_INFO_ADDR_HIGH,
+    VMX_VMCS64_CTRL_XSS_EXITING_BITMAP_FULL,
+    VMX_VMCS64_CTRL_XSS_EXITING_BITMAP_HIGH,
+    VMX_VMCS64_CTRL_ENCLS_EXITING_BITMAP_FULL,
+    VMX_VMCS64_CTRL_ENCLS_EXITING_BITMAP_HIGH,
+    VMX_VMCS64_CTRL_TSC_MULTIPLIER_FULL,
+    VMX_VMCS64_CTRL_TSC_MULTIPLIER_HIGH,
+
+    /* 64-bit read-only data fields. */
+    VMX_VMCS64_RO_GUEST_PHYS_ADDR_FULL,
+    VMX_VMCS64_RO_GUEST_PHYS_ADDR_HIGH,
+
+    /* 64-bit guest-state fields. */
+    VMX_VMCS64_GUEST_VMCS_LINK_PTR_FULL,
+    VMX_VMCS64_GUEST_VMCS_LINK_PTR_HIGH,
+    VMX_VMCS64_GUEST_DEBUGCTL_FULL,
+    VMX_VMCS64_GUEST_DEBUGCTL_HIGH,
+    VMX_VMCS64_GUEST_PAT_FULL,
+    VMX_VMCS64_GUEST_PAT_HIGH,
+    VMX_VMCS64_GUEST_EFER_FULL,
+    VMX_VMCS64_GUEST_EFER_HIGH,
+    VMX_VMCS64_GUEST_PERF_GLOBAL_CTRL_FULL,
+    VMX_VMCS64_GUEST_PERF_GLOBAL_CTRL_HIGH,
+    VMX_VMCS64_GUEST_PDPTE0_FULL,
+    VMX_VMCS64_GUEST_PDPTE0_HIGH,
+    VMX_VMCS64_GUEST_PDPTE1_FULL,
+    VMX_VMCS64_GUEST_PDPTE1_HIGH,
+    VMX_VMCS64_GUEST_PDPTE2_FULL,
+    VMX_VMCS64_GUEST_PDPTE2_HIGH,
+    VMX_VMCS64_GUEST_PDPTE3_FULL,
+    VMX_VMCS64_GUEST_PDPTE3_HIGH,
+    VMX_VMCS64_GUEST_BNDCFGS_FULL,
+    VMX_VMCS64_GUEST_BNDCFGS_HIGH,
+
+    /* 64-bit host-state fields. */
+    VMX_VMCS64_HOST_PAT_FULL,
+    VMX_VMCS64_HOST_PAT_HIGH,
+    VMX_VMCS64_HOST_EFER_FULL,
+    VMX_VMCS64_HOST_EFER_HIGH,
+    VMX_VMCS64_HOST_PERF_GLOBAL_CTRL_FULL,
+    VMX_VMCS64_HOST_PERF_GLOBAL_CTRL_HIGH,
+
+    /* 32-bit control fields. */
+    VMX_VMCS32_CTRL_PIN_EXEC,
+    VMX_VMCS32_CTRL_PROC_EXEC,
+    VMX_VMCS32_CTRL_EXCEPTION_BITMAP,
+    VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MASK,
+    VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MATCH,
+    VMX_VMCS32_CTRL_CR3_TARGET_COUNT,
+    VMX_VMCS32_CTRL_EXIT,
+    VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT,
+    VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT,
+    VMX_VMCS32_CTRL_ENTRY,
+    VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT,
+    VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO,
+    VMX_VMCS32_CTRL_ENTRY_EXCEPTION_ERRCODE,
+    VMX_VMCS32_CTRL_ENTRY_INSTR_LENGTH,
+    VMX_VMCS32_CTRL_TPR_THRESHOLD,
+    VMX_VMCS32_CTRL_PROC_EXEC2,
+    VMX_VMCS32_CTRL_PLE_GAP,
+    VMX_VMCS32_CTRL_PLE_WINDOW,
+
+    /* 32-bits read-only fields. */
+    VMX_VMCS32_RO_VM_INSTR_ERROR,
+    VMX_VMCS32_RO_EXIT_REASON,
+    VMX_VMCS32_RO_EXIT_INTERRUPTION_INFO,
+    VMX_VMCS32_RO_EXIT_INTERRUPTION_ERROR_CODE,
+    VMX_VMCS32_RO_IDT_VECTORING_INFO,
+    VMX_VMCS32_RO_IDT_VECTORING_ERROR_CODE,
+    VMX_VMCS32_RO_EXIT_INSTR_LENGTH,
+    VMX_VMCS32_RO_EXIT_INSTR_INFO,
+
+    /* 32-bit guest-state fields. */
+    VMX_VMCS32_GUEST_ES_LIMIT,
+    VMX_VMCS32_GUEST_CS_LIMIT,
+    VMX_VMCS32_GUEST_SS_LIMIT,
+    VMX_VMCS32_GUEST_DS_LIMIT,
+    VMX_VMCS32_GUEST_FS_LIMIT,
+    VMX_VMCS32_GUEST_GS_LIMIT,
+    VMX_VMCS32_GUEST_LDTR_LIMIT,
+    VMX_VMCS32_GUEST_TR_LIMIT,
+    VMX_VMCS32_GUEST_GDTR_LIMIT,
+    VMX_VMCS32_GUEST_IDTR_LIMIT,
+    VMX_VMCS32_GUEST_ES_ACCESS_RIGHTS,
+    VMX_VMCS32_GUEST_CS_ACCESS_RIGHTS,
+    VMX_VMCS32_GUEST_SS_ACCESS_RIGHTS,
+    VMX_VMCS32_GUEST_DS_ACCESS_RIGHTS,
+    VMX_VMCS32_GUEST_FS_ACCESS_RIGHTS,
+    VMX_VMCS32_GUEST_GS_ACCESS_RIGHTS,
+    VMX_VMCS32_GUEST_LDTR_ACCESS_RIGHTS,
+    VMX_VMCS32_GUEST_TR_ACCESS_RIGHTS,
+    VMX_VMCS32_GUEST_INT_STATE,
+    VMX_VMCS32_GUEST_ACTIVITY_STATE,
+    VMX_VMCS32_GUEST_SMBASE,
+    VMX_VMCS32_GUEST_SYSENTER_CS,
+    VMX_VMCS32_PREEMPT_TIMER_VALUE,
+
+    /* 32-bit host-state fields. */
+    VMX_VMCS32_HOST_SYSENTER_CS,
+
+    /* Natural-width control fields. */
+    VMX_VMCS_CTRL_CR0_MASK,
+    VMX_VMCS_CTRL_CR4_MASK,
+    VMX_VMCS_CTRL_CR0_READ_SHADOW,
+    VMX_VMCS_CTRL_CR4_READ_SHADOW,
+    VMX_VMCS_CTRL_CR3_TARGET_VAL0,
+    VMX_VMCS_CTRL_CR3_TARGET_VAL1,
+    VMX_VMCS_CTRL_CR3_TARGET_VAL2,
+    VMX_VMCS_CTRL_CR3_TARGET_VAL3,
+
+    /* Natural-width read-only data fields. */
+    VMX_VMCS_RO_EXIT_QUALIFICATION,
+    VMX_VMCS_RO_IO_RCX,
+    VMX_VMCS_RO_IO_RSI,
+    VMX_VMCS_RO_IO_RDI,
+    VMX_VMCS_RO_IO_RIP,
+    VMX_VMCS_RO_GUEST_LINEAR_ADDR,
+
+    /* Natural-width guest-state field */
+    VMX_VMCS_GUEST_CR0,
+    VMX_VMCS_GUEST_CR3,
+    VMX_VMCS_GUEST_CR4,
+    VMX_VMCS_GUEST_ES_BASE,
+    VMX_VMCS_GUEST_CS_BASE,
+    VMX_VMCS_GUEST_SS_BASE,
+    VMX_VMCS_GUEST_DS_BASE,
+    VMX_VMCS_GUEST_FS_BASE,
+    VMX_VMCS_GUEST_GS_BASE,
+    VMX_VMCS_GUEST_LDTR_BASE,
+    VMX_VMCS_GUEST_TR_BASE,
+    VMX_VMCS_GUEST_GDTR_BASE,
+    VMX_VMCS_GUEST_IDTR_BASE,
+    VMX_VMCS_GUEST_DR7,
+    VMX_VMCS_GUEST_RSP,
+    VMX_VMCS_GUEST_RIP,
+    VMX_VMCS_GUEST_RFLAGS,
+    VMX_VMCS_GUEST_PENDING_DEBUG_XCPTS,
+    VMX_VMCS_GUEST_SYSENTER_ESP,
+    VMX_VMCS_GUEST_SYSENTER_EIP,
+
+    /* Natural-width host-state fields */
+    VMX_VMCS_HOST_CR0,
+    VMX_VMCS_HOST_CR3,
+    VMX_VMCS_HOST_CR4,
+    VMX_VMCS_HOST_FS_BASE,
+    VMX_VMCS_HOST_GS_BASE,
+    VMX_VMCS_HOST_TR_BASE,
+    VMX_VMCS_HOST_GDTR_BASE,
+    VMX_VMCS_HOST_IDTR_BASE,
+    VMX_VMCS_HOST_SYSENTER_ESP,
+    VMX_VMCS_HOST_SYSENTER_EIP,
+    VMX_VMCS_HOST_RSP,
+    VMX_VMCS_HOST_RIP
+};
+#endif /* VBOX_WITH_NESTED_HWVIRT_VMX */
+
+static const uint32_t g_aVmcsSegBase[] =
+{
+    VMX_VMCS_GUEST_ES_BASE,
+    VMX_VMCS_GUEST_CS_BASE,
+    VMX_VMCS_GUEST_SS_BASE,
+    VMX_VMCS_GUEST_DS_BASE,
+    VMX_VMCS_GUEST_FS_BASE,
+    VMX_VMCS_GUEST_GS_BASE
+};
+static const uint32_t g_aVmcsSegSel[] =
+{
+    VMX_VMCS16_GUEST_ES_SEL,
+    VMX_VMCS16_GUEST_CS_SEL,
+    VMX_VMCS16_GUEST_SS_SEL,
+    VMX_VMCS16_GUEST_DS_SEL,
+    VMX_VMCS16_GUEST_FS_SEL,
+    VMX_VMCS16_GUEST_GS_SEL
+};
+static const uint32_t g_aVmcsSegLimit[] =
+{
+    VMX_VMCS32_GUEST_ES_LIMIT,
+    VMX_VMCS32_GUEST_CS_LIMIT,
+    VMX_VMCS32_GUEST_SS_LIMIT,
+    VMX_VMCS32_GUEST_DS_LIMIT,
+    VMX_VMCS32_GUEST_FS_LIMIT,
+    VMX_VMCS32_GUEST_GS_LIMIT
+};
+static const uint32_t g_aVmcsSegAttr[] =
+{
+    VMX_VMCS32_GUEST_ES_ACCESS_RIGHTS,
+    VMX_VMCS32_GUEST_CS_ACCESS_RIGHTS,
+    VMX_VMCS32_GUEST_SS_ACCESS_RIGHTS,
+    VMX_VMCS32_GUEST_DS_ACCESS_RIGHTS,
+    VMX_VMCS32_GUEST_FS_ACCESS_RIGHTS,
+    VMX_VMCS32_GUEST_GS_ACCESS_RIGHTS
+};
+AssertCompile(RT_ELEMENTS(g_aVmcsSegSel)   == X86_SREG_COUNT);
+AssertCompile(RT_ELEMENTS(g_aVmcsSegLimit) == X86_SREG_COUNT);
+AssertCompile(RT_ELEMENTS(g_aVmcsSegBase)  == X86_SREG_COUNT);
+AssertCompile(RT_ELEMENTS(g_aVmcsSegAttr)  == X86_SREG_COUNT);
+
+#ifdef HMVMX_USE_FUNCTION_TABLE
+/**
+ * VMX_EXIT dispatch table.
+ */
+static const PFNVMXEXITHANDLER g_apfnVMExitHandlers[VMX_EXIT_MAX + 1] =
+{
+    /*  0  VMX_EXIT_XCPT_OR_NMI             */  hmR0VmxExitXcptOrNmi,
+    /*  1  VMX_EXIT_EXT_INT                 */  hmR0VmxExitExtInt,
+    /*  2  VMX_EXIT_TRIPLE_FAULT            */  hmR0VmxExitTripleFault,
+    /*  3  VMX_EXIT_INIT_SIGNAL             */  hmR0VmxExitErrUnexpected,
+    /*  4  VMX_EXIT_SIPI                    */  hmR0VmxExitErrUnexpected,
+    /*  5  VMX_EXIT_IO_SMI                  */  hmR0VmxExitErrUnexpected,
+    /*  6  VMX_EXIT_SMI                     */  hmR0VmxExitErrUnexpected,
+    /*  7  VMX_EXIT_INT_WINDOW              */  hmR0VmxExitIntWindow,
+    /*  8  VMX_EXIT_NMI_WINDOW              */  hmR0VmxExitNmiWindow,
+    /*  9  VMX_EXIT_TASK_SWITCH             */  hmR0VmxExitTaskSwitch,
+    /* 10  VMX_EXIT_CPUID                   */  hmR0VmxExitCpuid,
+    /* 11  VMX_EXIT_GETSEC                  */  hmR0VmxExitGetsec,
+    /* 12  VMX_EXIT_HLT                     */  hmR0VmxExitHlt,
+    /* 13  VMX_EXIT_INVD                    */  hmR0VmxExitInvd,
+    /* 14  VMX_EXIT_INVLPG                  */  hmR0VmxExitInvlpg,
+    /* 15  VMX_EXIT_RDPMC                   */  hmR0VmxExitRdpmc,
+    /* 16  VMX_EXIT_RDTSC                   */  hmR0VmxExitRdtsc,
+    /* 17  VMX_EXIT_RSM                     */  hmR0VmxExitErrUnexpected,
+    /* 18  VMX_EXIT_VMCALL                  */  hmR0VmxExitVmcall,
+#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+    /* 19  VMX_EXIT_VMCLEAR                 */  hmR0VmxExitVmclear,
+    /* 20  VMX_EXIT_VMLAUNCH                */  hmR0VmxExitVmlaunch,
+    /* 21  VMX_EXIT_VMPTRLD                 */  hmR0VmxExitVmptrld,
+    /* 22  VMX_EXIT_VMPTRST                 */  hmR0VmxExitVmptrst,
+    /* 23  VMX_EXIT_VMREAD                  */  hmR0VmxExitVmread,
+    /* 24  VMX_EXIT_VMRESUME                */  hmR0VmxExitVmresume,
+    /* 25  VMX_EXIT_VMWRITE                 */  hmR0VmxExitVmwrite,
+    /* 26  VMX_EXIT_VMXOFF                  */  hmR0VmxExitVmxoff,
+    /* 27  VMX_EXIT_VMXON                   */  hmR0VmxExitVmxon,
+#else
+    /* 19  VMX_EXIT_VMCLEAR                 */  hmR0VmxExitSetPendingXcptUD,
+    /* 20  VMX_EXIT_VMLAUNCH                */  hmR0VmxExitSetPendingXcptUD,
+    /* 21  VMX_EXIT_VMPTRLD                 */  hmR0VmxExitSetPendingXcptUD,
+    /* 22  VMX_EXIT_VMPTRST                 */  hmR0VmxExitSetPendingXcptUD,
+    /* 23  VMX_EXIT_VMREAD                  */  hmR0VmxExitSetPendingXcptUD,
+    /* 24  VMX_EXIT_VMRESUME                */  hmR0VmxExitSetPendingXcptUD,
+    /* 25  VMX_EXIT_VMWRITE                 */  hmR0VmxExitSetPendingXcptUD,
+    /* 26  VMX_EXIT_VMXOFF                  */  hmR0VmxExitSetPendingXcptUD,
+    /* 27  VMX_EXIT_VMXON                   */  hmR0VmxExitSetPendingXcptUD,
+#endif
+    /* 28  VMX_EXIT_MOV_CRX                 */  hmR0VmxExitMovCRx,
+    /* 29  VMX_EXIT_MOV_DRX                 */  hmR0VmxExitMovDRx,
+    /* 30  VMX_EXIT_IO_INSTR                */  hmR0VmxExitIoInstr,
+    /* 31  VMX_EXIT_RDMSR                   */  hmR0VmxExitRdmsr,
+    /* 32  VMX_EXIT_WRMSR                   */  hmR0VmxExitWrmsr,
+    /* 33  VMX_EXIT_ERR_INVALID_GUEST_STATE */  hmR0VmxExitErrInvalidGuestState,
+    /* 34  VMX_EXIT_ERR_MSR_LOAD            */  hmR0VmxExitErrUnexpected,
+    /* 35  UNDEFINED                        */  hmR0VmxExitErrUnexpected,
+    /* 36  VMX_EXIT_MWAIT                   */  hmR0VmxExitMwait,
+    /* 37  VMX_EXIT_MTF                     */  hmR0VmxExitMtf,
+    /* 38  UNDEFINED                        */  hmR0VmxExitErrUnexpected,
+    /* 39  VMX_EXIT_MONITOR                 */  hmR0VmxExitMonitor,
+    /* 40  VMX_EXIT_PAUSE                   */  hmR0VmxExitPause,
+    /* 41  VMX_EXIT_ERR_MACHINE_CHECK       */  hmR0VmxExitErrUnexpected,
+    /* 42  UNDEFINED                        */  hmR0VmxExitErrUnexpected,
+    /* 43  VMX_EXIT_TPR_BELOW_THRESHOLD     */  hmR0VmxExitTprBelowThreshold,
+    /* 44  VMX_EXIT_APIC_ACCESS             */  hmR0VmxExitApicAccess,
+    /* 45  VMX_EXIT_VIRTUALIZED_EOI         */  hmR0VmxExitErrUnexpected,
+    /* 46  VMX_EXIT_GDTR_IDTR_ACCESS        */  hmR0VmxExitErrUnexpected,
+    /* 47  VMX_EXIT_LDTR_TR_ACCESS          */  hmR0VmxExitErrUnexpected,
+    /* 48  VMX_EXIT_EPT_VIOLATION           */  hmR0VmxExitEptViolation,
+    /* 49  VMX_EXIT_EPT_MISCONFIG           */  hmR0VmxExitEptMisconfig,
+    /* 50  VMX_EXIT_INVEPT                  */  hmR0VmxExitSetPendingXcptUD,
+    /* 51  VMX_EXIT_RDTSCP                  */  hmR0VmxExitRdtscp,
+    /* 52  VMX_EXIT_PREEMPT_TIMER           */  hmR0VmxExitPreemptTimer,
+#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+    /* 53  VMX_EXIT_INVVPID                 */  hmR0VmxExitInvvpid,
+#else
+    /* 53  VMX_EXIT_INVVPID                 */  hmR0VmxExitSetPendingXcptUD,
+#endif
+    /* 54  VMX_EXIT_WBINVD                  */  hmR0VmxExitWbinvd,
+    /* 55  VMX_EXIT_XSETBV                  */  hmR0VmxExitXsetbv,
+    /* 56  VMX_EXIT_APIC_WRITE              */  hmR0VmxExitErrUnexpected,
+    /* 57  VMX_EXIT_RDRAND                  */  hmR0VmxExitErrUnexpected,
+    /* 58  VMX_EXIT_INVPCID                 */  hmR0VmxExitInvpcid,
+    /* 59  VMX_EXIT_VMFUNC                  */  hmR0VmxExitErrUnexpected,
+    /* 60  VMX_EXIT_ENCLS                   */  hmR0VmxExitErrUnexpected,
+    /* 61  VMX_EXIT_RDSEED                  */  hmR0VmxExitErrUnexpected,
+    /* 62  VMX_EXIT_PML_FULL                */  hmR0VmxExitErrUnexpected,
+    /* 63  VMX_EXIT_XSAVES                  */  hmR0VmxExitErrUnexpected,
+    /* 64  VMX_EXIT_XRSTORS                 */  hmR0VmxExitErrUnexpected,
+    /* 65  UNDEFINED                        */  hmR0VmxExitErrUnexpected,
+    /* 66  VMX_EXIT_SPP_EVENT               */  hmR0VmxExitErrUnexpected,
+    /* 67  VMX_EXIT_UMWAIT                  */  hmR0VmxExitErrUnexpected,
+    /* 68  VMX_EXIT_TPAUSE                  */  hmR0VmxExitErrUnexpected,
+};
+#endif /* HMVMX_USE_FUNCTION_TABLE */
+
+#if defined(VBOX_STRICT) && defined(LOG_ENABLED)
+static const char * const g_apszVmxInstrErrors[HMVMX_INSTR_ERROR_MAX + 1] =
+{
+    /*  0 */ "(Not Used)",
+    /*  1 */ "VMCALL executed in VMX root operation.",
+    /*  2 */ "VMCLEAR with invalid physical address.",
+    /*  3 */ "VMCLEAR with VMXON pointer.",
+    /*  4 */ "VMLAUNCH with non-clear VMCS.",
+    /*  5 */ "VMRESUME with non-launched VMCS.",
+    /*  6 */ "VMRESUME after VMXOFF",
+    /*  7 */ "VM-entry with invalid control fields.",
+    /*  8 */ "VM-entry with invalid host state fields.",
+    /*  9 */ "VMPTRLD with invalid physical address.",
+    /* 10 */ "VMPTRLD with VMXON pointer.",
+    /* 11 */ "VMPTRLD with incorrect revision identifier.",
+    /* 12 */ "VMREAD/VMWRITE from/to unsupported VMCS component.",
+    /* 13 */ "VMWRITE to read-only VMCS component.",
+    /* 14 */ "(Not Used)",
+    /* 15 */ "VMXON executed in VMX root operation.",
+    /* 16 */ "VM-entry with invalid executive-VMCS pointer.",
+    /* 17 */ "VM-entry with non-launched executing VMCS.",
+    /* 18 */ "VM-entry with executive-VMCS pointer not VMXON pointer.",
+    /* 19 */ "VMCALL with non-clear VMCS.",
+    /* 20 */ "VMCALL with invalid VM-exit control fields.",
+    /* 21 */ "(Not Used)",
+    /* 22 */ "VMCALL with incorrect MSEG revision identifier.",
+    /* 23 */ "VMXOFF under dual monitor treatment of SMIs and SMM.",
+    /* 24 */ "VMCALL with invalid SMM-monitor features.",
+    /* 25 */ "VM-entry with invalid VM-execution control fields in executive VMCS.",
+    /* 26 */ "VM-entry with events blocked by MOV SS.",
+    /* 27 */ "(Not Used)",
+    /* 28 */ "Invalid operand to INVEPT/INVVPID."
+};
+#endif /* VBOX_STRICT && LOG_ENABLED */
+
+
+/**
+ * Checks if the given MSR is part of the lastbranch-from-IP MSR stack.
+ * @returns @c true if it's part of LBR stack, @c false otherwise.
+ *
+ * @param   pVM         The cross context VM structure.
+ * @param   idMsr       The MSR.
+ * @param   pidxMsr     Where to store the index of the MSR in the LBR MSR array.
+ *                      Optional, can be NULL.
+ *
+ * @remarks Must only be called when LBR is enabled.
+ */
+DECL_FORCE_INLINE(bool) hmR0VmxIsLbrBranchFromMsr(PCVM pVM, uint32_t idMsr, uint32_t *pidxMsr)
+{
+    Assert(pVM->hm.s.vmx.fLbr);
+    Assert(pVM->hm.s.vmx.idLbrFromIpMsrFirst);
+    uint32_t const cLbrStack = pVM->hm.s.vmx.idLbrFromIpMsrLast - pVM->hm.s.vmx.idLbrFromIpMsrFirst + 1;
+    uint32_t const idxMsr    = idMsr - pVM->hm.s.vmx.idLbrFromIpMsrFirst;
+    if (idxMsr < cLbrStack)
+    {
+        if (pidxMsr)
+            *pidxMsr = idxMsr;
+        return true;
+    }
+    return false;
+}
+
+
+/**
+ * Checks if the given MSR is part of the lastbranch-to-IP MSR stack.
+ * @returns @c true if it's part of LBR stack, @c false otherwise.
+ *
+ * @param   pVM         The cross context VM structure.
+ * @param   idMsr       The MSR.
+ * @param   pidxMsr     Where to store the index of the MSR in the LBR MSR array.
+ *                      Optional, can be NULL.
+ *
+ * @remarks Must only be called when LBR is enabled and when lastbranch-to-IP MSRs
+ *          are supported by the CPU (see hmR0VmxSetupLbrMsrRange).
+ */
+DECL_FORCE_INLINE(bool) hmR0VmxIsLbrBranchToMsr(PCVM pVM, uint32_t idMsr, uint32_t *pidxMsr)
+{
+    Assert(pVM->hm.s.vmx.fLbr);
+    if (pVM->hm.s.vmx.idLbrToIpMsrFirst)
+    {
+        uint32_t const cLbrStack = pVM->hm.s.vmx.idLbrToIpMsrLast - pVM->hm.s.vmx.idLbrToIpMsrFirst + 1;
+        uint32_t const idxMsr    = idMsr - pVM->hm.s.vmx.idLbrToIpMsrFirst;
+        if (idxMsr < cLbrStack)
+        {
+            if (pidxMsr)
+                *pidxMsr = idxMsr;
+            return true;
+        }
+    }
+    return false;
+}
+
+
+/**
+ * Gets the CR0 guest/host mask.
+ *
+ * These bits typically does not change through the lifetime of a VM. Any bit set in
+ * this mask is owned by the host/hypervisor and would cause a VM-exit when modified
+ * by the guest.
+ *
+ * @returns The CR0 guest/host mask.
+ * @param   pVCpu   The cross context virtual CPU structure.
+ */
+static uint64_t hmR0VmxGetFixedCr0Mask(PCVMCPUCC pVCpu)
+{
+    /*
+     * Modifications to CR0 bits that VT-x ignores saving/restoring (CD, ET, NW) and
+     * to CR0 bits that we require for shadow paging (PG) by the guest must cause VM-exits.
+     *
+     * Furthermore, modifications to any bits that are reserved/unspecified currently
+     * by the Intel spec. must also cause a VM-exit. This prevents unpredictable behavior
+     * when future CPUs specify and use currently reserved/unspecified bits.
+     */
+    /** @todo Avoid intercepting CR0.PE with unrestricted guest execution. Fix PGM
+     *        enmGuestMode to be in-sync with the current mode. See @bugref{6398}
+     *        and @bugref{6944}. */
+    PCVMCC pVM = pVCpu->CTX_SUFF(pVM);
+    return (  X86_CR0_PE
+            | X86_CR0_NE
+            | (pVM->hm.s.fNestedPaging ? 0 : X86_CR0_WP)
+            | X86_CR0_PG
+            | VMX_EXIT_HOST_CR0_IGNORE_MASK);
+}
+
+
+/**
+ * Gets the CR4 guest/host mask.
+ *
+ * These bits typically does not change through the lifetime of a VM. Any bit set in
+ * this mask is owned by the host/hypervisor and would cause a VM-exit when modified
+ * by the guest.
+ *
+ * @returns The CR4 guest/host mask.
+ * @param   pVCpu   The cross context virtual CPU structure.
+ */
+static uint64_t hmR0VmxGetFixedCr4Mask(PCVMCPUCC pVCpu)
+{
+    /*
+     * We construct a mask of all CR4 bits that the guest can modify without causing
+     * a VM-exit. Then invert this mask to obtain all CR4 bits that should cause
+     * a VM-exit when the guest attempts to modify them when executing using
+     * hardware-assisted VMX.
+     *
+     * When a feature is not exposed to the guest (and may be present on the host),
+     * we want to intercept guest modifications to the bit so we can emulate proper
+     * behavior (e.g., #GP).
+     *
+     * Furthermore, only modifications to those bits that don't require immediate
+     * emulation is allowed. For e.g., PCIDE is excluded because the behavior
+     * depends on CR3 which might not always be the guest value while executing
+     * using hardware-assisted VMX.
+     */
+    PCVMCC pVM = pVCpu->CTX_SUFF(pVM);
+    bool const fFsGsBase    = pVM->cpum.ro.GuestFeatures.fFsGsBase;
+    bool const fXSaveRstor  = pVM->cpum.ro.GuestFeatures.fXSaveRstor;
+    bool const fFxSaveRstor = pVM->cpum.ro.GuestFeatures.fFxSaveRstor;
+
+    /*
+     * Paranoia.
+     * Ensure features exposed to the guest are present on the host.
+     */
+    Assert(!fFsGsBase    || pVM->cpum.ro.HostFeatures.fFsGsBase);
+    Assert(!fXSaveRstor  || pVM->cpum.ro.HostFeatures.fXSaveRstor);
+    Assert(!fFxSaveRstor || pVM->cpum.ro.HostFeatures.fFxSaveRstor);
+
+    uint64_t const fGstMask = (  X86_CR4_PVI
+                               | X86_CR4_TSD
+                               | X86_CR4_DE
+                               | X86_CR4_MCE
+                               | X86_CR4_PCE
+                               | X86_CR4_OSXMMEEXCPT
+                               | (fFsGsBase    ? X86_CR4_FSGSBASE : 0)
+                               | (fXSaveRstor  ? X86_CR4_OSXSAVE  : 0)
+                               | (fFxSaveRstor ? X86_CR4_OSFXSR   : 0));
+    return ~fGstMask;
+}
+
+
+/**
+ * Returns whether the the VM-exit MSR-store area differs from the VM-exit MSR-load
+ * area.
+ *
+ * @returns @c true if it's different, @c false otherwise.
+ * @param   pVmcsInfo   The VMCS info. object.
+ */
+DECL_FORCE_INLINE(bool) hmR0VmxIsSeparateExitMsrStoreAreaVmcs(PCVMXVMCSINFO pVmcsInfo)
+{
+    return RT_BOOL(   pVmcsInfo->pvGuestMsrStore != pVmcsInfo->pvGuestMsrLoad
+                   && pVmcsInfo->pvGuestMsrStore);
+}
+
+
+/**
+ * Sets the given Processor-based VM-execution controls.
+ *
+ * @param   pVmxTransient   The VMX-transient structure.
+ * @param   uProcCtls       The Processor-based VM-execution controls to set.
+ */
+static void hmR0VmxSetProcCtlsVmcs(PVMXTRANSIENT pVmxTransient, uint32_t uProcCtls)
+{
+    PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
+    if ((pVmcsInfo->u32ProcCtls & uProcCtls) != uProcCtls)
+    {
+        pVmcsInfo->u32ProcCtls |= uProcCtls;
+        int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVmcsInfo->u32ProcCtls);
+        AssertRC(rc);
+    }
+}
+
+
+/**
+ * Removes the given Processor-based VM-execution controls.
+ *
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   pVmxTransient   The VMX-transient structure.
+ * @param   uProcCtls       The Processor-based VM-execution controls to remove.
+ *
+ * @remarks When executing a nested-guest, this will not remove any of the specified
+ *          controls if the nested hypervisor has set any one of them.
+ */
+static void hmR0VmxRemoveProcCtlsVmcs(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient, uint32_t uProcCtls)
+{
+    PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
+    if (pVmcsInfo->u32ProcCtls & uProcCtls)
+    {
+#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+        bool const fRemoveCtls = !pVmxTransient->fIsNestedGuest
+                               ? true
+                               : !CPUMIsGuestVmxProcCtlsSet(&pVCpu->cpum.GstCtx, uProcCtls);
+#else
+        NOREF(pVCpu);
+        bool const fRemoveCtls = true;
+#endif
+        if (fRemoveCtls)
+        {
+            pVmcsInfo->u32ProcCtls &= ~uProcCtls;
+            int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVmcsInfo->u32ProcCtls);
+            AssertRC(rc);
+        }
+    }
+}
+
+
+/**
+ * Sets the TSC offset for the current VMCS.
+ *
+ * @param   uTscOffset  The TSC offset to set.
+ * @param   pVmcsInfo   The VMCS info. object.
+ */
+static void hmR0VmxSetTscOffsetVmcs(PVMXVMCSINFO pVmcsInfo, uint64_t uTscOffset)
+{
+    if (pVmcsInfo->u64TscOffset != uTscOffset)
+    {
+        int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_TSC_OFFSET_FULL, uTscOffset);
+        AssertRC(rc);
+        pVmcsInfo->u64TscOffset = uTscOffset;
+    }
+}
+
+
+/**
+ * Adds one or more exceptions to the exception bitmap and commits it to the current
+ * VMCS.
+ *
+ * @param   pVmxTransient   The VMX-transient structure.
+ * @param   uXcptMask       The exception(s) to add.
+ */
+static void hmR0VmxAddXcptInterceptMask(PCVMXTRANSIENT pVmxTransient, uint32_t uXcptMask)
+{
+    PVMXVMCSINFO pVmcsInfo   = pVmxTransient->pVmcsInfo;
+    uint32_t     uXcptBitmap = pVmcsInfo->u32XcptBitmap;
+    if ((uXcptBitmap & uXcptMask) != uXcptMask)
+    {
+        uXcptBitmap |= uXcptMask;
+        int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXCEPTION_BITMAP, uXcptBitmap);
+        AssertRC(rc);
+        pVmcsInfo->u32XcptBitmap = uXcptBitmap;
+    }
+}
+
+
+/**
+ * Adds an exception to the exception bitmap and commits it to the current VMCS.
+ *
+ * @param   pVmxTransient   The VMX-transient structure.
+ * @param   uXcpt           The exception to add.
+ */
+static void hmR0VmxAddXcptIntercept(PCVMXTRANSIENT pVmxTransient, uint8_t uXcpt)
+{
+    Assert(uXcpt <= X86_XCPT_LAST);
+    hmR0VmxAddXcptInterceptMask(pVmxTransient, RT_BIT_32(uXcpt));
+}
+
+
+/**
+ * Remove one or more exceptions from the exception bitmap and commits it to the
+ * current VMCS.
+ *
+ * This takes care of not removing the exception intercept if a nested-guest
+ * requires the exception to be intercepted.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   pVmxTransient   The VMX-transient structure.
+ * @param   uXcptMask       The exception(s) to remove.
+ */
+static int hmR0VmxRemoveXcptInterceptMask(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient, uint32_t uXcptMask)
+{
+    PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
+    uint32_t u32XcptBitmap = pVmcsInfo->u32XcptBitmap;
+    if (u32XcptBitmap & uXcptMask)
+    {
+#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+        if (!pVmxTransient->fIsNestedGuest)
+        { /* likely */ }
+        else
+        {
+            PCVMXVVMCS pVmcsNstGst = pVCpu->cpum.GstCtx.hwvirt.vmx.CTX_SUFF(pVmcs);
+            uXcptMask &= ~pVmcsNstGst->u32XcptBitmap;
+        }
+#endif
+#ifdef HMVMX_ALWAYS_TRAP_ALL_XCPTS
+        uXcptMask &= ~(  RT_BIT(X86_XCPT_BP)
+                       | RT_BIT(X86_XCPT_DE)
+                       | RT_BIT(X86_XCPT_NM)
+                       | RT_BIT(X86_XCPT_TS)
+                       | RT_BIT(X86_XCPT_UD)
+                       | RT_BIT(X86_XCPT_NP)
+                       | RT_BIT(X86_XCPT_SS)
+                       | RT_BIT(X86_XCPT_GP)
+                       | RT_BIT(X86_XCPT_PF)
+                       | RT_BIT(X86_XCPT_MF));
+#elif defined(HMVMX_ALWAYS_TRAP_PF)
+        uXcptMask &= ~RT_BIT(X86_XCPT_PF);
+#endif
+        if (uXcptMask)
+        {
+            /* Validate we are not removing any essential exception intercepts. */
+            Assert(pVCpu->CTX_SUFF(pVM)->hm.s.fNestedPaging || !(uXcptMask & RT_BIT(X86_XCPT_PF)));
+            NOREF(pVCpu);
+            Assert(!(uXcptMask & RT_BIT(X86_XCPT_DB)));
+            Assert(!(uXcptMask & RT_BIT(X86_XCPT_AC)));
+
+            /* Remove it from the exception bitmap. */
+            u32XcptBitmap &= ~uXcptMask;
+
+            /* Commit and update the cache if necessary. */
+            if (pVmcsInfo->u32XcptBitmap != u32XcptBitmap)
+            {
+                int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXCEPTION_BITMAP, u32XcptBitmap);
+                AssertRC(rc);
+                pVmcsInfo->u32XcptBitmap = u32XcptBitmap;
+            }
+        }
+    }
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Remove an exceptions from the exception bitmap and commits it to the current
+ * VMCS.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   pVmxTransient   The VMX-transient structure.
+ * @param   uXcpt           The exception to remove.
+ */
+static int hmR0VmxRemoveXcptIntercept(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient, uint8_t uXcpt)
+{
+    return hmR0VmxRemoveXcptInterceptMask(pVCpu, pVmxTransient, RT_BIT(uXcpt));
+}
+
+
+/**
+ * Loads the VMCS specified by the VMCS info. object.
+ *
+ * @returns VBox status code.
+ * @param   pVmcsInfo       The VMCS info. object.
+ *
+ * @remarks Can be called with interrupts disabled.
+ */
+static int hmR0VmxLoadVmcs(PVMXVMCSINFO pVmcsInfo)
+{
+    Assert(pVmcsInfo->HCPhysVmcs != 0 && pVmcsInfo->HCPhysVmcs != NIL_RTHCPHYS);
+    Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+
+    int rc = VMXLoadVmcs(pVmcsInfo->HCPhysVmcs);
+    if (RT_SUCCESS(rc))
+        pVmcsInfo->fVmcsState |= VMX_V_VMCS_LAUNCH_STATE_CURRENT;
+    return rc;
+}
+
+
+/**
+ * Clears the VMCS specified by the VMCS info. object.
+ *
+ * @returns VBox status code.
+ * @param   pVmcsInfo   The VMCS info. object.
+ *
+ * @remarks Can be called with interrupts disabled.
+ */
+static int hmR0VmxClearVmcs(PVMXVMCSINFO pVmcsInfo)
+{
+    Assert(pVmcsInfo->HCPhysVmcs != 0 && pVmcsInfo->HCPhysVmcs != NIL_RTHCPHYS);
+    Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+
+    int rc = VMXClearVmcs(pVmcsInfo->HCPhysVmcs);
+    if (RT_SUCCESS(rc))
+        pVmcsInfo->fVmcsState = VMX_V_VMCS_LAUNCH_STATE_CLEAR;
+    return rc;
+}
+
+
+#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+/**
+ * Loads the shadow VMCS specified by the VMCS info. object.
+ *
+ * @returns VBox status code.
+ * @param   pVmcsInfo   The VMCS info. object.
+ *
+ * @remarks Can be called with interrupts disabled.
+ */
+static int hmR0VmxLoadShadowVmcs(PVMXVMCSINFO pVmcsInfo)
+{
+    Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+    Assert(pVmcsInfo->HCPhysShadowVmcs != 0 && pVmcsInfo->HCPhysShadowVmcs != NIL_RTHCPHYS);
+
+    int rc = VMXLoadVmcs(pVmcsInfo->HCPhysShadowVmcs);
+    if (RT_SUCCESS(rc))
+        pVmcsInfo->fShadowVmcsState |= VMX_V_VMCS_LAUNCH_STATE_CURRENT;
+    return rc;
+}
+
+
+/**
+ * Clears the shadow VMCS specified by the VMCS info. object.
+ *
+ * @returns VBox status code.
+ * @param   pVmcsInfo   The VMCS info. object.
+ *
+ * @remarks Can be called with interrupts disabled.
+ */
+static int hmR0VmxClearShadowVmcs(PVMXVMCSINFO pVmcsInfo)
+{
+    Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+    Assert(pVmcsInfo->HCPhysShadowVmcs != 0 && pVmcsInfo->HCPhysShadowVmcs != NIL_RTHCPHYS);
+
+    int rc = VMXClearVmcs(pVmcsInfo->HCPhysShadowVmcs);
+    if (RT_SUCCESS(rc))
+        pVmcsInfo->fShadowVmcsState = VMX_V_VMCS_LAUNCH_STATE_CLEAR;
+    return rc;
+}
+
+
+/**
+ * Switches from and to the specified VMCSes.
+ *
+ * @returns VBox status code.
+ * @param   pVmcsInfoFrom   The VMCS info. object we are switching from.
+ * @param   pVmcsInfoTo     The VMCS info. object we are switching to.
+ *
+ * @remarks Called with interrupts disabled.
+ */
+static int hmR0VmxSwitchVmcs(PVMXVMCSINFO pVmcsInfoFrom, PVMXVMCSINFO pVmcsInfoTo)
+{
+    /*
+     * Clear the VMCS we are switching out if it has not already been cleared.
+     * This will sync any CPU internal data back to the VMCS.
+     */
+    if (pVmcsInfoFrom->fVmcsState != VMX_V_VMCS_LAUNCH_STATE_CLEAR)
+    {
+        int rc = hmR0VmxClearVmcs(pVmcsInfoFrom);
+        if (RT_SUCCESS(rc))
+        {
+            /*
+             * The shadow VMCS, if any, would not be active at this point since we
+             * would have cleared it while importing the virtual hardware-virtualization
+             * state as part the VMLAUNCH/VMRESUME VM-exit. Hence, there's no need to
+             * clear the shadow VMCS here, just assert for safety.
+             */
+            Assert(!pVmcsInfoFrom->pvShadowVmcs || pVmcsInfoFrom->fShadowVmcsState == VMX_V_VMCS_LAUNCH_STATE_CLEAR);
+        }
+        else
+            return rc;
+    }
+
+    /*
+     * Clear the VMCS we are switching to if it has not already been cleared.
+     * This will initialize the VMCS launch state to "clear" required for loading it.
+     *
+     * See Intel spec. 31.6 "Preparation And Launching A Virtual Machine".
+     */
+    if (pVmcsInfoTo->fVmcsState != VMX_V_VMCS_LAUNCH_STATE_CLEAR)
+    {
+        int rc = hmR0VmxClearVmcs(pVmcsInfoTo);
+        if (RT_SUCCESS(rc))
+        { /* likely */ }
+        else
+            return rc;
+    }
+
+    /*
+     * Finally, load the VMCS we are switching to.
+     */
+    return hmR0VmxLoadVmcs(pVmcsInfoTo);
+}
+
+
+/**
+ * Switches between the guest VMCS and the nested-guest VMCS as specified by the
+ * caller.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu                   The cross context virtual CPU structure.
+ * @param   fSwitchToNstGstVmcs     Whether to switch to the nested-guest VMCS (pass
+ *                                  true) or guest VMCS (pass false).
+ */
+static int hmR0VmxSwitchToGstOrNstGstVmcs(PVMCPUCC pVCpu, bool fSwitchToNstGstVmcs)
+{
+    /* Ensure we have synced everything from the guest-CPU context to the VMCS before switching. */
+    HMVMX_CPUMCTX_ASSERT(pVCpu, HMVMX_CPUMCTX_EXTRN_ALL);
+
+    PVMXVMCSINFO pVmcsInfoFrom;
+    PVMXVMCSINFO pVmcsInfoTo;
+    if (fSwitchToNstGstVmcs)
+    {
+        pVmcsInfoFrom = &pVCpu->hm.s.vmx.VmcsInfo;
+        pVmcsInfoTo   = &pVCpu->hm.s.vmx.VmcsInfoNstGst;
+    }
+    else
+    {
+        pVmcsInfoFrom = &pVCpu->hm.s.vmx.VmcsInfoNstGst;
+        pVmcsInfoTo   = &pVCpu->hm.s.vmx.VmcsInfo;
+    }
+
+    /*
+     * Disable interrupts to prevent being preempted while we switch the current VMCS as the
+     * preemption hook code path acquires the current VMCS.
+     */
+    RTCCUINTREG const fEFlags = ASMIntDisableFlags();
+
+    int rc = hmR0VmxSwitchVmcs(pVmcsInfoFrom, pVmcsInfoTo);
+    if (RT_SUCCESS(rc))
+    {
+        pVCpu->hm.s.vmx.fSwitchedToNstGstVmcs = fSwitchToNstGstVmcs;
+
+        /*
+         * If we are switching to a VMCS that was executed on a different host CPU or was
+         * never executed before, flag that we need to export the host state before executing
+         * guest/nested-guest code using hardware-assisted VMX.
+         *
+         * This could probably be done in a preemptible context since the preemption hook
+         * will flag the necessary change in host context. However, since preemption is
+         * already disabled and to avoid making assumptions about host specific code in
+         * RTMpCpuId when called with preemption enabled, we'll do this while preemption is
+         * disabled.
+         */
+        if (pVmcsInfoTo->idHostCpuState == RTMpCpuId())
+        { /* likely */ }
+        else
+            ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_HOST_CONTEXT | HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE);
+
+        ASMSetFlags(fEFlags);
+
+        /*
+         * We use a different VM-exit MSR-store areas for the guest and nested-guest. Hence,
+         * flag that we need to update the host MSR values there. Even if we decide in the
+         * future to share the VM-exit MSR-store area page between the guest and nested-guest,
+         * if its content differs, we would have to update the host MSRs anyway.
+         */
+        pVCpu->hm.s.vmx.fUpdatedHostAutoMsrs = false;
+    }
+    else
+        ASMSetFlags(fEFlags);
+    return rc;
+}
+#endif /* VBOX_WITH_NESTED_HWVIRT_VMX */
+
+
+/**
+ * Updates the VM's last error record.
+ *
+ * If there was a VMX instruction error, reads the error data from the VMCS and
+ * updates VCPU's last error record as well.
+ *
+ * @param   pVCpu   The cross context virtual CPU structure of the calling EMT.
+ *                  Can be NULL if @a rc is not VERR_VMX_UNABLE_TO_START_VM or
+ *                  VERR_VMX_INVALID_VMCS_FIELD.
+ * @param   rc      The error code.
+ */
+static void hmR0VmxUpdateErrorRecord(PVMCPUCC pVCpu, int rc)
+{
+    if (   rc == VERR_VMX_INVALID_VMCS_FIELD
+        || rc == VERR_VMX_UNABLE_TO_START_VM)
+    {
+        AssertPtrReturnVoid(pVCpu);
+        VMXReadVmcs32(VMX_VMCS32_RO_VM_INSTR_ERROR, &pVCpu->hm.s.vmx.LastError.u32InstrError);
+    }
+    pVCpu->CTX_SUFF(pVM)->hm.s.rcInit = rc;
+}
+
+
+#ifdef VBOX_STRICT
+/**
+ * Reads the VM-entry interruption-information field from the VMCS into the VMX
+ * transient structure.
+ *
+ * @param   pVmxTransient   The VMX-transient structure.
+ */
+DECLINLINE(void) hmR0VmxReadEntryIntInfoVmcs(PVMXTRANSIENT pVmxTransient)
+{
+    int rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, &pVmxTransient->uEntryIntInfo);
+    AssertRC(rc);
+}
+
+
+/**
+ * Reads the VM-entry exception error code field from the VMCS into
+ * the VMX transient structure.
+ *
+ * @param   pVmxTransient   The VMX-transient structure.
+ */
+DECLINLINE(void) hmR0VmxReadEntryXcptErrorCodeVmcs(PVMXTRANSIENT pVmxTransient)
+{
+    int rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_EXCEPTION_ERRCODE, &pVmxTransient->uEntryXcptErrorCode);
+    AssertRC(rc);
+}
+
+
+/**
+ * Reads the VM-entry exception error code field from the VMCS into
+ * the VMX transient structure.
+ *
+ * @param   pVmxTransient   The VMX-transient structure.
+ */
+DECLINLINE(void) hmR0VmxReadEntryInstrLenVmcs(PVMXTRANSIENT pVmxTransient)
+{
+    int rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_INSTR_LENGTH, &pVmxTransient->cbEntryInstr);
+    AssertRC(rc);
+}
+#endif /* VBOX_STRICT */
+
+
+/**
+ * Reads the VM-exit interruption-information field from the VMCS into the VMX
+ * transient structure.
+ *
+ * @param   pVmxTransient   The VMX-transient structure.
+ */
+DECLINLINE(void) hmR0VmxReadExitIntInfoVmcs(PVMXTRANSIENT pVmxTransient)
+{
+    if (!(pVmxTransient->fVmcsFieldsRead & HMVMX_READ_EXIT_INTERRUPTION_INFO))
+    {
+        int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_INTERRUPTION_INFO, &pVmxTransient->uExitIntInfo);
+        AssertRC(rc);
+        pVmxTransient->fVmcsFieldsRead |= HMVMX_READ_EXIT_INTERRUPTION_INFO;
+    }
+}
+
+
+/**
+ * Reads the VM-exit interruption error code from the VMCS into the VMX
+ * transient structure.
+ *
+ * @param   pVmxTransient   The VMX-transient structure.
+ */
+DECLINLINE(void) hmR0VmxReadExitIntErrorCodeVmcs(PVMXTRANSIENT pVmxTransient)
+{
+    if (!(pVmxTransient->fVmcsFieldsRead & HMVMX_READ_EXIT_INTERRUPTION_ERROR_CODE))
+    {
+        int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_INTERRUPTION_ERROR_CODE, &pVmxTransient->uExitIntErrorCode);
+        AssertRC(rc);
+        pVmxTransient->fVmcsFieldsRead |= HMVMX_READ_EXIT_INTERRUPTION_ERROR_CODE;
+    }
+}
+
+
+/**
+ * Reads the VM-exit instruction length field from the VMCS into the VMX
+ * transient structure.
+ *
+ * @param   pVmxTransient   The VMX-transient structure.
+ */
+DECLINLINE(void) hmR0VmxReadExitInstrLenVmcs(PVMXTRANSIENT pVmxTransient)
+{
+    if (!(pVmxTransient->fVmcsFieldsRead & HMVMX_READ_EXIT_INSTR_LEN))
+    {
+        int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_INSTR_LENGTH, &pVmxTransient->cbExitInstr);
+        AssertRC(rc);
+        pVmxTransient->fVmcsFieldsRead |= HMVMX_READ_EXIT_INSTR_LEN;
+    }
+}
+
+
+/**
+ * Reads the VM-exit instruction-information field from the VMCS into
+ * the VMX transient structure.
+ *
+ * @param   pVmxTransient   The VMX-transient structure.
+ */
+DECLINLINE(void) hmR0VmxReadExitInstrInfoVmcs(PVMXTRANSIENT pVmxTransient)
+{
+    if (!(pVmxTransient->fVmcsFieldsRead & HMVMX_READ_EXIT_INSTR_INFO))
+    {
+        int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_INSTR_INFO, &pVmxTransient->ExitInstrInfo.u);
+        AssertRC(rc);
+        pVmxTransient->fVmcsFieldsRead |= HMVMX_READ_EXIT_INSTR_INFO;
+    }
+}
+
+
+/**
+ * Reads the Exit Qualification from the VMCS into the VMX transient structure.
+ *
+ * @param   pVmxTransient   The VMX-transient structure.
+ */
+DECLINLINE(void) hmR0VmxReadExitQualVmcs(PVMXTRANSIENT pVmxTransient)
+{
+    if (!(pVmxTransient->fVmcsFieldsRead & HMVMX_READ_EXIT_QUALIFICATION))
+    {
+        int rc = VMXReadVmcsNw(VMX_VMCS_RO_EXIT_QUALIFICATION, &pVmxTransient->uExitQual);
+        AssertRC(rc);
+        pVmxTransient->fVmcsFieldsRead |= HMVMX_READ_EXIT_QUALIFICATION;
+    }
+}
+
+
+/**
+ * Reads the Guest-linear address from the VMCS into the VMX transient structure.
+ *
+ * @param   pVmxTransient   The VMX-transient structure.
+ */
+DECLINLINE(void) hmR0VmxReadGuestLinearAddrVmcs(PVMXTRANSIENT pVmxTransient)
+{
+    if (!(pVmxTransient->fVmcsFieldsRead & HMVMX_READ_GUEST_LINEAR_ADDR))
+    {
+        int rc = VMXReadVmcsNw(VMX_VMCS_RO_GUEST_LINEAR_ADDR, &pVmxTransient->uGuestLinearAddr);
+        AssertRC(rc);
+        pVmxTransient->fVmcsFieldsRead |= HMVMX_READ_GUEST_LINEAR_ADDR;
+    }
+}
+
+
+/**
+ * Reads the Guest-physical address from the VMCS into the VMX transient structure.
+ *
+ * @param   pVmxTransient   The VMX-transient structure.
+ */
+DECLINLINE(void) hmR0VmxReadGuestPhysicalAddrVmcs(PVMXTRANSIENT pVmxTransient)
+{
+    if (!(pVmxTransient->fVmcsFieldsRead & HMVMX_READ_GUEST_PHYSICAL_ADDR))
+    {
+        int rc = VMXReadVmcs64(VMX_VMCS64_RO_GUEST_PHYS_ADDR_FULL, &pVmxTransient->uGuestPhysicalAddr);
+        AssertRC(rc);
+        pVmxTransient->fVmcsFieldsRead |= HMVMX_READ_GUEST_PHYSICAL_ADDR;
+    }
+}
+
+#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+/**
+ * Reads the Guest pending-debug exceptions from the VMCS into the VMX transient
+ * structure.
+ *
+ * @param   pVmxTransient   The VMX-transient structure.
+ */
+DECLINLINE(void) hmR0VmxReadGuestPendingDbgXctps(PVMXTRANSIENT pVmxTransient)
+{
+    if (!(pVmxTransient->fVmcsFieldsRead & HMVMX_READ_GUEST_PENDING_DBG_XCPTS))
+    {
+        int rc = VMXReadVmcsNw(VMX_VMCS_GUEST_PENDING_DEBUG_XCPTS, &pVmxTransient->uGuestPendingDbgXcpts);
+        AssertRC(rc);
+        pVmxTransient->fVmcsFieldsRead |= HMVMX_READ_GUEST_PENDING_DBG_XCPTS;
+    }
+}
+#endif
+
+/**
+ * Reads the IDT-vectoring information field from the VMCS into the VMX
+ * transient structure.
+ *
+ * @param   pVmxTransient   The VMX-transient structure.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+DECLINLINE(void) hmR0VmxReadIdtVectoringInfoVmcs(PVMXTRANSIENT pVmxTransient)
+{
+    if (!(pVmxTransient->fVmcsFieldsRead & HMVMX_READ_IDT_VECTORING_INFO))
+    {
+        int rc = VMXReadVmcs32(VMX_VMCS32_RO_IDT_VECTORING_INFO, &pVmxTransient->uIdtVectoringInfo);
+        AssertRC(rc);
+        pVmxTransient->fVmcsFieldsRead |= HMVMX_READ_IDT_VECTORING_INFO;
+    }
+}
+
+
+/**
+ * Reads the IDT-vectoring error code from the VMCS into the VMX
+ * transient structure.
+ *
+ * @param   pVmxTransient   The VMX-transient structure.
+ */
+DECLINLINE(void) hmR0VmxReadIdtVectoringErrorCodeVmcs(PVMXTRANSIENT pVmxTransient)
+{
+    if (!(pVmxTransient->fVmcsFieldsRead & HMVMX_READ_IDT_VECTORING_ERROR_CODE))
+    {
+        int rc = VMXReadVmcs32(VMX_VMCS32_RO_IDT_VECTORING_ERROR_CODE, &pVmxTransient->uIdtVectoringErrorCode);
+        AssertRC(rc);
+        pVmxTransient->fVmcsFieldsRead |= HMVMX_READ_IDT_VECTORING_ERROR_CODE;
+    }
+}
+
+#ifdef HMVMX_ALWAYS_SAVE_RO_GUEST_STATE
+/**
+ * Reads all relevant read-only VMCS fields into the VMX transient structure.
+ *
+ * @param   pVmxTransient   The VMX-transient structure.
+ */
+static void hmR0VmxReadAllRoFieldsVmcs(PVMXTRANSIENT pVmxTransient)
+{
+    int rc = VMXReadVmcsNw(VMX_VMCS_RO_EXIT_QUALIFICATION,             &pVmxTransient->uExitQual);
+    rc    |= VMXReadVmcs32(VMX_VMCS32_RO_EXIT_INSTR_LENGTH,            &pVmxTransient->cbExitInstr);
+    rc    |= VMXReadVmcs32(VMX_VMCS32_RO_EXIT_INSTR_INFO,              &pVmxTransient->ExitInstrInfo.u);
+    rc    |= VMXReadVmcs32(VMX_VMCS32_RO_IDT_VECTORING_INFO,           &pVmxTransient->uIdtVectoringInfo);
+    rc    |= VMXReadVmcs32(VMX_VMCS32_RO_IDT_VECTORING_ERROR_CODE,     &pVmxTransient->uIdtVectoringErrorCode);
+    rc    |= VMXReadVmcs32(VMX_VMCS32_RO_EXIT_INTERRUPTION_INFO,       &pVmxTransient->uExitIntInfo);
+    rc    |= VMXReadVmcs32(VMX_VMCS32_RO_EXIT_INTERRUPTION_ERROR_CODE, &pVmxTransient->uExitIntErrorCode);
+    rc    |= VMXReadVmcsNw(VMX_VMCS_RO_GUEST_LINEAR_ADDR,              &pVmxTransient->uGuestLinearAddr);
+    rc    |= VMXReadVmcs64(VMX_VMCS64_RO_GUEST_PHYS_ADDR_FULL,         &pVmxTransient->uGuestPhysicalAddr);
+    AssertRC(rc);
+    pVmxTransient->fVmcsFieldsRead |= HMVMX_READ_EXIT_QUALIFICATION
+                                   |  HMVMX_READ_EXIT_INSTR_LEN
+                                   |  HMVMX_READ_EXIT_INSTR_INFO
+                                   |  HMVMX_READ_IDT_VECTORING_INFO
+                                   |  HMVMX_READ_IDT_VECTORING_ERROR_CODE
+                                   |  HMVMX_READ_EXIT_INTERRUPTION_INFO
+                                   |  HMVMX_READ_EXIT_INTERRUPTION_ERROR_CODE
+                                   |  HMVMX_READ_GUEST_LINEAR_ADDR
+                                   |  HMVMX_READ_GUEST_PHYSICAL_ADDR;
+}
+#endif
+
+/**
+ * Enters VMX root mode operation on the current CPU.
+ *
+ * @returns VBox status code.
+ * @param   pHostCpu        The HM physical-CPU structure.
+ * @param   pVM             The cross context VM structure. Can be
+ *                          NULL, after a resume.
+ * @param   HCPhysCpuPage   Physical address of the VMXON region.
+ * @param   pvCpuPage       Pointer to the VMXON region.
+ */
+static int hmR0VmxEnterRootMode(PHMPHYSCPU pHostCpu, PVMCC pVM, RTHCPHYS HCPhysCpuPage, void *pvCpuPage)
+{
+    Assert(pHostCpu);
+    Assert(HCPhysCpuPage && HCPhysCpuPage != NIL_RTHCPHYS);
+    Assert(RT_ALIGN_T(HCPhysCpuPage, _4K, RTHCPHYS) == HCPhysCpuPage);
+    Assert(pvCpuPage);
+    Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+
+    if (pVM)
+    {
+        /* Write the VMCS revision identifier to the VMXON region. */
+        *(uint32_t *)pvCpuPage = RT_BF_GET(pVM->hm.s.vmx.Msrs.u64Basic, VMX_BF_BASIC_VMCS_ID);
+    }
+
+    /* Paranoid: Disable interrupts as, in theory, interrupt handlers might mess with CR4. */
+    RTCCUINTREG const fEFlags = ASMIntDisableFlags();
+
+    /* Enable the VMX bit in CR4 if necessary. */
+    RTCCUINTREG const uOldCr4 = SUPR0ChangeCR4(X86_CR4_VMXE, RTCCUINTREG_MAX);
+
+    /* Record whether VMXE was already prior to us enabling it above. */
+    pHostCpu->fVmxeAlreadyEnabled = RT_BOOL(uOldCr4 & X86_CR4_VMXE);
+
+    /* Enter VMX root mode. */
+    int rc = VMXEnable(HCPhysCpuPage);
+    if (RT_FAILURE(rc))
+    {
+        /* Restore CR4.VMXE if it was not set prior to our attempt to set it above. */
+        if (!pHostCpu->fVmxeAlreadyEnabled)
+            SUPR0ChangeCR4(0 /* fOrMask */, ~(uint64_t)X86_CR4_VMXE);
+
+        if (pVM)
+            pVM->hm.s.vmx.HCPhysVmxEnableError = HCPhysCpuPage;
+    }
+
+    /* Restore interrupts. */
+    ASMSetFlags(fEFlags);
+    return rc;
+}
+
+
+/**
+ * Exits VMX root mode operation on the current CPU.
+ *
+ * @returns VBox status code.
+ * @param   pHostCpu        The HM physical-CPU structure.
+ */
+static int hmR0VmxLeaveRootMode(PHMPHYSCPU pHostCpu)
+{
+    Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+
+    /* Paranoid: Disable interrupts as, in theory, interrupts handlers might mess with CR4. */
+    RTCCUINTREG const fEFlags = ASMIntDisableFlags();
+
+    /* If we're for some reason not in VMX root mode, then don't leave it. */
+    RTCCUINTREG const uHostCr4 = ASMGetCR4();
+
+    int rc;
+    if (uHostCr4 & X86_CR4_VMXE)
+    {
+        /* Exit VMX root mode and clear the VMX bit in CR4. */
+        VMXDisable();
+
+        /* Clear CR4.VMXE only if it was clear prior to use setting it. */
+        if (!pHostCpu->fVmxeAlreadyEnabled)
+            SUPR0ChangeCR4(0 /* fOrMask */, ~(uint64_t)X86_CR4_VMXE);
+
+        rc = VINF_SUCCESS;
+    }
+    else
+        rc = VERR_VMX_NOT_IN_VMX_ROOT_MODE;
+
+    /* Restore interrupts. */
+    ASMSetFlags(fEFlags);
+    return rc;
+}
+
+
+/**
+ * Allocates pages specified as specified by an array of VMX page allocation info
+ * objects.
+ *
+ * The pages contents are zero'd after allocation.
+ *
+ * @returns VBox status code.
+ * @param   phMemObj        Where to return the handle to the allocation.
+ * @param   paAllocInfo     The pointer to the first element of the VMX
+ *                          page-allocation info object array.
+ * @param   cEntries        The number of elements in the @a paAllocInfo array.
+ */
+static int hmR0VmxPagesAllocZ(PRTR0MEMOBJ phMemObj, PVMXPAGEALLOCINFO paAllocInfo, uint32_t cEntries)
+{
+    *phMemObj = NIL_RTR0MEMOBJ;
+
+    /* Figure out how many pages to allocate. */
+    uint32_t cPages = 0;
+    for (uint32_t iPage = 0; iPage < cEntries; iPage++)
+        cPages += !!paAllocInfo[iPage].fValid;
+
+    /* Allocate the pages. */
+    if (cPages)
+    {
+        size_t const cbPages = cPages << PAGE_SHIFT;
+        int rc = RTR0MemObjAllocPage(phMemObj, cbPages, false /* fExecutable */);
+        if (RT_FAILURE(rc))
+            return rc;
+
+        /* Zero the contents and assign each page to the corresponding VMX page-allocation entry. */
+        void *pvFirstPage = RTR0MemObjAddress(*phMemObj);
+        RT_BZERO(pvFirstPage, cbPages);
+
+        uint32_t iPage = 0;
+        for (uint32_t i = 0; i < cEntries; i++)
+            if (paAllocInfo[i].fValid)
+            {
+                RTHCPHYS const HCPhysPage = RTR0MemObjGetPagePhysAddr(*phMemObj, iPage);
+                void          *pvPage     = (void *)((uintptr_t)pvFirstPage + (iPage << X86_PAGE_4K_SHIFT));
+                Assert(HCPhysPage && HCPhysPage != NIL_RTHCPHYS);
+                AssertPtr(pvPage);
+
+                Assert(paAllocInfo[iPage].pHCPhys);
+                Assert(paAllocInfo[iPage].ppVirt);
+                *paAllocInfo[iPage].pHCPhys = HCPhysPage;
+                *paAllocInfo[iPage].ppVirt  = pvPage;
+
+                /* Move to next page. */
+                ++iPage;
+            }
+
+        /* Make sure all valid (requested) pages have been assigned. */
+        Assert(iPage == cPages);
+    }
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Frees pages allocated using hmR0VmxPagesAllocZ.
+ *
+ * @param   hMemObj     The ring-0 memory object associated with the allocation.
+ */
+DECL_FORCE_INLINE(void) hmR0VmxPagesFree(RTR0MEMOBJ hMemObj)
+{
+    /* We can cleanup wholesale since it's all one allocation. */
+    RTR0MemObjFree(hMemObj, true /* fFreeMappings */);
+}
+
+
+/**
+ * Initializes a VMCS info. object.
+ *
+ * @param   pVmcsInfo   The VMCS info. object.
+ */
+static void hmR0VmxVmcsInfoInit(PVMXVMCSINFO pVmcsInfo)
+{
+    memset(pVmcsInfo, 0, sizeof(*pVmcsInfo));
+
+    Assert(pVmcsInfo->hMemObj == NIL_RTR0MEMOBJ);
+    pVmcsInfo->HCPhysVmcs          = NIL_RTHCPHYS;
+    pVmcsInfo->HCPhysShadowVmcs    = NIL_RTHCPHYS;
+    pVmcsInfo->HCPhysMsrBitmap     = NIL_RTHCPHYS;
+    pVmcsInfo->HCPhysGuestMsrLoad  = NIL_RTHCPHYS;
+    pVmcsInfo->HCPhysGuestMsrStore = NIL_RTHCPHYS;
+    pVmcsInfo->HCPhysHostMsrLoad   = NIL_RTHCPHYS;
+    pVmcsInfo->HCPhysVirtApic      = NIL_RTHCPHYS;
+    pVmcsInfo->HCPhysEPTP          = NIL_RTHCPHYS;
+    pVmcsInfo->u64VmcsLinkPtr      = NIL_RTHCPHYS;
+    pVmcsInfo->idHostCpuState      = NIL_RTCPUID;
+    pVmcsInfo->idHostCpuExec       = NIL_RTCPUID;
+}
+
+
+/**
+ * Frees the VT-x structures for a VMCS info. object.
+ *
+ * @param   pVmcsInfo   The VMCS info. object.
+ */
+static void hmR0VmxVmcsInfoFree(PVMXVMCSINFO pVmcsInfo)
+{
+    if (pVmcsInfo->hMemObj != NIL_RTR0MEMOBJ)
+    {
+        hmR0VmxPagesFree(pVmcsInfo->hMemObj);
+        hmR0VmxVmcsInfoInit(pVmcsInfo);
+    }
+}
+
+
+/**
+ * Allocates the VT-x structures for a VMCS info. object.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   pVmcsInfo       The VMCS info. object.
+ * @param   fIsNstGstVmcs   Whether this is a nested-guest VMCS.
+ *
+ * @remarks The caller is expected to take care of any and all allocation failures.
+ *          This function will not perform any cleanup for failures half-way
+ *          through.
+ */
+static int hmR0VmxAllocVmcsInfo(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo, bool fIsNstGstVmcs)
+{
+    PVMCC pVM = pVCpu->CTX_SUFF(pVM);
+
+    bool const fMsrBitmaps = RT_BOOL(pVM->hm.s.vmx.Msrs.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_MSR_BITMAPS);
+    bool const fShadowVmcs = !fIsNstGstVmcs ? pVM->hm.s.vmx.fUseVmcsShadowing : pVM->cpum.ro.GuestFeatures.fVmxVmcsShadowing;
+    Assert(!pVM->cpum.ro.GuestFeatures.fVmxVmcsShadowing);  /* VMCS shadowing is not yet exposed to the guest. */
+    VMXPAGEALLOCINFO aAllocInfo[] =
+    {
+        { true,        0 /* Unused */, &pVmcsInfo->HCPhysVmcs,         &pVmcsInfo->pvVmcs         },
+        { true,        0 /* Unused */, &pVmcsInfo->HCPhysGuestMsrLoad, &pVmcsInfo->pvGuestMsrLoad },
+        { true,        0 /* Unused */, &pVmcsInfo->HCPhysHostMsrLoad,  &pVmcsInfo->pvHostMsrLoad  },
+        { fMsrBitmaps, 0 /* Unused */, &pVmcsInfo->HCPhysMsrBitmap,    &pVmcsInfo->pvMsrBitmap    },
+        { fShadowVmcs, 0 /* Unused */, &pVmcsInfo->HCPhysShadowVmcs,   &pVmcsInfo->pvShadowVmcs   },
+    };
+
+    int rc = hmR0VmxPagesAllocZ(&pVmcsInfo->hMemObj, &aAllocInfo[0], RT_ELEMENTS(aAllocInfo));
+    if (RT_FAILURE(rc))
+        return rc;
+
+    /*
+     * We use the same page for VM-entry MSR-load and VM-exit MSR store areas.
+     * Because they contain a symmetric list of guest MSRs to load on VM-entry and store on VM-exit.
+     */
+    AssertCompile(RT_ELEMENTS(aAllocInfo) > 0);
+    Assert(pVmcsInfo->HCPhysGuestMsrLoad != NIL_RTHCPHYS);
+    pVmcsInfo->pvGuestMsrStore     = pVmcsInfo->pvGuestMsrLoad;
+    pVmcsInfo->HCPhysGuestMsrStore = pVmcsInfo->HCPhysGuestMsrLoad;
+
+    /*
+     * Get the virtual-APIC page rather than allocating them again.
+     */
+    if (pVM->hm.s.vmx.Msrs.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_TPR_SHADOW)
+    {
+        if (!fIsNstGstVmcs)
+        {
+            if (PDMHasApic(pVM))
+            {
+                rc = APICGetApicPageForCpu(pVCpu, &pVmcsInfo->HCPhysVirtApic, (PRTR0PTR)&pVmcsInfo->pbVirtApic, NULL /*pR3Ptr*/);
+                if (RT_FAILURE(rc))
+                    return rc;
+                Assert(pVmcsInfo->pbVirtApic);
+                Assert(pVmcsInfo->HCPhysVirtApic && pVmcsInfo->HCPhysVirtApic != NIL_RTHCPHYS);
+            }
+        }
+        else
+        {
+            pVmcsInfo->pbVirtApic = (uint8_t *)CPUMGetGuestVmxVirtApicPage(&pVCpu->cpum.GstCtx, &pVmcsInfo->HCPhysVirtApic);
+            Assert(pVmcsInfo->pbVirtApic);
+            Assert(pVmcsInfo->HCPhysVirtApic && pVmcsInfo->HCPhysVirtApic != NIL_RTHCPHYS);
+        }
+    }
+
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Free all VT-x structures for the VM.
+ *
+ * @returns IPRT status code.
+ * @param   pVM     The cross context VM structure.
+ */
+static void hmR0VmxStructsFree(PVMCC pVM)
+{
+    hmR0VmxPagesFree(pVM->hm.s.vmx.hMemObj);
+#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+    if (pVM->hm.s.vmx.fUseVmcsShadowing)
+    {
+        RTMemFree(pVM->hm.s.vmx.paShadowVmcsFields);
+        RTMemFree(pVM->hm.s.vmx.paShadowVmcsRoFields);
+    }
+#endif
+
+    for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++)
+    {
+        PVMCPUCC pVCpu = VMCC_GET_CPU(pVM, idCpu);
+        hmR0VmxVmcsInfoFree(&pVCpu->hm.s.vmx.VmcsInfo);
+#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+        if (pVM->cpum.ro.GuestFeatures.fVmx)
+            hmR0VmxVmcsInfoFree(&pVCpu->hm.s.vmx.VmcsInfoNstGst);
+#endif
+    }
+}
+
+
+/**
+ * Allocate all VT-x structures for the VM.
+ *
+ * @returns IPRT status code.
+ * @param   pVM     The cross context VM structure.
+ *
+ * @remarks This functions will cleanup on memory allocation failures.
+ */
+static int hmR0VmxStructsAlloc(PVMCC pVM)
+{
+    /*
+     * Sanity check the VMCS size reported by the CPU as we assume 4KB allocations.
+     * The VMCS size cannot be more than 4096 bytes.
+     *
+     * See Intel spec. Appendix A.1 "Basic VMX Information".
+     */
+    uint32_t const cbVmcs = RT_BF_GET(pVM->hm.s.vmx.Msrs.u64Basic, VMX_BF_BASIC_VMCS_SIZE);
+    if (cbVmcs <= X86_PAGE_4K_SIZE)
+    { /* likely */ }
+    else
+    {
+        VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_INVALID_VMCS_SIZE;
+        return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
+    }
+
+    /*
+     * Allocate per-VM VT-x structures.
+     */
+    bool const fVirtApicAccess   = RT_BOOL(pVM->hm.s.vmx.Msrs.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS);
+    bool const fUseVmcsShadowing = pVM->hm.s.vmx.fUseVmcsShadowing;
+    VMXPAGEALLOCINFO aAllocInfo[] =
+    {
+        { fVirtApicAccess,   0 /* Unused */, &pVM->hm.s.vmx.HCPhysApicAccess,    (PRTR0PTR)&pVM->hm.s.vmx.pbApicAccess },
+        { fUseVmcsShadowing, 0 /* Unused */, &pVM->hm.s.vmx.HCPhysVmreadBitmap,  &pVM->hm.s.vmx.pvVmreadBitmap         },
+        { fUseVmcsShadowing, 0 /* Unused */, &pVM->hm.s.vmx.HCPhysVmwriteBitmap, &pVM->hm.s.vmx.pvVmwriteBitmap        },
+#ifdef VBOX_WITH_CRASHDUMP_MAGIC
+        { true,              0 /* Unused */, &pVM->hm.s.vmx.HCPhysScratch,       &(PRTR0PTR)pVM->hm.s.vmx.pbScratch    },
+#endif
+    };
+
+    int rc = hmR0VmxPagesAllocZ(&pVM->hm.s.vmx.hMemObj, &aAllocInfo[0], RT_ELEMENTS(aAllocInfo));
+    if (RT_SUCCESS(rc))
+    {
+#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+        /* Allocate the shadow VMCS-fields array. */
+        if (fUseVmcsShadowing)
+        {
+            Assert(!pVM->hm.s.vmx.cShadowVmcsFields);
+            Assert(!pVM->hm.s.vmx.cShadowVmcsRoFields);
+            pVM->hm.s.vmx.paShadowVmcsFields   = (uint32_t *)RTMemAllocZ(sizeof(g_aVmcsFields));
+            pVM->hm.s.vmx.paShadowVmcsRoFields = (uint32_t *)RTMemAllocZ(sizeof(g_aVmcsFields));
+            if (!pVM->hm.s.vmx.paShadowVmcsFields || !pVM->hm.s.vmx.paShadowVmcsRoFields)
+                rc = VERR_NO_MEMORY;
+        }
+#endif
+
+        /*
+         * Allocate per-VCPU VT-x structures.
+         */
+        for (VMCPUID idCpu = 0; idCpu < pVM->cCpus && RT_SUCCESS(rc); idCpu++)
+        {
+            /* Allocate the guest VMCS structures. */
+            PVMCPUCC pVCpu = VMCC_GET_CPU(pVM, idCpu);
+            rc = hmR0VmxAllocVmcsInfo(pVCpu, &pVCpu->hm.s.vmx.VmcsInfo, false /* fIsNstGstVmcs */);
+
+#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+            /* Allocate the nested-guest VMCS structures, when the VMX feature is exposed to the guest. */
+            if (pVM->cpum.ro.GuestFeatures.fVmx && RT_SUCCESS(rc))
+                rc = hmR0VmxAllocVmcsInfo(pVCpu, &pVCpu->hm.s.vmx.VmcsInfoNstGst, true /* fIsNstGstVmcs */);
+#endif
+        }
+        if (RT_SUCCESS(rc))
+            return VINF_SUCCESS;
+    }
+    hmR0VmxStructsFree(pVM);
+    return rc;
+}
+
+
+/**
+ * Pre-initializes non-zero fields in VMX structures that will be allocated.
+ *
+ * @param   pVM     The cross context VM structure.
+ */
+static void hmR0VmxStructsInit(PVMCC pVM)
+{
+    /* Paranoia. */
+    Assert(pVM->hm.s.vmx.pbApicAccess == NULL);
+#ifdef VBOX_WITH_CRASHDUMP_MAGIC
+    Assert(pVM->hm.s.vmx.pbScratch == NULL);
+#endif
+
+    /*
+     * Initialize members up-front so we can cleanup en masse on allocation failures.
+     */
+#ifdef VBOX_WITH_CRASHDUMP_MAGIC
+    pVM->hm.s.vmx.HCPhysScratch = NIL_RTHCPHYS;
+#endif
+    pVM->hm.s.vmx.HCPhysApicAccess    = NIL_RTHCPHYS;
+    pVM->hm.s.vmx.HCPhysVmreadBitmap  = NIL_RTHCPHYS;
+    pVM->hm.s.vmx.HCPhysVmwriteBitmap = NIL_RTHCPHYS;
+    for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++)
+    {
+        PVMCPUCC pVCpu = VMCC_GET_CPU(pVM, idCpu);
+        hmR0VmxVmcsInfoInit(&pVCpu->hm.s.vmx.VmcsInfo);
+        hmR0VmxVmcsInfoInit(&pVCpu->hm.s.vmx.VmcsInfoNstGst);
+    }
+}
+
+#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+/**
+ * Returns whether an MSR at the given MSR-bitmap offset is intercepted or not.
+ *
+ * @returns @c true if the MSR is intercepted, @c false otherwise.
+ * @param   pvMsrBitmap     The MSR bitmap.
+ * @param   offMsr          The MSR byte offset.
+ * @param   iBit            The bit offset from the byte offset.
+ */
+DECLINLINE(bool) hmR0VmxIsMsrBitSet(const void *pvMsrBitmap, uint16_t offMsr, int32_t iBit)
+{
+    uint8_t const * const pbMsrBitmap = (uint8_t const * const)pvMsrBitmap;
+    Assert(pbMsrBitmap);
+    Assert(offMsr + (iBit >> 3) <= X86_PAGE_4K_SIZE);
+    return ASMBitTest(pbMsrBitmap + offMsr, iBit);
+}
+#endif
+
+/**
+ * Sets the permission bits for the specified MSR in the given MSR bitmap.
+ *
+ * If the passed VMCS is a nested-guest VMCS, this function ensures that the
+ * read/write intercept is cleared from the MSR bitmap used for hardware-assisted
+ * VMX execution of the nested-guest, only if nested-guest is also not intercepting
+ * the read/write access of this MSR.
+ *
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   pVmcsInfo       The VMCS info. object.
+ * @param   fIsNstGstVmcs   Whether this is a nested-guest VMCS.
+ * @param   idMsr           The MSR value.
+ * @param   fMsrpm          The MSR permissions (see VMXMSRPM_XXX). This must
+ *                          include both a read -and- a write permission!
+ *
+ * @sa      CPUMGetVmxMsrPermission.
+ * @remarks Can be called with interrupts disabled.
+ */
+static void hmR0VmxSetMsrPermission(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo, bool fIsNstGstVmcs, uint32_t idMsr, uint32_t fMsrpm)
+{
+    uint8_t *pbMsrBitmap = (uint8_t *)pVmcsInfo->pvMsrBitmap;
+    Assert(pbMsrBitmap);
+    Assert(VMXMSRPM_IS_FLAG_VALID(fMsrpm));
+
+    /*
+     * MSR-bitmap Layout:
+     *   Byte index            MSR range            Interpreted as
+     * 0x000 - 0x3ff    0x00000000 - 0x00001fff    Low MSR read bits.
+     * 0x400 - 0x7ff    0xc0000000 - 0xc0001fff    High MSR read bits.
+     * 0x800 - 0xbff    0x00000000 - 0x00001fff    Low MSR write bits.
+     * 0xc00 - 0xfff    0xc0000000 - 0xc0001fff    High MSR write bits.
+     *
+     * A bit corresponding to an MSR within the above range causes a VM-exit
+     * if the bit is 1 on executions of RDMSR/WRMSR.  If an MSR falls out of
+     * the MSR range, it always cause a VM-exit.
+     *
+     * See Intel spec. 24.6.9 "MSR-Bitmap Address".
+     */
+    uint16_t const offBitmapRead  = 0;
+    uint16_t const offBitmapWrite = 0x800;
+    uint16_t       offMsr;
+    int32_t        iBit;
+    if (idMsr <= UINT32_C(0x00001fff))
+    {
+        offMsr = 0;
+        iBit   = idMsr;
+    }
+    else if (idMsr - UINT32_C(0xc0000000) <= UINT32_C(0x00001fff))
+    {
+        offMsr = 0x400;
+        iBit   = idMsr - UINT32_C(0xc0000000);
+    }
+    else
+        AssertMsgFailedReturnVoid(("Invalid MSR %#RX32\n", idMsr));
+
+    /*
+     * Set the MSR read permission.
+     */
+    uint16_t const offMsrRead = offBitmapRead + offMsr;
+    Assert(offMsrRead + (iBit >> 3) < offBitmapWrite);
+    if (fMsrpm & VMXMSRPM_ALLOW_RD)
+    {
+#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+        bool const fClear = !fIsNstGstVmcs ? true
+                          : !hmR0VmxIsMsrBitSet(pVCpu->cpum.GstCtx.hwvirt.vmx.CTX_SUFF(pvMsrBitmap), offMsrRead, iBit);
+#else
+        RT_NOREF2(pVCpu, fIsNstGstVmcs);
+        bool const fClear = true;
+#endif
+        if (fClear)
+            ASMBitClear(pbMsrBitmap + offMsrRead, iBit);
+    }
+    else
+        ASMBitSet(pbMsrBitmap + offMsrRead, iBit);
+
+    /*
+     * Set the MSR write permission.
+     */
+    uint16_t const offMsrWrite = offBitmapWrite + offMsr;
+    Assert(offMsrWrite + (iBit >> 3) < X86_PAGE_4K_SIZE);
+    if (fMsrpm & VMXMSRPM_ALLOW_WR)
+    {
+#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+        bool const fClear = !fIsNstGstVmcs ? true
+                          : !hmR0VmxIsMsrBitSet(pVCpu->cpum.GstCtx.hwvirt.vmx.CTX_SUFF(pvMsrBitmap), offMsrWrite, iBit);
+#else
+        RT_NOREF2(pVCpu, fIsNstGstVmcs);
+        bool const fClear = true;
+#endif
+        if (fClear)
+            ASMBitClear(pbMsrBitmap + offMsrWrite, iBit);
+    }
+    else
+        ASMBitSet(pbMsrBitmap + offMsrWrite, iBit);
+}
+
+
+/**
+ * Updates the VMCS with the number of effective MSRs in the auto-load/store MSR
+ * area.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu       The cross context virtual CPU structure.
+ * @param   pVmcsInfo   The VMCS info. object.
+ * @param   cMsrs       The number of MSRs.
+ */
+static int hmR0VmxSetAutoLoadStoreMsrCount(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo, uint32_t cMsrs)
+{
+    /* Shouldn't ever happen but there -is- a number. We're well within the recommended 512. */
+    uint32_t const cMaxSupportedMsrs = VMX_MISC_MAX_MSRS(pVCpu->CTX_SUFF(pVM)->hm.s.vmx.Msrs.u64Misc);
+    if (RT_LIKELY(cMsrs < cMaxSupportedMsrs))
+    {
+        /* Commit the MSR counts to the VMCS and update the cache. */
+        if (pVmcsInfo->cEntryMsrLoad != cMsrs)
+        {
+            int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT, cMsrs);   AssertRC(rc);
+            rc     = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT, cMsrs);   AssertRC(rc);
+            rc     = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT,  cMsrs);   AssertRC(rc);
+            pVmcsInfo->cEntryMsrLoad = cMsrs;
+            pVmcsInfo->cExitMsrStore = cMsrs;
+            pVmcsInfo->cExitMsrLoad  = cMsrs;
+        }
+        return VINF_SUCCESS;
+    }
+
+    LogRel(("Auto-load/store MSR count exceeded! cMsrs=%u MaxSupported=%u\n", cMsrs, cMaxSupportedMsrs));
+    pVCpu->hm.s.u32HMError = VMX_UFC_INSUFFICIENT_GUEST_MSR_STORAGE;
+    return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
+}
+
+
+/**
+ * Adds a new (or updates the value of an existing) guest/host MSR
+ * pair to be swapped during the world-switch as part of the
+ * auto-load/store MSR area in the VMCS.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   pVmxTransient   The VMX-transient structure.
+ * @param   idMsr           The MSR.
+ * @param   uGuestMsrValue  Value of the guest MSR.
+ * @param   fSetReadWrite   Whether to set the guest read/write access of this
+ *                          MSR (thus not causing a VM-exit).
+ * @param   fUpdateHostMsr  Whether to update the value of the host MSR if
+ *                          necessary.
+ */
+static int hmR0VmxAddAutoLoadStoreMsr(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient, uint32_t idMsr, uint64_t uGuestMsrValue,
+                                      bool fSetReadWrite, bool fUpdateHostMsr)
+{
+    PVMXVMCSINFO pVmcsInfo     = pVmxTransient->pVmcsInfo;
+    bool const   fIsNstGstVmcs = pVmxTransient->fIsNestedGuest;
+    PVMXAUTOMSR  pGuestMsrLoad = (PVMXAUTOMSR)pVmcsInfo->pvGuestMsrLoad;
+    uint32_t     cMsrs         = pVmcsInfo->cEntryMsrLoad;
+    uint32_t     i;
+
+    /* Paranoia. */
+    Assert(pGuestMsrLoad);
+
+    LogFlowFunc(("pVCpu=%p idMsr=%#RX32 uGestMsrValue=%#RX64\n", pVCpu, idMsr, uGuestMsrValue));
+
+    /* Check if the MSR already exists in the VM-entry MSR-load area. */
+    for (i = 0; i < cMsrs; i++)
+    {
+        if (pGuestMsrLoad[i].u32Msr == idMsr)
+            break;
+    }
+
+    bool fAdded = false;
+    if (i == cMsrs)
+    {
+        /* The MSR does not exist, bump the MSR count to make room for the new MSR. */
+        ++cMsrs;
+        int rc = hmR0VmxSetAutoLoadStoreMsrCount(pVCpu, pVmcsInfo, cMsrs);
+        AssertMsgRCReturn(rc, ("Insufficient space to add MSR to VM-entry MSR-load/store area %u\n", idMsr), rc);
+
+        /* Set the guest to read/write this MSR without causing VM-exits. */
+        if (   fSetReadWrite
+            && (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS))
+            hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, fIsNstGstVmcs, idMsr, VMXMSRPM_ALLOW_RD_WR);
+
+        Log4Func(("Added MSR %#RX32, cMsrs=%u\n", idMsr, cMsrs));
+        fAdded = true;
+    }
+
+    /* Update the MSR value for the newly added or already existing MSR. */
+    pGuestMsrLoad[i].u32Msr   = idMsr;
+    pGuestMsrLoad[i].u64Value = uGuestMsrValue;
+
+    /* Create the corresponding slot in the VM-exit MSR-store area if we use a different page. */
+    if (hmR0VmxIsSeparateExitMsrStoreAreaVmcs(pVmcsInfo))
+    {
+        PVMXAUTOMSR pGuestMsrStore = (PVMXAUTOMSR)pVmcsInfo->pvGuestMsrStore;
+        pGuestMsrStore[i].u32Msr   = idMsr;
+        pGuestMsrStore[i].u64Value = uGuestMsrValue;
+    }
+
+    /* Update the corresponding slot in the host MSR area. */
+    PVMXAUTOMSR pHostMsr = (PVMXAUTOMSR)pVmcsInfo->pvHostMsrLoad;
+    Assert(pHostMsr != pVmcsInfo->pvGuestMsrLoad);
+    Assert(pHostMsr != pVmcsInfo->pvGuestMsrStore);
+    pHostMsr[i].u32Msr = idMsr;
+
+    /*
+     * Only if the caller requests to update the host MSR value AND we've newly added the
+     * MSR to the host MSR area do we actually update the value. Otherwise, it will be
+     * updated by hmR0VmxUpdateAutoLoadHostMsrs().
+     *
+     * We do this for performance reasons since reading MSRs may be quite expensive.
+     */
+    if (fAdded)
+    {
+        if (fUpdateHostMsr)
+        {
+            Assert(!VMMRZCallRing3IsEnabled(pVCpu));
+            Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+            pHostMsr[i].u64Value = ASMRdMsr(idMsr);
+        }
+        else
+        {
+            /* Someone else can do the work. */
+            pVCpu->hm.s.vmx.fUpdatedHostAutoMsrs = false;
+        }
+    }
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Removes a guest/host MSR pair to be swapped during the world-switch from the
+ * auto-load/store MSR area in the VMCS.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   pVmxTransient   The VMX-transient structure.
+ * @param   idMsr           The MSR.
+ */
+static int hmR0VmxRemoveAutoLoadStoreMsr(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient, uint32_t idMsr)
+{
+    PVMXVMCSINFO pVmcsInfo     = pVmxTransient->pVmcsInfo;
+    bool const   fIsNstGstVmcs = pVmxTransient->fIsNestedGuest;
+    PVMXAUTOMSR  pGuestMsrLoad = (PVMXAUTOMSR)pVmcsInfo->pvGuestMsrLoad;
+    uint32_t     cMsrs         = pVmcsInfo->cEntryMsrLoad;
+
+    LogFlowFunc(("pVCpu=%p idMsr=%#RX32\n", pVCpu, idMsr));
+
+    for (uint32_t i = 0; i < cMsrs; i++)
+    {
+        /* Find the MSR. */
+        if (pGuestMsrLoad[i].u32Msr == idMsr)
+        {
+            /*
+             * If it's the last MSR, we only need to reduce the MSR count.
+             * If it's -not- the last MSR, copy the last MSR in place of it and reduce the MSR count.
+             */
+            if (i < cMsrs - 1)
+            {
+                /* Remove it from the VM-entry MSR-load area. */
+                pGuestMsrLoad[i].u32Msr   = pGuestMsrLoad[cMsrs - 1].u32Msr;
+                pGuestMsrLoad[i].u64Value = pGuestMsrLoad[cMsrs - 1].u64Value;
+
+                /* Remove it from the VM-exit MSR-store area if it's in a different page. */
+                if (hmR0VmxIsSeparateExitMsrStoreAreaVmcs(pVmcsInfo))
+                {
+                    PVMXAUTOMSR pGuestMsrStore = (PVMXAUTOMSR)pVmcsInfo->pvGuestMsrStore;
+                    Assert(pGuestMsrStore[i].u32Msr == idMsr);
+                    pGuestMsrStore[i].u32Msr   = pGuestMsrStore[cMsrs - 1].u32Msr;
+                    pGuestMsrStore[i].u64Value = pGuestMsrStore[cMsrs - 1].u64Value;
+                }
+
+                /* Remove it from the VM-exit MSR-load area. */
+                PVMXAUTOMSR pHostMsr = (PVMXAUTOMSR)pVmcsInfo->pvHostMsrLoad;
+                Assert(pHostMsr[i].u32Msr == idMsr);
+                pHostMsr[i].u32Msr   = pHostMsr[cMsrs - 1].u32Msr;
+                pHostMsr[i].u64Value = pHostMsr[cMsrs - 1].u64Value;
+            }
+
+            /* Reduce the count to reflect the removed MSR and bail. */
+            --cMsrs;
+            break;
+        }
+    }
+
+    /* Update the VMCS if the count changed (meaning the MSR was found and removed). */
+    if (cMsrs != pVmcsInfo->cEntryMsrLoad)
+    {
+        int rc = hmR0VmxSetAutoLoadStoreMsrCount(pVCpu, pVmcsInfo, cMsrs);
+        AssertRCReturn(rc, rc);
+
+        /* We're no longer swapping MSRs during the world-switch, intercept guest read/writes to them. */
+        if (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS)
+            hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, fIsNstGstVmcs, idMsr, VMXMSRPM_EXIT_RD | VMXMSRPM_EXIT_WR);
+
+        Log4Func(("Removed MSR %#RX32, cMsrs=%u\n", idMsr, cMsrs));
+        return VINF_SUCCESS;
+    }
+
+    return VERR_NOT_FOUND;
+}
+
+
+/**
+ * Checks if the specified guest MSR is part of the VM-entry MSR-load area.
+ *
+ * @returns @c true if found, @c false otherwise.
+ * @param   pVmcsInfo   The VMCS info. object.
+ * @param   idMsr       The MSR to find.
+ */
+static bool hmR0VmxIsAutoLoadGuestMsr(PCVMXVMCSINFO pVmcsInfo, uint32_t idMsr)
+{
+    PCVMXAUTOMSR   pMsrs = (PCVMXAUTOMSR)pVmcsInfo->pvGuestMsrLoad;
+    uint32_t const cMsrs = pVmcsInfo->cEntryMsrLoad;
+    Assert(pMsrs);
+    Assert(sizeof(*pMsrs) * cMsrs <= X86_PAGE_4K_SIZE);
+    for (uint32_t i = 0; i < cMsrs; i++)
+    {
+        if (pMsrs[i].u32Msr == idMsr)
+            return true;
+    }
+    return false;
+}
+
+
+/**
+ * Updates the value of all host MSRs in the VM-exit MSR-load area.
+ *
+ * @param   pVCpu       The cross context virtual CPU structure.
+ * @param   pVmcsInfo   The VMCS info. object.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+static void hmR0VmxUpdateAutoLoadHostMsrs(PCVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo)
+{
+    Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+
+    PVMXAUTOMSR pHostMsrLoad = (PVMXAUTOMSR)pVmcsInfo->pvHostMsrLoad;
+    uint32_t const cMsrs     = pVmcsInfo->cExitMsrLoad;
+    Assert(pHostMsrLoad);
+    Assert(sizeof(*pHostMsrLoad) * cMsrs <= X86_PAGE_4K_SIZE);
+    LogFlowFunc(("pVCpu=%p cMsrs=%u\n", pVCpu, cMsrs));
+    for (uint32_t i = 0; i < cMsrs; i++)
+    {
+        /*
+         * Performance hack for the host EFER MSR. We use the cached value rather than re-read it.
+         * Strict builds will catch mismatches in hmR0VmxCheckAutoLoadStoreMsrs(). See @bugref{7368}.
+         */
+        if (pHostMsrLoad[i].u32Msr == MSR_K6_EFER)
+            pHostMsrLoad[i].u64Value = pVCpu->CTX_SUFF(pVM)->hm.s.vmx.u64HostMsrEfer;
+        else
+            pHostMsrLoad[i].u64Value = ASMRdMsr(pHostMsrLoad[i].u32Msr);
+    }
+}
+
+
+/**
+ * Saves a set of host MSRs to allow read/write passthru access to the guest and
+ * perform lazy restoration of the host MSRs while leaving VT-x.
+ *
+ * @param   pVCpu   The cross context virtual CPU structure.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+static void hmR0VmxLazySaveHostMsrs(PVMCPUCC pVCpu)
+{
+    Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+
+    /*
+     * Note: If you're adding MSRs here, make sure to update the MSR-bitmap accesses in hmR0VmxSetupVmcsProcCtls().
+     */
+    if (!(pVCpu->hm.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_SAVED_HOST))
+    {
+        Assert(!(pVCpu->hm.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST));  /* Guest MSRs better not be loaded now. */
+        if (pVCpu->CTX_SUFF(pVM)->hm.s.fAllow64BitGuests)
+        {
+            pVCpu->hm.s.vmx.u64HostMsrLStar        = ASMRdMsr(MSR_K8_LSTAR);
+            pVCpu->hm.s.vmx.u64HostMsrStar         = ASMRdMsr(MSR_K6_STAR);
+            pVCpu->hm.s.vmx.u64HostMsrSfMask       = ASMRdMsr(MSR_K8_SF_MASK);
+            pVCpu->hm.s.vmx.u64HostMsrKernelGsBase = ASMRdMsr(MSR_K8_KERNEL_GS_BASE);
+        }
+        pVCpu->hm.s.vmx.fLazyMsrs |= VMX_LAZY_MSRS_SAVED_HOST;
+    }
+}
+
+
+/**
+ * Checks whether the MSR belongs to the set of guest MSRs that we restore
+ * lazily while leaving VT-x.
+ *
+ * @returns true if it does, false otherwise.
+ * @param   pVCpu   The cross context virtual CPU structure.
+ * @param   idMsr   The MSR to check.
+ */
+static bool hmR0VmxIsLazyGuestMsr(PCVMCPUCC pVCpu, uint32_t idMsr)
+{
+    if (pVCpu->CTX_SUFF(pVM)->hm.s.fAllow64BitGuests)
+    {
+        switch (idMsr)
+        {
+            case MSR_K8_LSTAR:
+            case MSR_K6_STAR:
+            case MSR_K8_SF_MASK:
+            case MSR_K8_KERNEL_GS_BASE:
+                return true;
+        }
+    }
+    return false;
+}
+
+
+/**
+ * Loads a set of guests MSRs to allow read/passthru to the guest.
+ *
+ * The name of this function is slightly confusing. This function does NOT
+ * postpone loading, but loads the MSR right now. "hmR0VmxLazy" is simply a
+ * common prefix for functions dealing with "lazy restoration" of the shared
+ * MSRs.
+ *
+ * @param   pVCpu   The cross context virtual CPU structure.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+static void hmR0VmxLazyLoadGuestMsrs(PVMCPUCC pVCpu)
+{
+    Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+    Assert(!VMMRZCallRing3IsEnabled(pVCpu));
+
+    Assert(pVCpu->hm.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_SAVED_HOST);
+    if (pVCpu->CTX_SUFF(pVM)->hm.s.fAllow64BitGuests)
+    {
+        /*
+         * If the guest MSRs are not loaded -and- if all the guest MSRs are identical
+         * to the MSRs on the CPU (which are the saved host MSRs, see assertion above) then
+         * we can skip a few MSR writes.
+         *
+         * Otherwise, it implies either 1. they're not loaded, or 2. they're loaded but the
+         * guest MSR values in the guest-CPU context might be different to what's currently
+         * loaded in the CPU. In either case, we need to write the new guest MSR values to the
+         * CPU, see @bugref{8728}.
+         */
+        PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+        if (   !(pVCpu->hm.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST)
+            && pCtx->msrKERNELGSBASE == pVCpu->hm.s.vmx.u64HostMsrKernelGsBase
+            && pCtx->msrLSTAR        == pVCpu->hm.s.vmx.u64HostMsrLStar
+            && pCtx->msrSTAR         == pVCpu->hm.s.vmx.u64HostMsrStar
+            && pCtx->msrSFMASK       == pVCpu->hm.s.vmx.u64HostMsrSfMask)
+        {
+#ifdef VBOX_STRICT
+            Assert(ASMRdMsr(MSR_K8_KERNEL_GS_BASE) == pCtx->msrKERNELGSBASE);
+            Assert(ASMRdMsr(MSR_K8_LSTAR)          == pCtx->msrLSTAR);
+            Assert(ASMRdMsr(MSR_K6_STAR)           == pCtx->msrSTAR);
+            Assert(ASMRdMsr(MSR_K8_SF_MASK)        == pCtx->msrSFMASK);
+#endif
+        }
+        else
+        {
+            ASMWrMsr(MSR_K8_KERNEL_GS_BASE, pCtx->msrKERNELGSBASE);
+            ASMWrMsr(MSR_K8_LSTAR,          pCtx->msrLSTAR);
+            ASMWrMsr(MSR_K6_STAR,           pCtx->msrSTAR);
+            ASMWrMsr(MSR_K8_SF_MASK,        pCtx->msrSFMASK);
+        }
+    }
+    pVCpu->hm.s.vmx.fLazyMsrs |= VMX_LAZY_MSRS_LOADED_GUEST;
+}
+
+
+/**
+ * Performs lazy restoration of the set of host MSRs if they were previously
+ * loaded with guest MSR values.
+ *
+ * @param   pVCpu   The cross context virtual CPU structure.
+ *
+ * @remarks No-long-jump zone!!!
+ * @remarks The guest MSRs should have been saved back into the guest-CPU
+ *          context by hmR0VmxImportGuestState()!!!
+ */
+static void hmR0VmxLazyRestoreHostMsrs(PVMCPUCC pVCpu)
+{
+    Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+    Assert(!VMMRZCallRing3IsEnabled(pVCpu));
+
+    if (pVCpu->hm.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST)
+    {
+        Assert(pVCpu->hm.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_SAVED_HOST);
+        if (pVCpu->CTX_SUFF(pVM)->hm.s.fAllow64BitGuests)
+        {
+            ASMWrMsr(MSR_K8_LSTAR,          pVCpu->hm.s.vmx.u64HostMsrLStar);
+            ASMWrMsr(MSR_K6_STAR,           pVCpu->hm.s.vmx.u64HostMsrStar);
+            ASMWrMsr(MSR_K8_SF_MASK,        pVCpu->hm.s.vmx.u64HostMsrSfMask);
+            ASMWrMsr(MSR_K8_KERNEL_GS_BASE, pVCpu->hm.s.vmx.u64HostMsrKernelGsBase);
+        }
+    }
+    pVCpu->hm.s.vmx.fLazyMsrs &= ~(VMX_LAZY_MSRS_LOADED_GUEST | VMX_LAZY_MSRS_SAVED_HOST);
+}
+
+
+/**
+ * Verifies that our cached values of the VMCS fields are all consistent with
+ * what's actually present in the VMCS.
+ *
+ * @returns VBox status code.
+ * @retval  VINF_SUCCESS if all our caches match their respective VMCS fields.
+ * @retval  VERR_VMX_VMCS_FIELD_CACHE_INVALID if a cache field doesn't match the
+ *                                            VMCS content. HMCPU error-field is
+ *                                            updated, see VMX_VCI_XXX.
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   pVmcsInfo       The VMCS info. object.
+ * @param   fIsNstGstVmcs   Whether this is a nested-guest VMCS.
+ */
+static int hmR0VmxCheckCachedVmcsCtls(PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo, bool fIsNstGstVmcs)
+{
+    const char * const pcszVmcs = fIsNstGstVmcs ? "Nested-guest VMCS" : "VMCS";
+
+    uint32_t u32Val;
+    int rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY, &u32Val);
+    AssertRC(rc);
+    AssertMsgReturnStmt(pVmcsInfo->u32EntryCtls == u32Val,
+                        ("%s controls mismatch: Cache=%#RX32 VMCS=%#RX32\n", pcszVmcs, pVmcsInfo->u32EntryCtls, u32Val),
+                        pVCpu->hm.s.u32HMError = VMX_VCI_CTRL_ENTRY,
+                        VERR_VMX_VMCS_FIELD_CACHE_INVALID);
+
+    rc = VMXReadVmcs32(VMX_VMCS32_CTRL_EXIT, &u32Val);
+    AssertRC(rc);
+    AssertMsgReturnStmt(pVmcsInfo->u32ExitCtls == u32Val,
+                        ("%s controls mismatch: Cache=%#RX32 VMCS=%#RX32\n", pcszVmcs, pVmcsInfo->u32ExitCtls, u32Val),
+                        pVCpu->hm.s.u32HMError = VMX_VCI_CTRL_EXIT,
+                        VERR_VMX_VMCS_FIELD_CACHE_INVALID);
+
+    rc = VMXReadVmcs32(VMX_VMCS32_CTRL_PIN_EXEC, &u32Val);
+    AssertRC(rc);
+    AssertMsgReturnStmt(pVmcsInfo->u32PinCtls == u32Val,
+                        ("%s controls mismatch: Cache=%#RX32 VMCS=%#RX32\n", pcszVmcs, pVmcsInfo->u32PinCtls, u32Val),
+                        pVCpu->hm.s.u32HMError = VMX_VCI_CTRL_PIN_EXEC,
+                        VERR_VMX_VMCS_FIELD_CACHE_INVALID);
+
+    rc = VMXReadVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, &u32Val);
+    AssertRC(rc);
+    AssertMsgReturnStmt(pVmcsInfo->u32ProcCtls == u32Val,
+                        ("%s controls mismatch: Cache=%#RX32 VMCS=%#RX32\n", pcszVmcs, pVmcsInfo->u32ProcCtls, u32Val),
+                        pVCpu->hm.s.u32HMError = VMX_VCI_CTRL_PROC_EXEC,
+                        VERR_VMX_VMCS_FIELD_CACHE_INVALID);
+
+    if (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_SECONDARY_CTLS)
+    {
+        rc = VMXReadVmcs32(VMX_VMCS32_CTRL_PROC_EXEC2, &u32Val);
+        AssertRC(rc);
+        AssertMsgReturnStmt(pVmcsInfo->u32ProcCtls2 == u32Val,
+                            ("%s controls mismatch: Cache=%#RX32 VMCS=%#RX32\n", pcszVmcs, pVmcsInfo->u32ProcCtls2, u32Val),
+                            pVCpu->hm.s.u32HMError = VMX_VCI_CTRL_PROC_EXEC2,
+                            VERR_VMX_VMCS_FIELD_CACHE_INVALID);
+    }
+
+    rc = VMXReadVmcs32(VMX_VMCS32_CTRL_EXCEPTION_BITMAP, &u32Val);
+    AssertRC(rc);
+    AssertMsgReturnStmt(pVmcsInfo->u32XcptBitmap == u32Val,
+                        ("%s exception bitmap mismatch: Cache=%#RX32 VMCS=%#RX32\n", pcszVmcs, pVmcsInfo->u32XcptBitmap, u32Val),
+                        pVCpu->hm.s.u32HMError = VMX_VCI_CTRL_XCPT_BITMAP,
+                        VERR_VMX_VMCS_FIELD_CACHE_INVALID);
+
+    uint64_t u64Val;
+    rc = VMXReadVmcs64(VMX_VMCS64_CTRL_TSC_OFFSET_FULL, &u64Val);
+    AssertRC(rc);
+    AssertMsgReturnStmt(pVmcsInfo->u64TscOffset == u64Val,
+                        ("%s TSC offset mismatch: Cache=%#RX64 VMCS=%#RX64\n", pcszVmcs, pVmcsInfo->u64TscOffset, u64Val),
+                        pVCpu->hm.s.u32HMError = VMX_VCI_CTRL_TSC_OFFSET,
+                        VERR_VMX_VMCS_FIELD_CACHE_INVALID);
+
+    NOREF(pcszVmcs);
+    return VINF_SUCCESS;
+}
+
+
+#ifdef VBOX_STRICT
+/**
+ * Verifies that our cached host EFER MSR value has not changed since we cached it.
+ *
+ * @param   pVCpu       The cross context virtual CPU structure.
+ * @param   pVmcsInfo   The VMCS info. object.
+ */
+static void hmR0VmxCheckHostEferMsr(PCVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo)
+{
+    Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+
+    if (pVmcsInfo->u32ExitCtls & VMX_EXIT_CTLS_LOAD_EFER_MSR)
+    {
+        uint64_t const uHostEferMsr      = ASMRdMsr(MSR_K6_EFER);
+        uint64_t const uHostEferMsrCache = pVCpu->CTX_SUFF(pVM)->hm.s.vmx.u64HostMsrEfer;
+        uint64_t       uVmcsEferMsrVmcs;
+        int rc = VMXReadVmcs64(VMX_VMCS64_HOST_EFER_FULL, &uVmcsEferMsrVmcs);
+        AssertRC(rc);
+
+        AssertMsgReturnVoid(uHostEferMsr == uVmcsEferMsrVmcs,
+                            ("EFER Host/VMCS mismatch! host=%#RX64 vmcs=%#RX64\n", uHostEferMsr, uVmcsEferMsrVmcs));
+        AssertMsgReturnVoid(uHostEferMsr == uHostEferMsrCache,
+                            ("EFER Host/Cache mismatch! host=%#RX64 cache=%#RX64\n", uHostEferMsr, uHostEferMsrCache));
+    }
+}
+
+
+/**
+ * Verifies whether the guest/host MSR pairs in the auto-load/store area in the
+ * VMCS are correct.
+ *
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   pVmcsInfo       The VMCS info. object.
+ * @param   fIsNstGstVmcs   Whether this is a nested-guest VMCS.
+ */
+static void hmR0VmxCheckAutoLoadStoreMsrs(PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo, bool fIsNstGstVmcs)
+{
+    Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+
+    /* Read the various MSR-area counts from the VMCS. */
+    uint32_t cEntryLoadMsrs;
+    uint32_t cExitStoreMsrs;
+    uint32_t cExitLoadMsrs;
+    int rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT, &cEntryLoadMsrs);  AssertRC(rc);
+    rc     = VMXReadVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT, &cExitStoreMsrs);  AssertRC(rc);
+    rc     = VMXReadVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT,  &cExitLoadMsrs);   AssertRC(rc);
+
+    /* Verify all the MSR counts are the same. */
+    Assert(cEntryLoadMsrs == cExitStoreMsrs);
+    Assert(cExitStoreMsrs == cExitLoadMsrs);
+    uint32_t const cMsrs = cExitLoadMsrs;
+
+    /* Verify the MSR counts do not exceed the maximum count supported by the hardware. */
+    Assert(cMsrs < VMX_MISC_MAX_MSRS(pVCpu->CTX_SUFF(pVM)->hm.s.vmx.Msrs.u64Misc));
+
+    /* Verify the MSR counts are within the allocated page size. */
+    Assert(sizeof(VMXAUTOMSR) * cMsrs <= X86_PAGE_4K_SIZE);
+
+    /* Verify the relevant contents of the MSR areas match. */
+    PCVMXAUTOMSR pGuestMsrLoad  = (PCVMXAUTOMSR)pVmcsInfo->pvGuestMsrLoad;
+    PCVMXAUTOMSR pGuestMsrStore = (PCVMXAUTOMSR)pVmcsInfo->pvGuestMsrStore;
+    PCVMXAUTOMSR pHostMsrLoad   = (PCVMXAUTOMSR)pVmcsInfo->pvHostMsrLoad;
+    bool const   fSeparateExitMsrStorePage = hmR0VmxIsSeparateExitMsrStoreAreaVmcs(pVmcsInfo);
+    for (uint32_t i = 0; i < cMsrs; i++)
+    {
+        /* Verify that the MSRs are paired properly and that the host MSR has the correct value. */
+        if (fSeparateExitMsrStorePage)
+        {
+            AssertMsgReturnVoid(pGuestMsrLoad->u32Msr == pGuestMsrStore->u32Msr,
+                                ("GuestMsrLoad=%#RX32 GuestMsrStore=%#RX32 cMsrs=%u\n",
+                                 pGuestMsrLoad->u32Msr, pGuestMsrStore->u32Msr, cMsrs));
+        }
+
+        AssertMsgReturnVoid(pHostMsrLoad->u32Msr == pGuestMsrLoad->u32Msr,
+                            ("HostMsrLoad=%#RX32 GuestMsrLoad=%#RX32 cMsrs=%u\n",
+                             pHostMsrLoad->u32Msr, pGuestMsrLoad->u32Msr, cMsrs));
+
+        uint64_t const u64HostMsr = ASMRdMsr(pHostMsrLoad->u32Msr);
+        AssertMsgReturnVoid(pHostMsrLoad->u64Value == u64HostMsr,
+                            ("u32Msr=%#RX32 VMCS Value=%#RX64 ASMRdMsr=%#RX64 cMsrs=%u\n",
+                             pHostMsrLoad->u32Msr, pHostMsrLoad->u64Value, u64HostMsr, cMsrs));
+
+        /* Verify that cached host EFER MSR matches what's loaded the CPU. */
+        bool const fIsEferMsr = RT_BOOL(pHostMsrLoad->u32Msr == MSR_K6_EFER);
+        if (fIsEferMsr)
+        {
+            AssertMsgReturnVoid(u64HostMsr == pVCpu->CTX_SUFF(pVM)->hm.s.vmx.u64HostMsrEfer,
+                                ("Cached=%#RX64 ASMRdMsr=%#RX64 cMsrs=%u\n",
+                                 pVCpu->CTX_SUFF(pVM)->hm.s.vmx.u64HostMsrEfer, u64HostMsr, cMsrs));
+        }
+
+        /* Verify that the accesses are as expected in the MSR bitmap for auto-load/store MSRs. */
+        if (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS)
+        {
+            uint32_t const fMsrpm = CPUMGetVmxMsrPermission(pVmcsInfo->pvMsrBitmap, pGuestMsrLoad->u32Msr);
+            if (fIsEferMsr)
+            {
+                AssertMsgReturnVoid((fMsrpm & VMXMSRPM_EXIT_RD), ("Passthru read for EFER MSR!?\n"));
+                AssertMsgReturnVoid((fMsrpm & VMXMSRPM_EXIT_WR), ("Passthru write for EFER MSR!?\n"));
+            }
+            else
+            {
+                /* Verify LBR MSRs (used only for debugging) are intercepted. We don't passthru these MSRs to the guest yet. */
+                PCVM pVM = pVCpu->CTX_SUFF(pVM);
+                if (   pVM->hm.s.vmx.fLbr
+                    && (   hmR0VmxIsLbrBranchFromMsr(pVM, pGuestMsrLoad->u32Msr, NULL /* pidxMsr */)
+                        || hmR0VmxIsLbrBranchToMsr(pVM, pGuestMsrLoad->u32Msr, NULL /* pidxMsr */)
+                        || pGuestMsrLoad->u32Msr == pVM->hm.s.vmx.idLbrTosMsr))
+                {
+                    AssertMsgReturnVoid((fMsrpm & VMXMSRPM_MASK) == VMXMSRPM_EXIT_RD_WR,
+                                        ("u32Msr=%#RX32 cMsrs=%u Passthru read/write for LBR MSRs!\n",
+                                         pGuestMsrLoad->u32Msr, cMsrs));
+                }
+                else if (!fIsNstGstVmcs)
+                {
+                    AssertMsgReturnVoid((fMsrpm & VMXMSRPM_MASK) == VMXMSRPM_ALLOW_RD_WR,
+                                        ("u32Msr=%#RX32 cMsrs=%u No passthru read/write!\n", pGuestMsrLoad->u32Msr, cMsrs));
+                }
+                else
+                {
+                    /*
+                     * A nested-guest VMCS must -also- allow read/write passthrough for the MSR for us to
+                     * execute a nested-guest with MSR passthrough.
+                     *
+                     * Check if the nested-guest MSR bitmap allows passthrough, and if so, assert that we
+                     * allow passthrough too.
+                     */
+                    void const *pvMsrBitmapNstGst = pVCpu->cpum.GstCtx.hwvirt.vmx.CTX_SUFF(pvMsrBitmap);
+                    Assert(pvMsrBitmapNstGst);
+                    uint32_t const fMsrpmNstGst = CPUMGetVmxMsrPermission(pvMsrBitmapNstGst, pGuestMsrLoad->u32Msr);
+                    AssertMsgReturnVoid(fMsrpm == fMsrpmNstGst,
+                                        ("u32Msr=%#RX32 cMsrs=%u Permission mismatch fMsrpm=%#x fMsrpmNstGst=%#x!\n",
+                                         pGuestMsrLoad->u32Msr, cMsrs, fMsrpm, fMsrpmNstGst));
+                }
+            }
+        }
+
+        /* Move to the next MSR. */
+        pHostMsrLoad++;
+        pGuestMsrLoad++;
+        pGuestMsrStore++;
+    }
+}
+#endif /* VBOX_STRICT */
+
+
+/**
+ * Flushes the TLB using EPT.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu           The cross context virtual CPU structure of the calling
+ *                          EMT.  Can be NULL depending on @a enmTlbFlush.
+ * @param   pVmcsInfo       The VMCS info. object. Can be NULL depending on @a
+ *                          enmTlbFlush.
+ * @param   enmTlbFlush     Type of flush.
+ *
+ * @remarks Caller is responsible for making sure this function is called only
+ *          when NestedPaging is supported and providing @a enmTlbFlush that is
+ *          supported by the CPU.
+ * @remarks Can be called with interrupts disabled.
+ */
+static void hmR0VmxFlushEpt(PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo, VMXTLBFLUSHEPT enmTlbFlush)
+{
+    uint64_t au64Descriptor[2];
+    if (enmTlbFlush == VMXTLBFLUSHEPT_ALL_CONTEXTS)
+        au64Descriptor[0] = 0;
+    else
+    {
+        Assert(pVCpu);
+        Assert(pVmcsInfo);
+        au64Descriptor[0] = pVmcsInfo->HCPhysEPTP;
+    }
+    au64Descriptor[1] = 0;                       /* MBZ. Intel spec. 33.3 "VMX Instructions" */
+
+    int rc = VMXR0InvEPT(enmTlbFlush, &au64Descriptor[0]);
+    AssertMsg(rc == VINF_SUCCESS, ("VMXR0InvEPT %#x %#RHp failed. rc=%Rrc\n", enmTlbFlush, au64Descriptor[0], rc));
+
+    if (   RT_SUCCESS(rc)
+        && pVCpu)
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushNestedPaging);
+}
+
+
+/**
+ * Flushes the TLB using VPID.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu           The cross context virtual CPU structure of the calling
+ *                          EMT.  Can be NULL depending on @a enmTlbFlush.
+ * @param   enmTlbFlush     Type of flush.
+ * @param   GCPtr           Virtual address of the page to flush (can be 0 depending
+ *                          on @a enmTlbFlush).
+ *
+ * @remarks Can be called with interrupts disabled.
+ */
+static void hmR0VmxFlushVpid(PVMCPUCC pVCpu, VMXTLBFLUSHVPID enmTlbFlush, RTGCPTR GCPtr)
+{
+    Assert(pVCpu->CTX_SUFF(pVM)->hm.s.vmx.fVpid);
+
+    uint64_t au64Descriptor[2];
+    if (enmTlbFlush == VMXTLBFLUSHVPID_ALL_CONTEXTS)
+    {
+        au64Descriptor[0] = 0;
+        au64Descriptor[1] = 0;
+    }
+    else
+    {
+        AssertPtr(pVCpu);
+        AssertMsg(pVCpu->hm.s.uCurrentAsid != 0, ("VMXR0InvVPID: invalid ASID %lu\n", pVCpu->hm.s.uCurrentAsid));
+        AssertMsg(pVCpu->hm.s.uCurrentAsid <= UINT16_MAX, ("VMXR0InvVPID: invalid ASID %lu\n", pVCpu->hm.s.uCurrentAsid));
+        au64Descriptor[0] = pVCpu->hm.s.uCurrentAsid;
+        au64Descriptor[1] = GCPtr;
+    }
+
+    int rc = VMXR0InvVPID(enmTlbFlush, &au64Descriptor[0]);
+    AssertMsg(rc == VINF_SUCCESS,
+              ("VMXR0InvVPID %#x %u %RGv failed with %Rrc\n", enmTlbFlush, pVCpu ? pVCpu->hm.s.uCurrentAsid : 0, GCPtr, rc));
+
+    if (   RT_SUCCESS(rc)
+        && pVCpu)
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushAsid);
+    NOREF(rc);
+}
+
+
+/**
+ * Invalidates a guest page by guest virtual address. Only relevant for EPT/VPID,
+ * otherwise there is nothing really to invalidate.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu   The cross context virtual CPU structure.
+ * @param   GCVirt  Guest virtual address of the page to invalidate.
+ */
+VMMR0DECL(int) VMXR0InvalidatePage(PVMCPUCC pVCpu, RTGCPTR GCVirt)
+{
+    AssertPtr(pVCpu);
+    LogFlowFunc(("pVCpu=%p GCVirt=%RGv\n", pVCpu, GCVirt));
+
+    if (!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_TLB_FLUSH))
+    {
+        /*
+         * We must invalidate the guest TLB entry in either case, we cannot ignore it even for
+         * the EPT case. See @bugref{6043} and @bugref{6177}.
+         *
+         * Set the VMCPU_FF_TLB_FLUSH force flag and flush before VM-entry in hmR0VmxFlushTLB*()
+         * as this function maybe called in a loop with individual addresses.
+         */
+        PVMCC pVM = pVCpu->CTX_SUFF(pVM);
+        if (pVM->hm.s.vmx.fVpid)
+        {
+            bool fVpidFlush = RT_BOOL(pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_INDIV_ADDR);
+            if (fVpidFlush)
+            {
+                hmR0VmxFlushVpid(pVCpu, VMXTLBFLUSHVPID_INDIV_ADDR, GCVirt);
+                STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbInvlpgVirt);
+            }
+            else
+                VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
+        }
+        else if (pVM->hm.s.fNestedPaging)
+            VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
+    }
+
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Dummy placeholder for tagged-TLB flush handling before VM-entry. Used in the
+ * case where neither EPT nor VPID is supported by the CPU.
+ *
+ * @param   pHostCpu    The HM physical-CPU structure.
+ * @param   pVCpu       The cross context virtual CPU structure.
+ *
+ * @remarks Called with interrupts disabled.
+ */
+static void hmR0VmxFlushTaggedTlbNone(PHMPHYSCPU pHostCpu, PVMCPUCC pVCpu)
+{
+    AssertPtr(pVCpu);
+    AssertPtr(pHostCpu);
+
+    VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH);
+
+    Assert(pHostCpu->idCpu != NIL_RTCPUID);
+    pVCpu->hm.s.idLastCpu      = pHostCpu->idCpu;
+    pVCpu->hm.s.cTlbFlushes    = pHostCpu->cTlbFlushes;
+    pVCpu->hm.s.fForceTLBFlush = false;
+    return;
+}
+
+
+/**
+ * Flushes the tagged-TLB entries for EPT+VPID CPUs as necessary.
+ *
+ * @param   pHostCpu    The HM physical-CPU structure.
+ * @param   pVCpu       The cross context virtual CPU structure.
+ * @param   pVmcsInfo   The VMCS info. object.
+ *
+ * @remarks  All references to "ASID" in this function pertains to "VPID" in Intel's
+ *           nomenclature. The reason is, to avoid confusion in compare statements
+ *           since the host-CPU copies are named "ASID".
+ *
+ * @remarks  Called with interrupts disabled.
+ */
+static void hmR0VmxFlushTaggedTlbBoth(PHMPHYSCPU pHostCpu, PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo)
+{
+#ifdef VBOX_WITH_STATISTICS
+    bool fTlbFlushed = false;
+# define HMVMX_SET_TAGGED_TLB_FLUSHED()       do { fTlbFlushed = true; } while (0)
+# define HMVMX_UPDATE_FLUSH_SKIPPED_STAT()    do { \
+                                                if (!fTlbFlushed) \
+                                                    STAM_COUNTER_INC(&pVCpu->hm.s.StatNoFlushTlbWorldSwitch); \
+                                              } while (0)
+#else
+# define HMVMX_SET_TAGGED_TLB_FLUSHED()       do { } while (0)
+# define HMVMX_UPDATE_FLUSH_SKIPPED_STAT()    do { } while (0)
+#endif
+
+    AssertPtr(pVCpu);
+    AssertPtr(pHostCpu);
+    Assert(pHostCpu->idCpu != NIL_RTCPUID);
+
+    PVMCC pVM = pVCpu->CTX_SUFF(pVM);
+    AssertMsg(pVM->hm.s.fNestedPaging && pVM->hm.s.vmx.fVpid,
+              ("hmR0VmxFlushTaggedTlbBoth cannot be invoked unless NestedPaging & VPID are enabled."
+               "fNestedPaging=%RTbool fVpid=%RTbool", pVM->hm.s.fNestedPaging, pVM->hm.s.vmx.fVpid));
+
+    /*
+     * Force a TLB flush for the first world-switch if the current CPU differs from the one we
+     * ran on last. If the TLB flush count changed, another VM (VCPU rather) has hit the ASID
+     * limit while flushing the TLB or the host CPU is online after a suspend/resume, so we
+     * cannot reuse the current ASID anymore.
+     */
+    if (   pVCpu->hm.s.idLastCpu   != pHostCpu->idCpu
+        || pVCpu->hm.s.cTlbFlushes != pHostCpu->cTlbFlushes)
+    {
+        ++pHostCpu->uCurrentAsid;
+        if (pHostCpu->uCurrentAsid >= pVM->hm.s.uMaxAsid)
+        {
+            pHostCpu->uCurrentAsid = 1;            /* Wraparound to 1; host uses 0. */
+            pHostCpu->cTlbFlushes++;               /* All VCPUs that run on this host CPU must use a new VPID. */
+            pHostCpu->fFlushAsidBeforeUse = true;  /* All VCPUs that run on this host CPU must flush their new VPID before use. */
+        }
+
+        pVCpu->hm.s.uCurrentAsid = pHostCpu->uCurrentAsid;
+        pVCpu->hm.s.idLastCpu    = pHostCpu->idCpu;
+        pVCpu->hm.s.cTlbFlushes  = pHostCpu->cTlbFlushes;
+
+        /*
+         * Flush by EPT when we get rescheduled to a new host CPU to ensure EPT-only tagged mappings are also
+         * invalidated. We don't need to flush-by-VPID here as flushing by EPT covers it. See @bugref{6568}.
+         */
+        hmR0VmxFlushEpt(pVCpu, pVmcsInfo, pVM->hm.s.vmx.enmTlbFlushEpt);
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbWorldSwitch);
+        HMVMX_SET_TAGGED_TLB_FLUSHED();
+        VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH);
+    }
+    else if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH))    /* Check for explicit TLB flushes. */
+    {
+        /*
+         * Changes to the EPT paging structure by VMM requires flushing-by-EPT as the CPU
+         * creates guest-physical (ie. only EPT-tagged) mappings while traversing the EPT
+         * tables when EPT is in use. Flushing-by-VPID will only flush linear (only
+         * VPID-tagged) and combined (EPT+VPID tagged) mappings but not guest-physical
+         * mappings, see @bugref{6568}.
+         *
+         * See Intel spec. 28.3.2 "Creating and Using Cached Translation Information".
+         */
+        hmR0VmxFlushEpt(pVCpu, pVmcsInfo, pVM->hm.s.vmx.enmTlbFlushEpt);
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlb);
+        HMVMX_SET_TAGGED_TLB_FLUSHED();
+    }
+    else if (pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb)
+    {
+        /*
+         * The nested-guest specifies its own guest-physical address to use as the APIC-access
+         * address which requires flushing the TLB of EPT cached structures.
+         *
+         * See Intel spec. 28.3.3.4 "Guidelines for Use of the INVEPT Instruction".
+         */
+        hmR0VmxFlushEpt(pVCpu, pVmcsInfo, pVM->hm.s.vmx.enmTlbFlushEpt);
+        pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb = false;
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbNstGst);
+        HMVMX_SET_TAGGED_TLB_FLUSHED();
+    }
+
+
+    pVCpu->hm.s.fForceTLBFlush = false;
+    HMVMX_UPDATE_FLUSH_SKIPPED_STAT();
+
+    Assert(pVCpu->hm.s.idLastCpu == pHostCpu->idCpu);
+    Assert(pVCpu->hm.s.cTlbFlushes == pHostCpu->cTlbFlushes);
+    AssertMsg(pVCpu->hm.s.cTlbFlushes == pHostCpu->cTlbFlushes,
+              ("Flush count mismatch for cpu %d (%u vs %u)\n", pHostCpu->idCpu, pVCpu->hm.s.cTlbFlushes, pHostCpu->cTlbFlushes));
+    AssertMsg(pHostCpu->uCurrentAsid >= 1 && pHostCpu->uCurrentAsid < pVM->hm.s.uMaxAsid,
+              ("Cpu[%u] uCurrentAsid=%u cTlbFlushes=%u pVCpu->idLastCpu=%u pVCpu->cTlbFlushes=%u\n", pHostCpu->idCpu,
+               pHostCpu->uCurrentAsid, pHostCpu->cTlbFlushes, pVCpu->hm.s.idLastCpu, pVCpu->hm.s.cTlbFlushes));
+    AssertMsg(pVCpu->hm.s.uCurrentAsid >= 1 && pVCpu->hm.s.uCurrentAsid < pVM->hm.s.uMaxAsid,
+              ("Cpu[%u] pVCpu->uCurrentAsid=%u\n", pHostCpu->idCpu, pVCpu->hm.s.uCurrentAsid));
+
+    /* Update VMCS with the VPID. */
+    int rc  = VMXWriteVmcs16(VMX_VMCS16_VPID, pVCpu->hm.s.uCurrentAsid);
+    AssertRC(rc);
+
+#undef HMVMX_SET_TAGGED_TLB_FLUSHED
+}
+
+
+/**
+ * Flushes the tagged-TLB entries for EPT CPUs as necessary.
+ *
+ * @param   pHostCpu    The HM physical-CPU structure.
+ * @param   pVCpu       The cross context virtual CPU structure.
+ * @param   pVmcsInfo   The VMCS info. object.
+ *
+ * @remarks Called with interrupts disabled.
+ */
+static void hmR0VmxFlushTaggedTlbEpt(PHMPHYSCPU pHostCpu, PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo)
+{
+    AssertPtr(pVCpu);
+    AssertPtr(pHostCpu);
+    Assert(pHostCpu->idCpu != NIL_RTCPUID);
+    AssertMsg(pVCpu->CTX_SUFF(pVM)->hm.s.fNestedPaging, ("hmR0VmxFlushTaggedTlbEpt cannot be invoked without NestedPaging."));
+    AssertMsg(!pVCpu->CTX_SUFF(pVM)->hm.s.vmx.fVpid, ("hmR0VmxFlushTaggedTlbEpt cannot be invoked with VPID."));
+
+    /*
+     * Force a TLB flush for the first world-switch if the current CPU differs from the one we ran on last.
+     * A change in the TLB flush count implies the host CPU is online after a suspend/resume.
+     */
+    if (   pVCpu->hm.s.idLastCpu   != pHostCpu->idCpu
+        || pVCpu->hm.s.cTlbFlushes != pHostCpu->cTlbFlushes)
+    {
+        pVCpu->hm.s.fForceTLBFlush = true;
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbWorldSwitch);
+    }
+
+    /* Check for explicit TLB flushes. */
+    if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH))
+    {
+        pVCpu->hm.s.fForceTLBFlush = true;
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlb);
+    }
+
+    /* Check for TLB flushes while switching to/from a nested-guest. */
+    if (pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb)
+    {
+        pVCpu->hm.s.fForceTLBFlush = true;
+        pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb = false;
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbNstGst);
+    }
+
+    pVCpu->hm.s.idLastCpu   = pHostCpu->idCpu;
+    pVCpu->hm.s.cTlbFlushes = pHostCpu->cTlbFlushes;
+
+    if (pVCpu->hm.s.fForceTLBFlush)
+    {
+        hmR0VmxFlushEpt(pVCpu, pVmcsInfo, pVCpu->CTX_SUFF(pVM)->hm.s.vmx.enmTlbFlushEpt);
+        pVCpu->hm.s.fForceTLBFlush = false;
+    }
+}
+
+
+/**
+ * Flushes the tagged-TLB entries for VPID CPUs as necessary.
+ *
+ * @param   pHostCpu    The HM physical-CPU structure.
+ * @param   pVCpu       The cross context virtual CPU structure.
+ *
+ * @remarks Called with interrupts disabled.
+ */
+static void hmR0VmxFlushTaggedTlbVpid(PHMPHYSCPU pHostCpu, PVMCPUCC pVCpu)
+{
+    AssertPtr(pVCpu);
+    AssertPtr(pHostCpu);
+    Assert(pHostCpu->idCpu != NIL_RTCPUID);
+    AssertMsg(pVCpu->CTX_SUFF(pVM)->hm.s.vmx.fVpid, ("hmR0VmxFlushTlbVpid cannot be invoked without VPID."));
+    AssertMsg(!pVCpu->CTX_SUFF(pVM)->hm.s.fNestedPaging, ("hmR0VmxFlushTlbVpid cannot be invoked with NestedPaging"));
+
+    /*
+     * Force a TLB flush for the first world switch if the current CPU differs from the one we
+     * ran on last. If the TLB flush count changed, another VM (VCPU rather) has hit the ASID
+     * limit while flushing the TLB or the host CPU is online after a suspend/resume, so we
+     * cannot reuse the current ASID anymore.
+     */
+    if (   pVCpu->hm.s.idLastCpu   != pHostCpu->idCpu
+        || pVCpu->hm.s.cTlbFlushes != pHostCpu->cTlbFlushes)
+    {
+        pVCpu->hm.s.fForceTLBFlush = true;
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbWorldSwitch);
+    }
+
+    /* Check for explicit TLB flushes. */
+    if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH))
+    {
+        /*
+         * If we ever support VPID flush combinations other than ALL or SINGLE-context (see
+         * hmR0VmxSetupTaggedTlb()) we would need to explicitly flush in this case (add an
+         * fExplicitFlush = true here and change the pHostCpu->fFlushAsidBeforeUse check below to
+         * include fExplicitFlush's too) - an obscure corner case.
+         */
+        pVCpu->hm.s.fForceTLBFlush = true;
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlb);
+    }
+
+    /* Check for TLB flushes while switching to/from a nested-guest. */
+    if (pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb)
+    {
+        pVCpu->hm.s.fForceTLBFlush = true;
+        pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb = false;
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbNstGst);
+    }
+
+    PVMCC pVM = pVCpu->CTX_SUFF(pVM);
+    pVCpu->hm.s.idLastCpu = pHostCpu->idCpu;
+    if (pVCpu->hm.s.fForceTLBFlush)
+    {
+        ++pHostCpu->uCurrentAsid;
+        if (pHostCpu->uCurrentAsid >= pVM->hm.s.uMaxAsid)
+        {
+            pHostCpu->uCurrentAsid        = 1;     /* Wraparound to 1; host uses 0 */
+            pHostCpu->cTlbFlushes++;               /* All VCPUs that run on this host CPU must use a new VPID. */
+            pHostCpu->fFlushAsidBeforeUse = true;  /* All VCPUs that run on this host CPU must flush their new VPID before use. */
+        }
+
+        pVCpu->hm.s.fForceTLBFlush = false;
+        pVCpu->hm.s.cTlbFlushes    = pHostCpu->cTlbFlushes;
+        pVCpu->hm.s.uCurrentAsid   = pHostCpu->uCurrentAsid;
+        if (pHostCpu->fFlushAsidBeforeUse)
+        {
+            if (pVM->hm.s.vmx.enmTlbFlushVpid == VMXTLBFLUSHVPID_SINGLE_CONTEXT)
+                hmR0VmxFlushVpid(pVCpu, VMXTLBFLUSHVPID_SINGLE_CONTEXT, 0 /* GCPtr */);
+            else if (pVM->hm.s.vmx.enmTlbFlushVpid == VMXTLBFLUSHVPID_ALL_CONTEXTS)
+            {
+                hmR0VmxFlushVpid(pVCpu, VMXTLBFLUSHVPID_ALL_CONTEXTS, 0 /* GCPtr */);
+                pHostCpu->fFlushAsidBeforeUse = false;
+            }
+            else
+            {
+                /* hmR0VmxSetupTaggedTlb() ensures we never get here. Paranoia. */
+                AssertMsgFailed(("Unsupported VPID-flush context type.\n"));
+            }
+        }
+    }
+
+    AssertMsg(pVCpu->hm.s.cTlbFlushes == pHostCpu->cTlbFlushes,
+              ("Flush count mismatch for cpu %d (%u vs %u)\n", pHostCpu->idCpu, pVCpu->hm.s.cTlbFlushes, pHostCpu->cTlbFlushes));
+    AssertMsg(pHostCpu->uCurrentAsid >= 1 && pHostCpu->uCurrentAsid < pVM->hm.s.uMaxAsid,
+              ("Cpu[%u] uCurrentAsid=%u cTlbFlushes=%u pVCpu->idLastCpu=%u pVCpu->cTlbFlushes=%u\n", pHostCpu->idCpu,
+               pHostCpu->uCurrentAsid, pHostCpu->cTlbFlushes, pVCpu->hm.s.idLastCpu, pVCpu->hm.s.cTlbFlushes));
+    AssertMsg(pVCpu->hm.s.uCurrentAsid >= 1 && pVCpu->hm.s.uCurrentAsid < pVM->hm.s.uMaxAsid,
+              ("Cpu[%u] pVCpu->uCurrentAsid=%u\n", pHostCpu->idCpu, pVCpu->hm.s.uCurrentAsid));
+
+    int rc  = VMXWriteVmcs16(VMX_VMCS16_VPID, pVCpu->hm.s.uCurrentAsid);
+    AssertRC(rc);
+}
+
+
+/**
+ * Flushes the guest TLB entry based on CPU capabilities.
+ *
+ * @param   pHostCpu    The HM physical-CPU structure.
+ * @param   pVCpu       The cross context virtual CPU structure.
+ * @param   pVmcsInfo   The VMCS info. object.
+ *
+ * @remarks Called with interrupts disabled.
+ */
+static void hmR0VmxFlushTaggedTlb(PHMPHYSCPU pHostCpu, PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
+{
+#ifdef HMVMX_ALWAYS_FLUSH_TLB
+    VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
+#endif
+    PVMCC pVM = pVCpu->CTX_SUFF(pVM);
+    switch (pVM->hm.s.vmx.enmTlbFlushType)
+    {
+        case VMXTLBFLUSHTYPE_EPT_VPID: hmR0VmxFlushTaggedTlbBoth(pHostCpu, pVCpu, pVmcsInfo); break;
+        case VMXTLBFLUSHTYPE_EPT:      hmR0VmxFlushTaggedTlbEpt(pHostCpu, pVCpu, pVmcsInfo);  break;
+        case VMXTLBFLUSHTYPE_VPID:     hmR0VmxFlushTaggedTlbVpid(pHostCpu, pVCpu);            break;
+        case VMXTLBFLUSHTYPE_NONE:     hmR0VmxFlushTaggedTlbNone(pHostCpu, pVCpu);            break;
+        default:
+            AssertMsgFailed(("Invalid flush-tag function identifier\n"));
+            break;
+    }
+    /* Don't assert that VMCPU_FF_TLB_FLUSH should no longer be pending. It can be set by other EMTs. */
+}
+
+
+/**
+ * Sets up the appropriate tagged TLB-flush level and handler for flushing guest
+ * TLB entries from the host TLB before VM-entry.
+ *
+ * @returns VBox status code.
+ * @param   pVM     The cross context VM structure.
+ */
+static int hmR0VmxSetupTaggedTlb(PVMCC pVM)
+{
+    /*
+     * Determine optimal flush type for nested paging.
+     * We cannot ignore EPT if no suitable flush-types is supported by the CPU as we've already setup
+     * unrestricted guest execution (see hmR3InitFinalizeR0()).
+     */
+    if (pVM->hm.s.fNestedPaging)
+    {
+        if (pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT)
+        {
+            if (pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT_SINGLE_CONTEXT)
+                pVM->hm.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_SINGLE_CONTEXT;
+            else if (pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT_ALL_CONTEXTS)
+                pVM->hm.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_ALL_CONTEXTS;
+            else
+            {
+                /* Shouldn't happen. EPT is supported but no suitable flush-types supported. */
+                pVM->hm.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_NOT_SUPPORTED;
+                VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_EPT_FLUSH_TYPE_UNSUPPORTED;
+                return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
+            }
+
+            /* Make sure the write-back cacheable memory type for EPT is supported. */
+            if (RT_UNLIKELY(!(pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_EMT_WB)))
+            {
+                pVM->hm.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_NOT_SUPPORTED;
+                VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_EPT_MEM_TYPE_NOT_WB;
+                return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
+            }
+
+            /* EPT requires a page-walk length of 4. */
+            if (RT_UNLIKELY(!(pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_PAGE_WALK_LENGTH_4)))
+            {
+                pVM->hm.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_NOT_SUPPORTED;
+                VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_EPT_PAGE_WALK_LENGTH_UNSUPPORTED;
+                return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
+            }
+        }
+        else
+        {
+            /* Shouldn't happen. EPT is supported but INVEPT instruction is not supported. */
+            pVM->hm.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_NOT_SUPPORTED;
+            VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_EPT_INVEPT_UNAVAILABLE;
+            return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
+        }
+    }
+
+    /*
+     * Determine optimal flush type for VPID.
+     */
+    if (pVM->hm.s.vmx.fVpid)
+    {
+        if (pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID)
+        {
+            if (pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_SINGLE_CONTEXT)
+                pVM->hm.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_SINGLE_CONTEXT;
+            else if (pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_ALL_CONTEXTS)
+                pVM->hm.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_ALL_CONTEXTS;
+            else
+            {
+                /* Neither SINGLE nor ALL-context flush types for VPID is supported by the CPU. Ignore VPID capability. */
+                if (pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_INDIV_ADDR)
+                    LogRelFunc(("Only INDIV_ADDR supported. Ignoring VPID.\n"));
+                if (pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_SINGLE_CONTEXT_RETAIN_GLOBALS)
+                    LogRelFunc(("Only SINGLE_CONTEXT_RETAIN_GLOBALS supported. Ignoring VPID.\n"));
+                pVM->hm.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_NOT_SUPPORTED;
+                pVM->hm.s.vmx.fVpid = false;
+            }
+        }
+        else
+        {
+            /*  Shouldn't happen. VPID is supported but INVVPID is not supported by the CPU. Ignore VPID capability. */
+            Log4Func(("VPID supported without INVEPT support. Ignoring VPID.\n"));
+            pVM->hm.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_NOT_SUPPORTED;
+            pVM->hm.s.vmx.fVpid = false;
+        }
+    }
+
+    /*
+     * Setup the handler for flushing tagged-TLBs.
+     */
+    if (pVM->hm.s.fNestedPaging && pVM->hm.s.vmx.fVpid)
+        pVM->hm.s.vmx.enmTlbFlushType = VMXTLBFLUSHTYPE_EPT_VPID;
+    else if (pVM->hm.s.fNestedPaging)
+        pVM->hm.s.vmx.enmTlbFlushType = VMXTLBFLUSHTYPE_EPT;
+    else if (pVM->hm.s.vmx.fVpid)
+        pVM->hm.s.vmx.enmTlbFlushType = VMXTLBFLUSHTYPE_VPID;
+    else
+        pVM->hm.s.vmx.enmTlbFlushType = VMXTLBFLUSHTYPE_NONE;
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Sets up the LBR MSR ranges based on the host CPU.
+ *
+ * @returns VBox status code.
+ * @param   pVM     The cross context VM structure.
+ */
+static int hmR0VmxSetupLbrMsrRange(PVMCC pVM)
+{
+    Assert(pVM->hm.s.vmx.fLbr);
+    uint32_t idLbrFromIpMsrFirst;
+    uint32_t idLbrFromIpMsrLast;
+    uint32_t idLbrToIpMsrFirst;
+    uint32_t idLbrToIpMsrLast;
+    uint32_t idLbrTosMsr;
+
+    /*
+     * Determine the LBR MSRs supported for this host CPU family and model.
+     *
+     * See Intel spec. 17.4.8 "LBR Stack".
+     * See Intel "Model-Specific Registers" spec.
+     */
+    uint32_t const uFamilyModel = (pVM->cpum.ro.HostFeatures.uFamily << 8)
+                                | pVM->cpum.ro.HostFeatures.uModel;
+    switch (uFamilyModel)
+    {
+        case 0x0f01: case 0x0f02:
+            idLbrFromIpMsrFirst = MSR_P4_LASTBRANCH_0;
+            idLbrFromIpMsrLast  = MSR_P4_LASTBRANCH_3;
+            idLbrToIpMsrFirst   = 0x0;
+            idLbrToIpMsrLast    = 0x0;
+            idLbrTosMsr         = MSR_P4_LASTBRANCH_TOS;
+            break;
+
+        case 0x065c: case 0x065f: case 0x064e: case 0x065e: case 0x068e:
+        case 0x069e: case 0x0655: case 0x0666: case 0x067a: case 0x0667:
+        case 0x066a: case 0x066c: case 0x067d: case 0x067e:
+            idLbrFromIpMsrFirst = MSR_LASTBRANCH_0_FROM_IP;
+            idLbrFromIpMsrLast  = MSR_LASTBRANCH_31_FROM_IP;
+            idLbrToIpMsrFirst   = MSR_LASTBRANCH_0_TO_IP;
+            idLbrToIpMsrLast    = MSR_LASTBRANCH_31_TO_IP;
+            idLbrTosMsr         = MSR_LASTBRANCH_TOS;
+            break;
+
+        case 0x063d: case 0x0647: case 0x064f: case 0x0656: case 0x063c:
+        case 0x0645: case 0x0646: case 0x063f: case 0x062a: case 0x062d:
+        case 0x063a: case 0x063e: case 0x061a: case 0x061e: case 0x061f:
+        case 0x062e: case 0x0625: case 0x062c: case 0x062f:
+            idLbrFromIpMsrFirst = MSR_LASTBRANCH_0_FROM_IP;
+            idLbrFromIpMsrLast  = MSR_LASTBRANCH_15_FROM_IP;
+            idLbrToIpMsrFirst   = MSR_LASTBRANCH_0_TO_IP;
+            idLbrToIpMsrLast    = MSR_LASTBRANCH_15_TO_IP;
+            idLbrTosMsr         = MSR_LASTBRANCH_TOS;
+            break;
+
+        case 0x0617: case 0x061d: case 0x060f:
+            idLbrFromIpMsrFirst = MSR_CORE2_LASTBRANCH_0_FROM_IP;
+            idLbrFromIpMsrLast  = MSR_CORE2_LASTBRANCH_3_FROM_IP;
+            idLbrToIpMsrFirst   = MSR_CORE2_LASTBRANCH_0_TO_IP;
+            idLbrToIpMsrLast    = MSR_CORE2_LASTBRANCH_3_TO_IP;
+            idLbrTosMsr         = MSR_CORE2_LASTBRANCH_TOS;
+            break;
+
+        /* Atom and related microarchitectures we don't care about:
+        case 0x0637: case 0x064a: case 0x064c: case 0x064d: case 0x065a:
+        case 0x065d: case 0x061c: case 0x0626: case 0x0627: case 0x0635:
+        case 0x0636: */
+        /* All other CPUs: */
+        default:
+        {
+            LogRelFunc(("Could not determine LBR stack size for the CPU model %#x\n", uFamilyModel));
+            VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_LBR_STACK_SIZE_UNKNOWN;
+            return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
+        }
+    }
+
+    /*
+     * Validate.
+     */
+    uint32_t const cLbrStack = idLbrFromIpMsrLast - idLbrFromIpMsrFirst + 1;
+    PCVMCPU pVCpu0 = VMCC_GET_CPU_0(pVM);
+    AssertCompile(   RT_ELEMENTS(pVCpu0->hm.s.vmx.VmcsInfo.au64LbrFromIpMsr)
+                  == RT_ELEMENTS(pVCpu0->hm.s.vmx.VmcsInfo.au64LbrToIpMsr));
+    if (cLbrStack > RT_ELEMENTS(pVCpu0->hm.s.vmx.VmcsInfo.au64LbrFromIpMsr))
+    {
+        LogRelFunc(("LBR stack size of the CPU (%u) exceeds our buffer size\n", cLbrStack));
+        VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_LBR_STACK_SIZE_OVERFLOW;
+        return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
+    }
+    NOREF(pVCpu0);
+
+    /*
+     * Update the LBR info. to the VM struct. for use later.
+     */
+    pVM->hm.s.vmx.idLbrTosMsr         = idLbrTosMsr;
+    pVM->hm.s.vmx.idLbrFromIpMsrFirst = idLbrFromIpMsrFirst;
+    pVM->hm.s.vmx.idLbrFromIpMsrLast  = idLbrFromIpMsrLast;
+
+    pVM->hm.s.vmx.idLbrToIpMsrFirst   = idLbrToIpMsrFirst;
+    pVM->hm.s.vmx.idLbrToIpMsrLast    = idLbrToIpMsrLast;
+    return VINF_SUCCESS;
+}
+
+
+#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+/**
+ * Sets up the shadow VMCS fields arrays.
+ *
+ * This function builds arrays of VMCS fields to sync the shadow VMCS later while
+ * executing the guest.
+ *
+ * @returns VBox status code.
+ * @param   pVM     The cross context VM structure.
+ */
+static int hmR0VmxSetupShadowVmcsFieldsArrays(PVMCC pVM)
+{
+    /*
+     * Paranoia. Ensure we haven't exposed the VMWRITE-All VMX feature to the guest
+     * when the host does not support it.
+     */
+    bool const fGstVmwriteAll = pVM->cpum.ro.GuestFeatures.fVmxVmwriteAll;
+    if (   !fGstVmwriteAll
+        || (pVM->hm.s.vmx.Msrs.u64Misc & VMX_MISC_VMWRITE_ALL))
+    { /* likely. */ }
+    else
+    {
+        LogRelFunc(("VMX VMWRITE-All feature exposed to the guest but host CPU does not support it!\n"));
+        VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_GST_HOST_VMWRITE_ALL;
+        return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
+    }
+
+    uint32_t const cVmcsFields = RT_ELEMENTS(g_aVmcsFields);
+    uint32_t       cRwFields   = 0;
+    uint32_t       cRoFields   = 0;
+    for (uint32_t i = 0; i < cVmcsFields; i++)
+    {
+        VMXVMCSFIELD VmcsField;
+        VmcsField.u = g_aVmcsFields[i];
+
+        /*
+         * We will be writing "FULL" (64-bit) fields while syncing the shadow VMCS.
+         * Therefore, "HIGH" (32-bit portion of 64-bit) fields must not be included
+         * in the shadow VMCS fields array as they would be redundant.
+         *
+         * If the VMCS field depends on a CPU feature that is not exposed to the guest,
+         * we must not include it in the shadow VMCS fields array. Guests attempting to
+         * VMREAD/VMWRITE such VMCS fields would cause a VM-exit and we shall emulate
+         * the required behavior.
+         */
+        if (   VmcsField.n.fAccessType == VMX_VMCSFIELD_ACCESS_FULL
+            && CPUMIsGuestVmxVmcsFieldValid(pVM, VmcsField.u))
+        {
+            /*
+             * Read-only fields are placed in a separate array so that while syncing shadow
+             * VMCS fields later (which is more performance critical) we can avoid branches.
+             *
+             * However, if the guest can write to all fields (including read-only fields),
+             * we treat it a as read/write field. Otherwise, writing to these fields would
+             * cause a VMWRITE instruction error while syncing the shadow VMCS.
+             */
+            if (   fGstVmwriteAll
+                || !VMXIsVmcsFieldReadOnly(VmcsField.u))
+                pVM->hm.s.vmx.paShadowVmcsFields[cRwFields++] = VmcsField.u;
+            else
+                pVM->hm.s.vmx.paShadowVmcsRoFields[cRoFields++] = VmcsField.u;
+        }
+    }
+
+    /* Update the counts. */
+    pVM->hm.s.vmx.cShadowVmcsFields   = cRwFields;
+    pVM->hm.s.vmx.cShadowVmcsRoFields = cRoFields;
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Sets up the VMREAD and VMWRITE bitmaps.
+ *
+ * @param   pVM     The cross context VM structure.
+ */
+static void hmR0VmxSetupVmreadVmwriteBitmaps(PVMCC pVM)
+{
+    /*
+     * By default, ensure guest attempts to access any VMCS fields cause VM-exits.
+     */
+    uint32_t const cbBitmap        = X86_PAGE_4K_SIZE;
+    uint8_t       *pbVmreadBitmap  = (uint8_t *)pVM->hm.s.vmx.pvVmreadBitmap;
+    uint8_t       *pbVmwriteBitmap = (uint8_t *)pVM->hm.s.vmx.pvVmwriteBitmap;
+    ASMMemFill32(pbVmreadBitmap,  cbBitmap, UINT32_C(0xffffffff));
+    ASMMemFill32(pbVmwriteBitmap, cbBitmap, UINT32_C(0xffffffff));
+
+    /*
+     * Skip intercepting VMREAD/VMWRITE to guest read/write fields in the
+     * VMREAD and VMWRITE bitmaps.
+     */
+    {
+        uint32_t const *paShadowVmcsFields = pVM->hm.s.vmx.paShadowVmcsFields;
+        uint32_t const  cShadowVmcsFields  = pVM->hm.s.vmx.cShadowVmcsFields;
+        for (uint32_t i = 0; i < cShadowVmcsFields; i++)
+        {
+            uint32_t const uVmcsField = paShadowVmcsFields[i];
+            Assert(!(uVmcsField & VMX_VMCSFIELD_RSVD_MASK));
+            Assert(uVmcsField >> 3 < cbBitmap);
+            ASMBitClear(pbVmreadBitmap  + (uVmcsField >> 3), uVmcsField & 7);
+            ASMBitClear(pbVmwriteBitmap + (uVmcsField >> 3), uVmcsField & 7);
+        }
+    }
+
+    /*
+     * Skip intercepting VMREAD for guest read-only fields in the VMREAD bitmap
+     * if the host supports VMWRITE to all supported VMCS fields.
+     */
+    if (pVM->hm.s.vmx.Msrs.u64Misc & VMX_MISC_VMWRITE_ALL)
+    {
+        uint32_t const *paShadowVmcsRoFields = pVM->hm.s.vmx.paShadowVmcsRoFields;
+        uint32_t const  cShadowVmcsRoFields  = pVM->hm.s.vmx.cShadowVmcsRoFields;
+        for (uint32_t i = 0; i < cShadowVmcsRoFields; i++)
+        {
+            uint32_t const uVmcsField = paShadowVmcsRoFields[i];
+            Assert(!(uVmcsField & VMX_VMCSFIELD_RSVD_MASK));
+            Assert(uVmcsField >> 3 < cbBitmap);
+            ASMBitClear(pbVmreadBitmap + (uVmcsField >> 3), uVmcsField & 7);
+        }
+    }
+}
+#endif /* VBOX_WITH_NESTED_HWVIRT_VMX */
+
+
+/**
+ * Sets up the virtual-APIC page address for the VMCS.
+ *
+ * @param   pVmcsInfo   The VMCS info. object.
+ */
+DECLINLINE(void) hmR0VmxSetupVmcsVirtApicAddr(PCVMXVMCSINFO pVmcsInfo)
+{
+    RTHCPHYS const HCPhysVirtApic = pVmcsInfo->HCPhysVirtApic;
+    Assert(HCPhysVirtApic != NIL_RTHCPHYS);
+    Assert(!(HCPhysVirtApic & 0xfff));                       /* Bits 11:0 MBZ. */
+    int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_VIRT_APIC_PAGEADDR_FULL, HCPhysVirtApic);
+    AssertRC(rc);
+}
+
+
+/**
+ * Sets up the MSR-bitmap address for the VMCS.
+ *
+ * @param   pVmcsInfo   The VMCS info. object.
+ */
+DECLINLINE(void) hmR0VmxSetupVmcsMsrBitmapAddr(PCVMXVMCSINFO pVmcsInfo)
+{
+    RTHCPHYS const HCPhysMsrBitmap = pVmcsInfo->HCPhysMsrBitmap;
+    Assert(HCPhysMsrBitmap != NIL_RTHCPHYS);
+    Assert(!(HCPhysMsrBitmap & 0xfff));                      /* Bits 11:0 MBZ. */
+    int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_MSR_BITMAP_FULL, HCPhysMsrBitmap);
+    AssertRC(rc);
+}
+
+
+/**
+ * Sets up the APIC-access page address for the VMCS.
+ *
+ * @param   pVCpu   The cross context virtual CPU structure.
+ */
+DECLINLINE(void) hmR0VmxSetupVmcsApicAccessAddr(PVMCPUCC pVCpu)
+{
+    RTHCPHYS const HCPhysApicAccess = pVCpu->CTX_SUFF(pVM)->hm.s.vmx.HCPhysApicAccess;
+    Assert(HCPhysApicAccess != NIL_RTHCPHYS);
+    Assert(!(HCPhysApicAccess & 0xfff));                     /* Bits 11:0 MBZ. */
+    int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_APIC_ACCESSADDR_FULL, HCPhysApicAccess);
+    AssertRC(rc);
+}
+
+
+#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+/**
+ * Sets up the VMREAD bitmap address for the VMCS.
+ *
+ * @param   pVCpu   The cross context virtual CPU structure.
+ */
+DECLINLINE(void) hmR0VmxSetupVmcsVmreadBitmapAddr(PVMCPUCC pVCpu)
+{
+    RTHCPHYS const HCPhysVmreadBitmap = pVCpu->CTX_SUFF(pVM)->hm.s.vmx.HCPhysVmreadBitmap;
+    Assert(HCPhysVmreadBitmap != NIL_RTHCPHYS);
+    Assert(!(HCPhysVmreadBitmap & 0xfff));                     /* Bits 11:0 MBZ. */
+    int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_VMREAD_BITMAP_FULL, HCPhysVmreadBitmap);
+    AssertRC(rc);
+}
+
+
+/**
+ * Sets up the VMWRITE bitmap address for the VMCS.
+ *
+ * @param   pVCpu   The cross context virtual CPU structure.
+ */
+DECLINLINE(void) hmR0VmxSetupVmcsVmwriteBitmapAddr(PVMCPUCC pVCpu)
+{
+    RTHCPHYS const HCPhysVmwriteBitmap = pVCpu->CTX_SUFF(pVM)->hm.s.vmx.HCPhysVmwriteBitmap;
+    Assert(HCPhysVmwriteBitmap != NIL_RTHCPHYS);
+    Assert(!(HCPhysVmwriteBitmap & 0xfff));                     /* Bits 11:0 MBZ. */
+    int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_VMWRITE_BITMAP_FULL, HCPhysVmwriteBitmap);
+    AssertRC(rc);
+}
+#endif
+
+
+/**
+ * Sets up the VM-entry MSR load, VM-exit MSR-store and VM-exit MSR-load addresses
+ * in the VMCS.
+ *
+ * @returns VBox status code.
+ * @param   pVmcsInfo   The VMCS info. object.
+ */
+DECLINLINE(int) hmR0VmxSetupVmcsAutoLoadStoreMsrAddrs(PVMXVMCSINFO pVmcsInfo)
+{
+    RTHCPHYS const HCPhysGuestMsrLoad = pVmcsInfo->HCPhysGuestMsrLoad;
+    Assert(HCPhysGuestMsrLoad != NIL_RTHCPHYS);
+    Assert(!(HCPhysGuestMsrLoad & 0xf));                     /* Bits 3:0 MBZ. */
+
+    RTHCPHYS const HCPhysGuestMsrStore = pVmcsInfo->HCPhysGuestMsrStore;
+    Assert(HCPhysGuestMsrStore != NIL_RTHCPHYS);
+    Assert(!(HCPhysGuestMsrStore & 0xf));                    /* Bits 3:0 MBZ. */
+
+    RTHCPHYS const HCPhysHostMsrLoad = pVmcsInfo->HCPhysHostMsrLoad;
+    Assert(HCPhysHostMsrLoad != NIL_RTHCPHYS);
+    Assert(!(HCPhysHostMsrLoad & 0xf));                      /* Bits 3:0 MBZ. */
+
+    int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_ENTRY_MSR_LOAD_FULL, HCPhysGuestMsrLoad);   AssertRC(rc);
+    rc     = VMXWriteVmcs64(VMX_VMCS64_CTRL_EXIT_MSR_STORE_FULL, HCPhysGuestMsrStore);  AssertRC(rc);
+    rc     = VMXWriteVmcs64(VMX_VMCS64_CTRL_EXIT_MSR_LOAD_FULL,  HCPhysHostMsrLoad);    AssertRC(rc);
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Sets up MSR permissions in the MSR bitmap of a VMCS info. object.
+ *
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   pVmcsInfo       The VMCS info. object.
+ */
+static void hmR0VmxSetupVmcsMsrPermissions(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
+{
+    Assert(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS);
+
+    /*
+     * By default, ensure guest attempts to access any MSR cause VM-exits.
+     * This shall later be relaxed for specific MSRs as necessary.
+     *
+     * Note: For nested-guests, the entire bitmap will be merged prior to
+     * executing the nested-guest using hardware-assisted VMX and hence there
+     * is no need to perform this operation. See hmR0VmxMergeMsrBitmapNested.
+     */
+    Assert(pVmcsInfo->pvMsrBitmap);
+    ASMMemFill32(pVmcsInfo->pvMsrBitmap, X86_PAGE_4K_SIZE, UINT32_C(0xffffffff));
+
+    /*
+     * The guest can access the following MSRs (read, write) without causing
+     * VM-exits; they are loaded/stored automatically using fields in the VMCS.
+     */
+    PVMCC pVM = pVCpu->CTX_SUFF(pVM);
+    hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_SYSENTER_CS,  VMXMSRPM_ALLOW_RD_WR);
+    hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_SYSENTER_ESP, VMXMSRPM_ALLOW_RD_WR);
+    hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_SYSENTER_EIP, VMXMSRPM_ALLOW_RD_WR);
+    hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K8_GS_BASE,        VMXMSRPM_ALLOW_RD_WR);
+    hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K8_FS_BASE,        VMXMSRPM_ALLOW_RD_WR);
+
+    /*
+     * The IA32_PRED_CMD and IA32_FLUSH_CMD MSRs are write-only and has no state
+     * associated with then. We never need to intercept access (writes need to be
+     * executed without causing a VM-exit, reads will #GP fault anyway).
+     *
+     * The IA32_SPEC_CTRL MSR is read/write and has state. We allow the guest to
+     * read/write them. We swap the the guest/host MSR value using the
+     * auto-load/store MSR area.
+     */
+    if (pVM->cpum.ro.GuestFeatures.fIbpb)
+        hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_PRED_CMD,  VMXMSRPM_ALLOW_RD_WR);
+    if (pVM->cpum.ro.GuestFeatures.fFlushCmd)
+        hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_FLUSH_CMD, VMXMSRPM_ALLOW_RD_WR);
+    if (pVM->cpum.ro.GuestFeatures.fIbrs)
+        hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_SPEC_CTRL, VMXMSRPM_ALLOW_RD_WR);
+
+    /*
+     * Allow full read/write access for the following MSRs (mandatory for VT-x)
+     * required for 64-bit guests.
+     */
+    if (pVM->hm.s.fAllow64BitGuests)
+    {
+        hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K8_LSTAR,          VMXMSRPM_ALLOW_RD_WR);
+        hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K6_STAR,           VMXMSRPM_ALLOW_RD_WR);
+        hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K8_SF_MASK,        VMXMSRPM_ALLOW_RD_WR);
+        hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K8_KERNEL_GS_BASE, VMXMSRPM_ALLOW_RD_WR);
+    }
+
+    /*
+     * IA32_EFER MSR is always intercepted, see @bugref{9180#c37}.
+     */
+#ifdef VBOX_STRICT
+    Assert(pVmcsInfo->pvMsrBitmap);
+    uint32_t const fMsrpmEfer = CPUMGetVmxMsrPermission(pVmcsInfo->pvMsrBitmap, MSR_K6_EFER);
+    Assert(fMsrpmEfer == VMXMSRPM_EXIT_RD_WR);
+#endif
+}
+
+
+/**
+ * Sets up pin-based VM-execution controls in the VMCS.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu       The cross context virtual CPU structure.
+ * @param   pVmcsInfo   The VMCS info. object.
+ */
+static int hmR0VmxSetupVmcsPinCtls(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
+{
+    PVMCC pVM = pVCpu->CTX_SUFF(pVM);
+    uint32_t       fVal = pVM->hm.s.vmx.Msrs.PinCtls.n.allowed0;      /* Bits set here must always be set. */
+    uint32_t const fZap = pVM->hm.s.vmx.Msrs.PinCtls.n.allowed1;      /* Bits cleared here must always be cleared. */
+
+    fVal |= VMX_PIN_CTLS_EXT_INT_EXIT                        /* External interrupts cause a VM-exit. */
+         |  VMX_PIN_CTLS_NMI_EXIT;                           /* Non-maskable interrupts (NMIs) cause a VM-exit. */
+
+    if (pVM->hm.s.vmx.Msrs.PinCtls.n.allowed1 & VMX_PIN_CTLS_VIRT_NMI)
+        fVal |= VMX_PIN_CTLS_VIRT_NMI;                       /* Use virtual NMIs and virtual-NMI blocking features. */
+
+    /* Enable the VMX-preemption timer. */
+    if (pVM->hm.s.vmx.fUsePreemptTimer)
+    {
+        Assert(pVM->hm.s.vmx.Msrs.PinCtls.n.allowed1 & VMX_PIN_CTLS_PREEMPT_TIMER);
+        fVal |= VMX_PIN_CTLS_PREEMPT_TIMER;
+    }
+
+#if 0
+    /* Enable posted-interrupt processing. */
+    if (pVM->hm.s.fPostedIntrs)
+    {
+        Assert(pVM->hm.s.vmx.Msrs.PinCtls.n.allowed1  & VMX_PIN_CTLS_POSTED_INT);
+        Assert(pVM->hm.s.vmx.Msrs.ExitCtls.n.allowed1 & VMX_EXIT_CTLS_ACK_EXT_INT);
+        fVal |= VMX_PIN_CTLS_POSTED_INT;
+    }
+#endif
+
+    if ((fVal & fZap) != fVal)
+    {
+        LogRelFunc(("Invalid pin-based VM-execution controls combo! Cpu=%#RX32 fVal=%#RX32 fZap=%#RX32\n",
+                    pVM->hm.s.vmx.Msrs.PinCtls.n.allowed0, fVal, fZap));
+        pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_PIN_EXEC;
+        return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
+    }
+
+    /* Commit it to the VMCS and update our cache. */
+    int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PIN_EXEC, fVal);
+    AssertRC(rc);
+    pVmcsInfo->u32PinCtls = fVal;
+
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Sets up secondary processor-based VM-execution controls in the VMCS.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu       The cross context virtual CPU structure.
+ * @param   pVmcsInfo   The VMCS info. object.
+ */
+static int hmR0VmxSetupVmcsProcCtls2(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
+{
+    PVMCC pVM = pVCpu->CTX_SUFF(pVM);
+    uint32_t       fVal = pVM->hm.s.vmx.Msrs.ProcCtls2.n.allowed0;    /* Bits set here must be set in the VMCS. */
+    uint32_t const fZap = pVM->hm.s.vmx.Msrs.ProcCtls2.n.allowed1;    /* Bits cleared here must be cleared in the VMCS. */
+
+    /* WBINVD causes a VM-exit. */
+    if (pVM->hm.s.vmx.Msrs.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_WBINVD_EXIT)
+        fVal |= VMX_PROC_CTLS2_WBINVD_EXIT;
+
+    /* Enable EPT (aka nested-paging). */
+    if (pVM->hm.s.fNestedPaging)
+        fVal |= VMX_PROC_CTLS2_EPT;
+
+    /* Enable the INVPCID instruction if we expose it to the guest and is supported
+       by the hardware. Without this, guest executing INVPCID would cause a #UD. */
+    if (   pVM->cpum.ro.GuestFeatures.fInvpcid
+        && (pVM->hm.s.vmx.Msrs.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_INVPCID))
+        fVal |= VMX_PROC_CTLS2_INVPCID;
+
+    /* Enable VPID. */
+    if (pVM->hm.s.vmx.fVpid)
+        fVal |= VMX_PROC_CTLS2_VPID;
+
+    /* Enable unrestricted guest execution. */
+    if (pVM->hm.s.vmx.fUnrestrictedGuest)
+        fVal |= VMX_PROC_CTLS2_UNRESTRICTED_GUEST;
+
+#if 0
+    if (pVM->hm.s.fVirtApicRegs)
+    {
+        /* Enable APIC-register virtualization. */
+        Assert(pVM->hm.s.vmx.Msrs.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_APIC_REG_VIRT);
+        fVal |= VMX_PROC_CTLS2_APIC_REG_VIRT;
+
+        /* Enable virtual-interrupt delivery. */
+        Assert(pVM->hm.s.vmx.Msrs.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_VIRT_INTR_DELIVERY);
+        fVal |= VMX_PROC_CTLS2_VIRT_INTR_DELIVERY;
+    }
+#endif
+
+    /* Virtualize-APIC accesses if supported by the CPU. The virtual-APIC page is
+       where the TPR shadow resides. */
+    /** @todo VIRT_X2APIC support, it's mutually exclusive with this. So must be
+     *        done dynamically. */
+    if (pVM->hm.s.vmx.Msrs.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS)
+    {
+        fVal |= VMX_PROC_CTLS2_VIRT_APIC_ACCESS;
+        hmR0VmxSetupVmcsApicAccessAddr(pVCpu);
+   }
+
+    /* Enable the RDTSCP instruction if we expose it to the guest and is supported
+       by the hardware. Without this, guest executing RDTSCP would cause a #UD. */
+    if (   pVM->cpum.ro.GuestFeatures.fRdTscP
+        && (pVM->hm.s.vmx.Msrs.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_RDTSCP))
+        fVal |= VMX_PROC_CTLS2_RDTSCP;
+
+    /* Enable Pause-Loop exiting. */
+    if (   (pVM->hm.s.vmx.Msrs.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_PAUSE_LOOP_EXIT)
+        && pVM->hm.s.vmx.cPleGapTicks
+        && pVM->hm.s.vmx.cPleWindowTicks)
+    {
+        fVal |= VMX_PROC_CTLS2_PAUSE_LOOP_EXIT;
+
+        int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PLE_GAP, pVM->hm.s.vmx.cPleGapTicks);          AssertRC(rc);
+        rc     = VMXWriteVmcs32(VMX_VMCS32_CTRL_PLE_WINDOW, pVM->hm.s.vmx.cPleWindowTicks);    AssertRC(rc);
+    }
+
+    if ((fVal & fZap) != fVal)
+    {
+        LogRelFunc(("Invalid secondary processor-based VM-execution controls combo! cpu=%#RX32 fVal=%#RX32 fZap=%#RX32\n",
+                    pVM->hm.s.vmx.Msrs.ProcCtls2.n.allowed0, fVal, fZap));
+        pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_PROC_EXEC2;
+        return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
+    }
+
+    /* Commit it to the VMCS and update our cache. */
+    int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC2, fVal);
+    AssertRC(rc);
+    pVmcsInfo->u32ProcCtls2 = fVal;
+
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Sets up processor-based VM-execution controls in the VMCS.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu       The cross context virtual CPU structure.
+ * @param   pVmcsInfo   The VMCS info. object.
+ */
+static int hmR0VmxSetupVmcsProcCtls(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
+{
+    PVMCC pVM = pVCpu->CTX_SUFF(pVM);
+    uint32_t       fVal = pVM->hm.s.vmx.Msrs.ProcCtls.n.allowed0;     /* Bits set here must be set in the VMCS. */
+    uint32_t const fZap = pVM->hm.s.vmx.Msrs.ProcCtls.n.allowed1;     /* Bits cleared here must be cleared in the VMCS. */
+
+    fVal |= VMX_PROC_CTLS_HLT_EXIT                                    /* HLT causes a VM-exit. */
+         |  VMX_PROC_CTLS_USE_TSC_OFFSETTING                          /* Use TSC-offsetting. */
+         |  VMX_PROC_CTLS_MOV_DR_EXIT                                 /* MOV DRx causes a VM-exit. */
+         |  VMX_PROC_CTLS_UNCOND_IO_EXIT                              /* All IO instructions cause a VM-exit. */
+         |  VMX_PROC_CTLS_RDPMC_EXIT                                  /* RDPMC causes a VM-exit. */
+         |  VMX_PROC_CTLS_MONITOR_EXIT                                /* MONITOR causes a VM-exit. */
+         |  VMX_PROC_CTLS_MWAIT_EXIT;                                 /* MWAIT causes a VM-exit. */
+
+    /* We toggle VMX_PROC_CTLS_MOV_DR_EXIT later, check if it's not -always- needed to be set or clear. */
+    if (   !(pVM->hm.s.vmx.Msrs.ProcCtls.n.allowed1 & VMX_PROC_CTLS_MOV_DR_EXIT)
+        ||  (pVM->hm.s.vmx.Msrs.ProcCtls.n.allowed0 & VMX_PROC_CTLS_MOV_DR_EXIT))
+    {
+        pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_PROC_MOV_DRX_EXIT;
+        return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
+    }
+
+    /* Without nested paging, INVLPG (also affects INVPCID) and MOV CR3 instructions should cause VM-exits. */
+    if (!pVM->hm.s.fNestedPaging)
+    {
+        Assert(!pVM->hm.s.vmx.fUnrestrictedGuest);
+        fVal |= VMX_PROC_CTLS_INVLPG_EXIT
+             |  VMX_PROC_CTLS_CR3_LOAD_EXIT
+             |  VMX_PROC_CTLS_CR3_STORE_EXIT;
+    }
+
+    /* Use TPR shadowing if supported by the CPU. */
+    if (   PDMHasApic(pVM)
+        && (pVM->hm.s.vmx.Msrs.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_TPR_SHADOW))
+    {
+        fVal |= VMX_PROC_CTLS_USE_TPR_SHADOW;                /* CR8 reads from the Virtual-APIC page. */
+                                                             /* CR8 writes cause a VM-exit based on TPR threshold. */
+        Assert(!(fVal & VMX_PROC_CTLS_CR8_STORE_EXIT));
+        Assert(!(fVal & VMX_PROC_CTLS_CR8_LOAD_EXIT));
+        hmR0VmxSetupVmcsVirtApicAddr(pVmcsInfo);
+    }
+    else
+    {
+        /* Some 32-bit CPUs do not support CR8 load/store exiting as MOV CR8 is
+           invalid on 32-bit Intel CPUs. Set this control only for 64-bit guests. */
+        if (pVM->hm.s.fAllow64BitGuests)
+        {
+            fVal |= VMX_PROC_CTLS_CR8_STORE_EXIT             /* CR8 reads cause a VM-exit. */
+                 |  VMX_PROC_CTLS_CR8_LOAD_EXIT;             /* CR8 writes cause a VM-exit. */
+        }
+    }
+
+    /* Use MSR-bitmaps if supported by the CPU. */
+    if (pVM->hm.s.vmx.Msrs.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_MSR_BITMAPS)
+    {
+        fVal |= VMX_PROC_CTLS_USE_MSR_BITMAPS;
+        hmR0VmxSetupVmcsMsrBitmapAddr(pVmcsInfo);
+    }
+
+    /* Use the secondary processor-based VM-execution controls if supported by the CPU. */
+    if (pVM->hm.s.vmx.Msrs.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_SECONDARY_CTLS)
+        fVal |= VMX_PROC_CTLS_USE_SECONDARY_CTLS;
+
+    if ((fVal & fZap) != fVal)
+    {
+        LogRelFunc(("Invalid processor-based VM-execution controls combo! cpu=%#RX32 fVal=%#RX32 fZap=%#RX32\n",
+                    pVM->hm.s.vmx.Msrs.ProcCtls.n.allowed0, fVal, fZap));
+        pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_PROC_EXEC;
+        return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
+    }
+
+    /* Commit it to the VMCS and update our cache. */
+    int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, fVal);
+    AssertRC(rc);
+    pVmcsInfo->u32ProcCtls = fVal;
+
+    /* Set up MSR permissions that don't change through the lifetime of the VM. */
+    if (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS)
+        hmR0VmxSetupVmcsMsrPermissions(pVCpu, pVmcsInfo);
+
+    /* Set up secondary processor-based VM-execution controls if the CPU supports it. */
+    if (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_SECONDARY_CTLS)
+        return hmR0VmxSetupVmcsProcCtls2(pVCpu, pVmcsInfo);
+
+    /* Sanity check, should not really happen. */
+    if (RT_LIKELY(!pVM->hm.s.vmx.fUnrestrictedGuest))
+    { /* likely */ }
+    else
+    {
+        pVCpu->hm.s.u32HMError = VMX_UFC_INVALID_UX_COMBO;
+        return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
+    }
+
+    /* Old CPUs without secondary processor-based VM-execution controls would end up here. */
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Sets up miscellaneous (everything other than Pin, Processor and secondary
+ * Processor-based VM-execution) control fields in the VMCS.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu       The cross context virtual CPU structure.
+ * @param   pVmcsInfo   The VMCS info. object.
+ */
+static int hmR0VmxSetupVmcsMiscCtls(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
+{
+#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+    if (pVCpu->CTX_SUFF(pVM)->hm.s.vmx.fUseVmcsShadowing)
+    {
+        hmR0VmxSetupVmcsVmreadBitmapAddr(pVCpu);
+        hmR0VmxSetupVmcsVmwriteBitmapAddr(pVCpu);
+    }
+#endif
+
+    Assert(pVmcsInfo->u64VmcsLinkPtr == NIL_RTHCPHYS);
+    int rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_VMCS_LINK_PTR_FULL, NIL_RTHCPHYS);
+    AssertRC(rc);
+
+    rc = hmR0VmxSetupVmcsAutoLoadStoreMsrAddrs(pVmcsInfo);
+    if (RT_SUCCESS(rc))
+    {
+        uint64_t const u64Cr0Mask = hmR0VmxGetFixedCr0Mask(pVCpu);
+        uint64_t const u64Cr4Mask = hmR0VmxGetFixedCr4Mask(pVCpu);
+
+        rc = VMXWriteVmcsNw(VMX_VMCS_CTRL_CR0_MASK, u64Cr0Mask);    AssertRC(rc);
+        rc = VMXWriteVmcsNw(VMX_VMCS_CTRL_CR4_MASK, u64Cr4Mask);    AssertRC(rc);
+
+        pVmcsInfo->u64Cr0Mask = u64Cr0Mask;
+        pVmcsInfo->u64Cr4Mask = u64Cr4Mask;
+
+        if (pVCpu->CTX_SUFF(pVM)->hm.s.vmx.fLbr)
+        {
+            rc = VMXWriteVmcsNw(VMX_VMCS64_GUEST_DEBUGCTL_FULL, MSR_IA32_DEBUGCTL_LBR);
+            AssertRC(rc);
+        }
+        return VINF_SUCCESS;
+    }
+    else
+        LogRelFunc(("Failed to initialize VMCS auto-load/store MSR addresses. rc=%Rrc\n", rc));
+    return rc;
+}
+
+
+/**
+ * Sets up the initial exception bitmap in the VMCS based on static conditions.
+ *
+ * We shall setup those exception intercepts that don't change during the
+ * lifetime of the VM here. The rest are done dynamically while loading the
+ * guest state.
+ *
+ * @param   pVCpu       The cross context virtual CPU structure.
+ * @param   pVmcsInfo   The VMCS info. object.
+ */
+static void hmR0VmxSetupVmcsXcptBitmap(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
+{
+    /*
+     * The following exceptions are always intercepted:
+     *
+     * #AC - To prevent the guest from hanging the CPU.
+     * #DB - To maintain the DR6 state even when intercepting DRx reads/writes and
+     *       recursive #DBs can cause a CPU hang.
+     * #PF - To sync our shadow page tables when nested-paging is not used.
+     */
+    bool const fNestedPaging = pVCpu->CTX_SUFF(pVM)->hm.s.fNestedPaging;
+    uint32_t const uXcptBitmap = RT_BIT(X86_XCPT_AC)
+                               | RT_BIT(X86_XCPT_DB)
+                               | (fNestedPaging ? 0 : RT_BIT(X86_XCPT_PF));
+
+    /* Commit it to the VMCS. */
+    int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXCEPTION_BITMAP, uXcptBitmap);
+    AssertRC(rc);
+
+    /* Update our cache of the exception bitmap. */
+    pVmcsInfo->u32XcptBitmap = uXcptBitmap;
+}
+
+
+#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+/**
+ * Sets up the VMCS for executing a nested-guest using hardware-assisted VMX.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu       The cross context virtual CPU structure.
+ * @param   pVmcsInfo   The VMCS info. object.
+ */
+static int hmR0VmxSetupVmcsCtlsNested(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
+{
+    Assert(pVmcsInfo->u64VmcsLinkPtr == NIL_RTHCPHYS);
+    int rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_VMCS_LINK_PTR_FULL, NIL_RTHCPHYS);
+    AssertRC(rc);
+
+    rc = hmR0VmxSetupVmcsAutoLoadStoreMsrAddrs(pVmcsInfo);
+    if (RT_SUCCESS(rc))
+    {
+        if (pVCpu->CTX_SUFF(pVM)->hm.s.vmx.Msrs.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_MSR_BITMAPS)
+            hmR0VmxSetupVmcsMsrBitmapAddr(pVmcsInfo);
+
+        /* Paranoia - We've not yet initialized these, they shall be done while merging the VMCS. */
+        Assert(!pVmcsInfo->u64Cr0Mask);
+        Assert(!pVmcsInfo->u64Cr4Mask);
+        return VINF_SUCCESS;
+    }
+    else
+        LogRelFunc(("Failed to set up the VMCS link pointer in the nested-guest VMCS. rc=%Rrc\n", rc));
+    return rc;
+}
+#endif
+
+
+/**
+ * Sets up the VMCS for executing a guest (or nested-guest) using hardware-assisted
+ * VMX.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   pVmcsInfo       The VMCS info. object.
+ * @param   fIsNstGstVmcs   Whether this is a nested-guest VMCS.
+ */
+static int hmR0VmxSetupVmcs(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo, bool fIsNstGstVmcs)
+{
+    Assert(pVmcsInfo->pvVmcs);
+    Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+
+    /* Set the CPU specified revision identifier at the beginning of the VMCS structure. */
+    PVMCC pVM = pVCpu->CTX_SUFF(pVM);
+    *(uint32_t *)pVmcsInfo->pvVmcs = RT_BF_GET(pVM->hm.s.vmx.Msrs.u64Basic, VMX_BF_BASIC_VMCS_ID);
+    const char * const pszVmcs     = fIsNstGstVmcs ? "nested-guest VMCS" : "guest VMCS";
+
+    LogFlowFunc(("\n"));
+
+    /*
+     * Initialize the VMCS using VMCLEAR before loading the VMCS.
+     * See Intel spec. 31.6 "Preparation And Launching A Virtual Machine".
+     */
+    int rc = hmR0VmxClearVmcs(pVmcsInfo);
+    if (RT_SUCCESS(rc))
+    {
+        rc = hmR0VmxLoadVmcs(pVmcsInfo);
+        if (RT_SUCCESS(rc))
+        {
+            /*
+             * Initialize the hardware-assisted VMX execution handler for guest and nested-guest VMCS.
+             * The host is always 64-bit since we no longer support 32-bit hosts.
+             * Currently we have just a single handler for all guest modes as well, see @bugref{6208#c73}.
+             */
+            pVmcsInfo->pfnStartVM = VMXR0StartVM64;
+            if (!fIsNstGstVmcs)
+            {
+                rc = hmR0VmxSetupVmcsPinCtls(pVCpu, pVmcsInfo);
+                if (RT_SUCCESS(rc))
+                {
+                    rc = hmR0VmxSetupVmcsProcCtls(pVCpu, pVmcsInfo);
+                    if (RT_SUCCESS(rc))
+                    {
+                        rc = hmR0VmxSetupVmcsMiscCtls(pVCpu, pVmcsInfo);
+                        if (RT_SUCCESS(rc))
+                        {
+                            hmR0VmxSetupVmcsXcptBitmap(pVCpu, pVmcsInfo);
+#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+                            /*
+                             * If a shadow VMCS is allocated for the VMCS info. object, initialize the
+                             * VMCS revision ID and shadow VMCS indicator bit. Also, clear the VMCS
+                             * making it fit for use when VMCS shadowing is later enabled.
+                             */
+                            if (pVmcsInfo->pvShadowVmcs)
+                            {
+                                VMXVMCSREVID VmcsRevId;
+                                VmcsRevId.u = RT_BF_GET(pVM->hm.s.vmx.Msrs.u64Basic, VMX_BF_BASIC_VMCS_ID);
+                                VmcsRevId.n.fIsShadowVmcs = 1;
+                                *(uint32_t *)pVmcsInfo->pvShadowVmcs = VmcsRevId.u;
+                                rc = hmR0VmxClearShadowVmcs(pVmcsInfo);
+                                if (RT_SUCCESS(rc))
+                                { /* likely */ }
+                                else
+                                    LogRelFunc(("Failed to initialize shadow VMCS. rc=%Rrc\n", rc));
+                            }
+#endif
+                        }
+                        else
+                            LogRelFunc(("Failed to setup miscellaneous controls. rc=%Rrc\n", rc));
+                    }
+                    else
+                        LogRelFunc(("Failed to setup processor-based VM-execution controls. rc=%Rrc\n", rc));
+                }
+                else
+                    LogRelFunc(("Failed to setup pin-based controls. rc=%Rrc\n", rc));
+            }
+            else
+            {
+#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+                rc = hmR0VmxSetupVmcsCtlsNested(pVCpu, pVmcsInfo);
+                if (RT_SUCCESS(rc))
+                { /* likely */ }
+                else
+                    LogRelFunc(("Failed to initialize nested-guest VMCS. rc=%Rrc\n", rc));
+#else
+                AssertFailed();
+#endif
+            }
+        }
+        else
+            LogRelFunc(("Failed to load the %s. rc=%Rrc\n", rc, pszVmcs));
+    }
+    else
+        LogRelFunc(("Failed to clear the %s. rc=%Rrc\n", rc, pszVmcs));
+
+    /* Sync any CPU internal VMCS data back into our VMCS in memory. */
+    if (RT_SUCCESS(rc))
+    {
+        rc = hmR0VmxClearVmcs(pVmcsInfo);
+        if (RT_SUCCESS(rc))
+        { /* likely */ }
+        else
+            LogRelFunc(("Failed to clear the %s post setup. rc=%Rrc\n", rc, pszVmcs));
+    }
+
+    /*
+     * Update the last-error record both for failures and success, so we
+     * can propagate the status code back to ring-3 for diagnostics.
+     */
+    hmR0VmxUpdateErrorRecord(pVCpu, rc);
+    NOREF(pszVmcs);
+    return rc;
+}
+
+
+/**
+ * Does global VT-x initialization (called during module initialization).
+ *
+ * @returns VBox status code.
+ */
+VMMR0DECL(int) VMXR0GlobalInit(void)
+{
+#ifdef HMVMX_USE_FUNCTION_TABLE
+    AssertCompile(VMX_EXIT_MAX + 1 == RT_ELEMENTS(g_apfnVMExitHandlers));
+# ifdef VBOX_STRICT
+    for (unsigned i = 0; i < RT_ELEMENTS(g_apfnVMExitHandlers); i++)
+        Assert(g_apfnVMExitHandlers[i]);
+# endif
+#endif
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Does global VT-x termination (called during module termination).
+ */
+VMMR0DECL(void) VMXR0GlobalTerm()
+{
+    /* Nothing to do currently. */
+}
+
+
+/**
+ * Sets up and activates VT-x on the current CPU.
+ *
+ * @returns VBox status code.
+ * @param   pHostCpu        The HM physical-CPU structure.
+ * @param   pVM             The cross context VM structure.  Can be
+ *                          NULL after a host resume operation.
+ * @param   pvCpuPage       Pointer to the VMXON region (can be NULL if @a
+ *                          fEnabledByHost is @c true).
+ * @param   HCPhysCpuPage   Physical address of the VMXON region (can be 0 if
+ *                          @a fEnabledByHost is @c true).
+ * @param   fEnabledByHost  Set if SUPR0EnableVTx() or similar was used to
+ *                          enable VT-x on the host.
+ * @param   pHwvirtMsrs     Pointer to the hardware-virtualization MSRs.
+ */
+VMMR0DECL(int) VMXR0EnableCpu(PHMPHYSCPU pHostCpu, PVMCC pVM, void *pvCpuPage, RTHCPHYS HCPhysCpuPage, bool fEnabledByHost,
+                              PCSUPHWVIRTMSRS pHwvirtMsrs)
+{
+    AssertPtr(pHostCpu);
+    AssertPtr(pHwvirtMsrs);
+    Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+
+    /* Enable VT-x if it's not already enabled by the host. */
+    if (!fEnabledByHost)
+    {
+        int rc = hmR0VmxEnterRootMode(pHostCpu, pVM, HCPhysCpuPage, pvCpuPage);
+        if (RT_FAILURE(rc))
+            return rc;
+    }
+
+    /*
+     * Flush all EPT tagged-TLB entries (in case VirtualBox or any other hypervisor have been
+     * using EPTPs) so we don't retain any stale guest-physical mappings which won't get
+     * invalidated when flushing by VPID.
+     */
+    if (pHwvirtMsrs->u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT_ALL_CONTEXTS)
+    {
+        hmR0VmxFlushEpt(NULL /* pVCpu */, NULL /* pVmcsInfo */, VMXTLBFLUSHEPT_ALL_CONTEXTS);
+        pHostCpu->fFlushAsidBeforeUse = false;
+    }
+    else
+        pHostCpu->fFlushAsidBeforeUse = true;
+
+    /* Ensure each VCPU scheduled on this CPU gets a new VPID on resume. See @bugref{6255}. */
+    ++pHostCpu->cTlbFlushes;
+
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Deactivates VT-x on the current CPU.
+ *
+ * @returns VBox status code.
+ * @param   pHostCpu        The HM physical-CPU structure.
+ * @param   pvCpuPage       Pointer to the VMXON region.
+ * @param   HCPhysCpuPage   Physical address of the VMXON region.
+ *
+ * @remarks This function should never be called when SUPR0EnableVTx() or
+ *          similar was used to enable VT-x on the host.
+ */
+VMMR0DECL(int) VMXR0DisableCpu(PHMPHYSCPU pHostCpu, void *pvCpuPage, RTHCPHYS HCPhysCpuPage)
+{
+    RT_NOREF2(pvCpuPage, HCPhysCpuPage);
+
+    Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+    return hmR0VmxLeaveRootMode(pHostCpu);
+}
+
+
+/**
+ * Does per-VM VT-x initialization.
+ *
+ * @returns VBox status code.
+ * @param   pVM             The cross context VM structure.
+ */
+VMMR0DECL(int) VMXR0InitVM(PVMCC pVM)
+{
+    AssertPtr(pVM);
+    LogFlowFunc(("pVM=%p\n", pVM));
+
+    hmR0VmxStructsInit(pVM);
+    int rc = hmR0VmxStructsAlloc(pVM);
+    if (RT_FAILURE(rc))
+    {
+        LogRelFunc(("Failed to allocated VMX structures. rc=%Rrc\n", rc));
+        return rc;
+    }
+
+    /* Setup the crash dump page. */
+#ifdef VBOX_WITH_CRASHDUMP_MAGIC
+    strcpy((char *)pVM->hm.s.vmx.pbScratch, "SCRATCH Magic");
+    *(uint64_t *)(pVM->hm.s.vmx.pbScratch + 16) = UINT64_C(0xdeadbeefdeadbeef);
+#endif
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Does per-VM VT-x termination.
+ *
+ * @returns VBox status code.
+ * @param   pVM     The cross context VM structure.
+ */
+VMMR0DECL(int) VMXR0TermVM(PVMCC pVM)
+{
+    AssertPtr(pVM);
+    LogFlowFunc(("pVM=%p\n", pVM));
+
+#ifdef VBOX_WITH_CRASHDUMP_MAGIC
+    if (pVM->hm.s.vmx.hMemObjScratch != NIL_RTR0MEMOBJ)
+    {
+        Assert(pVM->hm.s.vmx.pvScratch);
+        ASMMemZero32(pVM->hm.s.vmx.pvScratch, X86_PAGE_4K_SIZE);
+    }
+#endif
+    hmR0VmxStructsFree(pVM);
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Sets up the VM for execution using hardware-assisted VMX.
+ * This function is only called once per-VM during initialization.
+ *
+ * @returns VBox status code.
+ * @param   pVM     The cross context VM structure.
+ */
+VMMR0DECL(int) VMXR0SetupVM(PVMCC pVM)
+{
+    AssertPtr(pVM);
+    Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+
+    LogFlowFunc(("pVM=%p\n", pVM));
+
+    /*
+     * At least verify if VMX is enabled, since we can't check if we're in VMX root mode or not
+     * without causing a #GP.
+     */
+    RTCCUINTREG const uHostCr4 = ASMGetCR4();
+    if (RT_LIKELY(uHostCr4 & X86_CR4_VMXE))
+    { /* likely */ }
+    else
+        return VERR_VMX_NOT_IN_VMX_ROOT_MODE;
+
+    /*
+     * Without unrestricted guest execution, pRealModeTSS and pNonPagingModeEPTPageTable *must*
+     * always be allocated. We no longer support the highly unlikely case of unrestricted guest
+     * without pRealModeTSS, see hmR3InitFinalizeR0Intel().
+     */
+    if (   !pVM->hm.s.vmx.fUnrestrictedGuest
+        &&  (   !pVM->hm.s.vmx.pNonPagingModeEPTPageTable
+             || !pVM->hm.s.vmx.pRealModeTSS))
+    {
+        LogRelFunc(("Invalid real-on-v86 state.\n"));
+        return VERR_INTERNAL_ERROR;
+    }
+
+    /* Initialize these always, see hmR3InitFinalizeR0().*/
+    pVM->hm.s.vmx.enmTlbFlushEpt  = VMXTLBFLUSHEPT_NONE;
+    pVM->hm.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_NONE;
+
+    /* Setup the tagged-TLB flush handlers. */
+    int rc = hmR0VmxSetupTaggedTlb(pVM);
+    if (RT_FAILURE(rc))
+    {
+        LogRelFunc(("Failed to setup tagged TLB. rc=%Rrc\n", rc));
+        return rc;
+    }
+
+    /* Determine LBR capabilities. */
+    if (pVM->hm.s.vmx.fLbr)
+    {
+        rc = hmR0VmxSetupLbrMsrRange(pVM);
+        if (RT_FAILURE(rc))
+        {
+            LogRelFunc(("Failed to setup LBR MSR range. rc=%Rrc\n", rc));
+            return rc;
+        }
+    }
+
+#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+    /* Setup the shadow VMCS fields array and VMREAD/VMWRITE bitmaps. */
+    if (pVM->hm.s.vmx.fUseVmcsShadowing)
+    {
+        rc = hmR0VmxSetupShadowVmcsFieldsArrays(pVM);
+        if (RT_SUCCESS(rc))
+            hmR0VmxSetupVmreadVmwriteBitmaps(pVM);
+        else
+        {
+            LogRelFunc(("Failed to setup shadow VMCS fields arrays. rc=%Rrc\n", rc));
+            return rc;
+        }
+    }
+#endif
+
+    for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++)
+    {
+        PVMCPUCC pVCpu = VMCC_GET_CPU(pVM, idCpu);
+        Log4Func(("pVCpu=%p idCpu=%RU32\n", pVCpu, pVCpu->idCpu));
+
+        rc = hmR0VmxSetupVmcs(pVCpu, &pVCpu->hm.s.vmx.VmcsInfo,  false /* fIsNstGstVmcs */);
+        if (RT_SUCCESS(rc))
+        {
+#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+            if (pVM->cpum.ro.GuestFeatures.fVmx)
+            {
+                rc = hmR0VmxSetupVmcs(pVCpu, &pVCpu->hm.s.vmx.VmcsInfoNstGst, true /* fIsNstGstVmcs */);
+                if (RT_SUCCESS(rc))
+                { /* likely */ }
+                else
+                {
+                    LogRelFunc(("Nested-guest VMCS setup failed. rc=%Rrc\n", rc));
+                    return rc;
+                }
+            }
+#endif
+        }
+        else
+        {
+            LogRelFunc(("VMCS setup failed. rc=%Rrc\n", rc));
+            return rc;
+        }
+    }
+
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Saves the host control registers (CR0, CR3, CR4) into the host-state area in
+ * the VMCS.
+ */
+static void hmR0VmxExportHostControlRegs(void)
+{
+    int rc = VMXWriteVmcsNw(VMX_VMCS_HOST_CR0, ASMGetCR0());    AssertRC(rc);
+    rc     = VMXWriteVmcsNw(VMX_VMCS_HOST_CR3, ASMGetCR3());    AssertRC(rc);
+    rc     = VMXWriteVmcsNw(VMX_VMCS_HOST_CR4, ASMGetCR4());    AssertRC(rc);
+}
+
+
+/**
+ * Saves the host segment registers and GDTR, IDTR, (TR, GS and FS bases) into
+ * the host-state area in the VMCS.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu   The cross context virtual CPU structure.
+ */
+static int hmR0VmxExportHostSegmentRegs(PVMCPUCC pVCpu)
+{
+/**
+ * Macro for adjusting host segment selectors to satisfy VT-x's VM-entry
+ * requirements. See hmR0VmxExportHostSegmentRegs().
+ */
+#define VMXLOCAL_ADJUST_HOST_SEG(a_Seg, a_selValue) \
+    if ((a_selValue) & (X86_SEL_RPL | X86_SEL_LDT)) \
+    { \
+        bool fValidSelector = true; \
+        if ((a_selValue) & X86_SEL_LDT) \
+        { \
+            uint32_t const uAttr = ASMGetSegAttr(a_selValue); \
+            fValidSelector = RT_BOOL(uAttr != UINT32_MAX && (uAttr & X86_DESC_P)); \
+        } \
+        if (fValidSelector) \
+        { \
+            pVCpu->hm.s.vmx.fRestoreHostFlags |= VMX_RESTORE_HOST_SEL_##a_Seg; \
+            pVCpu->hm.s.vmx.RestoreHost.uHostSel##a_Seg = (a_selValue); \
+        } \
+        (a_selValue) = 0; \
+    }
+
+    /*
+     * If we've executed guest code using hardware-assisted VMX, the host-state bits
+     * will be messed up. We should -not- save the messed up state without restoring
+     * the original host-state, see @bugref{7240}.
+     *
+     * This apparently can happen (most likely the FPU changes), deal with it rather than
+     * asserting. Was observed booting Solaris 10u10 32-bit guest.
+     */
+    if (   (pVCpu->hm.s.vmx.fRestoreHostFlags & VMX_RESTORE_HOST_REQUIRED)
+        && (pVCpu->hm.s.vmx.fRestoreHostFlags & ~VMX_RESTORE_HOST_REQUIRED))
+    {
+        Log4Func(("Restoring Host State: fRestoreHostFlags=%#RX32 HostCpuId=%u\n", pVCpu->hm.s.vmx.fRestoreHostFlags,
+                  pVCpu->idCpu));
+        VMXRestoreHostState(pVCpu->hm.s.vmx.fRestoreHostFlags, &pVCpu->hm.s.vmx.RestoreHost);
+    }
+    pVCpu->hm.s.vmx.fRestoreHostFlags = 0;
+
+    /*
+     * Host segment registers.
+     */
+    RTSEL uSelES = ASMGetES();
+    RTSEL uSelCS = ASMGetCS();
+    RTSEL uSelSS = ASMGetSS();
+    RTSEL uSelDS = ASMGetDS();
+    RTSEL uSelFS = ASMGetFS();
+    RTSEL uSelGS = ASMGetGS();
+    RTSEL uSelTR = ASMGetTR();
+
+    /*
+     * Determine if the host segment registers are suitable for VT-x. Otherwise use zero to
+     * gain VM-entry and restore them before we get preempted.
+     *
+     * See Intel spec. 26.2.3 "Checks on Host Segment and Descriptor-Table Registers".
+     */
+    VMXLOCAL_ADJUST_HOST_SEG(DS, uSelDS);
+    VMXLOCAL_ADJUST_HOST_SEG(ES, uSelES);
+    VMXLOCAL_ADJUST_HOST_SEG(FS, uSelFS);
+    VMXLOCAL_ADJUST_HOST_SEG(GS, uSelGS);
+
+    /* Verification based on Intel spec. 26.2.3 "Checks on Host Segment and Descriptor-Table Registers"  */
+    Assert(!(uSelCS & X86_SEL_RPL)); Assert(!(uSelCS & X86_SEL_LDT));
+    Assert(!(uSelSS & X86_SEL_RPL)); Assert(!(uSelSS & X86_SEL_LDT));
+    Assert(!(uSelDS & X86_SEL_RPL)); Assert(!(uSelDS & X86_SEL_LDT));
+    Assert(!(uSelES & X86_SEL_RPL)); Assert(!(uSelES & X86_SEL_LDT));
+    Assert(!(uSelFS & X86_SEL_RPL)); Assert(!(uSelFS & X86_SEL_LDT));
+    Assert(!(uSelGS & X86_SEL_RPL)); Assert(!(uSelGS & X86_SEL_LDT));
+    Assert(!(uSelTR & X86_SEL_RPL)); Assert(!(uSelTR & X86_SEL_LDT));
+    Assert(uSelCS);
+    Assert(uSelTR);
+
+    /* Write these host selector fields into the host-state area in the VMCS. */
+    int rc = VMXWriteVmcs16(VMX_VMCS16_HOST_CS_SEL, uSelCS);    AssertRC(rc);
+    rc     = VMXWriteVmcs16(VMX_VMCS16_HOST_SS_SEL, uSelSS);    AssertRC(rc);
+    rc     = VMXWriteVmcs16(VMX_VMCS16_HOST_DS_SEL, uSelDS);    AssertRC(rc);
+    rc     = VMXWriteVmcs16(VMX_VMCS16_HOST_ES_SEL, uSelES);    AssertRC(rc);
+    rc     = VMXWriteVmcs16(VMX_VMCS16_HOST_FS_SEL, uSelFS);    AssertRC(rc);
+    rc     = VMXWriteVmcs16(VMX_VMCS16_HOST_GS_SEL, uSelGS);    AssertRC(rc);
+    rc     = VMXWriteVmcs16(VMX_VMCS16_HOST_TR_SEL, uSelTR);    AssertRC(rc);
+
+    /*
+     * Host GDTR and IDTR.
+     */
+    RTGDTR Gdtr;
+    RTIDTR Idtr;
+    RT_ZERO(Gdtr);
+    RT_ZERO(Idtr);
+    ASMGetGDTR(&Gdtr);
+    ASMGetIDTR(&Idtr);
+    rc = VMXWriteVmcsNw(VMX_VMCS_HOST_GDTR_BASE, Gdtr.pGdt);    AssertRC(rc);
+    rc = VMXWriteVmcsNw(VMX_VMCS_HOST_IDTR_BASE, Idtr.pIdt);    AssertRC(rc);
+
+    /*
+     * Determine if we need to manually need to restore the GDTR and IDTR limits as VT-x zaps
+     * them to the maximum limit (0xffff) on every VM-exit.
+     */
+    if (Gdtr.cbGdt != 0xffff)
+        pVCpu->hm.s.vmx.fRestoreHostFlags |= VMX_RESTORE_HOST_GDTR;
+
+    /*
+     * IDT limit is effectively capped at 0xfff. (See Intel spec. 6.14.1 "64-Bit Mode IDT" and
+     * Intel spec. 6.2 "Exception and Interrupt Vectors".)  Therefore if the host has the limit
+     * as 0xfff, VT-x bloating the limit to 0xffff shouldn't cause any different CPU behavior.
+     * However, several hosts either insists on 0xfff being the limit (Windows Patch Guard) or
+     * uses the limit for other purposes (darwin puts the CPU ID in there but botches sidt
+     * alignment in at least one consumer).  So, we're only allowing the IDTR.LIMIT to be left
+     * at 0xffff on hosts where we are sure it won't cause trouble.
+     */
+#if defined(RT_OS_LINUX) || defined(RT_OS_SOLARIS)
+    if (Idtr.cbIdt <  0x0fff)
+#else
+    if (Idtr.cbIdt != 0xffff)
+#endif
+    {
+        pVCpu->hm.s.vmx.fRestoreHostFlags |= VMX_RESTORE_HOST_IDTR;
+        AssertCompile(sizeof(Idtr) == sizeof(X86XDTR64));
+        memcpy(&pVCpu->hm.s.vmx.RestoreHost.HostIdtr, &Idtr, sizeof(X86XDTR64));
+    }
+
+    /*
+     * Host TR base. Verify that TR selector doesn't point past the GDT. Masking off the TI
+     * and RPL bits is effectively what the CPU does for "scaling by 8". TI is always 0 and
+     * RPL should be too in most cases.
+     */
+    AssertMsgReturn((uSelTR | X86_SEL_RPL_LDT) <= Gdtr.cbGdt,
+                    ("TR selector exceeds limit. TR=%RTsel cbGdt=%#x\n", uSelTR, Gdtr.cbGdt), VERR_VMX_INVALID_HOST_STATE);
+
+    PCX86DESCHC pDesc = (PCX86DESCHC)(Gdtr.pGdt + (uSelTR & X86_SEL_MASK));
+    uintptr_t const uTRBase = X86DESC64_BASE(pDesc);
+
+    /*
+     * VT-x unconditionally restores the TR limit to 0x67 and type to 11 (32-bit busy TSS) on
+     * all VM-exits. The type is the same for 64-bit busy TSS[1]. The limit needs manual
+     * restoration if the host has something else. Task switching is not supported in 64-bit
+     * mode[2], but the limit still matters as IOPM is supported in 64-bit mode. Restoring the
+     * limit lazily while returning to ring-3 is safe because IOPM is not applicable in ring-0.
+     *
+     * [1] See Intel spec. 3.5 "System Descriptor Types".
+     * [2] See Intel spec. 7.2.3 "TSS Descriptor in 64-bit mode".
+     */
+    PVMCC pVM = pVCpu->CTX_SUFF(pVM);
+    Assert(pDesc->System.u4Type == 11);
+    if (   pDesc->System.u16LimitLow != 0x67
+        || pDesc->System.u4LimitHigh)
+    {
+        pVCpu->hm.s.vmx.fRestoreHostFlags |= VMX_RESTORE_HOST_SEL_TR;
+        /* If the host has made GDT read-only, we would need to temporarily toggle CR0.WP before writing the GDT. */
+        if (pVM->hm.s.fHostKernelFeatures & SUPKERNELFEATURES_GDT_READ_ONLY)
+            pVCpu->hm.s.vmx.fRestoreHostFlags |= VMX_RESTORE_HOST_GDT_READ_ONLY;
+        pVCpu->hm.s.vmx.RestoreHost.uHostSelTR = uSelTR;
+    }
+
+    /*
+     * Store the GDTR as we need it when restoring the GDT and while restoring the TR.
+     */
+    if (pVCpu->hm.s.vmx.fRestoreHostFlags & (VMX_RESTORE_HOST_GDTR | VMX_RESTORE_HOST_SEL_TR))
+    {
+        AssertCompile(sizeof(Gdtr) == sizeof(X86XDTR64));
+        memcpy(&pVCpu->hm.s.vmx.RestoreHost.HostGdtr, &Gdtr, sizeof(X86XDTR64));
+        if (pVM->hm.s.fHostKernelFeatures & SUPKERNELFEATURES_GDT_NEED_WRITABLE)
+        {
+            /* The GDT is read-only but the writable GDT is available. */
+            pVCpu->hm.s.vmx.fRestoreHostFlags |= VMX_RESTORE_HOST_GDT_NEED_WRITABLE;
+            pVCpu->hm.s.vmx.RestoreHost.HostGdtrRw.cb = Gdtr.cbGdt;
+            rc = SUPR0GetCurrentGdtRw(&pVCpu->hm.s.vmx.RestoreHost.HostGdtrRw.uAddr);
+            AssertRCReturn(rc, rc);
+        }
+    }
+
+    rc = VMXWriteVmcsNw(VMX_VMCS_HOST_TR_BASE, uTRBase);
+    AssertRC(rc);
+
+    /*
+     * Host FS base and GS base.
+     */
+    uint64_t const u64FSBase = ASMRdMsr(MSR_K8_FS_BASE);
+    uint64_t const u64GSBase = ASMRdMsr(MSR_K8_GS_BASE);
+    rc = VMXWriteVmcsNw(VMX_VMCS_HOST_FS_BASE, u64FSBase);  AssertRC(rc);
+    rc = VMXWriteVmcsNw(VMX_VMCS_HOST_GS_BASE, u64GSBase);  AssertRC(rc);
+
+    /* Store the base if we have to restore FS or GS manually as we need to restore the base as well. */
+    if (pVCpu->hm.s.vmx.fRestoreHostFlags & VMX_RESTORE_HOST_SEL_FS)
+        pVCpu->hm.s.vmx.RestoreHost.uHostFSBase = u64FSBase;
+    if (pVCpu->hm.s.vmx.fRestoreHostFlags & VMX_RESTORE_HOST_SEL_GS)
+        pVCpu->hm.s.vmx.RestoreHost.uHostGSBase = u64GSBase;
+
+    return VINF_SUCCESS;
+#undef VMXLOCAL_ADJUST_HOST_SEG
+}
+
+
+/**
+ * Exports certain host MSRs in the VM-exit MSR-load area and some in the
+ * host-state area of the VMCS.
+ *
+ * These MSRs will be automatically restored on the host after every successful
+ * VM-exit.
+ *
+ * @param   pVCpu   The cross context virtual CPU structure.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+static void hmR0VmxExportHostMsrs(PVMCPUCC pVCpu)
+{
+    AssertPtr(pVCpu);
+
+    /*
+     * Save MSRs that we restore lazily (due to preemption or transition to ring-3)
+     * rather than swapping them on every VM-entry.
+     */
+    hmR0VmxLazySaveHostMsrs(pVCpu);
+
+    /*
+     * Host Sysenter MSRs.
+     */
+    int rc = VMXWriteVmcs32(VMX_VMCS32_HOST_SYSENTER_CS, ASMRdMsr_Low(MSR_IA32_SYSENTER_CS));   AssertRC(rc);
+    rc     = VMXWriteVmcsNw(VMX_VMCS_HOST_SYSENTER_ESP,  ASMRdMsr(MSR_IA32_SYSENTER_ESP));      AssertRC(rc);
+    rc     = VMXWriteVmcsNw(VMX_VMCS_HOST_SYSENTER_EIP,  ASMRdMsr(MSR_IA32_SYSENTER_EIP));      AssertRC(rc);
+
+    /*
+     * Host EFER MSR.
+     *
+     * If the CPU supports the newer VMCS controls for managing EFER, use it. Otherwise it's
+     * done as part of auto-load/store MSR area in the VMCS, see hmR0VmxExportGuestMsrs().
+     */
+    PVMCC pVM = pVCpu->CTX_SUFF(pVM);
+    if (pVM->hm.s.vmx.fSupportsVmcsEfer)
+    {
+        rc = VMXWriteVmcs64(VMX_VMCS64_HOST_EFER_FULL, pVM->hm.s.vmx.u64HostMsrEfer);
+        AssertRC(rc);
+    }
+
+    /** @todo IA32_PERF_GLOBALCTRL, IA32_PAT also see
+     *        hmR0VmxExportGuestEntryExitCtls(). */
+}
+
+
+/**
+ * Figures out if we need to swap the EFER MSR which is particularly expensive.
+ *
+ * We check all relevant bits. For now, that's everything besides LMA/LME, as
+ * these two bits are handled by VM-entry, see hmR0VMxExportGuestEntryExitCtls().
+ *
+ * @returns true if we need to load guest EFER, false otherwise.
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   pVmxTransient   The VMX-transient structure.
+ *
+ * @remarks Requires EFER, CR4.
+ * @remarks No-long-jump zone!!!
+ */
+static bool hmR0VmxShouldSwapEferMsr(PCVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient)
+{
+#ifdef HMVMX_ALWAYS_SWAP_EFER
+    RT_NOREF2(pVCpu, pVmxTransient);
+    return true;
+#else
+    PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+    PVMCC pVM = pVCpu->CTX_SUFF(pVM);
+    uint64_t const u64HostEfer  = pVM->hm.s.vmx.u64HostMsrEfer;
+    uint64_t const u64GuestEfer = pCtx->msrEFER;
+
+# ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+    /*
+     * For nested-guests, we shall honor swapping the EFER MSR when requested by
+     * the nested-guest.
+     */
+    if (   pVmxTransient->fIsNestedGuest
+        && (   CPUMIsGuestVmxEntryCtlsSet(pCtx, VMX_ENTRY_CTLS_LOAD_EFER_MSR)
+            || CPUMIsGuestVmxExitCtlsSet(pCtx, VMX_EXIT_CTLS_SAVE_EFER_MSR)
+            || CPUMIsGuestVmxExitCtlsSet(pCtx, VMX_EXIT_CTLS_LOAD_EFER_MSR)))
+        return true;
+# else
+    RT_NOREF(pVmxTransient);
+#endif
+
+    /*
+     * For 64-bit guests, if EFER.SCE bit differs, we need to swap the EFER MSR
+     * to ensure that the guest's SYSCALL behaviour isn't broken, see @bugref{7386}.
+     */
+    if (   CPUMIsGuestInLongModeEx(pCtx)
+        && (u64GuestEfer & MSR_K6_EFER_SCE) != (u64HostEfer & MSR_K6_EFER_SCE))
+        return true;
+
+    /*
+     * If the guest uses PAE and EFER.NXE bit differs, we need to swap the EFER MSR
+     * as it affects guest paging. 64-bit paging implies CR4.PAE as well.
+     *
+     * See Intel spec. 4.5 "IA-32e Paging".
+     * See Intel spec. 4.1.1 "Three Paging Modes".
+     *
+     * Verify that we always intercept CR4.PAE and CR0.PG bits, so we don't need to
+     * import CR4 and CR0 from the VMCS here as those bits are always up to date.
+     */
+    Assert(hmR0VmxGetFixedCr4Mask(pVCpu) & X86_CR4_PAE);
+    Assert(hmR0VmxGetFixedCr0Mask(pVCpu) & X86_CR0_PG);
+    if (   (pCtx->cr4 & X86_CR4_PAE)
+        && (pCtx->cr0 & X86_CR0_PG))
+    {
+        /*
+         * If nested paging is not used, verify that the guest paging mode matches the
+         * shadow paging mode which is/will be placed in the VMCS (which is what will
+         * actually be used while executing the guest and not the CR4 shadow value).
+         */
+        AssertMsg(pVM->hm.s.fNestedPaging || (   pVCpu->hm.s.enmShadowMode == PGMMODE_PAE
+                                              || pVCpu->hm.s.enmShadowMode == PGMMODE_PAE_NX
+                                              || pVCpu->hm.s.enmShadowMode == PGMMODE_AMD64
+                                              || pVCpu->hm.s.enmShadowMode == PGMMODE_AMD64_NX),
+                  ("enmShadowMode=%u\n", pVCpu->hm.s.enmShadowMode));
+        if ((u64GuestEfer & MSR_K6_EFER_NXE) != (u64HostEfer & MSR_K6_EFER_NXE))
+        {
+            /* Verify that the host is NX capable. */
+            Assert(pVCpu->CTX_SUFF(pVM)->cpum.ro.HostFeatures.fNoExecute);
+            return true;
+        }
+    }
+
+    return false;
+#endif
+}
+
+
+/**
+ * Exports the guest state with appropriate VM-entry and VM-exit controls in the
+ * VMCS.
+ *
+ * This is typically required when the guest changes paging mode.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   pVmxTransient   The VMX-transient structure.
+ *
+ * @remarks Requires EFER.
+ * @remarks No-long-jump zone!!!
+ */
+static int hmR0VmxExportGuestEntryExitCtls(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient)
+{
+    if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_VMX_ENTRY_EXIT_CTLS)
+    {
+        PVMCC pVM = pVCpu->CTX_SUFF(pVM);
+        PVMXVMCSINFO pVmcsInfo      = pVmxTransient->pVmcsInfo;
+
+        /*
+         * VM-entry controls.
+         */
+        {
+            uint32_t       fVal = pVM->hm.s.vmx.Msrs.EntryCtls.n.allowed0;    /* Bits set here must be set in the VMCS. */
+            uint32_t const fZap = pVM->hm.s.vmx.Msrs.EntryCtls.n.allowed1;    /* Bits cleared here must be cleared in the VMCS. */
+
+            /*
+             * Load the guest debug controls (DR7 and IA32_DEBUGCTL MSR) on VM-entry.
+             * The first VT-x capable CPUs only supported the 1-setting of this bit.
+             *
+             * For nested-guests, this is a mandatory VM-entry control. It's also
+             * required because we do not want to leak host bits to the nested-guest.
+             */
+            fVal |= VMX_ENTRY_CTLS_LOAD_DEBUG;
+
+            /*
+             * Set if the guest is in long mode. This will set/clear the EFER.LMA bit on VM-entry.
+             *
+             * For nested-guests, the "IA-32e mode guest" control we initialize with what is
+             * required to get the nested-guest working with hardware-assisted VMX execution.
+             * It depends on the nested-guest's IA32_EFER.LMA bit. Remember, a nested hypervisor
+             * can skip intercepting changes to the EFER MSR. This is why it it needs to be done
+             * here rather than while merging the guest VMCS controls.
+             */
+            if (CPUMIsGuestInLongModeEx(&pVCpu->cpum.GstCtx))
+            {
+                Assert(pVCpu->cpum.GstCtx.msrEFER & MSR_K6_EFER_LME);
+                fVal |= VMX_ENTRY_CTLS_IA32E_MODE_GUEST;
+            }
+            else
+                Assert(!(fVal & VMX_ENTRY_CTLS_IA32E_MODE_GUEST));
+
+            /*
+             * If the CPU supports the newer VMCS controls for managing guest/host EFER, use it.
+             *
+             * For nested-guests, we use the "load IA32_EFER" if the hardware supports it,
+             * regardless of whether the nested-guest VMCS specifies it because we are free to
+             * load whatever MSRs we require and we do not need to modify the guest visible copy
+             * of the VM-entry MSR load area.
+             */
+            if (   pVM->hm.s.vmx.fSupportsVmcsEfer
+                && hmR0VmxShouldSwapEferMsr(pVCpu, pVmxTransient))
+                fVal |= VMX_ENTRY_CTLS_LOAD_EFER_MSR;
+            else
+                Assert(!(fVal & VMX_ENTRY_CTLS_LOAD_EFER_MSR));
+
+            /*
+             * The following should -not- be set (since we're not in SMM mode):
+             * - VMX_ENTRY_CTLS_ENTRY_TO_SMM
+             * - VMX_ENTRY_CTLS_DEACTIVATE_DUAL_MON
+             */
+
+            /** @todo VMX_ENTRY_CTLS_LOAD_PERF_MSR,
+             *        VMX_ENTRY_CTLS_LOAD_PAT_MSR. */
+
+            if ((fVal & fZap) == fVal)
+            { /* likely */ }
+            else
+            {
+                Log4Func(("Invalid VM-entry controls combo! Cpu=%#RX32 fVal=%#RX32 fZap=%#RX32\n",
+                          pVM->hm.s.vmx.Msrs.EntryCtls.n.allowed0, fVal, fZap));
+                pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_ENTRY;
+                return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
+            }
+
+            /* Commit it to the VMCS. */
+            if (pVmcsInfo->u32EntryCtls != fVal)
+            {
+                int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_ENTRY, fVal);
+                AssertRC(rc);
+                pVmcsInfo->u32EntryCtls = fVal;
+            }
+        }
+
+        /*
+         * VM-exit controls.
+         */
+        {
+            uint32_t       fVal = pVM->hm.s.vmx.Msrs.ExitCtls.n.allowed0;     /* Bits set here must be set in the VMCS. */
+            uint32_t const fZap = pVM->hm.s.vmx.Msrs.ExitCtls.n.allowed1;     /* Bits cleared here must be cleared in the VMCS. */
+
+            /*
+             * Save debug controls (DR7 & IA32_DEBUGCTL_MSR). The first VT-x CPUs only
+             * supported the 1-setting of this bit.
+             *
+             * For nested-guests, we set the "save debug controls" as the converse
+             * "load debug controls" is mandatory for nested-guests anyway.
+             */
+            fVal |= VMX_EXIT_CTLS_SAVE_DEBUG;
+
+            /*
+             * Set the host long mode active (EFER.LMA) bit (which Intel calls
+             * "Host address-space size") if necessary. On VM-exit, VT-x sets both the
+             * host EFER.LMA and EFER.LME bit to this value. See assertion in
+             * hmR0VmxExportHostMsrs().
+             *
+             * For nested-guests, we always set this bit as we do not support 32-bit
+             * hosts.
+             */
+            fVal |= VMX_EXIT_CTLS_HOST_ADDR_SPACE_SIZE;
+
+            /*
+             * If the VMCS EFER MSR fields are supported by the hardware, we use it.
+             *
+             * For nested-guests, we should use the "save IA32_EFER" control if we also
+             * used the "load IA32_EFER" control while exporting VM-entry controls.
+             */
+            if (   pVM->hm.s.vmx.fSupportsVmcsEfer
+                && hmR0VmxShouldSwapEferMsr(pVCpu, pVmxTransient))
+            {
+                fVal |= VMX_EXIT_CTLS_SAVE_EFER_MSR
+                     |  VMX_EXIT_CTLS_LOAD_EFER_MSR;
+            }
+
+            /*
+             * Enable saving of the VMX-preemption timer value on VM-exit.
+             * For nested-guests, currently not exposed/used.
+             */
+            if (    pVM->hm.s.vmx.fUsePreemptTimer
+                && (pVM->hm.s.vmx.Msrs.ExitCtls.n.allowed1 & VMX_EXIT_CTLS_SAVE_PREEMPT_TIMER))
+                fVal |= VMX_EXIT_CTLS_SAVE_PREEMPT_TIMER;
+
+            /* Don't acknowledge external interrupts on VM-exit. We want to let the host do that. */
+            Assert(!(fVal & VMX_EXIT_CTLS_ACK_EXT_INT));
+
+            /** @todo VMX_EXIT_CTLS_LOAD_PERF_MSR,
+             *        VMX_EXIT_CTLS_SAVE_PAT_MSR,
+             *        VMX_EXIT_CTLS_LOAD_PAT_MSR. */
+
+            if ((fVal & fZap) == fVal)
+            { /* likely */ }
+            else
+            {
+                Log4Func(("Invalid VM-exit controls combo! cpu=%#RX32 fVal=%#RX32 fZap=%R#X32\n",
+                          pVM->hm.s.vmx.Msrs.ExitCtls.n.allowed0, fVal, fZap));
+                pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_EXIT;
+                return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
+            }
+
+            /* Commit it to the VMCS. */
+            if (pVmcsInfo->u32ExitCtls != fVal)
+            {
+                int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXIT, fVal);
+                AssertRC(rc);
+                pVmcsInfo->u32ExitCtls = fVal;
+            }
+        }
+
+        ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_VMX_ENTRY_EXIT_CTLS);
+    }
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Sets the TPR threshold in the VMCS.
+ *
+ * @param   pVmcsInfo           The VMCS info. object.
+ * @param   u32TprThreshold     The TPR threshold (task-priority class only).
+ */
+DECLINLINE(void) hmR0VmxApicSetTprThreshold(PVMXVMCSINFO pVmcsInfo, uint32_t u32TprThreshold)
+{
+    Assert(!(u32TprThreshold & ~VMX_TPR_THRESHOLD_MASK));         /* Bits 31:4 MBZ. */
+    Assert(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_TPR_SHADOW);
+    RT_NOREF(pVmcsInfo);
+    int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_TPR_THRESHOLD, u32TprThreshold);
+    AssertRC(rc);
+}
+
+
+/**
+ * Exports the guest APIC TPR state into the VMCS.
+ *
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   pVmxTransient   The VMX-transient structure.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+static void hmR0VmxExportGuestApicTpr(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient)
+{
+    if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_APIC_TPR)
+    {
+        HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_APIC_TPR);
+
+        PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
+        if (!pVmxTransient->fIsNestedGuest)
+        {
+            if (   PDMHasApic(pVCpu->CTX_SUFF(pVM))
+                && APICIsEnabled(pVCpu))
+            {
+                /*
+                 * Setup TPR shadowing.
+                 */
+                if (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_TPR_SHADOW)
+                {
+                    bool    fPendingIntr  = false;
+                    uint8_t u8Tpr         = 0;
+                    uint8_t u8PendingIntr = 0;
+                    int rc = APICGetTpr(pVCpu, &u8Tpr, &fPendingIntr, &u8PendingIntr);
+                    AssertRC(rc);
+
+                    /*
+                     * If there are interrupts pending but masked by the TPR, instruct VT-x to
+                     * cause a TPR-below-threshold VM-exit when the guest lowers its TPR below the
+                     * priority of the pending interrupt so we can deliver the interrupt. If there
+                     * are no interrupts pending, set threshold to 0 to not cause any
+                     * TPR-below-threshold VM-exits.
+                     */
+                    uint32_t u32TprThreshold = 0;
+                    if (fPendingIntr)
+                    {
+                        /* Bits 3:0 of the TPR threshold field correspond to bits 7:4 of the TPR
+                           (which is the Task-Priority Class). */
+                        const uint8_t u8PendingPriority = u8PendingIntr >> 4;
+                        const uint8_t u8TprPriority     = u8Tpr >> 4;
+                        if (u8PendingPriority <= u8TprPriority)
+                            u32TprThreshold = u8PendingPriority;
+                    }
+
+                    hmR0VmxApicSetTprThreshold(pVmcsInfo, u32TprThreshold);
+                }
+            }
+        }
+        /* else: the TPR threshold has already been updated while merging the nested-guest VMCS. */
+        ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_APIC_TPR);
+    }
+}
+
+
+/**
+ * Gets the guest interruptibility-state and updates related force-flags.
+ *
+ * @returns Guest's interruptibility-state.
+ * @param   pVCpu           The cross context virtual CPU structure.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+static uint32_t hmR0VmxGetGuestIntrStateAndUpdateFFs(PVMCPUCC pVCpu)
+{
+    /*
+     * Check if we should inhibit interrupt delivery due to instructions like STI and MOV SS.
+     */
+    uint32_t fIntrState = 0;
+    if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS))
+    {
+        /* If inhibition is active, RIP and RFLAGS should've been imported from the VMCS already. */
+        HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_RIP | CPUMCTX_EXTRN_RFLAGS);
+
+        PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+        if (pCtx->rip == EMGetInhibitInterruptsPC(pVCpu))
+        {
+            if (pCtx->eflags.Bits.u1IF)
+                fIntrState = VMX_VMCS_GUEST_INT_STATE_BLOCK_STI;
+            else
+                fIntrState = VMX_VMCS_GUEST_INT_STATE_BLOCK_MOVSS;
+        }
+        else if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS))
+        {
+            /*
+             * We can clear the inhibit force flag as even if we go back to the recompiler
+             * without executing guest code in VT-x, the flag's condition to be cleared is
+             * met and thus the cleared state is correct.
+             */
+            VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS);
+        }
+    }
+
+    /*
+     * Check if we should inhibit NMI delivery.
+     */
+    if (CPUMIsGuestNmiBlocking(pVCpu))
+        fIntrState |= VMX_VMCS_GUEST_INT_STATE_BLOCK_NMI;
+
+    /*
+     * Validate.
+     */
+#ifdef VBOX_STRICT
+    /* We don't support block-by-SMI yet.*/
+    Assert(!(fIntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_SMI));
+
+    /* Block-by-STI must not be set when interrupts are disabled. */
+    if (fIntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_STI)
+    {
+        HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_RFLAGS);
+        Assert(pVCpu->cpum.GstCtx.eflags.u & X86_EFL_IF);
+    }
+#endif
+
+    return fIntrState;
+}
+
+
+/**
+ * Exports the exception intercepts required for guest execution in the VMCS.
+ *
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   pVmxTransient   The VMX-transient structure.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+static void hmR0VmxExportGuestXcptIntercepts(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient)
+{
+    if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_VMX_XCPT_INTERCEPTS)
+    {
+        /* When executing a nested-guest, we do not need to trap GIM hypercalls by intercepting #UD. */
+        if (   !pVmxTransient->fIsNestedGuest
+            &&  pVCpu->hm.s.fGIMTrapXcptUD)
+            hmR0VmxAddXcptIntercept(pVmxTransient, X86_XCPT_UD);
+        else
+            hmR0VmxRemoveXcptIntercept(pVCpu, pVmxTransient, X86_XCPT_UD);
+
+        /* Other exception intercepts are handled elsewhere, e.g. while exporting guest CR0. */
+        ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_VMX_XCPT_INTERCEPTS);
+    }
+}
+
+
+/**
+ * Exports the guest's RIP into the guest-state area in the VMCS.
+ *
+ * @param   pVCpu   The cross context virtual CPU structure.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+static void hmR0VmxExportGuestRip(PVMCPUCC pVCpu)
+{
+    if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_RIP)
+    {
+        HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_RIP);
+
+        int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_RIP, pVCpu->cpum.GstCtx.rip);
+        AssertRC(rc);
+
+        ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_RIP);
+        Log4Func(("rip=%#RX64\n", pVCpu->cpum.GstCtx.rip));
+    }
+}
+
+
+/**
+ * Exports the guest's RSP into the guest-state area in the VMCS.
+ *
+ * @param   pVCpu   The cross context virtual CPU structure.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+static void hmR0VmxExportGuestRsp(PVMCPUCC pVCpu)
+{
+    if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_RSP)
+    {
+        HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_RSP);
+
+        int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_RSP, pVCpu->cpum.GstCtx.rsp);
+        AssertRC(rc);
+
+        ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_RSP);
+        Log4Func(("rsp=%#RX64\n", pVCpu->cpum.GstCtx.rsp));
+    }
+}
+
+
+/**
+ * Exports the guest's RFLAGS into the guest-state area in the VMCS.
+ *
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   pVmxTransient   The VMX-transient structure.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+static void hmR0VmxExportGuestRflags(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient)
+{
+    if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_RFLAGS)
+    {
+        HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_RFLAGS);
+
+        /* Intel spec. 2.3.1 "System Flags and Fields in IA-32e Mode" claims the upper 32-bits of RFLAGS are reserved (MBZ).
+           Let us assert it as such and use 32-bit VMWRITE. */
+        Assert(!RT_HI_U32(pVCpu->cpum.GstCtx.rflags.u64));
+        X86EFLAGS fEFlags = pVCpu->cpum.GstCtx.eflags;
+        Assert(fEFlags.u32 & X86_EFL_RA1_MASK);
+        Assert(!(fEFlags.u32 & ~(X86_EFL_1 | X86_EFL_LIVE_MASK)));
+
+        /*
+         * If we're emulating real-mode using Virtual 8086 mode, save the real-mode eflags so
+         * we can restore them on VM-exit. Modify the real-mode guest's eflags so that VT-x
+         * can run the real-mode guest code under Virtual 8086 mode.
+         */
+        PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
+        if (pVmcsInfo->RealMode.fRealOnV86Active)
+        {
+            Assert(pVCpu->CTX_SUFF(pVM)->hm.s.vmx.pRealModeTSS);
+            Assert(PDMVmmDevHeapIsEnabled(pVCpu->CTX_SUFF(pVM)));
+            Assert(!pVmxTransient->fIsNestedGuest);
+            pVmcsInfo->RealMode.Eflags.u32 = fEFlags.u32;    /* Save the original eflags of the real-mode guest. */
+            fEFlags.Bits.u1VM   = 1;                         /* Set the Virtual 8086 mode bit. */
+            fEFlags.Bits.u2IOPL = 0;                         /* Change IOPL to 0, otherwise certain instructions won't fault. */
+        }
+
+        int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_RFLAGS, fEFlags.u32);
+        AssertRC(rc);
+
+        ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_RFLAGS);
+        Log4Func(("eflags=%#RX32\n", fEFlags.u32));
+    }
+}
+
+
+#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+/**
+ * Copies the nested-guest VMCS to the shadow VMCS.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu       The cross context virtual CPU structure.
+ * @param   pVmcsInfo   The VMCS info. object.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+static int hmR0VmxCopyNstGstToShadowVmcs(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
+{
+    PVMCC pVM = pVCpu->CTX_SUFF(pVM);
+    PCVMXVVMCS pVmcsNstGst = pVCpu->cpum.GstCtx.hwvirt.vmx.CTX_SUFF(pVmcs);
+
+    /*
+     * Disable interrupts so we don't get preempted while the shadow VMCS is the
+     * current VMCS, as we may try saving guest lazy MSRs.
+     *
+     * Strictly speaking the lazy MSRs are not in the VMCS, but I'd rather not risk
+     * calling the import VMCS code which is currently performing the guest MSR reads
+     * (on 64-bit hosts) and accessing the auto-load/store MSR area on 32-bit hosts
+     * and the rest of the VMX leave session machinery.
+     */
+    RTCCUINTREG const fEFlags = ASMIntDisableFlags();
+
+    int rc = hmR0VmxLoadShadowVmcs(pVmcsInfo);
+    if (RT_SUCCESS(rc))
+    {
+        /*
+         * Copy all guest read/write VMCS fields.
+         *
+         * We don't check for VMWRITE failures here for performance reasons and
+         * because they are not expected to fail, barring irrecoverable conditions
+         * like hardware errors.
+         */
+        uint32_t const cShadowVmcsFields = pVM->hm.s.vmx.cShadowVmcsFields;
+        for (uint32_t i = 0; i < cShadowVmcsFields; i++)
+        {
+            uint64_t       u64Val;
+            uint32_t const uVmcsField = pVM->hm.s.vmx.paShadowVmcsFields[i];
+            IEMReadVmxVmcsField(pVmcsNstGst, uVmcsField, &u64Val);
+            VMXWriteVmcs64(uVmcsField, u64Val);
+        }
+
+        /*
+         * If the host CPU supports writing all VMCS fields, copy the guest read-only
+         * VMCS fields, so the guest can VMREAD them without causing a VM-exit.
+         */
+        if (pVM->hm.s.vmx.Msrs.u64Misc & VMX_MISC_VMWRITE_ALL)
+        {
+            uint32_t const cShadowVmcsRoFields = pVM->hm.s.vmx.cShadowVmcsRoFields;
+            for (uint32_t i = 0; i < cShadowVmcsRoFields; i++)
+            {
+                uint64_t       u64Val;
+                uint32_t const uVmcsField = pVM->hm.s.vmx.paShadowVmcsRoFields[i];
+                IEMReadVmxVmcsField(pVmcsNstGst, uVmcsField, &u64Val);
+                VMXWriteVmcs64(uVmcsField, u64Val);
+            }
+        }
+
+        rc  = hmR0VmxClearShadowVmcs(pVmcsInfo);
+        rc |= hmR0VmxLoadVmcs(pVmcsInfo);
+    }
+
+    ASMSetFlags(fEFlags);
+    return rc;
+}
+
+
+/**
+ * Copies the shadow VMCS to the nested-guest VMCS.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu       The cross context virtual CPU structure.
+ * @param   pVmcsInfo   The VMCS info. object.
+ *
+ * @remarks Called with interrupts disabled.
+ */
+static int hmR0VmxCopyShadowToNstGstVmcs(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
+{
+    Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+    PVMCC pVM = pVCpu->CTX_SUFF(pVM);
+    PVMXVVMCS pVmcsNstGst = pVCpu->cpum.GstCtx.hwvirt.vmx.CTX_SUFF(pVmcs);
+
+    int rc = hmR0VmxLoadShadowVmcs(pVmcsInfo);
+    if (RT_SUCCESS(rc))
+    {
+        /*
+         * Copy guest read/write fields from the shadow VMCS.
+         * Guest read-only fields cannot be modified, so no need to copy them.
+         *
+         * We don't check for VMREAD failures here for performance reasons and
+         * because they are not expected to fail, barring irrecoverable conditions
+         * like hardware errors.
+         */
+        uint32_t const cShadowVmcsFields = pVM->hm.s.vmx.cShadowVmcsFields;
+        for (uint32_t i = 0; i < cShadowVmcsFields; i++)
+        {
+            uint64_t       u64Val;
+            uint32_t const uVmcsField = pVM->hm.s.vmx.paShadowVmcsFields[i];
+            VMXReadVmcs64(uVmcsField, &u64Val);
+            IEMWriteVmxVmcsField(pVmcsNstGst, uVmcsField, u64Val);
+        }
+
+        rc  = hmR0VmxClearShadowVmcs(pVmcsInfo);
+        rc |= hmR0VmxLoadVmcs(pVmcsInfo);
+    }
+    return rc;
+}
+
+
+/**
+ * Enables VMCS shadowing for the given VMCS info. object.
+ *
+ * @param   pVmcsInfo   The VMCS info. object.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+static void hmR0VmxEnableVmcsShadowing(PVMXVMCSINFO pVmcsInfo)
+{
+    uint32_t uProcCtls2 = pVmcsInfo->u32ProcCtls2;
+    if (!(uProcCtls2 & VMX_PROC_CTLS2_VMCS_SHADOWING))
+    {
+        Assert(pVmcsInfo->HCPhysShadowVmcs != 0 && pVmcsInfo->HCPhysShadowVmcs != NIL_RTHCPHYS);
+        uProcCtls2 |= VMX_PROC_CTLS2_VMCS_SHADOWING;
+        int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC2, uProcCtls2);                            AssertRC(rc);
+        rc     = VMXWriteVmcs64(VMX_VMCS64_GUEST_VMCS_LINK_PTR_FULL, pVmcsInfo->HCPhysShadowVmcs);  AssertRC(rc);
+        pVmcsInfo->u32ProcCtls2   = uProcCtls2;
+        pVmcsInfo->u64VmcsLinkPtr = pVmcsInfo->HCPhysShadowVmcs;
+        Log4Func(("Enabled\n"));
+    }
+}
+
+
+/**
+ * Disables VMCS shadowing for the given VMCS info. object.
+ *
+ * @param   pVmcsInfo   The VMCS info. object.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+static void hmR0VmxDisableVmcsShadowing(PVMXVMCSINFO pVmcsInfo)
+{
+    /*
+     * We want all VMREAD and VMWRITE instructions to cause VM-exits, so we clear the
+     * VMCS shadowing control. However, VM-entry requires the shadow VMCS indicator bit
+     * to match the VMCS shadowing control if the VMCS link pointer is not NIL_RTHCPHYS.
+     * Hence, we must also reset the VMCS link pointer to ensure VM-entry does not fail.
+     *
+     * See Intel spec. 26.2.1.1 "VM-Execution Control Fields".
+     * See Intel spec. 26.3.1.5 "Checks on Guest Non-Register State".
+     */
+    uint32_t uProcCtls2 = pVmcsInfo->u32ProcCtls2;
+    if (uProcCtls2 & VMX_PROC_CTLS2_VMCS_SHADOWING)
+    {
+        uProcCtls2 &= ~VMX_PROC_CTLS2_VMCS_SHADOWING;
+        int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC2, uProcCtls2);                AssertRC(rc);
+        rc     = VMXWriteVmcs64(VMX_VMCS64_GUEST_VMCS_LINK_PTR_FULL, NIL_RTHCPHYS);     AssertRC(rc);
+        pVmcsInfo->u32ProcCtls2   = uProcCtls2;
+        pVmcsInfo->u64VmcsLinkPtr = NIL_RTHCPHYS;
+        Log4Func(("Disabled\n"));
+    }
+}
+#endif
+
+
+/**
+ * Exports the guest hardware-virtualization state.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   pVmxTransient   The VMX-transient structure.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+static int hmR0VmxExportGuestHwvirtState(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient)
+{
+    if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_HWVIRT)
+    {
+#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+        /*
+         * Check if the VMX feature is exposed to the guest and if the host CPU supports
+         * VMCS shadowing.
+         */
+        if (pVCpu->CTX_SUFF(pVM)->hm.s.vmx.fUseVmcsShadowing)
+        {
+            /*
+             * If the nested hypervisor has loaded a current VMCS and is in VMX root mode,
+             * copy the nested hypervisor's current VMCS into the shadow VMCS and enable
+             * VMCS shadowing to skip intercepting some or all VMREAD/VMWRITE VM-exits.
+             *
+             * We check for VMX root mode here in case the guest executes VMXOFF without
+             * clearing the current VMCS pointer and our VMXOFF instruction emulation does
+             * not clear the current VMCS pointer.
+             */
+            PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
+            if (   CPUMIsGuestInVmxRootMode(&pVCpu->cpum.GstCtx)
+                && !CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx)
+                && CPUMIsGuestVmxCurrentVmcsValid(&pVCpu->cpum.GstCtx))
+            {
+                /* Paranoia. */
+                Assert(!pVmxTransient->fIsNestedGuest);
+
+                /*
+                 * For performance reasons, also check if the nested hypervisor's current VMCS
+                 * was newly loaded or modified before copying it to the shadow VMCS.
+                 */
+                if (!pVCpu->hm.s.vmx.fCopiedNstGstToShadowVmcs)
+                {
+                    int rc = hmR0VmxCopyNstGstToShadowVmcs(pVCpu, pVmcsInfo);
+                    AssertRCReturn(rc, rc);
+                    pVCpu->hm.s.vmx.fCopiedNstGstToShadowVmcs = true;
+                }
+                hmR0VmxEnableVmcsShadowing(pVmcsInfo);
+            }
+            else
+                hmR0VmxDisableVmcsShadowing(pVmcsInfo);
+        }
+#else
+        NOREF(pVmxTransient);
+#endif
+        ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_HWVIRT);
+    }
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Exports the guest CR0 control register into the guest-state area in the VMCS.
+ *
+ * The guest FPU state is always pre-loaded hence we don't need to bother about
+ * sharing FPU related CR0 bits between the guest and host.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   pVmxTransient   The VMX-transient structure.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+static int hmR0VmxExportGuestCR0(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient)
+{
+    if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_CR0)
+    {
+        PVMCC pVM = pVCpu->CTX_SUFF(pVM);
+        PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
+
+        uint64_t       fSetCr0 = pVM->hm.s.vmx.Msrs.u64Cr0Fixed0;
+        uint64_t const fZapCr0 = pVM->hm.s.vmx.Msrs.u64Cr0Fixed1;
+        if (pVM->hm.s.vmx.fUnrestrictedGuest)
+            fSetCr0 &= ~(uint64_t)(X86_CR0_PE | X86_CR0_PG);
+        else
+            Assert((fSetCr0 & (X86_CR0_PE | X86_CR0_PG)) == (X86_CR0_PE | X86_CR0_PG));
+
+        if (!pVmxTransient->fIsNestedGuest)
+        {
+            HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_CR0);
+            uint64_t       u64GuestCr0  = pVCpu->cpum.GstCtx.cr0;
+            uint64_t const u64ShadowCr0 = u64GuestCr0;
+            Assert(!RT_HI_U32(u64GuestCr0));
+
+            /*
+             * Setup VT-x's view of the guest CR0.
+             */
+            uint32_t uProcCtls = pVmcsInfo->u32ProcCtls;
+            if (pVM->hm.s.fNestedPaging)
+            {
+                if (CPUMIsGuestPagingEnabled(pVCpu))
+                {
+                    /* The guest has paging enabled, let it access CR3 without causing a VM-exit if supported. */
+                    uProcCtls &= ~(  VMX_PROC_CTLS_CR3_LOAD_EXIT
+                                   | VMX_PROC_CTLS_CR3_STORE_EXIT);
+                }
+                else
+                {
+                    /* The guest doesn't have paging enabled, make CR3 access cause a VM-exit to update our shadow. */
+                    uProcCtls |= VMX_PROC_CTLS_CR3_LOAD_EXIT
+                              |  VMX_PROC_CTLS_CR3_STORE_EXIT;
+                }
+
+                /* If we have unrestricted guest execution, we never have to intercept CR3 reads. */
+                if (pVM->hm.s.vmx.fUnrestrictedGuest)
+                    uProcCtls &= ~VMX_PROC_CTLS_CR3_STORE_EXIT;
+            }
+            else
+            {
+                /* Guest CPL 0 writes to its read-only pages should cause a #PF VM-exit. */
+                u64GuestCr0 |= X86_CR0_WP;
+            }
+
+            /*
+             * Guest FPU bits.
+             *
+             * Since we pre-load the guest FPU always before VM-entry there is no need to track lazy state
+             * using CR0.TS.
+             *
+             * Intel spec. 23.8 "Restrictions on VMX operation" mentions that CR0.NE bit must always be
+             * set on the first CPUs to support VT-x and no mention of with regards to UX in VM-entry checks.
+             */
+            u64GuestCr0 |= X86_CR0_NE;
+
+            /* If CR0.NE isn't set, we need to intercept #MF exceptions and report them to the guest differently. */
+            bool const fInterceptMF = !(u64ShadowCr0 & X86_CR0_NE);
+
+            /*
+             * Update exception intercepts.
+             */
+            uint32_t uXcptBitmap = pVmcsInfo->u32XcptBitmap;
+            if (pVmcsInfo->RealMode.fRealOnV86Active)
+            {
+                Assert(PDMVmmDevHeapIsEnabled(pVM));
+                Assert(pVM->hm.s.vmx.pRealModeTSS);
+                uXcptBitmap |= HMVMX_REAL_MODE_XCPT_MASK;
+            }
+            else
+            {
+                /* For now, cleared here as mode-switches can happen outside HM/VT-x. See @bugref{7626#c11}. */
+                uXcptBitmap &= ~HMVMX_REAL_MODE_XCPT_MASK;
+                if (fInterceptMF)
+                    uXcptBitmap |= RT_BIT(X86_XCPT_MF);
+            }
+
+            /* Additional intercepts for debugging, define these yourself explicitly. */
+#ifdef HMVMX_ALWAYS_TRAP_ALL_XCPTS
+            uXcptBitmap |= 0
+                        |  RT_BIT(X86_XCPT_BP)
+                        |  RT_BIT(X86_XCPT_DE)
+                        |  RT_BIT(X86_XCPT_NM)
+                        |  RT_BIT(X86_XCPT_TS)
+                        |  RT_BIT(X86_XCPT_UD)
+                        |  RT_BIT(X86_XCPT_NP)
+                        |  RT_BIT(X86_XCPT_SS)
+                        |  RT_BIT(X86_XCPT_GP)
+                        |  RT_BIT(X86_XCPT_PF)
+                        |  RT_BIT(X86_XCPT_MF)
+                        ;
+#elif defined(HMVMX_ALWAYS_TRAP_PF)
+            uXcptBitmap |= RT_BIT(X86_XCPT_PF);
+#endif
+            if (pVCpu->hm.s.fTrapXcptGpForLovelyMesaDrv)
+                uXcptBitmap |= RT_BIT(X86_XCPT_GP);
+            Assert(pVM->hm.s.fNestedPaging || (uXcptBitmap & RT_BIT(X86_XCPT_PF)));
+
+            /* Apply the hardware specified CR0 fixed bits and enable caching. */
+            u64GuestCr0 |= fSetCr0;
+            u64GuestCr0 &= fZapCr0;
+            u64GuestCr0 &= ~(uint64_t)(X86_CR0_CD | X86_CR0_NW);
+
+            /* Commit the CR0 and related fields to the guest VMCS. */
+            int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_CR0, u64GuestCr0);               AssertRC(rc);
+            rc     = VMXWriteVmcsNw(VMX_VMCS_CTRL_CR0_READ_SHADOW, u64ShadowCr0);   AssertRC(rc);
+            if (uProcCtls != pVmcsInfo->u32ProcCtls)
+            {
+                rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, uProcCtls);
+                AssertRC(rc);
+            }
+            if (uXcptBitmap != pVmcsInfo->u32XcptBitmap)
+            {
+                rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXCEPTION_BITMAP, uXcptBitmap);
+                AssertRC(rc);
+            }
+
+            /* Update our caches. */
+            pVmcsInfo->u32ProcCtls   = uProcCtls;
+            pVmcsInfo->u32XcptBitmap = uXcptBitmap;
+
+            Log4Func(("cr0=%#RX64 shadow=%#RX64 set=%#RX64 zap=%#RX64\n", u64GuestCr0, u64ShadowCr0, fSetCr0, fZapCr0));
+        }
+        else
+        {
+            /*
+             * With nested-guests, we may have extended the guest/host mask here since we
+             * merged in the outer guest's mask. Thus, the merged mask can include more bits
+             * (to read from the nested-guest CR0 read-shadow) than the nested hypervisor
+             * originally supplied. We must copy those bits from the nested-guest CR0 into
+             * the nested-guest CR0 read-shadow.
+             */
+            HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_CR0);
+            uint64_t       u64GuestCr0  = pVCpu->cpum.GstCtx.cr0;
+            uint64_t const u64ShadowCr0 = CPUMGetGuestVmxMaskedCr0(&pVCpu->cpum.GstCtx, pVmcsInfo->u64Cr0Mask);
+            Assert(!RT_HI_U32(u64GuestCr0));
+            Assert(u64GuestCr0 & X86_CR0_NE);
+
+            /* Apply the hardware specified CR0 fixed bits and enable caching. */
+            u64GuestCr0 |= fSetCr0;
+            u64GuestCr0 &= fZapCr0;
+            u64GuestCr0 &= ~(uint64_t)(X86_CR0_CD | X86_CR0_NW);
+
+            /* Commit the CR0 and CR0 read-shadow to the nested-guest VMCS. */
+            int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_CR0, u64GuestCr0);               AssertRC(rc);
+            rc     = VMXWriteVmcsNw(VMX_VMCS_CTRL_CR0_READ_SHADOW, u64ShadowCr0);   AssertRC(rc);
+
+            Log4Func(("cr0=%#RX64 shadow=%#RX64 (set=%#RX64 zap=%#RX64)\n", u64GuestCr0, u64ShadowCr0, fSetCr0, fZapCr0));
+        }
+
+        ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_CR0);
+    }
+
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Exports the guest control registers (CR3, CR4) into the guest-state area
+ * in the VMCS.
+ *
+ * @returns VBox strict status code.
+ * @retval  VINF_EM_RESCHEDULE_REM if we try to emulate non-paged guest code
+ *          without unrestricted guest access and the VMMDev is not presently
+ *          mapped (e.g. EFI32).
+ *
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   pVmxTransient   The VMX-transient structure.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+static VBOXSTRICTRC hmR0VmxExportGuestCR3AndCR4(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient)
+{
+    int rc  = VINF_SUCCESS;
+    PVMCC pVM = pVCpu->CTX_SUFF(pVM);
+
+    /*
+     * Guest CR2.
+     * It's always loaded in the assembler code. Nothing to do here.
+     */
+
+    /*
+     * Guest CR3.
+     */
+    if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_CR3)
+    {
+        HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_CR3);
+
+        if (pVM->hm.s.fNestedPaging)
+        {
+            PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
+            pVmcsInfo->HCPhysEPTP = PGMGetHyperCR3(pVCpu);
+
+            /* Validate. See Intel spec. 28.2.2 "EPT Translation Mechanism" and 24.6.11 "Extended-Page-Table Pointer (EPTP)" */
+            Assert(pVmcsInfo->HCPhysEPTP != NIL_RTHCPHYS);
+            Assert(!(pVmcsInfo->HCPhysEPTP & UINT64_C(0xfff0000000000000)));
+            Assert(!(pVmcsInfo->HCPhysEPTP & 0xfff));
+
+            /* VMX_EPT_MEMTYPE_WB support is already checked in hmR0VmxSetupTaggedTlb(). */
+            pVmcsInfo->HCPhysEPTP |= VMX_EPT_MEMTYPE_WB
+                                  |  (VMX_EPT_PAGE_WALK_LENGTH_DEFAULT << VMX_EPT_PAGE_WALK_LENGTH_SHIFT);
+
+            /* Validate. See Intel spec. 26.2.1 "Checks on VMX Controls" */
+            AssertMsg(   ((pVmcsInfo->HCPhysEPTP >> 3) & 0x07) == 3      /* Bits 3:5 (EPT page walk length - 1) must be 3. */
+                      && ((pVmcsInfo->HCPhysEPTP >> 7) & 0x1f) == 0,     /* Bits 7:11 MBZ. */
+                         ("EPTP %#RX64\n", pVmcsInfo->HCPhysEPTP));
+            AssertMsg(  !((pVmcsInfo->HCPhysEPTP >> 6) & 0x01)           /* Bit 6 (EPT accessed & dirty bit). */
+                      || (pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_EPT_ACCESS_DIRTY),
+                         ("EPTP accessed/dirty bit not supported by CPU but set %#RX64\n", pVmcsInfo->HCPhysEPTP));
+
+            rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_EPTP_FULL, pVmcsInfo->HCPhysEPTP);
+            AssertRC(rc);
+
+            uint64_t  u64GuestCr3;
+            PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+            if (   pVM->hm.s.vmx.fUnrestrictedGuest
+                || CPUMIsGuestPagingEnabledEx(pCtx))
+            {
+                /* If the guest is in PAE mode, pass the PDPEs to VT-x using the VMCS fields. */
+                if (CPUMIsGuestInPAEModeEx(pCtx))
+                {
+                    rc  = PGMGstGetPaePdpes(pVCpu, &pVCpu->hm.s.aPdpes[0]);
+                    AssertRC(rc);
+                    rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_PDPTE0_FULL, pVCpu->hm.s.aPdpes[0].u);     AssertRC(rc);
+                    rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_PDPTE1_FULL, pVCpu->hm.s.aPdpes[1].u);     AssertRC(rc);
+                    rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_PDPTE2_FULL, pVCpu->hm.s.aPdpes[2].u);     AssertRC(rc);
+                    rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_PDPTE3_FULL, pVCpu->hm.s.aPdpes[3].u);     AssertRC(rc);
+                }
+
+                /*
+                 * The guest's view of its CR3 is unblemished with nested paging when the
+                 * guest is using paging or we have unrestricted guest execution to handle
+                 * the guest when it's not using paging.
+                 */
+                u64GuestCr3 = pCtx->cr3;
+            }
+            else
+            {
+                /*
+                 * The guest is not using paging, but the CPU (VT-x) has to. While the guest
+                 * thinks it accesses physical memory directly, we use our identity-mapped
+                 * page table to map guest-linear to guest-physical addresses. EPT takes care
+                 * of translating it to host-physical addresses.
+                 */
+                RTGCPHYS GCPhys;
+                Assert(pVM->hm.s.vmx.pNonPagingModeEPTPageTable);
+
+                /* We obtain it here every time as the guest could have relocated this PCI region. */
+                rc = PDMVmmDevHeapR3ToGCPhys(pVM, pVM->hm.s.vmx.pNonPagingModeEPTPageTable, &GCPhys);
+                if (RT_SUCCESS(rc))
+                { /* likely */ }
+                else if (rc == VERR_PDM_DEV_HEAP_R3_TO_GCPHYS)
+                {
+                    Log4Func(("VERR_PDM_DEV_HEAP_R3_TO_GCPHYS -> VINF_EM_RESCHEDULE_REM\n"));
+                    return VINF_EM_RESCHEDULE_REM;  /* We cannot execute now, switch to REM/IEM till the guest maps in VMMDev. */
+                }
+                else
+                    AssertMsgFailedReturn(("%Rrc\n",  rc), rc);
+
+                u64GuestCr3 = GCPhys;
+            }
+
+            Log4Func(("guest_cr3=%#RX64 (GstN)\n", u64GuestCr3));
+            rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_CR3, u64GuestCr3);
+            AssertRC(rc);
+        }
+        else
+        {
+            Assert(!pVmxTransient->fIsNestedGuest);
+            /* Non-nested paging case, just use the hypervisor's CR3. */
+            RTHCPHYS const HCPhysGuestCr3 = PGMGetHyperCR3(pVCpu);
+
+            Log4Func(("guest_cr3=%#RX64 (HstN)\n", HCPhysGuestCr3));
+            rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_CR3, HCPhysGuestCr3);
+            AssertRC(rc);
+        }
+
+        ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_CR3);
+    }
+
+    /*
+     * Guest CR4.
+     * ASSUMES this is done everytime we get in from ring-3! (XCR0)
+     */
+    if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_CR4)
+    {
+        PCPUMCTX     pCtx      = &pVCpu->cpum.GstCtx;
+        PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
+
+        uint64_t const fSetCr4 = pVM->hm.s.vmx.Msrs.u64Cr4Fixed0;
+        uint64_t const fZapCr4 = pVM->hm.s.vmx.Msrs.u64Cr4Fixed1;
+
+        /*
+         * With nested-guests, we may have extended the guest/host mask here (since we
+         * merged in the outer guest's mask, see hmR0VmxMergeVmcsNested). This means, the
+         * mask can include more bits (to read from the nested-guest CR4 read-shadow) than
+         * the nested hypervisor originally supplied. Thus, we should, in essence, copy
+         * those bits from the nested-guest CR4 into the nested-guest CR4 read-shadow.
+         */
+        HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_CR4);
+        uint64_t       u64GuestCr4  = pCtx->cr4;
+        uint64_t const u64ShadowCr4 = !pVmxTransient->fIsNestedGuest
+                                    ? pCtx->cr4
+                                    : CPUMGetGuestVmxMaskedCr4(pCtx, pVmcsInfo->u64Cr4Mask);
+        Assert(!RT_HI_U32(u64GuestCr4));
+
+        /*
+         * Setup VT-x's view of the guest CR4.
+         *
+         * If we're emulating real-mode using virtual-8086 mode, we want to redirect software
+         * interrupts to the 8086 program interrupt handler. Clear the VME bit (the interrupt
+         * redirection bitmap is already all 0, see hmR3InitFinalizeR0())
+         *
+         * See Intel spec. 20.2 "Software Interrupt Handling Methods While in Virtual-8086 Mode".
+         */
+        if (pVmcsInfo->RealMode.fRealOnV86Active)
+        {
+            Assert(pVM->hm.s.vmx.pRealModeTSS);
+            Assert(PDMVmmDevHeapIsEnabled(pVM));
+            u64GuestCr4 &= ~(uint64_t)X86_CR4_VME;
+        }
+
+        if (pVM->hm.s.fNestedPaging)
+        {
+            if (   !CPUMIsGuestPagingEnabledEx(pCtx)
+                && !pVM->hm.s.vmx.fUnrestrictedGuest)
+            {
+                /* We use 4 MB pages in our identity mapping page table when the guest doesn't have paging. */
+                u64GuestCr4 |= X86_CR4_PSE;
+                /* Our identity mapping is a 32-bit page directory. */
+                u64GuestCr4 &= ~(uint64_t)X86_CR4_PAE;
+            }
+            /* else use guest CR4.*/
+        }
+        else
+        {
+            Assert(!pVmxTransient->fIsNestedGuest);
+
+            /*
+             * The shadow paging modes and guest paging modes are different, the shadow is in accordance with the host
+             * paging mode and thus we need to adjust VT-x's view of CR4 depending on our shadow page tables.
+             */
+            switch (pVCpu->hm.s.enmShadowMode)
+            {
+                case PGMMODE_REAL:              /* Real-mode. */
+                case PGMMODE_PROTECTED:         /* Protected mode without paging. */
+                case PGMMODE_32_BIT:            /* 32-bit paging. */
+                {
+                    u64GuestCr4 &= ~(uint64_t)X86_CR4_PAE;
+                    break;
+                }
+
+                case PGMMODE_PAE:               /* PAE paging. */
+                case PGMMODE_PAE_NX:            /* PAE paging with NX. */
+                {
+                    u64GuestCr4 |= X86_CR4_PAE;
+                    break;
+                }
+
+                case PGMMODE_AMD64:             /* 64-bit AMD paging (long mode). */
+                case PGMMODE_AMD64_NX:          /* 64-bit AMD paging (long mode) with NX enabled. */
+                {
+#ifdef VBOX_WITH_64_BITS_GUESTS
+                    /* For our assumption in hmR0VmxShouldSwapEferMsr. */
+                    Assert(u64GuestCr4 & X86_CR4_PAE);
+                    break;
+#endif
+                }
+                default:
+                    AssertFailed();
+                    return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
+            }
+        }
+
+        /* Apply the hardware specified CR4 fixed bits (mainly CR4.VMXE). */
+        u64GuestCr4 |= fSetCr4;
+        u64GuestCr4 &= fZapCr4;
+
+        /* Commit the CR4 and CR4 read-shadow to the guest VMCS. */
+        rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_CR4, u64GuestCr4);               AssertRC(rc);
+        rc = VMXWriteVmcsNw(VMX_VMCS_CTRL_CR4_READ_SHADOW, u64ShadowCr4);   AssertRC(rc);
+
+        /* Whether to save/load/restore XCR0 during world switch depends on CR4.OSXSAVE and host+guest XCR0. */
+        pVCpu->hm.s.fLoadSaveGuestXcr0 = (pCtx->cr4 & X86_CR4_OSXSAVE) && pCtx->aXcr[0] != ASMGetXcr0();
+
+        ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_CR4);
+
+        Log4Func(("cr4=%#RX64 shadow=%#RX64 (set=%#RX64 zap=%#RX64)\n", u64GuestCr4, u64ShadowCr4, fSetCr4, fZapCr4));
+    }
+    return rc;
+}
+
+
+/**
+ * Exports the guest debug registers into the guest-state area in the VMCS.
+ * The guest debug bits are partially shared with the host (e.g. DR6, DR0-3).
+ *
+ * This also sets up whether \#DB and MOV DRx accesses cause VM-exits.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   pVmxTransient   The VMX-transient structure.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+static int hmR0VmxExportSharedDebugState(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+
+    /** @todo NSTVMX: Figure out what we want to do with nested-guest instruction
+     *        stepping. */
+    PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
+    if (pVmxTransient->fIsNestedGuest)
+    {
+        int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_DR7, CPUMGetGuestDR7(pVCpu));
+        AssertRC(rc);
+
+        /* Always intercept Mov DRx accesses for the nested-guest for now. */
+        pVmcsInfo->u32ProcCtls |= VMX_PROC_CTLS_MOV_DR_EXIT;
+        rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVmcsInfo->u32ProcCtls);
+        AssertRC(rc);
+        return VINF_SUCCESS;
+    }
+
+#ifdef VBOX_STRICT
+    /* Validate. Intel spec. 26.3.1.1 "Checks on Guest Controls Registers, Debug Registers, MSRs" */
+    if (pVmcsInfo->u32EntryCtls & VMX_ENTRY_CTLS_LOAD_DEBUG)
+    {
+        /* Validate. Intel spec. 17.2 "Debug Registers", recompiler paranoia checks. */
+        Assert((pVCpu->cpum.GstCtx.dr[7] & (X86_DR7_MBZ_MASK | X86_DR7_RAZ_MASK)) == 0);
+        Assert((pVCpu->cpum.GstCtx.dr[7] & X86_DR7_RA1_MASK) == X86_DR7_RA1_MASK);
+    }
+#endif
+
+    bool     fSteppingDB      = false;
+    bool     fInterceptMovDRx = false;
+    uint32_t uProcCtls        = pVmcsInfo->u32ProcCtls;
+    if (pVCpu->hm.s.fSingleInstruction)
+    {
+        /* If the CPU supports the monitor trap flag, use it for single stepping in DBGF and avoid intercepting #DB. */
+        PVMCC pVM = pVCpu->CTX_SUFF(pVM);
+        if (pVM->hm.s.vmx.Msrs.ProcCtls.n.allowed1 & VMX_PROC_CTLS_MONITOR_TRAP_FLAG)
+        {
+            uProcCtls |= VMX_PROC_CTLS_MONITOR_TRAP_FLAG;
+            Assert(fSteppingDB == false);
+        }
+        else
+        {
+            pVCpu->cpum.GstCtx.eflags.u32 |= X86_EFL_TF;
+            pVCpu->hm.s.fCtxChanged |= HM_CHANGED_GUEST_RFLAGS;
+            pVCpu->hm.s.fClearTrapFlag = true;
+            fSteppingDB = true;
+        }
+    }
+
+    uint64_t u64GuestDr7;
+    if (   fSteppingDB
+        || (CPUMGetHyperDR7(pVCpu) & X86_DR7_ENABLED_MASK))
+    {
+        /*
+         * Use the combined guest and host DRx values found in the hypervisor register set
+         * because the hypervisor debugger has breakpoints active or someone is single stepping
+         * on the host side without a monitor trap flag.
+         *
+         * Note! DBGF expects a clean DR6 state before executing guest code.
+         */
+        if (!CPUMIsHyperDebugStateActive(pVCpu))
+        {
+            CPUMR0LoadHyperDebugState(pVCpu, true /* include DR6 */);
+            Assert(CPUMIsHyperDebugStateActive(pVCpu));
+            Assert(!CPUMIsGuestDebugStateActive(pVCpu));
+        }
+
+        /* Update DR7 with the hypervisor value (other DRx registers are handled by CPUM one way or another). */
+        u64GuestDr7 = CPUMGetHyperDR7(pVCpu);
+        pVCpu->hm.s.fUsingHyperDR7 = true;
+        fInterceptMovDRx = true;
+    }
+    else
+    {
+        /*
+         * If the guest has enabled debug registers, we need to load them prior to
+         * executing guest code so they'll trigger at the right time.
+         */
+        HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_DR7);
+        if (pVCpu->cpum.GstCtx.dr[7] & (X86_DR7_ENABLED_MASK | X86_DR7_GD))
+        {
+            if (!CPUMIsGuestDebugStateActive(pVCpu))
+            {
+                CPUMR0LoadGuestDebugState(pVCpu, true /* include DR6 */);
+                Assert(CPUMIsGuestDebugStateActive(pVCpu));
+                Assert(!CPUMIsHyperDebugStateActive(pVCpu));
+                STAM_COUNTER_INC(&pVCpu->hm.s.StatDRxArmed);
+            }
+            Assert(!fInterceptMovDRx);
+        }
+        else if (!CPUMIsGuestDebugStateActive(pVCpu))
+        {
+            /*
+             * If no debugging enabled, we'll lazy load DR0-3.  Unlike on AMD-V, we
+             * must intercept #DB in order to maintain a correct DR6 guest value, and
+             * because we need to intercept it to prevent nested #DBs from hanging the
+             * CPU, we end up always having to intercept it. See hmR0VmxSetupVmcsXcptBitmap().
+             */
+            fInterceptMovDRx = true;
+        }
+
+        /* Update DR7 with the actual guest value. */
+        u64GuestDr7 = pVCpu->cpum.GstCtx.dr[7];
+        pVCpu->hm.s.fUsingHyperDR7 = false;
+    }
+
+    if (fInterceptMovDRx)
+        uProcCtls |= VMX_PROC_CTLS_MOV_DR_EXIT;
+    else
+        uProcCtls &= ~VMX_PROC_CTLS_MOV_DR_EXIT;
+
+    /*
+     * Update the processor-based VM-execution controls with the MOV-DRx intercepts and the
+     * monitor-trap flag and update our cache.
+     */
+    if (uProcCtls != pVmcsInfo->u32ProcCtls)
+    {
+        int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, uProcCtls);
+        AssertRC(rc);
+        pVmcsInfo->u32ProcCtls = uProcCtls;
+    }
+
+    /*
+     * Update guest DR7.
+     */
+    int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_DR7, u64GuestDr7);
+    AssertRC(rc);
+
+    /*
+     * If we have forced EFLAGS.TF to be set because we're single-stepping in the hypervisor debugger,
+     * we need to clear interrupt inhibition if any as otherwise it causes a VM-entry failure.
+     *
+     * See Intel spec. 26.3.1.5 "Checks on Guest Non-Register State".
+     */
+    if (fSteppingDB)
+    {
+        Assert(pVCpu->hm.s.fSingleInstruction);
+        Assert(pVCpu->cpum.GstCtx.eflags.Bits.u1TF);
+
+        uint32_t fIntrState = 0;
+        rc = VMXReadVmcs32(VMX_VMCS32_GUEST_INT_STATE, &fIntrState);
+        AssertRC(rc);
+
+        if (fIntrState & (VMX_VMCS_GUEST_INT_STATE_BLOCK_STI | VMX_VMCS_GUEST_INT_STATE_BLOCK_MOVSS))
+        {
+            fIntrState &= ~(VMX_VMCS_GUEST_INT_STATE_BLOCK_STI | VMX_VMCS_GUEST_INT_STATE_BLOCK_MOVSS);
+            rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_INT_STATE, fIntrState);
+            AssertRC(rc);
+        }
+    }
+
+    return VINF_SUCCESS;
+}
+
+
+#ifdef VBOX_STRICT
+/**
+ * Strict function to validate segment registers.
+ *
+ * @param   pVCpu       The cross context virtual CPU structure.
+ * @param   pVmcsInfo   The VMCS info. object.
+ *
+ * @remarks Will import guest CR0 on strict builds during validation of
+ *          segments.
+ */
+static void hmR0VmxValidateSegmentRegs(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
+{
+    /*
+     * Validate segment registers. See Intel spec. 26.3.1.2 "Checks on Guest Segment Registers".
+     *
+     * The reason we check for attribute value 0 in this function and not just the unusable bit is
+     * because hmR0VmxExportGuestSegReg() only updates the VMCS' copy of the value with the
+     * unusable bit and doesn't change the guest-context value.
+     */
+    PVMCC pVM = pVCpu->CTX_SUFF(pVM);
+    PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+    hmR0VmxImportGuestState(pVCpu, pVmcsInfo, CPUMCTX_EXTRN_CR0);
+    if (   !pVM->hm.s.vmx.fUnrestrictedGuest
+        && (   !CPUMIsGuestInRealModeEx(pCtx)
+            && !CPUMIsGuestInV86ModeEx(pCtx)))
+    {
+        /* Protected mode checks */
+        /* CS */
+        Assert(pCtx->cs.Attr.n.u1Present);
+        Assert(!(pCtx->cs.Attr.u & 0xf00));
+        Assert(!(pCtx->cs.Attr.u & 0xfffe0000));
+        Assert(   (pCtx->cs.u32Limit & 0xfff) == 0xfff
+               || !(pCtx->cs.Attr.n.u1Granularity));
+        Assert(   !(pCtx->cs.u32Limit & 0xfff00000)
+               || (pCtx->cs.Attr.n.u1Granularity));
+        /* CS cannot be loaded with NULL in protected mode. */
+        Assert(pCtx->cs.Attr.u && !(pCtx->cs.Attr.u & X86DESCATTR_UNUSABLE)); /** @todo is this really true even for 64-bit CS? */
+        if (pCtx->cs.Attr.n.u4Type == 9 || pCtx->cs.Attr.n.u4Type == 11)
+            Assert(pCtx->cs.Attr.n.u2Dpl == pCtx->ss.Attr.n.u2Dpl);
+        else if (pCtx->cs.Attr.n.u4Type == 13 || pCtx->cs.Attr.n.u4Type == 15)
+            Assert(pCtx->cs.Attr.n.u2Dpl <= pCtx->ss.Attr.n.u2Dpl);
+        else
+            AssertMsgFailed(("Invalid CS Type %#x\n", pCtx->cs.Attr.n.u2Dpl));
+        /* SS */
+        Assert((pCtx->ss.Sel & X86_SEL_RPL) == (pCtx->cs.Sel & X86_SEL_RPL));
+        Assert(pCtx->ss.Attr.n.u2Dpl == (pCtx->ss.Sel & X86_SEL_RPL));
+        if (   !(pCtx->cr0 & X86_CR0_PE)
+            || pCtx->cs.Attr.n.u4Type == 3)
+        {
+            Assert(!pCtx->ss.Attr.n.u2Dpl);
+        }
+        if (pCtx->ss.Attr.u && !(pCtx->ss.Attr.u & X86DESCATTR_UNUSABLE))
+        {
+            Assert((pCtx->ss.Sel & X86_SEL_RPL) == (pCtx->cs.Sel & X86_SEL_RPL));
+            Assert(pCtx->ss.Attr.n.u4Type == 3 || pCtx->ss.Attr.n.u4Type == 7);
+            Assert(pCtx->ss.Attr.n.u1Present);
+            Assert(!(pCtx->ss.Attr.u & 0xf00));
+            Assert(!(pCtx->ss.Attr.u & 0xfffe0000));
+            Assert(   (pCtx->ss.u32Limit & 0xfff) == 0xfff
+                   || !(pCtx->ss.Attr.n.u1Granularity));
+            Assert(   !(pCtx->ss.u32Limit & 0xfff00000)
+                   || (pCtx->ss.Attr.n.u1Granularity));
+        }
+        /* DS, ES, FS, GS - only check for usable selectors, see hmR0VmxExportGuestSegReg(). */
+        if (pCtx->ds.Attr.u && !(pCtx->ds.Attr.u & X86DESCATTR_UNUSABLE))
+        {
+            Assert(pCtx->ds.Attr.n.u4Type & X86_SEL_TYPE_ACCESSED);
+            Assert(pCtx->ds.Attr.n.u1Present);
+            Assert(pCtx->ds.Attr.n.u4Type > 11 || pCtx->ds.Attr.n.u2Dpl >= (pCtx->ds.Sel & X86_SEL_RPL));
+            Assert(!(pCtx->ds.Attr.u & 0xf00));
+            Assert(!(pCtx->ds.Attr.u & 0xfffe0000));
+            Assert(   (pCtx->ds.u32Limit & 0xfff) == 0xfff
+                   || !(pCtx->ds.Attr.n.u1Granularity));
+            Assert(   !(pCtx->ds.u32Limit & 0xfff00000)
+                   || (pCtx->ds.Attr.n.u1Granularity));
+            Assert(   !(pCtx->ds.Attr.n.u4Type & X86_SEL_TYPE_CODE)
+                   || (pCtx->ds.Attr.n.u4Type & X86_SEL_TYPE_READ));
+        }
+        if (pCtx->es.Attr.u && !(pCtx->es.Attr.u & X86DESCATTR_UNUSABLE))
+        {
+            Assert(pCtx->es.Attr.n.u4Type & X86_SEL_TYPE_ACCESSED);
+            Assert(pCtx->es.Attr.n.u1Present);
+            Assert(pCtx->es.Attr.n.u4Type > 11 || pCtx->es.Attr.n.u2Dpl >= (pCtx->es.Sel & X86_SEL_RPL));
+            Assert(!(pCtx->es.Attr.u & 0xf00));
+            Assert(!(pCtx->es.Attr.u & 0xfffe0000));
+            Assert(   (pCtx->es.u32Limit & 0xfff) == 0xfff
+                   || !(pCtx->es.Attr.n.u1Granularity));
+            Assert(   !(pCtx->es.u32Limit & 0xfff00000)
+                   || (pCtx->es.Attr.n.u1Granularity));
+            Assert(   !(pCtx->es.Attr.n.u4Type & X86_SEL_TYPE_CODE)
+                   || (pCtx->es.Attr.n.u4Type & X86_SEL_TYPE_READ));
+        }
+        if (pCtx->fs.Attr.u && !(pCtx->fs.Attr.u & X86DESCATTR_UNUSABLE))
+        {
+            Assert(pCtx->fs.Attr.n.u4Type & X86_SEL_TYPE_ACCESSED);
+            Assert(pCtx->fs.Attr.n.u1Present);
+            Assert(pCtx->fs.Attr.n.u4Type > 11 || pCtx->fs.Attr.n.u2Dpl >= (pCtx->fs.Sel & X86_SEL_RPL));
+            Assert(!(pCtx->fs.Attr.u & 0xf00));
+            Assert(!(pCtx->fs.Attr.u & 0xfffe0000));
+            Assert(   (pCtx->fs.u32Limit & 0xfff) == 0xfff
+                   || !(pCtx->fs.Attr.n.u1Granularity));
+            Assert(   !(pCtx->fs.u32Limit & 0xfff00000)
+                   || (pCtx->fs.Attr.n.u1Granularity));
+            Assert(   !(pCtx->fs.Attr.n.u4Type & X86_SEL_TYPE_CODE)
+                   || (pCtx->fs.Attr.n.u4Type & X86_SEL_TYPE_READ));
+        }
+        if (pCtx->gs.Attr.u && !(pCtx->gs.Attr.u & X86DESCATTR_UNUSABLE))
+        {
+            Assert(pCtx->gs.Attr.n.u4Type & X86_SEL_TYPE_ACCESSED);
+            Assert(pCtx->gs.Attr.n.u1Present);
+            Assert(pCtx->gs.Attr.n.u4Type > 11 || pCtx->gs.Attr.n.u2Dpl >= (pCtx->gs.Sel & X86_SEL_RPL));
+            Assert(!(pCtx->gs.Attr.u & 0xf00));
+            Assert(!(pCtx->gs.Attr.u & 0xfffe0000));
+            Assert(   (pCtx->gs.u32Limit & 0xfff) == 0xfff
+                   || !(pCtx->gs.Attr.n.u1Granularity));
+            Assert(   !(pCtx->gs.u32Limit & 0xfff00000)
+                   || (pCtx->gs.Attr.n.u1Granularity));
+            Assert(   !(pCtx->gs.Attr.n.u4Type & X86_SEL_TYPE_CODE)
+                   || (pCtx->gs.Attr.n.u4Type & X86_SEL_TYPE_READ));
+        }
+        /* 64-bit capable CPUs. */
+        Assert(!RT_HI_U32(pCtx->cs.u64Base));
+        Assert(!pCtx->ss.Attr.u || !RT_HI_U32(pCtx->ss.u64Base));
+        Assert(!pCtx->ds.Attr.u || !RT_HI_U32(pCtx->ds.u64Base));
+        Assert(!pCtx->es.Attr.u || !RT_HI_U32(pCtx->es.u64Base));
+    }
+    else if (   CPUMIsGuestInV86ModeEx(pCtx)
+             || (   CPUMIsGuestInRealModeEx(pCtx)
+                 && !pVM->hm.s.vmx.fUnrestrictedGuest))
+    {
+        /* Real and v86 mode checks. */
+        /* hmR0VmxExportGuestSegReg() writes the modified in VMCS. We want what we're feeding to VT-x. */
+        uint32_t u32CSAttr, u32SSAttr, u32DSAttr, u32ESAttr, u32FSAttr, u32GSAttr;
+        if (pVmcsInfo->RealMode.fRealOnV86Active)
+        {
+            u32CSAttr = 0xf3; u32SSAttr = 0xf3; u32DSAttr = 0xf3;
+            u32ESAttr = 0xf3; u32FSAttr = 0xf3; u32GSAttr = 0xf3;
+        }
+        else
+        {
+            u32CSAttr = pCtx->cs.Attr.u; u32SSAttr = pCtx->ss.Attr.u; u32DSAttr = pCtx->ds.Attr.u;
+            u32ESAttr = pCtx->es.Attr.u; u32FSAttr = pCtx->fs.Attr.u; u32GSAttr = pCtx->gs.Attr.u;
+        }
+
+        /* CS */
+        AssertMsg((pCtx->cs.u64Base == (uint64_t)pCtx->cs.Sel << 4), ("CS base %#x %#x\n", pCtx->cs.u64Base, pCtx->cs.Sel));
+        Assert(pCtx->cs.u32Limit == 0xffff);
+        Assert(u32CSAttr == 0xf3);
+        /* SS */
+        Assert(pCtx->ss.u64Base == (uint64_t)pCtx->ss.Sel << 4);
+        Assert(pCtx->ss.u32Limit == 0xffff);
+        Assert(u32SSAttr == 0xf3);
+        /* DS */
+        Assert(pCtx->ds.u64Base == (uint64_t)pCtx->ds.Sel << 4);
+        Assert(pCtx->ds.u32Limit == 0xffff);
+        Assert(u32DSAttr == 0xf3);
+        /* ES */
+        Assert(pCtx->es.u64Base == (uint64_t)pCtx->es.Sel << 4);
+        Assert(pCtx->es.u32Limit == 0xffff);
+        Assert(u32ESAttr == 0xf3);
+        /* FS */
+        Assert(pCtx->fs.u64Base == (uint64_t)pCtx->fs.Sel << 4);
+        Assert(pCtx->fs.u32Limit == 0xffff);
+        Assert(u32FSAttr == 0xf3);
+        /* GS */
+        Assert(pCtx->gs.u64Base == (uint64_t)pCtx->gs.Sel << 4);
+        Assert(pCtx->gs.u32Limit == 0xffff);
+        Assert(u32GSAttr == 0xf3);
+        /* 64-bit capable CPUs. */
+        Assert(!RT_HI_U32(pCtx->cs.u64Base));
+        Assert(!u32SSAttr || !RT_HI_U32(pCtx->ss.u64Base));
+        Assert(!u32DSAttr || !RT_HI_U32(pCtx->ds.u64Base));
+        Assert(!u32ESAttr || !RT_HI_U32(pCtx->es.u64Base));
+    }
+}
+#endif /* VBOX_STRICT */
+
+
+/**
+ * Exports a guest segment register into the guest-state area in the VMCS.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu       The cross context virtual CPU structure.
+ * @param   pVmcsInfo   The VMCS info. object.
+ * @param   iSegReg     The segment register number (X86_SREG_XXX).
+ * @param   pSelReg     Pointer to the segment selector.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+static int hmR0VmxExportGuestSegReg(PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo, uint8_t iSegReg, PCCPUMSELREG pSelReg)
+{
+    Assert(iSegReg < X86_SREG_COUNT);
+    uint32_t const idxSel   = g_aVmcsSegSel[iSegReg];
+    uint32_t const idxLimit = g_aVmcsSegLimit[iSegReg];
+    uint32_t const idxBase  = g_aVmcsSegBase[iSegReg];
+    uint32_t const idxAttr  = g_aVmcsSegAttr[iSegReg];
+
+    uint32_t u32Access = pSelReg->Attr.u;
+    if (pVmcsInfo->RealMode.fRealOnV86Active)
+    {
+        /* VT-x requires our real-using-v86 mode hack to override the segment access-right bits. */
+        u32Access = 0xf3;
+        Assert(pVCpu->CTX_SUFF(pVM)->hm.s.vmx.pRealModeTSS);
+        Assert(PDMVmmDevHeapIsEnabled(pVCpu->CTX_SUFF(pVM)));
+        RT_NOREF_PV(pVCpu);
+    }
+    else
+    {
+        /*
+         * The way to differentiate between whether this is really a null selector or was just
+         * a selector loaded with 0 in real-mode is using the segment attributes. A selector
+         * loaded in real-mode with the value 0 is valid and usable in protected-mode and we
+         * should -not- mark it as an unusable segment. Both the recompiler & VT-x ensures
+         * NULL selectors loaded in protected-mode have their attribute as 0.
+         */
+        if (!u32Access)
+            u32Access = X86DESCATTR_UNUSABLE;
+    }
+
+    /* Validate segment access rights. Refer to Intel spec. "26.3.1.2 Checks on Guest Segment Registers". */
+    AssertMsg((u32Access & X86DESCATTR_UNUSABLE) || (u32Access & X86_SEL_TYPE_ACCESSED),
+              ("Access bit not set for usable segment. idx=%#x sel=%#x attr %#x\n", idxBase, pSelReg, pSelReg->Attr.u));
+
+    /*
+     * Commit it to the VMCS.
+     */
+    int rc = VMXWriteVmcs32(idxSel,   pSelReg->Sel);        AssertRC(rc);
+    rc     = VMXWriteVmcs32(idxLimit, pSelReg->u32Limit);   AssertRC(rc);
+    rc     = VMXWriteVmcsNw(idxBase,  pSelReg->u64Base);    AssertRC(rc);
+    rc     = VMXWriteVmcs32(idxAttr,  u32Access);           AssertRC(rc);
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Exports the guest segment registers, GDTR, IDTR, LDTR, TR into the guest-state
+ * area in the VMCS.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   pVmxTransient   The VMX-transient structure.
+ *
+ * @remarks Will import guest CR0 on strict builds during validation of
+ *          segments.
+ * @remarks No-long-jump zone!!!
+ */
+static int hmR0VmxExportGuestSegRegsXdtr(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient)
+{
+    int   rc  = VERR_INTERNAL_ERROR_5;
+    PVMCC pVM = pVCpu->CTX_SUFF(pVM);
+    PCCPUMCTX    pCtx      = &pVCpu->cpum.GstCtx;
+    PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
+
+    /*
+     * Guest Segment registers: CS, SS, DS, ES, FS, GS.
+     */
+    if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_SREG_MASK)
+    {
+        if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_CS)
+        {
+            HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_CS);
+            if (pVmcsInfo->RealMode.fRealOnV86Active)
+                pVmcsInfo->RealMode.AttrCS.u = pCtx->cs.Attr.u;
+            rc = hmR0VmxExportGuestSegReg(pVCpu, pVmcsInfo, X86_SREG_CS, &pCtx->cs);
+            AssertRC(rc);
+            ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_CS);
+        }
+
+        if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_SS)
+        {
+            HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_SS);
+            if (pVmcsInfo->RealMode.fRealOnV86Active)
+                pVmcsInfo->RealMode.AttrSS.u = pCtx->ss.Attr.u;
+            rc = hmR0VmxExportGuestSegReg(pVCpu, pVmcsInfo, X86_SREG_SS, &pCtx->ss);
+            AssertRC(rc);
+            ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_SS);
+        }
+
+        if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_DS)
+        {
+            HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_DS);
+            if (pVmcsInfo->RealMode.fRealOnV86Active)
+                pVmcsInfo->RealMode.AttrDS.u = pCtx->ds.Attr.u;
+            rc = hmR0VmxExportGuestSegReg(pVCpu, pVmcsInfo, X86_SREG_DS, &pCtx->ds);
+            AssertRC(rc);
+            ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_DS);
+        }
+
+        if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_ES)
+        {
+            HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_ES);
+            if (pVmcsInfo->RealMode.fRealOnV86Active)
+                pVmcsInfo->RealMode.AttrES.u = pCtx->es.Attr.u;
+            rc = hmR0VmxExportGuestSegReg(pVCpu, pVmcsInfo, X86_SREG_ES, &pCtx->es);
+            AssertRC(rc);
+            ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_ES);
+        }
+
+        if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_FS)
+        {
+            HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_FS);
+            if (pVmcsInfo->RealMode.fRealOnV86Active)
+                pVmcsInfo->RealMode.AttrFS.u = pCtx->fs.Attr.u;
+            rc = hmR0VmxExportGuestSegReg(pVCpu, pVmcsInfo, X86_SREG_FS, &pCtx->fs);
+            AssertRC(rc);
+            ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_FS);
+        }
+
+        if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_GS)
+        {
+            HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_GS);
+            if (pVmcsInfo->RealMode.fRealOnV86Active)
+                pVmcsInfo->RealMode.AttrGS.u = pCtx->gs.Attr.u;
+            rc = hmR0VmxExportGuestSegReg(pVCpu, pVmcsInfo, X86_SREG_GS, &pCtx->gs);
+            AssertRC(rc);
+            ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_GS);
+        }
+
+#ifdef VBOX_STRICT
+        hmR0VmxValidateSegmentRegs(pVCpu, pVmcsInfo);
+#endif
+        Log4Func(("cs={%#04x base=%#RX64 limit=%#RX32 attr=%#RX32}\n", pCtx->cs.Sel, pCtx->cs.u64Base, pCtx->cs.u32Limit,
+                  pCtx->cs.Attr.u));
+    }
+
+    /*
+     * Guest TR.
+     */
+    if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_TR)
+    {
+        HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_TR);
+
+        /*
+         * Real-mode emulation using virtual-8086 mode with CR4.VME. Interrupt redirection is
+         * achieved using the interrupt redirection bitmap (all bits cleared to let the guest
+         * handle INT-n's) in the TSS. See hmR3InitFinalizeR0() to see how pRealModeTSS is setup.
+         */
+        uint16_t u16Sel;
+        uint32_t u32Limit;
+        uint64_t u64Base;
+        uint32_t u32AccessRights;
+        if (!pVmcsInfo->RealMode.fRealOnV86Active)
+        {
+            u16Sel          = pCtx->tr.Sel;
+            u32Limit        = pCtx->tr.u32Limit;
+            u64Base         = pCtx->tr.u64Base;
+            u32AccessRights = pCtx->tr.Attr.u;
+        }
+        else
+        {
+            Assert(!pVmxTransient->fIsNestedGuest);
+            Assert(pVM->hm.s.vmx.pRealModeTSS);
+            Assert(PDMVmmDevHeapIsEnabled(pVM));    /* Guaranteed by HMCanExecuteGuest() -XXX- what about inner loop changes? */
+
+            /* We obtain it here every time as PCI regions could be reconfigured in the guest, changing the VMMDev base. */
+            RTGCPHYS GCPhys;
+            rc = PDMVmmDevHeapR3ToGCPhys(pVM, pVM->hm.s.vmx.pRealModeTSS, &GCPhys);
+            AssertRCReturn(rc, rc);
+
+            X86DESCATTR DescAttr;
+            DescAttr.u           = 0;
+            DescAttr.n.u1Present = 1;
+            DescAttr.n.u4Type    = X86_SEL_TYPE_SYS_386_TSS_BUSY;
+
+            u16Sel          = 0;
+            u32Limit        = HM_VTX_TSS_SIZE;
+            u64Base         = GCPhys;
+            u32AccessRights = DescAttr.u;
+        }
+
+        /* Validate. */
+        Assert(!(u16Sel & RT_BIT(2)));
+        AssertMsg(   (u32AccessRights & 0xf) == X86_SEL_TYPE_SYS_386_TSS_BUSY
+                  || (u32AccessRights & 0xf) == X86_SEL_TYPE_SYS_286_TSS_BUSY, ("TSS is not busy!? %#x\n", u32AccessRights));
+        AssertMsg(!(u32AccessRights & X86DESCATTR_UNUSABLE), ("TR unusable bit is not clear!? %#x\n", u32AccessRights));
+        Assert(!(u32AccessRights & RT_BIT(4)));                 /* System MBZ.*/
+        Assert(u32AccessRights & RT_BIT(7));                    /* Present MB1.*/
+        Assert(!(u32AccessRights & 0xf00));                     /* 11:8 MBZ. */
+        Assert(!(u32AccessRights & 0xfffe0000));                /* 31:17 MBZ. */
+        Assert(   (u32Limit & 0xfff) == 0xfff
+               || !(u32AccessRights & RT_BIT(15)));             /* Granularity MBZ. */
+        Assert(   !(pCtx->tr.u32Limit & 0xfff00000)
+               || (u32AccessRights & RT_BIT(15)));              /* Granularity MB1. */
+
+        rc = VMXWriteVmcs16(VMX_VMCS16_GUEST_TR_SEL,           u16Sel);             AssertRC(rc);
+        rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_TR_LIMIT,         u32Limit);           AssertRC(rc);
+        rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_TR_ACCESS_RIGHTS, u32AccessRights);    AssertRC(rc);
+        rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_TR_BASE,            u64Base);            AssertRC(rc);
+
+        ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_TR);
+        Log4Func(("tr base=%#RX64 limit=%#RX32\n", pCtx->tr.u64Base, pCtx->tr.u32Limit));
+    }
+
+    /*
+     * Guest GDTR.
+     */
+    if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_GDTR)
+    {
+        HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_GDTR);
+
+        rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_GDTR_LIMIT, pCtx->gdtr.cbGdt);     AssertRC(rc);
+        rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_GDTR_BASE,  pCtx->gdtr.pGdt);        AssertRC(rc);
+
+        /* Validate. */
+        Assert(!(pCtx->gdtr.cbGdt & 0xffff0000));          /* Bits 31:16 MBZ. */
+
+        ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_GDTR);
+        Log4Func(("gdtr base=%#RX64 limit=%#RX32\n", pCtx->gdtr.pGdt, pCtx->gdtr.cbGdt));
+    }
+
+    /*
+     * Guest LDTR.
+     */
+    if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_LDTR)
+    {
+        HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_LDTR);
+
+        /* The unusable bit is specific to VT-x, if it's a null selector mark it as an unusable segment. */
+        uint32_t u32Access;
+        if (   !pVmxTransient->fIsNestedGuest
+            && !pCtx->ldtr.Attr.u)
+            u32Access = X86DESCATTR_UNUSABLE;
+        else
+            u32Access = pCtx->ldtr.Attr.u;
+
+        rc = VMXWriteVmcs16(VMX_VMCS16_GUEST_LDTR_SEL,           pCtx->ldtr.Sel);       AssertRC(rc);
+        rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_LDTR_LIMIT,         pCtx->ldtr.u32Limit);  AssertRC(rc);
+        rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_LDTR_ACCESS_RIGHTS, u32Access);            AssertRC(rc);
+        rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_LDTR_BASE,            pCtx->ldtr.u64Base);   AssertRC(rc);
+
+        /* Validate. */
+        if (!(u32Access & X86DESCATTR_UNUSABLE))
+        {
+            Assert(!(pCtx->ldtr.Sel & RT_BIT(2)));              /* TI MBZ. */
+            Assert(pCtx->ldtr.Attr.n.u4Type == 2);              /* Type MB2 (LDT). */
+            Assert(!pCtx->ldtr.Attr.n.u1DescType);              /* System MBZ. */
+            Assert(pCtx->ldtr.Attr.n.u1Present == 1);           /* Present MB1. */
+            Assert(!pCtx->ldtr.Attr.n.u4LimitHigh);             /* 11:8 MBZ. */
+            Assert(!(pCtx->ldtr.Attr.u & 0xfffe0000));          /* 31:17 MBZ. */
+            Assert(   (pCtx->ldtr.u32Limit & 0xfff) == 0xfff
+                   || !pCtx->ldtr.Attr.n.u1Granularity);        /* Granularity MBZ. */
+            Assert(   !(pCtx->ldtr.u32Limit & 0xfff00000)
+                   || pCtx->ldtr.Attr.n.u1Granularity);         /* Granularity MB1. */
+        }
+
+        ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_LDTR);
+        Log4Func(("ldtr base=%#RX64 limit=%#RX32\n", pCtx->ldtr.u64Base, pCtx->ldtr.u32Limit));
+    }
+
+    /*
+     * Guest IDTR.
+     */
+    if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_IDTR)
+    {
+        HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_IDTR);
+
+        rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_IDTR_LIMIT, pCtx->idtr.cbIdt);     AssertRC(rc);
+        rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_IDTR_BASE,  pCtx->idtr.pIdt);        AssertRC(rc);
+
+        /* Validate. */
+        Assert(!(pCtx->idtr.cbIdt & 0xffff0000));          /* Bits 31:16 MBZ. */
+
+        ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_IDTR);
+        Log4Func(("idtr base=%#RX64 limit=%#RX32\n", pCtx->idtr.pIdt, pCtx->idtr.cbIdt));
+    }
+
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Exports certain guest MSRs into the VM-entry MSR-load and VM-exit MSR-store
+ * areas.
+ *
+ * These MSRs will automatically be loaded to the host CPU on every successful
+ * VM-entry and stored from the host CPU on every successful VM-exit.
+ *
+ * We creates/updates MSR slots for the host MSRs in the VM-exit MSR-load area. The
+ * actual host MSR values are not- updated here for performance reasons. See
+ * hmR0VmxExportHostMsrs().
+ *
+ * We also exports the guest sysenter MSRs into the guest-state area in the VMCS.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   pVmxTransient   The VMX-transient structure.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+static int hmR0VmxExportGuestMsrs(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient)
+{
+    AssertPtr(pVCpu);
+    AssertPtr(pVmxTransient);
+
+    PVMCC pVM = pVCpu->CTX_SUFF(pVM);
+    PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+
+    /*
+     * MSRs that we use the auto-load/store MSR area in the VMCS.
+     * For 64-bit hosts, we load/restore them lazily, see hmR0VmxLazyLoadGuestMsrs(),
+     * nothing to do here. The host MSR values are updated when it's safe in
+     * hmR0VmxLazySaveHostMsrs().
+     *
+     * For nested-guests, the guests MSRs from the VM-entry MSR-load area are already
+     * loaded (into the guest-CPU context) by the VMLAUNCH/VMRESUME instruction
+     * emulation. The merged MSR permission bitmap will ensure that we get VM-exits
+     * for any MSR that are not part of the lazy MSRs so we do not need to place
+     * those MSRs into the auto-load/store MSR area. Nothing to do here.
+     */
+    if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_VMX_GUEST_AUTO_MSRS)
+    {
+        /* No auto-load/store MSRs currently. */
+        ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_VMX_GUEST_AUTO_MSRS);
+    }
+
+    /*
+     * Guest Sysenter MSRs.
+     */
+    if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_SYSENTER_MSR_MASK)
+    {
+        HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_SYSENTER_MSRS);
+
+        if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_SYSENTER_CS_MSR)
+        {
+            int rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_SYSENTER_CS, pCtx->SysEnter.cs);
+            AssertRC(rc);
+            ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_SYSENTER_CS_MSR);
+        }
+
+        if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_SYSENTER_EIP_MSR)
+        {
+            int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_SYSENTER_EIP, pCtx->SysEnter.eip);
+            AssertRC(rc);
+            ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_SYSENTER_EIP_MSR);
+        }
+
+        if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_SYSENTER_ESP_MSR)
+        {
+            int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_SYSENTER_ESP, pCtx->SysEnter.esp);
+            AssertRC(rc);
+            ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_SYSENTER_ESP_MSR);
+        }
+    }
+
+    /*
+     * Guest/host EFER MSR.
+     */
+    if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_EFER_MSR)
+    {
+        /* Whether we are using the VMCS to swap the EFER MSR must have been
+           determined earlier while exporting VM-entry/VM-exit controls. */
+        Assert(!(ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_VMX_ENTRY_EXIT_CTLS));
+        HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_EFER);
+
+        if (hmR0VmxShouldSwapEferMsr(pVCpu, pVmxTransient))
+        {
+            /*
+             * EFER.LME is written by software, while EFER.LMA is set by the CPU to (CR0.PG & EFER.LME).
+             * This means a guest can set EFER.LME=1 while CR0.PG=0 and EFER.LMA can remain 0.
+             * VT-x requires that "IA-32e mode guest" VM-entry control must be identical to EFER.LMA
+             * and to CR0.PG. Without unrestricted execution, CR0.PG (used for VT-x, not the shadow)
+             * must always be 1. This forces us to effectively clear both EFER.LMA and EFER.LME until
+             * the guest has also set CR0.PG=1. Otherwise, we would run into an invalid-guest state
+             * during VM-entry.
+             */
+            uint64_t uGuestEferMsr = pCtx->msrEFER;
+            if (!pVM->hm.s.vmx.fUnrestrictedGuest)
+            {
+                if (!(pCtx->msrEFER & MSR_K6_EFER_LMA))
+                    uGuestEferMsr &= ~MSR_K6_EFER_LME;
+                else
+                    Assert((pCtx->msrEFER & (MSR_K6_EFER_LMA | MSR_K6_EFER_LME)) == (MSR_K6_EFER_LMA | MSR_K6_EFER_LME));
+            }
+
+            /*
+             * If the CPU supports VMCS controls for swapping EFER, use it. Otherwise, we have no option
+             * but to use the auto-load store MSR area in the VMCS for swapping EFER. See @bugref{7368}.
+             */
+            if (pVM->hm.s.vmx.fSupportsVmcsEfer)
+            {
+                int rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_EFER_FULL, uGuestEferMsr);
+                AssertRC(rc);
+            }
+            else
+            {
+                /*
+                 * We shall use the auto-load/store MSR area only for loading the EFER MSR but we must
+                 * continue to intercept guest read and write accesses to it, see @bugref{7386#c16}.
+                 */
+                int rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, MSR_K6_EFER, uGuestEferMsr,
+                                                    false /* fSetReadWrite */, false /* fUpdateHostMsr */);
+                AssertRCReturn(rc, rc);
+            }
+
+            Log4Func(("efer=%#RX64 shadow=%#RX64\n", uGuestEferMsr, pCtx->msrEFER));
+        }
+        else if (!pVM->hm.s.vmx.fSupportsVmcsEfer)
+            hmR0VmxRemoveAutoLoadStoreMsr(pVCpu, pVmxTransient, MSR_K6_EFER);
+
+        ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_EFER_MSR);
+    }
+
+    /*
+     * Other MSRs.
+     */
+    if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_OTHER_MSRS)
+    {
+        /* Speculation Control (R/W). */
+        HMVMX_CPUMCTX_ASSERT(pVCpu, HM_CHANGED_GUEST_OTHER_MSRS);
+        if (pVM->cpum.ro.GuestFeatures.fIbrs)
+        {
+            int rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, MSR_IA32_SPEC_CTRL, CPUMGetGuestSpecCtrl(pVCpu),
+                                                false /* fSetReadWrite */, false /* fUpdateHostMsr */);
+            AssertRCReturn(rc, rc);
+        }
+
+        /* Last Branch Record. */
+        if (pVM->hm.s.vmx.fLbr)
+        {
+            uint32_t const idFromIpMsrStart = pVM->hm.s.vmx.idLbrFromIpMsrFirst;
+            uint32_t const idToIpMsrStart   = pVM->hm.s.vmx.idLbrToIpMsrFirst;
+            uint32_t const cLbrStack        = pVM->hm.s.vmx.idLbrFromIpMsrLast - pVM->hm.s.vmx.idLbrFromIpMsrFirst + 1;
+            Assert(cLbrStack <= 32);
+            for (uint32_t i = 0; i < cLbrStack; i++)
+            {
+                int rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, idFromIpMsrStart + i,
+                                                    pVmxTransient->pVmcsInfo->au64LbrFromIpMsr[i],
+                                                    false /* fSetReadWrite */, false /* fUpdateHostMsr */);
+                AssertRCReturn(rc, rc);
+
+                /* Some CPUs don't have a Branch-To-IP MSR (P4 and related Xeons). */
+                if (idToIpMsrStart != 0)
+                {
+                    rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, idToIpMsrStart + i,
+                                                    pVmxTransient->pVmcsInfo->au64LbrToIpMsr[i],
+                                                    false /* fSetReadWrite */, false /* fUpdateHostMsr */);
+                    AssertRCReturn(rc, rc);
+                }
+            }
+
+            /* Add LBR top-of-stack MSR (which contains the index to the most recent record). */
+            int rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, pVM->hm.s.vmx.idLbrTosMsr,
+                                                pVmxTransient->pVmcsInfo->u64LbrTosMsr, false /* fSetReadWrite */,
+                                                false /* fUpdateHostMsr */);
+            AssertRCReturn(rc, rc);
+        }
+
+        ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_OTHER_MSRS);
+    }
+
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Wrapper for running the guest code in VT-x.
+ *
+ * @returns VBox status code, no informational status codes.
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   pVmxTransient   The VMX-transient structure.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+DECLINLINE(int) hmR0VmxRunGuest(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient)
+{
+    /* Mark that HM is the keeper of all guest-CPU registers now that we're going to execute guest code. */
+    PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+    pCtx->fExtrn |= HMVMX_CPUMCTX_EXTRN_ALL | CPUMCTX_EXTRN_KEEPER_HM;
+
+    /** @todo Add stats for VMRESUME vs VMLAUNCH. */
+
+    /*
+     * 64-bit Windows uses XMM registers in the kernel as the Microsoft compiler expresses
+     * floating-point operations using SSE instructions. Some XMM registers (XMM6-XMM15) are
+     * callee-saved and thus the need for this XMM wrapper.
+     *
+     * See MSDN "Configuring Programs for 64-bit/x64 Software Conventions / Register Usage".
+     */
+    PCVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
+    bool const fResumeVM = RT_BOOL(pVmcsInfo->fVmcsState & VMX_V_VMCS_LAUNCH_STATE_LAUNCHED);
+    PVMCC pVM = pVCpu->CTX_SUFF(pVM);
+#ifdef VBOX_WITH_KERNEL_USING_XMM
+    int rc = hmR0VMXStartVMWrapXMM(fResumeVM, pCtx, NULL /*pvUnused*/, pVM, pVCpu, pVmcsInfo->pfnStartVM);
+#else
+    int rc = pVmcsInfo->pfnStartVM(fResumeVM, pCtx, NULL /*pvUnused*/, pVM, pVCpu);
+#endif
+    AssertMsg(rc <= VINF_SUCCESS, ("%Rrc\n", rc));
+    return rc;
+}
+
+
+/**
+ * Reports world-switch error and dumps some useful debug info.
+ *
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   rcVMRun         The return code from VMLAUNCH/VMRESUME.
+ * @param   pVmxTransient   The VMX-transient structure (only
+ *                          exitReason updated).
+ */
+static void hmR0VmxReportWorldSwitchError(PVMCPUCC pVCpu, int rcVMRun, PVMXTRANSIENT pVmxTransient)
+{
+    Assert(pVCpu);
+    Assert(pVmxTransient);
+    HMVMX_ASSERT_PREEMPT_SAFE(pVCpu);
+
+    Log4Func(("VM-entry failure: %Rrc\n", rcVMRun));
+    switch (rcVMRun)
+    {
+        case VERR_VMX_INVALID_VMXON_PTR:
+            AssertFailed();
+            break;
+        case VINF_SUCCESS:                  /* VMLAUNCH/VMRESUME succeeded but VM-entry failed... yeah, true story. */
+        case VERR_VMX_UNABLE_TO_START_VM:   /* VMLAUNCH/VMRESUME itself failed. */
+        {
+            int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_REASON, &pVCpu->hm.s.vmx.LastError.u32ExitReason);
+            rc    |= VMXReadVmcs32(VMX_VMCS32_RO_VM_INSTR_ERROR, &pVCpu->hm.s.vmx.LastError.u32InstrError);
+            AssertRC(rc);
+            hmR0VmxReadExitQualVmcs(pVmxTransient);
+
+            pVCpu->hm.s.vmx.LastError.idEnteredCpu = pVCpu->hm.s.idEnteredCpu;
+            /* LastError.idCurrentCpu was already updated in hmR0VmxPreRunGuestCommitted().
+               Cannot do it here as we may have been long preempted. */
+
+#ifdef VBOX_STRICT
+                PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
+                Log4(("uExitReason        %#RX32 (VmxTransient %#RX16)\n", pVCpu->hm.s.vmx.LastError.u32ExitReason,
+                     pVmxTransient->uExitReason));
+                Log4(("Exit Qualification %#RX64\n", pVmxTransient->uExitQual));
+                Log4(("InstrError         %#RX32\n", pVCpu->hm.s.vmx.LastError.u32InstrError));
+                if (pVCpu->hm.s.vmx.LastError.u32InstrError <= HMVMX_INSTR_ERROR_MAX)
+                    Log4(("InstrError Desc.  \"%s\"\n", g_apszVmxInstrErrors[pVCpu->hm.s.vmx.LastError.u32InstrError]));
+                else
+                    Log4(("InstrError Desc.    Range exceeded %u\n", HMVMX_INSTR_ERROR_MAX));
+                Log4(("Entered host CPU   %u\n", pVCpu->hm.s.vmx.LastError.idEnteredCpu));
+                Log4(("Current host CPU   %u\n", pVCpu->hm.s.vmx.LastError.idCurrentCpu));
+
+                static struct
+                {
+                    /** Name of the field to log. */
+                    const char     *pszName;
+                    /** The VMCS field. */
+                    uint32_t        uVmcsField;
+                    /** Whether host support of this field needs to be checked. */
+                    bool            fCheckSupport;
+                } const s_aVmcsFields[] =
+                {
+                    { "VMX_VMCS32_CTRL_PIN_EXEC",                 VMX_VMCS32_CTRL_PIN_EXEC,                   false  },
+                    { "VMX_VMCS32_CTRL_PROC_EXEC",                VMX_VMCS32_CTRL_PROC_EXEC,                  false  },
+                    { "VMX_VMCS32_CTRL_PROC_EXEC2",               VMX_VMCS32_CTRL_PROC_EXEC2,                 true   },
+                    { "VMX_VMCS32_CTRL_ENTRY",                    VMX_VMCS32_CTRL_ENTRY,                      false  },
+                    { "VMX_VMCS32_CTRL_EXIT",                     VMX_VMCS32_CTRL_EXIT,                       false  },
+                    { "VMX_VMCS32_CTRL_CR3_TARGET_COUNT",         VMX_VMCS32_CTRL_CR3_TARGET_COUNT,           false  },
+                    { "VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO",  VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO,    false  },
+                    { "VMX_VMCS32_CTRL_ENTRY_EXCEPTION_ERRCODE",  VMX_VMCS32_CTRL_ENTRY_EXCEPTION_ERRCODE,    false  },
+                    { "VMX_VMCS32_CTRL_ENTRY_INSTR_LENGTH",       VMX_VMCS32_CTRL_ENTRY_INSTR_LENGTH,         false  },
+                    { "VMX_VMCS32_CTRL_TPR_THRESHOLD",            VMX_VMCS32_CTRL_TPR_THRESHOLD,              false  },
+                    { "VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT",     VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT,       false  },
+                    { "VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT",      VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT,        false  },
+                    { "VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT",     VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT,       false  },
+                    { "VMX_VMCS32_CTRL_EXCEPTION_BITMAP",         VMX_VMCS32_CTRL_EXCEPTION_BITMAP,           false  },
+                    { "VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MASK",     VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MASK,       false  },
+                    { "VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MATCH",    VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MATCH,      false  },
+                    { "VMX_VMCS_CTRL_CR0_MASK",                   VMX_VMCS_CTRL_CR0_MASK,                     false  },
+                    { "VMX_VMCS_CTRL_CR0_READ_SHADOW",            VMX_VMCS_CTRL_CR0_READ_SHADOW,              false  },
+                    { "VMX_VMCS_CTRL_CR4_MASK",                   VMX_VMCS_CTRL_CR4_MASK,                     false  },
+                    { "VMX_VMCS_CTRL_CR4_READ_SHADOW",            VMX_VMCS_CTRL_CR4_READ_SHADOW,              false  },
+                    { "VMX_VMCS64_CTRL_EPTP_FULL",                VMX_VMCS64_CTRL_EPTP_FULL,                  true   },
+                    { "VMX_VMCS_GUEST_RIP",                       VMX_VMCS_GUEST_RIP,                         false  },
+                    { "VMX_VMCS_GUEST_RSP",                       VMX_VMCS_GUEST_RSP,                         false  },
+                    { "VMX_VMCS_GUEST_RFLAGS",                    VMX_VMCS_GUEST_RFLAGS,                      false  },
+                    { "VMX_VMCS16_VPID",                          VMX_VMCS16_VPID,                            true,  },
+                    { "VMX_VMCS_HOST_CR0",                        VMX_VMCS_HOST_CR0,                          false  },
+                    { "VMX_VMCS_HOST_CR3",                        VMX_VMCS_HOST_CR3,                          false  },
+                    { "VMX_VMCS_HOST_CR4",                        VMX_VMCS_HOST_CR4,                          false  },
+                    /* The order of selector fields below are fixed! */
+                    { "VMX_VMCS16_HOST_ES_SEL",                   VMX_VMCS16_HOST_ES_SEL,                     false  },
+                    { "VMX_VMCS16_HOST_CS_SEL",                   VMX_VMCS16_HOST_CS_SEL,                     false  },
+                    { "VMX_VMCS16_HOST_SS_SEL",                   VMX_VMCS16_HOST_SS_SEL,                     false  },
+                    { "VMX_VMCS16_HOST_DS_SEL",                   VMX_VMCS16_HOST_DS_SEL,                     false  },
+                    { "VMX_VMCS16_HOST_FS_SEL",                   VMX_VMCS16_HOST_FS_SEL,                     false  },
+                    { "VMX_VMCS16_HOST_GS_SEL",                   VMX_VMCS16_HOST_GS_SEL,                     false  },
+                    { "VMX_VMCS16_HOST_TR_SEL",                   VMX_VMCS16_HOST_TR_SEL,                     false  },
+                    /* End of ordered selector fields. */
+                    { "VMX_VMCS_HOST_TR_BASE",                    VMX_VMCS_HOST_TR_BASE,                      false  },
+                    { "VMX_VMCS_HOST_GDTR_BASE",                  VMX_VMCS_HOST_GDTR_BASE,                    false  },
+                    { "VMX_VMCS_HOST_IDTR_BASE",                  VMX_VMCS_HOST_IDTR_BASE,                    false  },
+                    { "VMX_VMCS32_HOST_SYSENTER_CS",              VMX_VMCS32_HOST_SYSENTER_CS,                false  },
+                    { "VMX_VMCS_HOST_SYSENTER_EIP",               VMX_VMCS_HOST_SYSENTER_EIP,                 false  },
+                    { "VMX_VMCS_HOST_SYSENTER_ESP",               VMX_VMCS_HOST_SYSENTER_ESP,                 false  },
+                    { "VMX_VMCS_HOST_RSP",                        VMX_VMCS_HOST_RSP,                          false  },
+                    { "VMX_VMCS_HOST_RIP",                        VMX_VMCS_HOST_RIP,                          false  }
+                };
+
+                RTGDTR      HostGdtr;
+                ASMGetGDTR(&HostGdtr);
+
+                uint32_t const cVmcsFields = RT_ELEMENTS(s_aVmcsFields);
+                for (uint32_t i = 0; i < cVmcsFields; i++)
+                {
+                    uint32_t const uVmcsField = s_aVmcsFields[i].uVmcsField;
+
+                    bool fSupported;
+                    if (!s_aVmcsFields[i].fCheckSupport)
+                        fSupported = true;
+                    else
+                    {
+                        PVMCC pVM = pVCpu->CTX_SUFF(pVM);
+                        switch (uVmcsField)
+                        {
+                            case VMX_VMCS64_CTRL_EPTP_FULL:  fSupported = pVM->hm.s.fNestedPaging;      break;
+                            case VMX_VMCS16_VPID:            fSupported = pVM->hm.s.vmx.fVpid;          break;
+                            case VMX_VMCS32_CTRL_PROC_EXEC2:
+                                fSupported = RT_BOOL(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_SECONDARY_CTLS);
+                                break;
+                            default:
+                                AssertMsgFailedReturnVoid(("Failed to provide VMCS field support for %#RX32\n", uVmcsField));
+                        }
+                    }
+
+                    if (fSupported)
+                    {
+                        uint8_t const uWidth = RT_BF_GET(uVmcsField, VMX_BF_VMCSFIELD_WIDTH);
+                        switch (uWidth)
+                        {
+                            case VMX_VMCSFIELD_WIDTH_16BIT:
+                            {
+                                uint16_t u16Val;
+                                rc = VMXReadVmcs16(uVmcsField, &u16Val);
+                                AssertRC(rc);
+                                Log4(("%-40s = %#RX16\n", s_aVmcsFields[i].pszName, u16Val));
+
+                                if (   uVmcsField >= VMX_VMCS16_HOST_ES_SEL
+                                    && uVmcsField <= VMX_VMCS16_HOST_TR_SEL)
+                                {
+                                    if (u16Val < HostGdtr.cbGdt)
+                                    {
+                                        /* Order of selectors in s_apszSel is fixed and matches the order in s_aVmcsFields. */
+                                        static const char * const s_apszSel[] = { "Host ES", "Host CS", "Host SS", "Host DS",
+                                                                                  "Host FS", "Host GS", "Host TR" };
+                                        uint8_t const idxSel = RT_BF_GET(uVmcsField, VMX_BF_VMCSFIELD_INDEX);
+                                        Assert(idxSel < RT_ELEMENTS(s_apszSel));
+                                        PCX86DESCHC pDesc = (PCX86DESCHC)(HostGdtr.pGdt + (u16Val & X86_SEL_MASK));
+                                        hmR0DumpDescriptor(pDesc, u16Val, s_apszSel[idxSel]);
+                                    }
+                                    else
+                                        Log4(("  Selector value exceeds GDT limit!\n"));
+                                }
+                                break;
+                            }
+
+                            case VMX_VMCSFIELD_WIDTH_32BIT:
+                            {
+                                uint32_t u32Val;
+                                rc = VMXReadVmcs32(uVmcsField, &u32Val);
+                                AssertRC(rc);
+                                Log4(("%-40s = %#RX32\n", s_aVmcsFields[i].pszName, u32Val));
+                                break;
+                            }
+
+                            case VMX_VMCSFIELD_WIDTH_64BIT:
+                            case VMX_VMCSFIELD_WIDTH_NATURAL:
+                            {
+                                uint64_t u64Val;
+                                rc = VMXReadVmcs64(uVmcsField, &u64Val);
+                                AssertRC(rc);
+                                Log4(("%-40s = %#RX64\n", s_aVmcsFields[i].pszName, u64Val));
+                                break;
+                            }
+                        }
+                    }
+                }
+
+                Log4(("MSR_K6_EFER            = %#RX64\n", ASMRdMsr(MSR_K6_EFER)));
+                Log4(("MSR_K8_CSTAR           = %#RX64\n", ASMRdMsr(MSR_K8_CSTAR)));
+                Log4(("MSR_K8_LSTAR           = %#RX64\n", ASMRdMsr(MSR_K8_LSTAR)));
+                Log4(("MSR_K6_STAR            = %#RX64\n", ASMRdMsr(MSR_K6_STAR)));
+                Log4(("MSR_K8_SF_MASK         = %#RX64\n", ASMRdMsr(MSR_K8_SF_MASK)));
+                Log4(("MSR_K8_KERNEL_GS_BASE  = %#RX64\n", ASMRdMsr(MSR_K8_KERNEL_GS_BASE)));
+#endif /* VBOX_STRICT */
+            break;
+        }
+
+        default:
+            /* Impossible */
+            AssertMsgFailed(("hmR0VmxReportWorldSwitchError %Rrc (%#x)\n", rcVMRun, rcVMRun));
+            break;
+    }
+}
+
+
+/**
+ * Sets up the usage of TSC-offsetting and updates the VMCS.
+ *
+ * If offsetting is not possible, cause VM-exits on RDTSC(P)s. Also sets up the
+ * VMX-preemption timer.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   pVmxTransient   The VMX-transient structure.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+static void hmR0VmxUpdateTscOffsettingAndPreemptTimer(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    bool         fOffsettedTsc;
+    bool         fParavirtTsc;
+    uint64_t     uTscOffset;
+    PVMCC pVM = pVCpu->CTX_SUFF(pVM);
+    PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
+
+    if (pVM->hm.s.vmx.fUsePreemptTimer)
+    {
+        uint64_t cTicksToDeadline = TMCpuTickGetDeadlineAndTscOffset(pVM, pVCpu, &uTscOffset, &fOffsettedTsc, &fParavirtTsc);
+
+        /* Make sure the returned values have sane upper and lower boundaries. */
+        uint64_t u64CpuHz  = SUPGetCpuHzFromGipBySetIndex(g_pSUPGlobalInfoPage, pVCpu->iHostCpuSet);
+        cTicksToDeadline   = RT_MIN(cTicksToDeadline, u64CpuHz / 64);      /* 1/64th of a second,  15.625ms. */
+        cTicksToDeadline   = RT_MAX(cTicksToDeadline, u64CpuHz / 32768);   /* 1/32768th of a second,  ~30us. */
+        cTicksToDeadline >>= pVM->hm.s.vmx.cPreemptTimerShift;
+
+        /** @todo r=ramshankar: We need to find a way to integrate nested-guest
+         *        preemption timers here. We probably need to clamp the preemption timer,
+         *        after converting the timer value to the host. */
+        uint32_t const cPreemptionTickCount = (uint32_t)RT_MIN(cTicksToDeadline, UINT32_MAX - 16);
+        int rc = VMXWriteVmcs32(VMX_VMCS32_PREEMPT_TIMER_VALUE, cPreemptionTickCount);
+        AssertRC(rc);
+    }
+    else
+        fOffsettedTsc = TMCpuTickCanUseRealTSC(pVM, pVCpu, &uTscOffset, &fParavirtTsc);
+
+    if (fParavirtTsc)
+    {
+        /* Currently neither Hyper-V nor KVM need to update their paravirt. TSC
+           information before every VM-entry, hence disable it for performance sake. */
+#if 0
+        int rc = GIMR0UpdateParavirtTsc(pVM, 0 /* u64Offset */);
+        AssertRC(rc);
+#endif
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatTscParavirt);
+    }
+
+    if (   fOffsettedTsc
+        && RT_LIKELY(!pVCpu->hm.s.fDebugWantRdTscExit))
+    {
+        if (pVmxTransient->fIsNestedGuest)
+            uTscOffset = CPUMApplyNestedGuestTscOffset(pVCpu, uTscOffset);
+        hmR0VmxSetTscOffsetVmcs(pVmcsInfo, uTscOffset);
+        hmR0VmxRemoveProcCtlsVmcs(pVCpu, pVmxTransient, VMX_PROC_CTLS_RDTSC_EXIT);
+    }
+    else
+    {
+        /* We can't use TSC-offsetting (non-fixed TSC, warp drive active etc.), VM-exit on RDTSC(P). */
+        hmR0VmxSetProcCtlsVmcs(pVmxTransient, VMX_PROC_CTLS_RDTSC_EXIT);
+    }
+}
+
+
+/**
+ * Gets the IEM exception flags for the specified vector and IDT vectoring /
+ * VM-exit interruption info type.
+ *
+ * @returns The IEM exception flags.
+ * @param   uVector         The event vector.
+ * @param   uVmxEventType   The VMX event type.
+ *
+ * @remarks This function currently only constructs flags required for
+ *          IEMEvaluateRecursiveXcpt and not the complete flags (e.g, error-code
+ *          and CR2 aspects of an exception are not included).
+ */
+static uint32_t hmR0VmxGetIemXcptFlags(uint8_t uVector, uint32_t uVmxEventType)
+{
+    uint32_t fIemXcptFlags;
+    switch (uVmxEventType)
+    {
+        case VMX_IDT_VECTORING_INFO_TYPE_HW_XCPT:
+        case VMX_IDT_VECTORING_INFO_TYPE_NMI:
+            fIemXcptFlags = IEM_XCPT_FLAGS_T_CPU_XCPT;
+            break;
+
+        case VMX_IDT_VECTORING_INFO_TYPE_EXT_INT:
+            fIemXcptFlags = IEM_XCPT_FLAGS_T_EXT_INT;
+            break;
+
+        case VMX_IDT_VECTORING_INFO_TYPE_PRIV_SW_XCPT:
+            fIemXcptFlags = IEM_XCPT_FLAGS_T_SOFT_INT | IEM_XCPT_FLAGS_ICEBP_INSTR;
+            break;
+
+        case VMX_IDT_VECTORING_INFO_TYPE_SW_XCPT:
+        {
+            fIemXcptFlags = IEM_XCPT_FLAGS_T_SOFT_INT;
+            if (uVector == X86_XCPT_BP)
+                fIemXcptFlags |= IEM_XCPT_FLAGS_BP_INSTR;
+            else if (uVector == X86_XCPT_OF)
+                fIemXcptFlags |= IEM_XCPT_FLAGS_OF_INSTR;
+            else
+            {
+                fIemXcptFlags = 0;
+                AssertMsgFailed(("Unexpected vector for software exception. uVector=%#x", uVector));
+            }
+            break;
+        }
+
+        case VMX_IDT_VECTORING_INFO_TYPE_SW_INT:
+            fIemXcptFlags = IEM_XCPT_FLAGS_T_SOFT_INT;
+            break;
+
+        default:
+            fIemXcptFlags = 0;
+            AssertMsgFailed(("Unexpected vector type! uVmxEventType=%#x uVector=%#x", uVmxEventType, uVector));
+            break;
+    }
+    return fIemXcptFlags;
+}
+
+
+/**
+ * Sets an event as a pending event to be injected into the guest.
+ *
+ * @param   pVCpu               The cross context virtual CPU structure.
+ * @param   u32IntInfo          The VM-entry interruption-information field.
+ * @param   cbInstr             The VM-entry instruction length in bytes (for
+ *                              software interrupts, exceptions and privileged
+ *                              software exceptions).
+ * @param   u32ErrCode          The VM-entry exception error code.
+ * @param   GCPtrFaultAddress   The fault-address (CR2) in case it's a
+ *                              page-fault.
+ */
+DECLINLINE(void) hmR0VmxSetPendingEvent(PVMCPUCC pVCpu, uint32_t u32IntInfo, uint32_t cbInstr, uint32_t u32ErrCode,
+                                        RTGCUINTPTR GCPtrFaultAddress)
+{
+    Assert(!pVCpu->hm.s.Event.fPending);
+    pVCpu->hm.s.Event.fPending          = true;
+    pVCpu->hm.s.Event.u64IntInfo        = u32IntInfo;
+    pVCpu->hm.s.Event.u32ErrCode        = u32ErrCode;
+    pVCpu->hm.s.Event.cbInstr           = cbInstr;
+    pVCpu->hm.s.Event.GCPtrFaultAddress = GCPtrFaultAddress;
+}
+
+
+/**
+ * Sets an external interrupt as pending-for-injection into the VM.
+ *
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   u8Interrupt     The external interrupt vector.
+ */
+DECLINLINE(void) hmR0VmxSetPendingExtInt(PVMCPUCC pVCpu, uint8_t u8Interrupt)
+{
+    uint32_t const u32IntInfo = RT_BF_MAKE(VMX_BF_EXIT_INT_INFO_VECTOR,          u8Interrupt)
+                              | RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_TYPE,           VMX_ENTRY_INT_INFO_TYPE_EXT_INT)
+                              | RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_ERR_CODE_VALID, 0)
+                              | RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_VALID,          1);
+    hmR0VmxSetPendingEvent(pVCpu, u32IntInfo, 0 /* cbInstr */, 0 /* u32ErrCode */, 0 /* GCPtrFaultAddress */);
+}
+
+
+/**
+ * Sets an NMI (\#NMI) exception as pending-for-injection into the VM.
+ *
+ * @param   pVCpu   The cross context virtual CPU structure.
+ */
+DECLINLINE(void) hmR0VmxSetPendingXcptNmi(PVMCPUCC pVCpu)
+{
+    uint32_t const u32IntInfo = RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_VECTOR,         X86_XCPT_NMI)
+                              | RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_TYPE,           VMX_ENTRY_INT_INFO_TYPE_NMI)
+                              | RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_ERR_CODE_VALID, 0)
+                              | RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_VALID,          1);
+    hmR0VmxSetPendingEvent(pVCpu, u32IntInfo, 0 /* cbInstr */, 0 /* u32ErrCode */, 0 /* GCPtrFaultAddress */);
+}
+
+
+/**
+ * Sets a double-fault (\#DF) exception as pending-for-injection into the VM.
+ *
+ * @param   pVCpu   The cross context virtual CPU structure.
+ */
+DECLINLINE(void) hmR0VmxSetPendingXcptDF(PVMCPUCC pVCpu)
+{
+    uint32_t const u32IntInfo = RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_VECTOR,         X86_XCPT_DF)
+                              | RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_TYPE,           VMX_EXIT_INT_INFO_TYPE_HW_XCPT)
+                              | RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_ERR_CODE_VALID, 1)
+                              | RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_VALID,          1);
+    hmR0VmxSetPendingEvent(pVCpu, u32IntInfo, 0 /* cbInstr */, 0 /* u32ErrCode */, 0 /* GCPtrFaultAddress */);
+}
+
+
+/**
+ * Sets an invalid-opcode (\#UD) exception as pending-for-injection into the VM.
+ *
+ * @param   pVCpu   The cross context virtual CPU structure.
+ */
+DECLINLINE(void) hmR0VmxSetPendingXcptUD(PVMCPUCC pVCpu)
+{
+    uint32_t const u32IntInfo = RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_VECTOR,         X86_XCPT_UD)
+                              | RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_TYPE,           VMX_EXIT_INT_INFO_TYPE_HW_XCPT)
+                              | RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_ERR_CODE_VALID, 0)
+                              | RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_VALID,          1);
+    hmR0VmxSetPendingEvent(pVCpu, u32IntInfo, 0 /* cbInstr */, 0 /* u32ErrCode */, 0 /* GCPtrFaultAddress */);
+}
+
+
+/**
+ * Sets a debug (\#DB) exception as pending-for-injection into the VM.
+ *
+ * @param   pVCpu   The cross context virtual CPU structure.
+ */
+DECLINLINE(void) hmR0VmxSetPendingXcptDB(PVMCPUCC pVCpu)
+{
+    uint32_t const u32IntInfo = RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_VECTOR,         X86_XCPT_DB)
+                              | RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_TYPE,           VMX_EXIT_INT_INFO_TYPE_HW_XCPT)
+                              | RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_ERR_CODE_VALID, 0)
+                              | RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_VALID,          1);
+    hmR0VmxSetPendingEvent(pVCpu, u32IntInfo, 0 /* cbInstr */, 0 /* u32ErrCode */, 0 /* GCPtrFaultAddress */);
+}
+
+
+#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+/**
+ * Sets a general-protection (\#GP) exception as pending-for-injection into the VM.
+ *
+ * @param   pVCpu       The cross context virtual CPU structure.
+ * @param   u32ErrCode  The error code for the general-protection exception.
+ */
+DECLINLINE(void) hmR0VmxSetPendingXcptGP(PVMCPUCC pVCpu, uint32_t u32ErrCode)
+{
+    uint32_t const u32IntInfo = RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_VECTOR,         X86_XCPT_GP)
+                              | RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_TYPE,           VMX_EXIT_INT_INFO_TYPE_HW_XCPT)
+                              | RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_ERR_CODE_VALID, 1)
+                              | RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_VALID,          1);
+    hmR0VmxSetPendingEvent(pVCpu, u32IntInfo, 0 /* cbInstr */, u32ErrCode, 0 /* GCPtrFaultAddress */);
+}
+
+
+/**
+ * Sets a stack (\#SS) exception as pending-for-injection into the VM.
+ *
+ * @param   pVCpu       The cross context virtual CPU structure.
+ * @param   u32ErrCode  The error code for the stack exception.
+ */
+DECLINLINE(void) hmR0VmxSetPendingXcptSS(PVMCPUCC pVCpu, uint32_t u32ErrCode)
+{
+    uint32_t const u32IntInfo = RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_VECTOR,         X86_XCPT_SS)
+                              | RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_TYPE,           VMX_EXIT_INT_INFO_TYPE_HW_XCPT)
+                              | RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_ERR_CODE_VALID, 1)
+                              | RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_VALID,          1);
+    hmR0VmxSetPendingEvent(pVCpu, u32IntInfo, 0 /* cbInstr */, u32ErrCode, 0 /* GCPtrFaultAddress */);
+}
+#endif /* VBOX_WITH_NESTED_HWVIRT_VMX */
+
+
+/**
+ * Fixes up attributes for the specified segment register.
+ *
+ * @param   pVCpu       The cross context virtual CPU structure.
+ * @param   pSelReg     The segment register that needs fixing.
+ * @param   idxSel      The VMCS field for the corresponding segment register.
+ */
+static void hmR0VmxFixUnusableSegRegAttr(PVMCPUCC pVCpu, PCPUMSELREG pSelReg, uint32_t idxSel)
+{
+    Assert(pSelReg->Attr.u & X86DESCATTR_UNUSABLE);
+
+    /*
+     * If VT-x marks the segment as unusable, most other bits remain undefined:
+     *   - For CS the L, D and G bits have meaning.
+     *   - For SS the DPL has meaning (it -is- the CPL for Intel and VBox).
+     *   - For the remaining data segments no bits are defined.
+     *
+     * The present bit and the unusable bit has been observed to be set at the
+     * same time (the selector was supposed to be invalid as we started executing
+     * a V8086 interrupt in ring-0).
+     *
+     * What should be important for the rest of the VBox code, is that the P bit is
+     * cleared.  Some of the other VBox code recognizes the unusable bit, but
+     * AMD-V certainly don't, and REM doesn't really either.  So, to be on the
+     * safe side here, we'll strip off P and other bits we don't care about.  If
+     * any code breaks because Attr.u != 0 when Sel < 4, it should be fixed.
+     *
+     * See Intel spec. 27.3.2 "Saving Segment Registers and Descriptor-Table Registers".
+     */
+#ifdef VBOX_STRICT
+    uint32_t const uAttr = pSelReg->Attr.u;
+#endif
+
+    /* Masking off: X86DESCATTR_P, X86DESCATTR_LIMIT_HIGH, and X86DESCATTR_AVL. The latter two are really irrelevant. */
+    pSelReg->Attr.u &= X86DESCATTR_UNUSABLE | X86DESCATTR_L    | X86DESCATTR_D  | X86DESCATTR_G
+                     | X86DESCATTR_DPL      | X86DESCATTR_TYPE | X86DESCATTR_DT;
+
+#ifdef VBOX_STRICT
+    VMMRZCallRing3Disable(pVCpu);
+    Log4Func(("Unusable %#x: sel=%#x attr=%#x -> %#x\n", idxSel, pSelReg->Sel, uAttr, pSelReg->Attr.u));
+# ifdef DEBUG_bird
+    AssertMsg((uAttr & ~X86DESCATTR_P) == pSelReg->Attr.u,
+              ("%#x: %#x != %#x (sel=%#x base=%#llx limit=%#x)\n",
+               idxSel, uAttr, pSelReg->Attr.u, pSelReg->Sel, pSelReg->u64Base, pSelReg->u32Limit));
+# endif
+    VMMRZCallRing3Enable(pVCpu);
+    NOREF(uAttr);
+#endif
+    RT_NOREF2(pVCpu, idxSel);
+}
+
+
+/**
+ * Imports a guest segment register from the current VMCS into the guest-CPU
+ * context.
+ *
+ * @param   pVCpu       The cross context virtual CPU structure.
+ * @param   iSegReg     The segment register number (X86_SREG_XXX).
+ *
+ * @remarks Called with interrupts and/or preemption disabled.
+ */
+static void hmR0VmxImportGuestSegReg(PVMCPUCC pVCpu, uint8_t iSegReg)
+{
+    Assert(iSegReg < X86_SREG_COUNT);
+
+    uint32_t const idxSel   = g_aVmcsSegSel[iSegReg];
+    uint32_t const idxLimit = g_aVmcsSegLimit[iSegReg];
+    uint32_t const idxAttr  = g_aVmcsSegAttr[iSegReg];
+    uint32_t const idxBase  = g_aVmcsSegBase[iSegReg];
+
+    uint16_t u16Sel;
+    uint64_t u64Base;
+    uint32_t u32Limit, u32Attr;
+    int rc = VMXReadVmcs16(idxSel,   &u16Sel);      AssertRC(rc);
+    rc     = VMXReadVmcs32(idxLimit, &u32Limit);    AssertRC(rc);
+    rc     = VMXReadVmcs32(idxAttr,  &u32Attr);     AssertRC(rc);
+    rc     = VMXReadVmcsNw(idxBase,  &u64Base);     AssertRC(rc);
+
+    PCPUMSELREG pSelReg = &pVCpu->cpum.GstCtx.aSRegs[iSegReg];
+    pSelReg->Sel      = u16Sel;
+    pSelReg->ValidSel = u16Sel;
+    pSelReg->fFlags   = CPUMSELREG_FLAGS_VALID;
+    pSelReg->u32Limit = u32Limit;
+    pSelReg->u64Base  = u64Base;
+    pSelReg->Attr.u   = u32Attr;
+    if (u32Attr & X86DESCATTR_UNUSABLE)
+        hmR0VmxFixUnusableSegRegAttr(pVCpu, pSelReg, idxSel);
+}
+
+
+/**
+ * Imports the guest LDTR from the current VMCS into the guest-CPU context.
+ *
+ * @param   pVCpu   The cross context virtual CPU structure.
+ *
+ * @remarks Called with interrupts and/or preemption disabled.
+ */
+static void hmR0VmxImportGuestLdtr(PVMCPUCC pVCpu)
+{
+    uint16_t u16Sel;
+    uint64_t u64Base;
+    uint32_t u32Limit, u32Attr;
+    int rc = VMXReadVmcs16(VMX_VMCS16_GUEST_LDTR_SEL,           &u16Sel);       AssertRC(rc);
+    rc     = VMXReadVmcs32(VMX_VMCS32_GUEST_LDTR_LIMIT,         &u32Limit);     AssertRC(rc);
+    rc     = VMXReadVmcs32(VMX_VMCS32_GUEST_LDTR_ACCESS_RIGHTS, &u32Attr);      AssertRC(rc);
+    rc     = VMXReadVmcsNw(VMX_VMCS_GUEST_LDTR_BASE,            &u64Base);      AssertRC(rc);
+
+    pVCpu->cpum.GstCtx.ldtr.Sel      = u16Sel;
+    pVCpu->cpum.GstCtx.ldtr.ValidSel = u16Sel;
+    pVCpu->cpum.GstCtx.ldtr.fFlags   = CPUMSELREG_FLAGS_VALID;
+    pVCpu->cpum.GstCtx.ldtr.u32Limit = u32Limit;
+    pVCpu->cpum.GstCtx.ldtr.u64Base  = u64Base;
+    pVCpu->cpum.GstCtx.ldtr.Attr.u   = u32Attr;
+    if (u32Attr & X86DESCATTR_UNUSABLE)
+        hmR0VmxFixUnusableSegRegAttr(pVCpu, &pVCpu->cpum.GstCtx.ldtr, VMX_VMCS16_GUEST_LDTR_SEL);
+}
+
+
+/**
+ * Imports the guest TR from the current VMCS into the guest-CPU context.
+ *
+ * @param   pVCpu   The cross context virtual CPU structure.
+ *
+ * @remarks Called with interrupts and/or preemption disabled.
+ */
+static void hmR0VmxImportGuestTr(PVMCPUCC pVCpu)
+{
+    uint16_t u16Sel;
+    uint64_t u64Base;
+    uint32_t u32Limit, u32Attr;
+    int rc = VMXReadVmcs16(VMX_VMCS16_GUEST_TR_SEL,           &u16Sel);     AssertRC(rc);
+    rc     = VMXReadVmcs32(VMX_VMCS32_GUEST_TR_LIMIT,         &u32Limit);   AssertRC(rc);
+    rc     = VMXReadVmcs32(VMX_VMCS32_GUEST_TR_ACCESS_RIGHTS, &u32Attr);    AssertRC(rc);
+    rc     = VMXReadVmcsNw(VMX_VMCS_GUEST_TR_BASE,            &u64Base);    AssertRC(rc);
+
+    pVCpu->cpum.GstCtx.tr.Sel      = u16Sel;
+    pVCpu->cpum.GstCtx.tr.ValidSel = u16Sel;
+    pVCpu->cpum.GstCtx.tr.fFlags   = CPUMSELREG_FLAGS_VALID;
+    pVCpu->cpum.GstCtx.tr.u32Limit = u32Limit;
+    pVCpu->cpum.GstCtx.tr.u64Base  = u64Base;
+    pVCpu->cpum.GstCtx.tr.Attr.u   = u32Attr;
+    /* TR is the only selector that can never be unusable. */
+    Assert(!(u32Attr & X86DESCATTR_UNUSABLE));
+}
+
+
+/**
+ * Imports the guest RIP from the VMCS back into the guest-CPU context.
+ *
+ * @param   pVCpu   The cross context virtual CPU structure.
+ *
+ * @remarks Called with interrupts and/or preemption disabled, should not assert!
+ * @remarks Do -not- call this function directly, use hmR0VmxImportGuestState()
+ *          instead!!!
+ */
+static void hmR0VmxImportGuestRip(PVMCPUCC pVCpu)
+{
+    uint64_t u64Val;
+    PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+    if (pCtx->fExtrn & CPUMCTX_EXTRN_RIP)
+    {
+        int rc = VMXReadVmcsNw(VMX_VMCS_GUEST_RIP, &u64Val);
+        AssertRC(rc);
+
+        pCtx->rip = u64Val;
+        EMR0HistoryUpdatePC(pVCpu, pCtx->rip, false);
+        pCtx->fExtrn &= ~CPUMCTX_EXTRN_RIP;
+    }
+}
+
+
+/**
+ * Imports the guest RFLAGS from the VMCS back into the guest-CPU context.
+ *
+ * @param   pVCpu       The cross context virtual CPU structure.
+ * @param   pVmcsInfo   The VMCS info. object.
+ *
+ * @remarks Called with interrupts and/or preemption disabled, should not assert!
+ * @remarks Do -not- call this function directly, use hmR0VmxImportGuestState()
+ *          instead!!!
+ */
+static void hmR0VmxImportGuestRFlags(PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo)
+{
+    PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+    if (pCtx->fExtrn & CPUMCTX_EXTRN_RFLAGS)
+    {
+        uint64_t u64Val;
+        int rc = VMXReadVmcsNw(VMX_VMCS_GUEST_RFLAGS, &u64Val);
+        AssertRC(rc);
+
+        pCtx->rflags.u64 = u64Val;
+        if (pVmcsInfo->RealMode.fRealOnV86Active)
+        {
+            pCtx->eflags.Bits.u1VM   = 0;
+            pCtx->eflags.Bits.u2IOPL = pVmcsInfo->RealMode.Eflags.Bits.u2IOPL;
+        }
+        pCtx->fExtrn &= ~CPUMCTX_EXTRN_RFLAGS;
+    }
+}
+
+
+/**
+ * Imports the guest interruptibility-state from the VMCS back into the guest-CPU
+ * context.
+ *
+ * @param   pVCpu       The cross context virtual CPU structure.
+ * @param   pVmcsInfo   The VMCS info. object.
+ *
+ * @remarks Called with interrupts and/or preemption disabled, try not to assert and
+ *          do not log!
+ * @remarks Do -not- call this function directly, use hmR0VmxImportGuestState()
+ *          instead!!!
+ */
+static void hmR0VmxImportGuestIntrState(PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo)
+{
+    uint32_t u32Val;
+    int rc = VMXReadVmcs32(VMX_VMCS32_GUEST_INT_STATE, &u32Val);    AssertRC(rc);
+    if (!u32Val)
+    {
+        if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS))
+            VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS);
+        CPUMSetGuestNmiBlocking(pVCpu, false);
+    }
+    else
+    {
+        /*
+         * We must import RIP here to set our EM interrupt-inhibited state.
+         * We also import RFLAGS as our code that evaluates pending interrupts
+         * before VM-entry requires it.
+         */
+        hmR0VmxImportGuestRip(pVCpu);
+        hmR0VmxImportGuestRFlags(pVCpu, pVmcsInfo);
+
+        if (u32Val & (VMX_VMCS_GUEST_INT_STATE_BLOCK_MOVSS | VMX_VMCS_GUEST_INT_STATE_BLOCK_STI))
+            EMSetInhibitInterruptsPC(pVCpu, pVCpu->cpum.GstCtx.rip);
+        else if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS))
+            VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS);
+
+        bool const fNmiBlocking = RT_BOOL(u32Val & VMX_VMCS_GUEST_INT_STATE_BLOCK_NMI);
+        CPUMSetGuestNmiBlocking(pVCpu, fNmiBlocking);
+    }
+}
+
+
+/**
+ * Worker for VMXR0ImportStateOnDemand.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu       The cross context virtual CPU structure.
+ * @param   pVmcsInfo   The VMCS info. object.
+ * @param   fWhat       What to import, CPUMCTX_EXTRN_XXX.
+ */
+static int hmR0VmxImportGuestState(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo, uint64_t fWhat)
+{
+    int      rc   = VINF_SUCCESS;
+    PVMCC    pVM  = pVCpu->CTX_SUFF(pVM);
+    PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+    uint32_t u32Val;
+
+    /*
+     * Note! This is hack to workaround a mysterious BSOD observed with release builds
+     *       on Windows 10 64-bit hosts. Profile and debug builds are not affected and
+     *       neither are other host platforms.
+     *
+     *       Committing this temporarily as it prevents BSOD.
+     *
+     * Update: This is very likely a compiler optimization bug, see @bugref{9180}.
+     */
+#ifdef RT_OS_WINDOWS
+    if (pVM == 0 || pVM == (void *)(uintptr_t)-1)
+        return VERR_HM_IPE_1;
+#endif
+
+    STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatImportGuestState, x);
+
+    /*
+     * We disable interrupts to make the updating of the state and in particular
+     * the fExtrn modification atomic wrt to preemption hooks.
+     */
+    RTCCUINTREG const fEFlags = ASMIntDisableFlags();
+
+    fWhat &= pCtx->fExtrn;
+    if (fWhat)
+    {
+        do
+        {
+            if (fWhat & CPUMCTX_EXTRN_RIP)
+                hmR0VmxImportGuestRip(pVCpu);
+
+            if (fWhat & CPUMCTX_EXTRN_RFLAGS)
+                hmR0VmxImportGuestRFlags(pVCpu, pVmcsInfo);
+
+            if (fWhat & CPUMCTX_EXTRN_HM_VMX_INT_STATE)
+                hmR0VmxImportGuestIntrState(pVCpu, pVmcsInfo);
+
+            if (fWhat & CPUMCTX_EXTRN_RSP)
+            {
+                rc = VMXReadVmcsNw(VMX_VMCS_GUEST_RSP, &pCtx->rsp);
+                AssertRC(rc);
+            }
+
+            if (fWhat & CPUMCTX_EXTRN_SREG_MASK)
+            {
+                bool const fRealOnV86Active = pVmcsInfo->RealMode.fRealOnV86Active;
+                if (fWhat & CPUMCTX_EXTRN_CS)
+                {
+                    hmR0VmxImportGuestSegReg(pVCpu, X86_SREG_CS);
+                    hmR0VmxImportGuestRip(pVCpu);
+                    if (fRealOnV86Active)
+                        pCtx->cs.Attr.u = pVmcsInfo->RealMode.AttrCS.u;
+                    EMR0HistoryUpdatePC(pVCpu, pCtx->cs.u64Base + pCtx->rip, true /* fFlattened */);
+                }
+                if (fWhat & CPUMCTX_EXTRN_SS)
+                {
+                    hmR0VmxImportGuestSegReg(pVCpu, X86_SREG_SS);
+                    if (fRealOnV86Active)
+                        pCtx->ss.Attr.u = pVmcsInfo->RealMode.AttrSS.u;
+                }
+                if (fWhat & CPUMCTX_EXTRN_DS)
+                {
+                    hmR0VmxImportGuestSegReg(pVCpu, X86_SREG_DS);
+                    if (fRealOnV86Active)
+                        pCtx->ds.Attr.u = pVmcsInfo->RealMode.AttrDS.u;
+                }
+                if (fWhat & CPUMCTX_EXTRN_ES)
+                {
+                    hmR0VmxImportGuestSegReg(pVCpu, X86_SREG_ES);
+                    if (fRealOnV86Active)
+                        pCtx->es.Attr.u = pVmcsInfo->RealMode.AttrES.u;
+                }
+                if (fWhat & CPUMCTX_EXTRN_FS)
+                {
+                    hmR0VmxImportGuestSegReg(pVCpu, X86_SREG_FS);
+                    if (fRealOnV86Active)
+                        pCtx->fs.Attr.u = pVmcsInfo->RealMode.AttrFS.u;
+                }
+                if (fWhat & CPUMCTX_EXTRN_GS)
+                {
+                    hmR0VmxImportGuestSegReg(pVCpu, X86_SREG_GS);
+                    if (fRealOnV86Active)
+                        pCtx->gs.Attr.u = pVmcsInfo->RealMode.AttrGS.u;
+                }
+            }
+
+            if (fWhat & CPUMCTX_EXTRN_TABLE_MASK)
+            {
+                if (fWhat & CPUMCTX_EXTRN_LDTR)
+                    hmR0VmxImportGuestLdtr(pVCpu);
+
+                if (fWhat & CPUMCTX_EXTRN_GDTR)
+                {
+                    rc = VMXReadVmcsNw(VMX_VMCS_GUEST_GDTR_BASE,    &pCtx->gdtr.pGdt);  AssertRC(rc);
+                    rc = VMXReadVmcs32(VMX_VMCS32_GUEST_GDTR_LIMIT, &u32Val);           AssertRC(rc);
+                    pCtx->gdtr.cbGdt = u32Val;
+                }
+
+                /* Guest IDTR. */
+                if (fWhat & CPUMCTX_EXTRN_IDTR)
+                {
+                    rc = VMXReadVmcsNw(VMX_VMCS_GUEST_IDTR_BASE,    &pCtx->idtr.pIdt);  AssertRC(rc);
+                    rc = VMXReadVmcs32(VMX_VMCS32_GUEST_IDTR_LIMIT, &u32Val);           AssertRC(rc);
+                    pCtx->idtr.cbIdt = u32Val;
+                }
+
+                /* Guest TR. */
+                if (fWhat & CPUMCTX_EXTRN_TR)
+                {
+                    /* Real-mode emulation using virtual-8086 mode has the fake TSS (pRealModeTSS) in TR,
+                       don't need to import that one. */
+                    if (!pVmcsInfo->RealMode.fRealOnV86Active)
+                        hmR0VmxImportGuestTr(pVCpu);
+                }
+            }
+
+            if (fWhat & CPUMCTX_EXTRN_DR7)
+            {
+                if (!pVCpu->hm.s.fUsingHyperDR7)
+                    rc = VMXReadVmcsNw(VMX_VMCS_GUEST_DR7, &pCtx->dr[7]);   AssertRC(rc);
+            }
+
+            if (fWhat & CPUMCTX_EXTRN_SYSENTER_MSRS)
+            {
+                rc = VMXReadVmcsNw(VMX_VMCS_GUEST_SYSENTER_EIP,  &pCtx->SysEnter.eip);  AssertRC(rc);
+                rc = VMXReadVmcsNw(VMX_VMCS_GUEST_SYSENTER_ESP,  &pCtx->SysEnter.esp);  AssertRC(rc);
+                rc = VMXReadVmcs32(VMX_VMCS32_GUEST_SYSENTER_CS, &u32Val);              AssertRC(rc);
+                pCtx->SysEnter.cs = u32Val;
+            }
+
+            if (fWhat & CPUMCTX_EXTRN_KERNEL_GS_BASE)
+            {
+                if (   pVM->hm.s.fAllow64BitGuests
+                    && (pVCpu->hm.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST))
+                    pCtx->msrKERNELGSBASE = ASMRdMsr(MSR_K8_KERNEL_GS_BASE);
+            }
+
+            if (fWhat & CPUMCTX_EXTRN_SYSCALL_MSRS)
+            {
+                if (   pVM->hm.s.fAllow64BitGuests
+                    && (pVCpu->hm.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST))
+                {
+                    pCtx->msrLSTAR  = ASMRdMsr(MSR_K8_LSTAR);
+                    pCtx->msrSTAR   = ASMRdMsr(MSR_K6_STAR);
+                    pCtx->msrSFMASK = ASMRdMsr(MSR_K8_SF_MASK);
+                }
+            }
+
+            if (fWhat & (CPUMCTX_EXTRN_TSC_AUX | CPUMCTX_EXTRN_OTHER_MSRS))
+            {
+                PCVMXAUTOMSR   pMsrs = (PCVMXAUTOMSR)pVmcsInfo->pvGuestMsrStore;
+                uint32_t const cMsrs = pVmcsInfo->cExitMsrStore;
+                Assert(pMsrs);
+                Assert(cMsrs <= VMX_MISC_MAX_MSRS(pVM->hm.s.vmx.Msrs.u64Misc));
+                Assert(sizeof(*pMsrs) * cMsrs <= X86_PAGE_4K_SIZE);
+                for (uint32_t i = 0; i < cMsrs; i++)
+                {
+                    uint32_t const idMsr = pMsrs[i].u32Msr;
+                    switch (idMsr)
+                    {
+                        case MSR_K8_TSC_AUX:        CPUMSetGuestTscAux(pVCpu, pMsrs[i].u64Value);     break;
+                        case MSR_IA32_SPEC_CTRL:    CPUMSetGuestSpecCtrl(pVCpu, pMsrs[i].u64Value);   break;
+                        case MSR_K6_EFER:           /* Can't be changed without causing a VM-exit */  break;
+                        default:
+                        {
+                            uint32_t idxLbrMsr;
+                            if (pVM->hm.s.vmx.fLbr)
+                            {
+                                if (hmR0VmxIsLbrBranchFromMsr(pVM, idMsr, &idxLbrMsr))
+                                {
+                                    Assert(idxLbrMsr < RT_ELEMENTS(pVmcsInfo->au64LbrFromIpMsr));
+                                    pVmcsInfo->au64LbrFromIpMsr[idxLbrMsr] = pMsrs[i].u64Value;
+                                    break;
+                                }
+                                else if (hmR0VmxIsLbrBranchToMsr(pVM, idMsr, &idxLbrMsr))
+                                {
+                                    Assert(idxLbrMsr < RT_ELEMENTS(pVmcsInfo->au64LbrFromIpMsr));
+                                    pVmcsInfo->au64LbrToIpMsr[idxLbrMsr] = pMsrs[i].u64Value;
+                                    break;
+                                }
+                                else if (idMsr == pVM->hm.s.vmx.idLbrTosMsr)
+                                {
+                                    pVmcsInfo->u64LbrTosMsr = pMsrs[i].u64Value;
+                                    break;
+                                }
+                                /* Fallthru (no break) */
+                            }
+                            pCtx->fExtrn = 0;
+                            pVCpu->hm.s.u32HMError = pMsrs->u32Msr;
+                            ASMSetFlags(fEFlags);
+                            AssertMsgFailed(("Unexpected MSR in auto-load/store area. idMsr=%#RX32 cMsrs=%u\n", idMsr, cMsrs));
+                            return VERR_HM_UNEXPECTED_LD_ST_MSR;
+                        }
+                    }
+                }
+            }
+
+            if (fWhat & CPUMCTX_EXTRN_CR_MASK)
+            {
+                if (fWhat & CPUMCTX_EXTRN_CR0)
+                {
+                    uint64_t u64Cr0;
+                    uint64_t u64Shadow;
+                    rc = VMXReadVmcsNw(VMX_VMCS_GUEST_CR0,            &u64Cr0);       AssertRC(rc);
+                    rc = VMXReadVmcsNw(VMX_VMCS_CTRL_CR0_READ_SHADOW, &u64Shadow);    AssertRC(rc);
+#ifndef VBOX_WITH_NESTED_HWVIRT_VMX
+                    u64Cr0 = (u64Cr0    & ~pVmcsInfo->u64Cr0Mask)
+                           | (u64Shadow &  pVmcsInfo->u64Cr0Mask);
+#else
+                    if (!CPUMIsGuestInVmxNonRootMode(pCtx))
+                    {
+                        u64Cr0 = (u64Cr0    & ~pVmcsInfo->u64Cr0Mask)
+                               | (u64Shadow &  pVmcsInfo->u64Cr0Mask);
+                    }
+                    else
+                    {
+                        /*
+                         * We've merged the guest and nested-guest's CR0 guest/host mask while executing
+                         * the nested-guest using hardware-assisted VMX. Accordingly we need to
+                         * re-construct CR0. See @bugref{9180#c95} for details.
+                         */
+                        PCVMXVMCSINFO pVmcsInfoGst = &pVCpu->hm.s.vmx.VmcsInfo;
+                        PCVMXVVMCS    pVmcsNstGst  = pVCpu->cpum.GstCtx.hwvirt.vmx.CTX_SUFF(pVmcs);
+                        u64Cr0 = (u64Cr0                     & ~pVmcsInfo->u64Cr0Mask)
+                               | (pVmcsNstGst->u64GuestCr0.u &  pVmcsNstGst->u64Cr0Mask.u)
+                               | (u64Shadow                  & (pVmcsInfoGst->u64Cr0Mask & ~pVmcsNstGst->u64Cr0Mask.u));
+                    }
+#endif
+                    VMMRZCallRing3Disable(pVCpu);   /* May call into PGM which has Log statements. */
+                    CPUMSetGuestCR0(pVCpu, u64Cr0);
+                    VMMRZCallRing3Enable(pVCpu);
+                }
+
+                if (fWhat & CPUMCTX_EXTRN_CR4)
+                {
+                    uint64_t u64Cr4;
+                    uint64_t u64Shadow;
+                    rc  = VMXReadVmcsNw(VMX_VMCS_GUEST_CR4,            &u64Cr4);      AssertRC(rc);
+                    rc |= VMXReadVmcsNw(VMX_VMCS_CTRL_CR4_READ_SHADOW, &u64Shadow);   AssertRC(rc);
+#ifndef VBOX_WITH_NESTED_HWVIRT_VMX
+                    u64Cr4 = (u64Cr4    & ~pVmcsInfo->u64Cr4Mask)
+                           | (u64Shadow &  pVmcsInfo->u64Cr4Mask);
+#else
+                    if (!CPUMIsGuestInVmxNonRootMode(pCtx))
+                    {
+                        u64Cr4 = (u64Cr4    & ~pVmcsInfo->u64Cr4Mask)
+                               | (u64Shadow &  pVmcsInfo->u64Cr4Mask);
+                    }
+                    else
+                    {
+                        /*
+                         * We've merged the guest and nested-guest's CR4 guest/host mask while executing
+                         * the nested-guest using hardware-assisted VMX. Accordingly we need to
+                         * re-construct CR4. See @bugref{9180#c95} for details.
+                         */
+                        PCVMXVMCSINFO pVmcsInfoGst = &pVCpu->hm.s.vmx.VmcsInfo;
+                        PCVMXVVMCS    pVmcsNstGst  = pVCpu->cpum.GstCtx.hwvirt.vmx.CTX_SUFF(pVmcs);
+                        u64Cr4 = (u64Cr4                     & ~pVmcsInfo->u64Cr4Mask)
+                               | (pVmcsNstGst->u64GuestCr4.u &  pVmcsNstGst->u64Cr4Mask.u)
+                               | (u64Shadow                  & (pVmcsInfoGst->u64Cr4Mask & ~pVmcsNstGst->u64Cr4Mask.u));
+                    }
+#endif
+                    pCtx->cr4 = u64Cr4;
+                }
+
+                if (fWhat & CPUMCTX_EXTRN_CR3)
+                {
+                    /* CR0.PG bit changes are always intercepted, so it's up to date. */
+                    if (   pVM->hm.s.vmx.fUnrestrictedGuest
+                        || (   pVM->hm.s.fNestedPaging
+                            && CPUMIsGuestPagingEnabledEx(pCtx)))
+                    {
+                        uint64_t u64Cr3;
+                        rc = VMXReadVmcsNw(VMX_VMCS_GUEST_CR3, &u64Cr3);  AssertRC(rc);
+                        if (pCtx->cr3 != u64Cr3)
+                        {
+                            pCtx->cr3 = u64Cr3;
+                            VMCPU_FF_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3);
+                        }
+
+                        /* If the guest is in PAE mode, sync back the PDPE's into the guest state.
+                           Note: CR4.PAE, CR0.PG, EFER MSR changes are always intercepted, so they're up to date. */
+                        if (CPUMIsGuestInPAEModeEx(pCtx))
+                        {
+                            rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE0_FULL, &pVCpu->hm.s.aPdpes[0].u);     AssertRC(rc);
+                            rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE1_FULL, &pVCpu->hm.s.aPdpes[1].u);     AssertRC(rc);
+                            rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE2_FULL, &pVCpu->hm.s.aPdpes[2].u);     AssertRC(rc);
+                            rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE3_FULL, &pVCpu->hm.s.aPdpes[3].u);     AssertRC(rc);
+                            VMCPU_FF_SET(pVCpu, VMCPU_FF_HM_UPDATE_PAE_PDPES);
+                        }
+                    }
+                }
+            }
+
+#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+            if (fWhat & CPUMCTX_EXTRN_HWVIRT)
+            {
+                if (   (pVmcsInfo->u32ProcCtls2 & VMX_PROC_CTLS2_VMCS_SHADOWING)
+                    && !CPUMIsGuestInVmxNonRootMode(pCtx))
+                {
+                    Assert(CPUMIsGuestInVmxRootMode(pCtx));
+                    rc = hmR0VmxCopyShadowToNstGstVmcs(pVCpu, pVmcsInfo);
+                    if (RT_SUCCESS(rc))
+                    { /* likely */ }
+                    else
+                        break;
+                }
+            }
+#endif
+        } while (0);
+
+        if (RT_SUCCESS(rc))
+        {
+            /* Update fExtrn. */
+            pCtx->fExtrn &= ~fWhat;
+
+            /* If everything has been imported, clear the HM keeper bit. */
+            if (!(pCtx->fExtrn & HMVMX_CPUMCTX_EXTRN_ALL))
+            {
+                pCtx->fExtrn &= ~CPUMCTX_EXTRN_KEEPER_HM;
+                Assert(!pCtx->fExtrn);
+            }
+        }
+    }
+    else
+        AssertMsg(!pCtx->fExtrn || (pCtx->fExtrn & HMVMX_CPUMCTX_EXTRN_ALL), ("%#RX64\n", pCtx->fExtrn));
+
+    /*
+     * Restore interrupts.
+     */
+    ASMSetFlags(fEFlags);
+
+    STAM_PROFILE_ADV_STOP(& pVCpu->hm.s.StatImportGuestState, x);
+
+    if (RT_SUCCESS(rc))
+    { /* likely */ }
+    else
+        return rc;
+
+    /*
+     * Honor any pending CR3 updates.
+     *
+     * Consider this scenario: VM-exit -> VMMRZCallRing3Enable() -> do stuff that causes a longjmp -> VMXR0CallRing3Callback()
+     * -> VMMRZCallRing3Disable() -> hmR0VmxImportGuestState() -> Sets VMCPU_FF_HM_UPDATE_CR3 pending -> return from the longjmp
+     * -> continue with VM-exit handling -> hmR0VmxImportGuestState() and here we are.
+     *
+     * The reason for such complicated handling is because VM-exits that call into PGM expect CR3 to be up-to-date and thus
+     * if any CR3-saves -before- the VM-exit (longjmp) postponed the CR3 update via the force-flag, any VM-exit handler that
+     * calls into PGM when it re-saves CR3 will end up here and we call PGMUpdateCR3(). This is why the code below should
+     * -NOT- check if CPUMCTX_EXTRN_CR3 is set!
+     *
+     * The longjmp exit path can't check these CR3 force-flags and call code that takes a lock again. We cover for it here.
+     */
+    if (VMMRZCallRing3IsEnabled(pVCpu))
+    {
+        if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3))
+        {
+            Assert(!(ASMAtomicUoReadU64(&pCtx->fExtrn) & CPUMCTX_EXTRN_CR3));
+            PGMUpdateCR3(pVCpu, CPUMGetGuestCR3(pVCpu));
+        }
+
+        if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_PAE_PDPES))
+            PGMGstUpdatePaePdpes(pVCpu, &pVCpu->hm.s.aPdpes[0]);
+
+        Assert(!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3));
+        Assert(!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_PAE_PDPES));
+    }
+
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Saves the guest state from the VMCS into the guest-CPU context.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu   The cross context virtual CPU structure.
+ * @param   fWhat   What to import, CPUMCTX_EXTRN_XXX.
+ */
+VMMR0DECL(int) VMXR0ImportStateOnDemand(PVMCPUCC pVCpu, uint64_t fWhat)
+{
+    AssertPtr(pVCpu);
+    PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
+    return hmR0VmxImportGuestState(pVCpu, pVmcsInfo, fWhat);
+}
+
+
+/**
+ * Check per-VM and per-VCPU force flag actions that require us to go back to
+ * ring-3 for one reason or another.
+ *
+ * @returns Strict VBox status code (i.e. informational status codes too)
+ * @retval VINF_SUCCESS if we don't have any actions that require going back to
+ *         ring-3.
+ * @retval VINF_PGM_SYNC_CR3 if we have pending PGM CR3 sync.
+ * @retval VINF_EM_PENDING_REQUEST if we have pending requests (like hardware
+ *         interrupts)
+ * @retval VINF_PGM_POOL_FLUSH_PENDING if PGM is doing a pool flush and requires
+ *         all EMTs to be in ring-3.
+ * @retval VINF_EM_RAW_TO_R3 if there is pending DMA requests.
+ * @retval VINF_EM_NO_MEMORY PGM is out of memory, we need to return
+ *         to the EM loop.
+ *
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   pVmxTransient   The VMX-transient structure.
+ * @param   fStepping       Whether we are single-stepping the guest using the
+ *                          hypervisor debugger.
+ *
+ * @remarks This might cause nested-guest VM-exits, caller must check if the guest
+ *          is no longer in VMX non-root mode.
+ */
+static VBOXSTRICTRC hmR0VmxCheckForceFlags(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient, bool fStepping)
+{
+    Assert(VMMRZCallRing3IsEnabled(pVCpu));
+
+    /*
+     * Update pending interrupts into the APIC's IRR.
+     */
+    if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_UPDATE_APIC))
+        APICUpdatePendingInterrupts(pVCpu);
+
+    /*
+     * Anything pending?  Should be more likely than not if we're doing a good job.
+     */
+    PVMCC pVM = pVCpu->CTX_SUFF(pVM);
+    if (  !fStepping
+        ?    !VM_FF_IS_ANY_SET(pVM, VM_FF_HP_R0_PRE_HM_MASK)
+          && !VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_HP_R0_PRE_HM_MASK)
+        :    !VM_FF_IS_ANY_SET(pVM, VM_FF_HP_R0_PRE_HM_STEP_MASK)
+          && !VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_HP_R0_PRE_HM_STEP_MASK) )
+        return VINF_SUCCESS;
+
+    /* Pending PGM C3 sync. */
+    if (VMCPU_FF_IS_ANY_SET(pVCpu,VMCPU_FF_PGM_SYNC_CR3 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL))
+    {
+        PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+        Assert(!(ASMAtomicUoReadU64(&pCtx->fExtrn) & (CPUMCTX_EXTRN_CR0 | CPUMCTX_EXTRN_CR3 | CPUMCTX_EXTRN_CR4)));
+        VBOXSTRICTRC rcStrict = PGMSyncCR3(pVCpu, pCtx->cr0, pCtx->cr3, pCtx->cr4,
+                                           VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3));
+        if (rcStrict != VINF_SUCCESS)
+        {
+            AssertRC(VBOXSTRICTRC_VAL(rcStrict));
+            Log4Func(("PGMSyncCR3 forcing us back to ring-3. rc2=%d\n", VBOXSTRICTRC_VAL(rcStrict)));
+            return rcStrict;
+        }
+    }
+
+    /* Pending HM-to-R3 operations (critsects, timers, EMT rendezvous etc.) */
+    if (   VM_FF_IS_ANY_SET(pVM, VM_FF_HM_TO_R3_MASK)
+        || VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_HM_TO_R3_MASK))
+    {
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchHmToR3FF);
+        int rc = RT_LIKELY(!VM_FF_IS_SET(pVM, VM_FF_PGM_NO_MEMORY)) ? VINF_EM_RAW_TO_R3 : VINF_EM_NO_MEMORY;
+        Log4Func(("HM_TO_R3 forcing us back to ring-3. rc=%d\n", rc));
+        return rc;
+    }
+
+    /* Pending VM request packets, such as hardware interrupts. */
+    if (   VM_FF_IS_SET(pVM, VM_FF_REQUEST)
+        || VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_REQUEST))
+    {
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchVmReq);
+        Log4Func(("Pending VM request forcing us back to ring-3\n"));
+        return VINF_EM_PENDING_REQUEST;
+    }
+
+    /* Pending PGM pool flushes. */
+    if (VM_FF_IS_SET(pVM, VM_FF_PGM_POOL_FLUSH_PENDING))
+    {
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchPgmPoolFlush);
+        Log4Func(("PGM pool flush pending forcing us back to ring-3\n"));
+        return VINF_PGM_POOL_FLUSH_PENDING;
+    }
+
+    /* Pending DMA requests. */
+    if (VM_FF_IS_SET(pVM, VM_FF_PDM_DMA))
+    {
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchDma);
+        Log4Func(("Pending DMA request forcing us back to ring-3\n"));
+        return VINF_EM_RAW_TO_R3;
+    }
+
+#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+    /*
+     * Pending nested-guest events.
+     *
+     * Please note the priority of these events are specified and important.
+     * See Intel spec. 29.4.3.2 "APIC-Write Emulation".
+     * See Intel spec. 6.9 "Priority Among Simultaneous Exceptions And Interrupts".
+     */
+    if (pVmxTransient->fIsNestedGuest)
+    {
+        /* Pending nested-guest APIC-write. */
+        if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_VMX_APIC_WRITE))
+        {
+            Log4Func(("Pending nested-guest APIC-write\n"));
+            VBOXSTRICTRC rcStrict = IEMExecVmxVmexitApicWrite(pVCpu);
+            Assert(rcStrict != VINF_VMX_INTERCEPT_NOT_ACTIVE);
+            return rcStrict;
+        }
+
+        /* Pending nested-guest monitor-trap flag (MTF). */
+        if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_VMX_MTF))
+        {
+            Log4Func(("Pending nested-guest MTF\n"));
+            VBOXSTRICTRC rcStrict = IEMExecVmxVmexit(pVCpu, VMX_EXIT_MTF, 0 /* uExitQual */);
+            Assert(rcStrict != VINF_VMX_INTERCEPT_NOT_ACTIVE);
+            return rcStrict;
+        }
+
+        /* Pending nested-guest VMX-preemption timer expired. */
+        if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_VMX_PREEMPT_TIMER))
+        {
+            Log4Func(("Pending nested-guest MTF\n"));
+            VBOXSTRICTRC rcStrict = IEMExecVmxVmexitPreemptTimer(pVCpu);
+            Assert(rcStrict != VINF_VMX_INTERCEPT_NOT_ACTIVE);
+            return rcStrict;
+        }
+    }
+#else
+    NOREF(pVmxTransient);
+#endif
+
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Converts any TRPM trap into a pending HM event. This is typically used when
+ * entering from ring-3 (not longjmp returns).
+ *
+ * @param   pVCpu   The cross context virtual CPU structure.
+ */
+static void hmR0VmxTrpmTrapToPendingEvent(PVMCPUCC pVCpu)
+{
+    Assert(TRPMHasTrap(pVCpu));
+    Assert(!pVCpu->hm.s.Event.fPending);
+
+    uint8_t     uVector;
+    TRPMEVENT   enmTrpmEvent;
+    uint32_t    uErrCode;
+    RTGCUINTPTR GCPtrFaultAddress;
+    uint8_t     cbInstr;
+    bool        fIcebp;
+
+    int rc = TRPMQueryTrapAll(pVCpu, &uVector, &enmTrpmEvent, &uErrCode, &GCPtrFaultAddress, &cbInstr, &fIcebp);
+    AssertRC(rc);
+
+    uint32_t u32IntInfo;
+    u32IntInfo  = uVector | VMX_IDT_VECTORING_INFO_VALID;
+    u32IntInfo |= HMTrpmEventTypeToVmxEventType(uVector, enmTrpmEvent, fIcebp);
+
+    rc = TRPMResetTrap(pVCpu);
+    AssertRC(rc);
+    Log4(("TRPM->HM event: u32IntInfo=%#RX32 enmTrpmEvent=%d cbInstr=%u uErrCode=%#RX32 GCPtrFaultAddress=%#RGv\n",
+          u32IntInfo, enmTrpmEvent, cbInstr, uErrCode, GCPtrFaultAddress));
+
+    hmR0VmxSetPendingEvent(pVCpu, u32IntInfo, cbInstr, uErrCode, GCPtrFaultAddress);
+}
+
+
+/**
+ * Converts the pending HM event into a TRPM trap.
+ *
+ * @param   pVCpu   The cross context virtual CPU structure.
+ */
+static void hmR0VmxPendingEventToTrpmTrap(PVMCPUCC pVCpu)
+{
+    Assert(pVCpu->hm.s.Event.fPending);
+
+    /* If a trap was already pending, we did something wrong! */
+    Assert(TRPMQueryTrap(pVCpu, NULL /* pu8TrapNo */, NULL /* pEnmType */) == VERR_TRPM_NO_ACTIVE_TRAP);
+
+    uint32_t const  u32IntInfo  = pVCpu->hm.s.Event.u64IntInfo;
+    uint32_t const  uVector     = VMX_IDT_VECTORING_INFO_VECTOR(u32IntInfo);
+    TRPMEVENT const enmTrapType = HMVmxEventTypeToTrpmEventType(u32IntInfo);
+
+    Log4(("HM event->TRPM: uVector=%#x enmTrapType=%d\n", uVector, enmTrapType));
+
+    int rc = TRPMAssertTrap(pVCpu, uVector, enmTrapType);
+    AssertRC(rc);
+
+    if (VMX_IDT_VECTORING_INFO_IS_ERROR_CODE_VALID(u32IntInfo))
+        TRPMSetErrorCode(pVCpu, pVCpu->hm.s.Event.u32ErrCode);
+
+    if (VMX_IDT_VECTORING_INFO_IS_XCPT_PF(u32IntInfo))
+        TRPMSetFaultAddress(pVCpu, pVCpu->hm.s.Event.GCPtrFaultAddress);
+    else
+    {
+        uint8_t const uVectorType = VMX_IDT_VECTORING_INFO_TYPE(u32IntInfo);
+        switch (uVectorType)
+        {
+            case VMX_IDT_VECTORING_INFO_TYPE_PRIV_SW_XCPT:
+                TRPMSetTrapDueToIcebp(pVCpu);
+                RT_FALL_THRU();
+            case VMX_IDT_VECTORING_INFO_TYPE_SW_INT:
+            case VMX_IDT_VECTORING_INFO_TYPE_SW_XCPT:
+            {
+                AssertMsg(   uVectorType == VMX_IDT_VECTORING_INFO_TYPE_SW_INT
+                          || (   uVector == X86_XCPT_BP /* INT3 */
+                              || uVector == X86_XCPT_OF /* INTO */
+                              || uVector == X86_XCPT_DB /* INT1 (ICEBP) */),
+                          ("Invalid vector: uVector=%#x uVectorType=%#x\n", uVector, uVectorType));
+                TRPMSetInstrLength(pVCpu, pVCpu->hm.s.Event.cbInstr);
+                break;
+            }
+        }
+    }
+
+    /* We're now done converting the pending event. */
+    pVCpu->hm.s.Event.fPending = false;
+}
+
+
+/**
+ * Sets the interrupt-window exiting control in the VMCS which instructs VT-x to
+ * cause a VM-exit as soon as the guest is in a state to receive interrupts.
+ *
+ * @param   pVCpu       The cross context virtual CPU structure.
+ * @param   pVmcsInfo   The VMCS info. object.
+ */
+static void hmR0VmxSetIntWindowExitVmcs(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
+{
+    if (pVCpu->CTX_SUFF(pVM)->hm.s.vmx.Msrs.ProcCtls.n.allowed1 & VMX_PROC_CTLS_INT_WINDOW_EXIT)
+    {
+        if (!(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_INT_WINDOW_EXIT))
+        {
+            pVmcsInfo->u32ProcCtls |= VMX_PROC_CTLS_INT_WINDOW_EXIT;
+            int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVmcsInfo->u32ProcCtls);
+            AssertRC(rc);
+        }
+    } /* else we will deliver interrupts whenever the guest Vm-exits next and is in a state to receive the interrupt. */
+}
+
+
+/**
+ * Clears the interrupt-window exiting control in the VMCS.
+ *
+ * @param   pVmcsInfo   The VMCS info. object.
+ */
+DECLINLINE(void) hmR0VmxClearIntWindowExitVmcs(PVMXVMCSINFO pVmcsInfo)
+{
+    if (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_INT_WINDOW_EXIT)
+    {
+        pVmcsInfo->u32ProcCtls &= ~VMX_PROC_CTLS_INT_WINDOW_EXIT;
+        int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVmcsInfo->u32ProcCtls);
+        AssertRC(rc);
+    }
+}
+
+
+/**
+ * Sets the NMI-window exiting control in the VMCS which instructs VT-x to
+ * cause a VM-exit as soon as the guest is in a state to receive NMIs.
+ *
+ * @param   pVCpu       The cross context virtual CPU structure.
+ * @param   pVmcsInfo   The VMCS info. object.
+ */
+static void hmR0VmxSetNmiWindowExitVmcs(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
+{
+    if (pVCpu->CTX_SUFF(pVM)->hm.s.vmx.Msrs.ProcCtls.n.allowed1 & VMX_PROC_CTLS_NMI_WINDOW_EXIT)
+    {
+        if (!(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_NMI_WINDOW_EXIT))
+        {
+            pVmcsInfo->u32ProcCtls |= VMX_PROC_CTLS_NMI_WINDOW_EXIT;
+            int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVmcsInfo->u32ProcCtls);
+            AssertRC(rc);
+            Log4Func(("Setup NMI-window exiting\n"));
+        }
+    } /* else we will deliver NMIs whenever we VM-exit next, even possibly nesting NMIs. Can't be helped on ancient CPUs. */
+}
+
+
+/**
+ * Clears the NMI-window exiting control in the VMCS.
+ *
+ * @param   pVmcsInfo   The VMCS info. object.
+ */
+DECLINLINE(void) hmR0VmxClearNmiWindowExitVmcs(PVMXVMCSINFO pVmcsInfo)
+{
+    if (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_NMI_WINDOW_EXIT)
+    {
+        pVmcsInfo->u32ProcCtls &= ~VMX_PROC_CTLS_NMI_WINDOW_EXIT;
+        int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVmcsInfo->u32ProcCtls);
+        AssertRC(rc);
+    }
+}
+
+
+/**
+ * Does the necessary state syncing before returning to ring-3 for any reason
+ * (longjmp, preemption, voluntary exits to ring-3) from VT-x.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   fImportState    Whether to import the guest state from the VMCS back
+ *                          to the guest-CPU context.
+ *
+ * @remarks No-long-jmp zone!!!
+ */
+static int hmR0VmxLeave(PVMCPUCC pVCpu, bool fImportState)
+{
+    Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+    Assert(!VMMRZCallRing3IsEnabled(pVCpu));
+
+    RTCPUID const idCpu = RTMpCpuId();
+    Log4Func(("HostCpuId=%u\n", idCpu));
+
+    /*
+     * !!! IMPORTANT !!!
+     * If you modify code here, check whether VMXR0CallRing3Callback() needs to be updated too.
+     */
+
+    /* Save the guest state if necessary. */
+    PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
+    if (fImportState)
+    {
+        int rc = hmR0VmxImportGuestState(pVCpu, pVmcsInfo, HMVMX_CPUMCTX_EXTRN_ALL);
+        AssertRCReturn(rc, rc);
+    }
+
+    /* Restore host FPU state if necessary. We will resync on next R0 reentry. */
+    CPUMR0FpuStateMaybeSaveGuestAndRestoreHost(pVCpu);
+    Assert(!CPUMIsGuestFPUStateActive(pVCpu));
+
+    /* Restore host debug registers if necessary. We will resync on next R0 reentry. */
+#ifdef VBOX_STRICT
+    if (CPUMIsHyperDebugStateActive(pVCpu))
+        Assert(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_MOV_DR_EXIT);
+#endif
+    CPUMR0DebugStateMaybeSaveGuestAndRestoreHost(pVCpu, true /* save DR6 */);
+    Assert(!CPUMIsGuestDebugStateActive(pVCpu) && !CPUMIsGuestDebugStateActivePending(pVCpu));
+    Assert(!CPUMIsHyperDebugStateActive(pVCpu) && !CPUMIsHyperDebugStateActivePending(pVCpu));
+
+    /* Restore host-state bits that VT-x only restores partially. */
+    if (   (pVCpu->hm.s.vmx.fRestoreHostFlags & VMX_RESTORE_HOST_REQUIRED)
+        && (pVCpu->hm.s.vmx.fRestoreHostFlags & ~VMX_RESTORE_HOST_REQUIRED))
+    {
+        Log4Func(("Restoring Host State: fRestoreHostFlags=%#RX32 HostCpuId=%u\n", pVCpu->hm.s.vmx.fRestoreHostFlags, idCpu));
+        VMXRestoreHostState(pVCpu->hm.s.vmx.fRestoreHostFlags, &pVCpu->hm.s.vmx.RestoreHost);
+    }
+    pVCpu->hm.s.vmx.fRestoreHostFlags = 0;
+
+    /* Restore the lazy host MSRs as we're leaving VT-x context. */
+    if (pVCpu->hm.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST)
+    {
+        /* We shouldn't restore the host MSRs without saving the guest MSRs first. */
+        if (!fImportState)
+        {
+            int rc = hmR0VmxImportGuestState(pVCpu, pVmcsInfo, CPUMCTX_EXTRN_KERNEL_GS_BASE | CPUMCTX_EXTRN_SYSCALL_MSRS);
+            AssertRCReturn(rc, rc);
+        }
+        hmR0VmxLazyRestoreHostMsrs(pVCpu);
+        Assert(!pVCpu->hm.s.vmx.fLazyMsrs);
+    }
+    else
+        pVCpu->hm.s.vmx.fLazyMsrs = 0;
+
+    /* Update auto-load/store host MSRs values when we re-enter VT-x (as we could be on a different CPU). */
+    pVCpu->hm.s.vmx.fUpdatedHostAutoMsrs = false;
+
+    STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatEntry);
+    STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatImportGuestState);
+    STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExportGuestState);
+    STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatPreExit);
+    STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitHandling);
+    STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitIO);
+    STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitMovCRx);
+    STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitXcptNmi);
+    STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitVmentry);
+    STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchLongJmpToR3);
+
+    VMCPU_CMPXCHG_STATE(pVCpu, VMCPUSTATE_STARTED_HM, VMCPUSTATE_STARTED_EXEC);
+
+    /** @todo This partially defeats the purpose of having preemption hooks.
+     *  The problem is, deregistering the hooks should be moved to a place that
+     *  lasts until the EMT is about to be destroyed not everytime while leaving HM
+     *  context.
+     */
+    int rc = hmR0VmxClearVmcs(pVmcsInfo);
+    AssertRCReturn(rc, rc);
+
+#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+    /*
+     * A valid shadow VMCS is made active as part of VM-entry. It is necessary to
+     * clear a shadow VMCS before allowing that VMCS to become active on another
+     * logical processor. We may or may not be importing guest state which clears
+     * it, so cover for it here.
+     *
+     * See Intel spec. 24.11.1 "Software Use of Virtual-Machine Control Structures".
+     */
+    if (   pVmcsInfo->pvShadowVmcs
+        && pVmcsInfo->fShadowVmcsState != VMX_V_VMCS_LAUNCH_STATE_CLEAR)
+    {
+        rc = hmR0VmxClearShadowVmcs(pVmcsInfo);
+        AssertRCReturn(rc, rc);
+    }
+
+    /*
+     * Flag that we need to re-export the host state if we switch to this VMCS before
+     * executing guest or nested-guest code.
+     */
+    pVmcsInfo->idHostCpuState = NIL_RTCPUID;
+#endif
+
+    Log4Func(("Cleared Vmcs. HostCpuId=%u\n", idCpu));
+    NOREF(idCpu);
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Leaves the VT-x session.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu   The cross context virtual CPU structure.
+ *
+ * @remarks No-long-jmp zone!!!
+ */
+static int hmR0VmxLeaveSession(PVMCPUCC pVCpu)
+{
+    HM_DISABLE_PREEMPT(pVCpu);
+    HMVMX_ASSERT_CPU_SAFE(pVCpu);
+    Assert(!VMMRZCallRing3IsEnabled(pVCpu));
+    Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+
+    /* When thread-context hooks are used, we can avoid doing the leave again if we had been preempted before
+       and done this from the VMXR0ThreadCtxCallback(). */
+    if (!pVCpu->hm.s.fLeaveDone)
+    {
+        int rc2 = hmR0VmxLeave(pVCpu, true /* fImportState */);
+        AssertRCReturnStmt(rc2, HM_RESTORE_PREEMPT(), rc2);
+        pVCpu->hm.s.fLeaveDone = true;
+    }
+    Assert(!pVCpu->cpum.GstCtx.fExtrn);
+
+    /*
+     * !!! IMPORTANT !!!
+     * If you modify code here, make sure to check whether VMXR0CallRing3Callback() needs to be updated too.
+     */
+
+    /* Deregister hook now that we've left HM context before re-enabling preemption. */
+    /** @todo Deregistering here means we need to VMCLEAR always
+     *        (longjmp/exit-to-r3) in VT-x which is not efficient, eliminate need
+     *        for calling VMMR0ThreadCtxHookDisable here! */
+    VMMR0ThreadCtxHookDisable(pVCpu);
+
+    /* Leave HM context. This takes care of local init (term) and deregistering the longjmp-to-ring-3 callback. */
+    int rc = HMR0LeaveCpu(pVCpu);
+    HM_RESTORE_PREEMPT();
+    return rc;
+}
+
+
+/**
+ * Does the necessary state syncing before doing a longjmp to ring-3.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu   The cross context virtual CPU structure.
+ *
+ * @remarks No-long-jmp zone!!!
+ */
+DECLINLINE(int) hmR0VmxLongJmpToRing3(PVMCPUCC pVCpu)
+{
+    return hmR0VmxLeaveSession(pVCpu);
+}
+
+
+/**
+ * Take necessary actions before going back to ring-3.
+ *
+ * An action requires us to go back to ring-3. This function does the necessary
+ * steps before we can safely return to ring-3. This is not the same as longjmps
+ * to ring-3, this is voluntary and prepares the guest so it may continue
+ * executing outside HM (recompiler/IEM).
+ *
+ * @returns VBox status code.
+ * @param   pVCpu   The cross context virtual CPU structure.
+ * @param   rcExit  The reason for exiting to ring-3. Can be
+ *                  VINF_VMM_UNKNOWN_RING3_CALL.
+ */
+static int hmR0VmxExitToRing3(PVMCPUCC pVCpu, VBOXSTRICTRC rcExit)
+{
+    HMVMX_ASSERT_PREEMPT_SAFE(pVCpu);
+
+    PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
+    if (RT_UNLIKELY(rcExit == VERR_VMX_INVALID_VMCS_PTR))
+    {
+        VMXGetCurrentVmcs(&pVCpu->hm.s.vmx.LastError.HCPhysCurrentVmcs);
+        pVCpu->hm.s.vmx.LastError.u32VmcsRev   = *(uint32_t *)pVmcsInfo->pvVmcs;
+        pVCpu->hm.s.vmx.LastError.idEnteredCpu = pVCpu->hm.s.idEnteredCpu;
+        /* LastError.idCurrentCpu was updated in hmR0VmxPreRunGuestCommitted(). */
+    }
+
+    /* Please, no longjumps here (any logging shouldn't flush jump back to ring-3). NO LOGGING BEFORE THIS POINT! */
+    VMMRZCallRing3Disable(pVCpu);
+    Log4Func(("rcExit=%d\n", VBOXSTRICTRC_VAL(rcExit)));
+
+    /*
+     * Convert any pending HM events back to TRPM due to premature exits to ring-3.
+     * We need to do this only on returns to ring-3 and not for longjmps to ring3.
+     *
+     * This is because execution may continue from ring-3 and we would need to inject
+     * the event from there (hence place it back in TRPM).
+     */
+    if (pVCpu->hm.s.Event.fPending)
+    {
+        hmR0VmxPendingEventToTrpmTrap(pVCpu);
+        Assert(!pVCpu->hm.s.Event.fPending);
+
+        /* Clear the events from the VMCS. */
+        int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, 0);    AssertRC(rc);
+        rc     = VMXWriteVmcs32(VMX_VMCS_GUEST_PENDING_DEBUG_XCPTS, 0);         AssertRC(rc);
+    }
+#ifdef VBOX_STRICT
+    /*
+     * We check for rcExit here since for errors like VERR_VMX_UNABLE_TO_START_VM (which are
+     * fatal), we don't care about verifying duplicate injection of events. Errors like
+     * VERR_EM_INTERPRET are converted to their VINF_* counterparts -prior- to  calling this
+     * function so those should and will be checked below.
+     */
+    else if (RT_SUCCESS(rcExit))
+    {
+        /*
+         * Ensure we don't accidentally clear a pending HM event without clearing the VMCS.
+         * This can be pretty hard to debug otherwise, interrupts might get injected twice
+         * occasionally, see @bugref{9180#c42}.
+         *
+         * However, if the VM-entry failed, any VM entry-interruption info. field would
+         * be left unmodified as the event would not have been injected to the guest. In
+         * such cases, don't assert, we're not going to continue guest execution anyway.
+         */
+        uint32_t uExitReason;
+        uint32_t uEntryIntInfo;
+        int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_REASON, &uExitReason);
+        rc    |= VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, &uEntryIntInfo);
+        AssertRC(rc);
+        AssertMsg(VMX_EXIT_REASON_HAS_ENTRY_FAILED(uExitReason) || !VMX_ENTRY_INT_INFO_IS_VALID(uEntryIntInfo),
+                  ("uExitReason=%#RX32 uEntryIntInfo=%#RX32 rcExit=%d\n", uExitReason, uEntryIntInfo, VBOXSTRICTRC_VAL(rcExit)));
+    }
+#endif
+
+    /*
+     * Clear the interrupt-window and NMI-window VMCS controls as we could have got
+     * a VM-exit with higher priority than interrupt-window or NMI-window VM-exits
+     * (e.g. TPR below threshold).
+     */
+    if (!CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx))
+    {
+        hmR0VmxClearIntWindowExitVmcs(pVmcsInfo);
+        hmR0VmxClearNmiWindowExitVmcs(pVmcsInfo);
+    }
+
+    /* If we're emulating an instruction, we shouldn't have any TRPM traps pending
+       and if we're injecting an event we should have a TRPM trap pending. */
+    AssertMsg(rcExit != VINF_EM_RAW_INJECT_TRPM_EVENT || TRPMHasTrap(pVCpu), ("%Rrc\n", VBOXSTRICTRC_VAL(rcExit)));
+#ifndef DEBUG_bird /* Triggered after firing an NMI against NT4SP1, possibly a triple fault in progress. */
+    AssertMsg(rcExit != VINF_EM_RAW_EMULATE_INSTR || !TRPMHasTrap(pVCpu), ("%Rrc\n", VBOXSTRICTRC_VAL(rcExit)));
+#endif
+
+    /* Save guest state and restore host state bits. */
+    int rc = hmR0VmxLeaveSession(pVCpu);
+    AssertRCReturn(rc, rc);
+    STAM_COUNTER_DEC(&pVCpu->hm.s.StatSwitchLongJmpToR3);
+
+    /* Thread-context hooks are unregistered at this point!!! */
+    /* Ring-3 callback notifications are unregistered at this point!!! */
+
+    /* Sync recompiler state. */
+    VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TO_R3);
+    CPUMSetChangedFlags(pVCpu, CPUM_CHANGED_SYSENTER_MSR
+                             | CPUM_CHANGED_LDTR
+                             | CPUM_CHANGED_GDTR
+                             | CPUM_CHANGED_IDTR
+                             | CPUM_CHANGED_TR
+                             | CPUM_CHANGED_HIDDEN_SEL_REGS);
+    if (   pVCpu->CTX_SUFF(pVM)->hm.s.fNestedPaging
+        && CPUMIsGuestPagingEnabledEx(&pVCpu->cpum.GstCtx))
+        CPUMSetChangedFlags(pVCpu, CPUM_CHANGED_GLOBAL_TLB_FLUSH);
+
+    Assert(!pVCpu->hm.s.fClearTrapFlag);
+
+    /* Update the exit-to-ring 3 reason. */
+    pVCpu->hm.s.rcLastExitToR3 = VBOXSTRICTRC_VAL(rcExit);
+
+    /* On our way back from ring-3 reload the guest state if there is a possibility of it being changed. */
+    if (   rcExit != VINF_EM_RAW_INTERRUPT
+        || CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx))
+    {
+        Assert(!(pVCpu->cpum.GstCtx.fExtrn & HMVMX_CPUMCTX_EXTRN_ALL));
+        ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_ALL_GUEST);
+    }
+
+    STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchExitToR3);
+    VMMRZCallRing3Enable(pVCpu);
+    return rc;
+}
+
+
+/**
+ * VMMRZCallRing3() callback wrapper which saves the guest state before we
+ * longjump to ring-3 and possibly get preempted.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   enmOperation    The operation causing the ring-3 longjump.
+ */
+VMMR0DECL(int) VMXR0CallRing3Callback(PVMCPUCC pVCpu, VMMCALLRING3 enmOperation)
+{
+    if (enmOperation == VMMCALLRING3_VM_R0_ASSERTION)
+    {
+        /*
+         * !!! IMPORTANT !!!
+         * If you modify code here, check whether hmR0VmxLeave() and hmR0VmxLeaveSession() needs to be updated too.
+         * This is a stripped down version which gets out ASAP, trying to not trigger any further assertions.
+         */
+        VMMRZCallRing3RemoveNotification(pVCpu);
+        VMMRZCallRing3Disable(pVCpu);
+        HM_DISABLE_PREEMPT(pVCpu);
+
+        PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
+        hmR0VmxImportGuestState(pVCpu, pVmcsInfo, HMVMX_CPUMCTX_EXTRN_ALL);
+        CPUMR0FpuStateMaybeSaveGuestAndRestoreHost(pVCpu);
+        CPUMR0DebugStateMaybeSaveGuestAndRestoreHost(pVCpu, true /* save DR6 */);
+
+        /* Restore host-state bits that VT-x only restores partially. */
+        if (   (pVCpu->hm.s.vmx.fRestoreHostFlags &  VMX_RESTORE_HOST_REQUIRED)
+            && (pVCpu->hm.s.vmx.fRestoreHostFlags & ~VMX_RESTORE_HOST_REQUIRED))
+            VMXRestoreHostState(pVCpu->hm.s.vmx.fRestoreHostFlags, &pVCpu->hm.s.vmx.RestoreHost);
+        pVCpu->hm.s.vmx.fRestoreHostFlags = 0;
+
+        /* Restore the lazy host MSRs as we're leaving VT-x context. */
+        if (pVCpu->hm.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST)
+            hmR0VmxLazyRestoreHostMsrs(pVCpu);
+
+        /* Update auto-load/store host MSRs values when we re-enter VT-x (as we could be on a different CPU). */
+        pVCpu->hm.s.vmx.fUpdatedHostAutoMsrs = false;
+        VMCPU_CMPXCHG_STATE(pVCpu, VMCPUSTATE_STARTED_HM, VMCPUSTATE_STARTED_EXEC);
+
+        /* Clear the current VMCS data back to memory (shadow VMCS if any would have been
+           cleared as part of importing the guest state above. */
+        hmR0VmxClearVmcs(pVmcsInfo);
+
+        /** @todo eliminate the need for calling VMMR0ThreadCtxHookDisable here!  */
+        VMMR0ThreadCtxHookDisable(pVCpu);
+
+        /* Leave HM context. This takes care of local init (term). */
+        HMR0LeaveCpu(pVCpu);
+        HM_RESTORE_PREEMPT();
+        return VINF_SUCCESS;
+    }
+
+    Assert(pVCpu);
+    Assert(VMMRZCallRing3IsEnabled(pVCpu));
+    HMVMX_ASSERT_PREEMPT_SAFE(pVCpu);
+
+    VMMRZCallRing3Disable(pVCpu);
+    Assert(VMMR0IsLogFlushDisabled(pVCpu));
+
+    Log4Func(("-> hmR0VmxLongJmpToRing3 enmOperation=%d\n", enmOperation));
+
+    int rc = hmR0VmxLongJmpToRing3(pVCpu);
+    AssertRCReturn(rc, rc);
+
+    VMMRZCallRing3Enable(pVCpu);
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Pushes a 2-byte value onto the real-mode (in virtual-8086 mode) guest's
+ * stack.
+ *
+ * @returns Strict VBox status code (i.e. informational status codes too).
+ * @retval  VINF_EM_RESET if pushing a value to the stack caused a triple-fault.
+ * @param   pVCpu   The cross context virtual CPU structure.
+ * @param   uValue  The value to push to the guest stack.
+ */
+static VBOXSTRICTRC hmR0VmxRealModeGuestStackPush(PVMCPUCC pVCpu, uint16_t uValue)
+{
+    /*
+     * The stack limit is 0xffff in real-on-virtual 8086 mode. Real-mode with weird stack limits cannot be run in
+     * virtual 8086 mode in VT-x. See Intel spec. 26.3.1.2 "Checks on Guest Segment Registers".
+     * See Intel Instruction reference for PUSH and Intel spec. 22.33.1 "Segment Wraparound".
+     */
+    PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+    if (pCtx->sp == 1)
+        return VINF_EM_RESET;
+    pCtx->sp -= sizeof(uint16_t);       /* May wrap around which is expected behaviour. */
+    int rc = PGMPhysSimpleWriteGCPhys(pVCpu->CTX_SUFF(pVM), pCtx->ss.u64Base + pCtx->sp, &uValue, sizeof(uint16_t));
+    AssertRC(rc);
+    return rc;
+}
+
+
+/**
+ * Injects an event into the guest upon VM-entry by updating the relevant fields
+ * in the VM-entry area in the VMCS.
+ *
+ * @returns Strict VBox status code (i.e. informational status codes too).
+ * @retval  VINF_SUCCESS if the event is successfully injected into the VMCS.
+ * @retval  VINF_EM_RESET if event injection resulted in a triple-fault.
+ *
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   pVmxTransient   The VMX-transient structure.
+ * @param   pEvent          The event being injected.
+ * @param   pfIntrState     Pointer to the VT-x guest-interruptibility-state. This
+ *                          will be updated if necessary. This cannot not be NULL.
+ * @param   fStepping       Whether we're single-stepping guest execution and should
+ *                          return VINF_EM_DBG_STEPPED if the event is injected
+ *                          directly (registers modified by us, not by hardware on
+ *                          VM-entry).
+ */
+static VBOXSTRICTRC hmR0VmxInjectEventVmcs(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient, PCHMEVENT pEvent, bool fStepping,
+                                           uint32_t *pfIntrState)
+{
+    /* Intel spec. 24.8.3 "VM-Entry Controls for Event Injection" specifies the interruption-information field to be 32-bits. */
+    AssertMsg(!RT_HI_U32(pEvent->u64IntInfo), ("%#RX64\n", pEvent->u64IntInfo));
+    Assert(pfIntrState);
+
+    PCPUMCTX          pCtx       = &pVCpu->cpum.GstCtx;
+    uint32_t          u32IntInfo = pEvent->u64IntInfo;
+    uint32_t const    u32ErrCode = pEvent->u32ErrCode;
+    uint32_t const    cbInstr    = pEvent->cbInstr;
+    RTGCUINTPTR const GCPtrFault = pEvent->GCPtrFaultAddress;
+    uint8_t const     uVector    = VMX_ENTRY_INT_INFO_VECTOR(u32IntInfo);
+    uint32_t const    uIntType   = VMX_ENTRY_INT_INFO_TYPE(u32IntInfo);
+
+#ifdef VBOX_STRICT
+    /*
+     * Validate the error-code-valid bit for hardware exceptions.
+     * No error codes for exceptions in real-mode.
+     *
+     * See Intel spec. 20.1.4 "Interrupt and Exception Handling"
+     */
+    if (   uIntType == VMX_EXIT_INT_INFO_TYPE_HW_XCPT
+        && !CPUMIsGuestInRealModeEx(pCtx))
+    {
+        switch (uVector)
+        {
+            case X86_XCPT_PF:
+            case X86_XCPT_DF:
+            case X86_XCPT_TS:
+            case X86_XCPT_NP:
+            case X86_XCPT_SS:
+            case X86_XCPT_GP:
+            case X86_XCPT_AC:
+                AssertMsg(VMX_ENTRY_INT_INFO_IS_ERROR_CODE_VALID(u32IntInfo),
+                          ("Error-code-valid bit not set for exception that has an error code uVector=%#x\n", uVector));
+                RT_FALL_THRU();
+            default:
+                break;
+        }
+    }
+
+    /* Cannot inject an NMI when block-by-MOV SS is in effect. */
+    Assert(   uIntType != VMX_EXIT_INT_INFO_TYPE_NMI
+           || !(*pfIntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_MOVSS));
+#endif
+
+    if (   uIntType == VMX_EXIT_INT_INFO_TYPE_HW_XCPT
+        || uIntType == VMX_EXIT_INT_INFO_TYPE_NMI
+        || uIntType == VMX_EXIT_INT_INFO_TYPE_PRIV_SW_XCPT
+        || uIntType == VMX_EXIT_INT_INFO_TYPE_SW_XCPT)
+    {
+        Assert(uVector <= X86_XCPT_LAST);
+        Assert(uIntType != VMX_EXIT_INT_INFO_TYPE_NMI          || uVector == X86_XCPT_NMI);
+        Assert(uIntType != VMX_EXIT_INT_INFO_TYPE_PRIV_SW_XCPT || uVector == X86_XCPT_DB);
+        STAM_COUNTER_INC(&pVCpu->hm.s.paStatInjectedXcptsR0[uVector]);
+    }
+    else
+        STAM_COUNTER_INC(&pVCpu->hm.s.paStatInjectedIrqsR0[uVector & MASK_INJECT_IRQ_STAT]);
+
+    /*
+     * Hardware interrupts & exceptions cannot be delivered through the software interrupt
+     * redirection bitmap to the real mode task in virtual-8086 mode. We must jump to the
+     * interrupt handler in the (real-mode) guest.
+     *
+     * See Intel spec. 20.3 "Interrupt and Exception handling in Virtual-8086 Mode".
+     * See Intel spec. 20.1.4 "Interrupt and Exception Handling" for real-mode interrupt handling.
+     */
+    if (CPUMIsGuestInRealModeEx(pCtx))     /* CR0.PE bit changes are always intercepted, so it's up to date. */
+    {
+        if (pVCpu->CTX_SUFF(pVM)->hm.s.vmx.fUnrestrictedGuest)
+        {
+            /*
+             * For CPUs with unrestricted guest execution enabled and with the guest
+             * in real-mode, we must not set the deliver-error-code bit.
+             *
+             * See Intel spec. 26.2.1.3 "VM-Entry Control Fields".
+             */
+            u32IntInfo &= ~VMX_ENTRY_INT_INFO_ERROR_CODE_VALID;
+        }
+        else
+        {
+            PVMCC pVM = pVCpu->CTX_SUFF(pVM);
+            Assert(PDMVmmDevHeapIsEnabled(pVM));
+            Assert(pVM->hm.s.vmx.pRealModeTSS);
+            Assert(!CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx));
+
+            /* We require RIP, RSP, RFLAGS, CS, IDTR, import them. */
+            PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
+            int rc2 = hmR0VmxImportGuestState(pVCpu, pVmcsInfo, CPUMCTX_EXTRN_SREG_MASK | CPUMCTX_EXTRN_TABLE_MASK
+                                                              | CPUMCTX_EXTRN_RIP | CPUMCTX_EXTRN_RSP | CPUMCTX_EXTRN_RFLAGS);
+            AssertRCReturn(rc2, rc2);
+
+            /* Check if the interrupt handler is present in the IVT (real-mode IDT). IDT limit is (4N - 1). */
+            size_t const cbIdtEntry = sizeof(X86IDTR16);
+            if (uVector * cbIdtEntry + (cbIdtEntry - 1) > pCtx->idtr.cbIdt)
+            {
+                /* If we are trying to inject a #DF with no valid IDT entry, return a triple-fault. */
+                if (uVector == X86_XCPT_DF)
+                    return VINF_EM_RESET;
+
+                /* If we're injecting a #GP with no valid IDT entry, inject a double-fault.
+                   No error codes for exceptions in real-mode. */
+                if (uVector == X86_XCPT_GP)
+                {
+                    uint32_t const uXcptDfInfo = RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_VECTOR,         X86_XCPT_DF)
+                                               | RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_TYPE,           VMX_ENTRY_INT_INFO_TYPE_HW_XCPT)
+                                               | RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_ERR_CODE_VALID, 0)
+                                               | RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_VALID,          1);
+                    HMEVENT EventXcptDf;
+                    RT_ZERO(EventXcptDf);
+                    EventXcptDf.u64IntInfo = uXcptDfInfo;
+                    return hmR0VmxInjectEventVmcs(pVCpu, pVmxTransient, &EventXcptDf, fStepping, pfIntrState);
+                }
+
+                /*
+                 * If we're injecting an event with no valid IDT entry, inject a #GP.
+                 * No error codes for exceptions in real-mode.
+                 *
+                 * See Intel spec. 20.1.4 "Interrupt and Exception Handling"
+                 */
+                uint32_t const uXcptGpInfo = RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_VECTOR,         X86_XCPT_GP)
+                                           | RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_TYPE,           VMX_ENTRY_INT_INFO_TYPE_HW_XCPT)
+                                           | RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_ERR_CODE_VALID, 0)
+                                           | RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_VALID,          1);
+                HMEVENT EventXcptGp;
+                RT_ZERO(EventXcptGp);
+                EventXcptGp.u64IntInfo = uXcptGpInfo;
+                return hmR0VmxInjectEventVmcs(pVCpu, pVmxTransient, &EventXcptGp, fStepping, pfIntrState);
+            }
+
+            /* Software exceptions (#BP and #OF exceptions thrown as a result of INT3 or INTO) */
+            uint16_t uGuestIp = pCtx->ip;
+            if (uIntType == VMX_ENTRY_INT_INFO_TYPE_SW_XCPT)
+            {
+                Assert(uVector == X86_XCPT_BP || uVector == X86_XCPT_OF);
+                /* #BP and #OF are both benign traps, we need to resume the next instruction. */
+                uGuestIp = pCtx->ip + (uint16_t)cbInstr;
+            }
+            else if (uIntType == VMX_ENTRY_INT_INFO_TYPE_SW_INT)
+                uGuestIp = pCtx->ip + (uint16_t)cbInstr;
+
+            /* Get the code segment selector and offset from the IDT entry for the interrupt handler. */
+            X86IDTR16 IdtEntry;
+            RTGCPHYS const GCPhysIdtEntry = (RTGCPHYS)pCtx->idtr.pIdt + uVector * cbIdtEntry;
+            rc2 = PGMPhysSimpleReadGCPhys(pVM, &IdtEntry, GCPhysIdtEntry, cbIdtEntry);
+            AssertRCReturn(rc2, rc2);
+
+            /* Construct the stack frame for the interrupt/exception handler. */
+            VBOXSTRICTRC rcStrict;
+            rcStrict = hmR0VmxRealModeGuestStackPush(pVCpu, pCtx->eflags.u32);
+            if (rcStrict == VINF_SUCCESS)
+            {
+                rcStrict = hmR0VmxRealModeGuestStackPush(pVCpu, pCtx->cs.Sel);
+                if (rcStrict == VINF_SUCCESS)
+                    rcStrict = hmR0VmxRealModeGuestStackPush(pVCpu, uGuestIp);
+            }
+
+            /* Clear the required eflag bits and jump to the interrupt/exception handler. */
+            if (rcStrict == VINF_SUCCESS)
+            {
+                pCtx->eflags.u32 &= ~(X86_EFL_IF | X86_EFL_TF | X86_EFL_RF | X86_EFL_AC);
+                pCtx->rip         = IdtEntry.offSel;
+                pCtx->cs.Sel      = IdtEntry.uSel;
+                pCtx->cs.ValidSel = IdtEntry.uSel;
+                pCtx->cs.u64Base  = IdtEntry.uSel << cbIdtEntry;
+                if (   uIntType == VMX_ENTRY_INT_INFO_TYPE_HW_XCPT
+                    && uVector  == X86_XCPT_PF)
+                    pCtx->cr2 = GCPtrFault;
+
+                ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_CS  | HM_CHANGED_GUEST_CR2
+                                                         | HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS
+                                                         | HM_CHANGED_GUEST_RSP);
+
+                /*
+                 * If we delivered a hardware exception (other than an NMI) and if there was
+                 * block-by-STI in effect, we should clear it.
+                 */
+                if (*pfIntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_STI)
+                {
+                    Assert(   uIntType != VMX_ENTRY_INT_INFO_TYPE_NMI
+                           && uIntType != VMX_ENTRY_INT_INFO_TYPE_EXT_INT);
+                    Log4Func(("Clearing inhibition due to STI\n"));
+                    *pfIntrState &= ~VMX_VMCS_GUEST_INT_STATE_BLOCK_STI;
+                }
+
+                Log4(("Injected real-mode: u32IntInfo=%#x u32ErrCode=%#x cbInstr=%#x Eflags=%#x CS:EIP=%04x:%04x\n",
+                      u32IntInfo, u32ErrCode, cbInstr, pCtx->eflags.u, pCtx->cs.Sel, pCtx->eip));
+
+                /*
+                 * The event has been truly dispatched to the guest. Mark it as no longer pending so
+                 * we don't attempt to undo it if we are returning to ring-3 before executing guest code.
+                 */
+                pVCpu->hm.s.Event.fPending = false;
+
+                /*
+                 * If we eventually support nested-guest execution without unrestricted guest execution,
+                 * we should set fInterceptEvents here.
+                 */
+                Assert(!pVmxTransient->fIsNestedGuest);
+
+                /* If we're stepping and we've changed cs:rip above, bail out of the VMX R0 execution loop. */
+                if (fStepping)
+                    rcStrict = VINF_EM_DBG_STEPPED;
+            }
+            AssertMsg(rcStrict == VINF_SUCCESS || rcStrict == VINF_EM_RESET || (rcStrict == VINF_EM_DBG_STEPPED && fStepping),
+                      ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict)));
+            return rcStrict;
+        }
+    }
+
+    /*
+     * Validate.
+     */
+    Assert(VMX_ENTRY_INT_INFO_IS_VALID(u32IntInfo));                     /* Bit 31 (Valid bit) must be set by caller. */
+    Assert(!(u32IntInfo & VMX_BF_ENTRY_INT_INFO_RSVD_12_30_MASK));       /* Bits 30:12 MBZ. */
+
+    /*
+     * Inject the event into the VMCS.
+     */
+    int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, u32IntInfo);
+    if (VMX_ENTRY_INT_INFO_IS_ERROR_CODE_VALID(u32IntInfo))
+        rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_ENTRY_EXCEPTION_ERRCODE, u32ErrCode);
+    rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_ENTRY_INSTR_LENGTH, cbInstr);
+    AssertRC(rc);
+
+    /*
+     * Update guest CR2 if this is a page-fault.
+     */
+    if (VMX_ENTRY_INT_INFO_IS_XCPT_PF(u32IntInfo))
+        pCtx->cr2 = GCPtrFault;
+
+    Log4(("Injecting u32IntInfo=%#x u32ErrCode=%#x cbInstr=%#x CR2=%#RX64\n", u32IntInfo, u32ErrCode, cbInstr, pCtx->cr2));
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Evaluates the event to be delivered to the guest and sets it as the pending
+ * event.
+ *
+ * Toggling of interrupt force-flags here is safe since we update TRPM on premature
+ * exits to ring-3 before executing guest code, see hmR0VmxExitToRing3(). We must
+ * NOT restore these force-flags.
+ *
+ * @returns Strict VBox status code (i.e. informational status codes too).
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   pVmxTransient   The VMX-transient structure.
+ * @param   pfIntrState     Where to store the VT-x guest-interruptibility state.
+ */
+static VBOXSTRICTRC hmR0VmxEvaluatePendingEvent(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient, uint32_t *pfIntrState)
+{
+    Assert(pfIntrState);
+    Assert(!TRPMHasTrap(pVCpu));
+
+    /*
+     * Compute/update guest-interruptibility state related FFs.
+     * The FFs will be used below while evaluating events to be injected.
+     */
+    *pfIntrState = hmR0VmxGetGuestIntrStateAndUpdateFFs(pVCpu);
+
+    /*
+     * Evaluate if a new event needs to be injected.
+     * An event that's already pending has already performed all necessary checks.
+     */
+    PVMXVMCSINFO pVmcsInfo      = pVmxTransient->pVmcsInfo;
+    bool const   fIsNestedGuest = pVmxTransient->fIsNestedGuest;
+    if (   !pVCpu->hm.s.Event.fPending
+        && !VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS))
+    {
+        /** @todo SMI. SMIs take priority over NMIs. */
+
+        /*
+         * NMIs.
+         * NMIs take priority over external interrupts.
+         */
+        PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+        if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INTERRUPT_NMI))
+        {
+            /*
+             * For a guest, the FF always indicates the guest's ability to receive an NMI.
+             *
+             * For a nested-guest, the FF always indicates the outer guest's ability to
+             * receive an NMI while the guest-interruptibility state bit depends on whether
+             * the nested-hypervisor is using virtual-NMIs.
+             */
+            if (!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_BLOCK_NMIS))
+            {
+#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+                if (   fIsNestedGuest
+                    && CPUMIsGuestVmxPinCtlsSet(pCtx, VMX_PIN_CTLS_NMI_EXIT))
+                    return IEMExecVmxVmexitXcptNmi(pVCpu);
+#endif
+                hmR0VmxSetPendingXcptNmi(pVCpu);
+                VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INTERRUPT_NMI);
+                Log4Func(("NMI pending injection\n"));
+
+                /* We've injected the NMI, bail. */
+                return VINF_SUCCESS;
+            }
+            else if (!fIsNestedGuest)
+                hmR0VmxSetNmiWindowExitVmcs(pVCpu, pVmcsInfo);
+        }
+
+        /*
+         * External interrupts (PIC/APIC).
+         * Once PDMGetInterrupt() returns a valid interrupt we -must- deliver it.
+         * We cannot re-request the interrupt from the controller again.
+         */
+        if (    VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC)
+            && !pVCpu->hm.s.fSingleInstruction)
+        {
+            Assert(!DBGFIsStepping(pVCpu));
+            int rc = hmR0VmxImportGuestState(pVCpu, pVmcsInfo, CPUMCTX_EXTRN_RFLAGS);
+            AssertRC(rc);
+
+            /*
+             * We must not check EFLAGS directly when executing a nested-guest, use
+             * CPUMIsGuestPhysIntrEnabled() instead as EFLAGS.IF does not control the blocking of
+             * external interrupts when "External interrupt exiting" is set. This fixes a nasty
+             * SMP hang while executing nested-guest VCPUs on spinlocks which aren't rescued by
+             * other VM-exits (like a preemption timer), see @bugref{9562#c18}.
+             *
+             * See Intel spec. 25.4.1 "Event Blocking".
+             */
+            if (CPUMIsGuestPhysIntrEnabled(pVCpu))
+            {
+#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+                if (    fIsNestedGuest
+                    &&  CPUMIsGuestVmxPinCtlsSet(pCtx, VMX_PIN_CTLS_EXT_INT_EXIT))
+                {
+                    VBOXSTRICTRC rcStrict = IEMExecVmxVmexitExtInt(pVCpu, 0 /* uVector */, true /* fIntPending */);
+                    if (rcStrict != VINF_VMX_INTERCEPT_NOT_ACTIVE)
+                        return rcStrict;
+                }
+#endif
+                uint8_t u8Interrupt;
+                rc = PDMGetInterrupt(pVCpu, &u8Interrupt);
+                if (RT_SUCCESS(rc))
+                {
+#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+                    if (   fIsNestedGuest
+                        && CPUMIsGuestVmxPinCtlsSet(pCtx, VMX_PIN_CTLS_EXT_INT_EXIT))
+                    {
+                        VBOXSTRICTRC rcStrict = IEMExecVmxVmexitExtInt(pVCpu, u8Interrupt, false /* fIntPending */);
+                        Assert(rcStrict != VINF_VMX_INTERCEPT_NOT_ACTIVE);
+                        return rcStrict;
+                    }
+#endif
+                    hmR0VmxSetPendingExtInt(pVCpu, u8Interrupt);
+                    Log4Func(("External interrupt (%#x) pending injection\n", u8Interrupt));
+                }
+                else if (rc == VERR_APIC_INTR_MASKED_BY_TPR)
+                {
+                    STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchTprMaskedIrq);
+
+                    if (   !fIsNestedGuest
+                        && (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_TPR_SHADOW))
+                        hmR0VmxApicSetTprThreshold(pVmcsInfo, u8Interrupt >> 4);
+                    /* else: for nested-guests, TPR threshold is picked up while merging VMCS controls. */
+
+                    /*
+                     * If the CPU doesn't have TPR shadowing, we will always get a VM-exit on TPR changes and
+                     * APICSetTpr() will end up setting the VMCPU_FF_INTERRUPT_APIC if required, so there is no
+                     * need to re-set this force-flag here.
+                     */
+                }
+                else
+                    STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchGuestIrq);
+
+                /* We've injected the interrupt or taken necessary action, bail. */
+                return VINF_SUCCESS;
+            }
+            else if (!fIsNestedGuest)
+                hmR0VmxSetIntWindowExitVmcs(pVCpu, pVmcsInfo);
+        }
+    }
+    else if (!fIsNestedGuest)
+    {
+        /*
+         * An event is being injected or we are in an interrupt shadow. Check if another event is
+         * pending. If so, instruct VT-x to cause a VM-exit as soon as the guest is ready to accept
+         * the pending event.
+         */
+        if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INTERRUPT_NMI))
+            hmR0VmxSetNmiWindowExitVmcs(pVCpu, pVmcsInfo);
+        else if (   VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC)
+                 && !pVCpu->hm.s.fSingleInstruction)
+            hmR0VmxSetIntWindowExitVmcs(pVCpu, pVmcsInfo);
+    }
+    /* else: for nested-guests, NMI/interrupt-window exiting will be picked up when merging VMCS controls. */
+
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Injects any pending events into the guest if the guest is in a state to
+ * receive them.
+ *
+ * @returns Strict VBox status code (i.e. informational status codes too).
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   pVmxTransient   The VMX-transient structure.
+ * @param   fIntrState      The VT-x guest-interruptibility state.
+ * @param   fStepping       Whether we are single-stepping the guest using the
+ *                          hypervisor debugger and should return
+ *                          VINF_EM_DBG_STEPPED if the event was dispatched
+ *                          directly.
+ */
+static VBOXSTRICTRC hmR0VmxInjectPendingEvent(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient, uint32_t fIntrState, bool fStepping)
+{
+    HMVMX_ASSERT_PREEMPT_SAFE(pVCpu);
+    Assert(VMMRZCallRing3IsEnabled(pVCpu));
+
+#ifdef VBOX_STRICT
+    /*
+     * Verify guest-interruptibility state.
+     *
+     * We put this in a scoped block so we do not accidentally use fBlockSti or fBlockMovSS,
+     * since injecting an event may modify the interruptibility state and we must thus always
+     * use fIntrState.
+     */
+    {
+        bool const fBlockMovSS = RT_BOOL(fIntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_MOVSS);
+        bool const fBlockSti   = RT_BOOL(fIntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_STI);
+        Assert(!fBlockSti || !(ASMAtomicUoReadU64(&pVCpu->cpum.GstCtx.fExtrn) & CPUMCTX_EXTRN_RFLAGS));
+        Assert(!fBlockSti || pVCpu->cpum.GstCtx.eflags.Bits.u1IF);     /* Cannot set block-by-STI when interrupts are disabled. */
+        Assert(!(fIntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_SMI));    /* We don't support block-by-SMI yet.*/
+        Assert(!TRPMHasTrap(pVCpu));
+        NOREF(fBlockMovSS); NOREF(fBlockSti);
+    }
+#endif
+
+    VBOXSTRICTRC rcStrict = VINF_SUCCESS;
+    if (pVCpu->hm.s.Event.fPending)
+    {
+        /*
+         * Do -not- clear any interrupt-window exiting control here. We might have an interrupt
+         * pending even while injecting an event and in this case, we want a VM-exit as soon as
+         * the guest is ready for the next interrupt, see @bugref{6208#c45}.
+         *
+         * See Intel spec. 26.6.5 "Interrupt-Window Exiting and Virtual-Interrupt Delivery".
+         */
+        uint32_t const uIntType = VMX_ENTRY_INT_INFO_TYPE(pVCpu->hm.s.Event.u64IntInfo);
+#ifdef VBOX_STRICT
+        if (uIntType == VMX_ENTRY_INT_INFO_TYPE_EXT_INT)
+        {
+            Assert(pVCpu->cpum.GstCtx.eflags.u32 & X86_EFL_IF);
+            Assert(!(fIntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_STI));
+            Assert(!(fIntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_MOVSS));
+        }
+        else if (uIntType == VMX_ENTRY_INT_INFO_TYPE_NMI)
+        {
+            Assert(!(fIntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_NMI));
+            Assert(!(fIntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_STI));
+            Assert(!(fIntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_MOVSS));
+        }
+#endif
+        Log4(("Injecting pending event vcpu[%RU32] u64IntInfo=%#RX64 Type=%#RX32\n", pVCpu->idCpu, pVCpu->hm.s.Event.u64IntInfo,
+              uIntType));
+
+        /*
+         * Inject the event and get any changes to the guest-interruptibility state.
+         *
+         * The guest-interruptibility state may need to be updated if we inject the event
+         * into the guest IDT ourselves (for real-on-v86 guest injecting software interrupts).
+         */
+        rcStrict = hmR0VmxInjectEventVmcs(pVCpu, pVmxTransient, &pVCpu->hm.s.Event, fStepping, &fIntrState);
+        AssertRCReturn(VBOXSTRICTRC_VAL(rcStrict), rcStrict);
+
+        if (uIntType == VMX_ENTRY_INT_INFO_TYPE_EXT_INT)
+            STAM_COUNTER_INC(&pVCpu->hm.s.StatInjectInterrupt);
+        else
+            STAM_COUNTER_INC(&pVCpu->hm.s.StatInjectXcpt);
+    }
+
+    /*
+     * Deliver any pending debug exceptions if the guest is single-stepping using EFLAGS.TF and
+     * is an interrupt shadow (block-by-STI or block-by-MOV SS).
+     */
+    if (   (fIntrState & (VMX_VMCS_GUEST_INT_STATE_BLOCK_STI | VMX_VMCS_GUEST_INT_STATE_BLOCK_MOVSS))
+        && !pVmxTransient->fIsNestedGuest)
+    {
+        HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_RFLAGS);
+
+        if (!pVCpu->hm.s.fSingleInstruction)
+        {
+            /*
+             * Set or clear the BS bit depending on whether the trap flag is active or not. We need
+             * to do both since we clear the BS bit from the VMCS while exiting to ring-3.
+             */
+            Assert(!DBGFIsStepping(pVCpu));
+            uint8_t const fTrapFlag = !!(pVCpu->cpum.GstCtx.eflags.u32 & X86_EFL_TF);
+            int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_PENDING_DEBUG_XCPTS, fTrapFlag << VMX_BF_VMCS_PENDING_DBG_XCPT_BS_SHIFT);
+            AssertRC(rc);
+        }
+        else
+        {
+            /*
+             * We must not deliver a debug exception when single-stepping over STI/Mov-SS in the
+             * hypervisor debugger using EFLAGS.TF but rather clear interrupt inhibition. However,
+             * we take care of this case in hmR0VmxExportSharedDebugState and also the case if
+             * we use MTF, so just make sure it's called before executing guest-code.
+             */
+            ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_DR_MASK);
+        }
+    }
+    /* else: for nested-guest currently handling while merging controls. */
+
+    /*
+     * Finally, update the guest-interruptibility state.
+     *
+     * This is required for the real-on-v86 software interrupt injection, for
+     * pending debug exceptions as well as updates to the guest state from ring-3 (IEM).
+     */
+    int rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_INT_STATE, fIntrState);
+    AssertRC(rc);
+
+    /*
+     * There's no need to clear the VM-entry interruption-information field here if we're not
+     * injecting anything. VT-x clears the valid bit on every VM-exit.
+     *
+     * See Intel spec. 24.8.3 "VM-Entry Controls for Event Injection".
+     */
+
+    Assert(rcStrict == VINF_SUCCESS || rcStrict == VINF_EM_RESET || (rcStrict == VINF_EM_DBG_STEPPED && fStepping));
+    return rcStrict;
+}
+
+
+/**
+ * Enters the VT-x session.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu   The cross context virtual CPU structure.
+ */
+VMMR0DECL(int) VMXR0Enter(PVMCPUCC pVCpu)
+{
+    AssertPtr(pVCpu);
+    Assert(pVCpu->CTX_SUFF(pVM)->hm.s.vmx.fSupported);
+    Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+
+    LogFlowFunc(("pVCpu=%p\n", pVCpu));
+    Assert((pVCpu->hm.s.fCtxChanged &  (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE))
+                                    == (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE));
+
+#ifdef VBOX_STRICT
+    /* At least verify VMX is enabled, since we can't check if we're in VMX root mode without #GP'ing. */
+    RTCCUINTREG uHostCr4 = ASMGetCR4();
+    if (!(uHostCr4 & X86_CR4_VMXE))
+    {
+        LogRelFunc(("X86_CR4_VMXE bit in CR4 is not set!\n"));
+        return VERR_VMX_X86_CR4_VMXE_CLEARED;
+    }
+#endif
+
+    /*
+     * Load the appropriate VMCS as the current and active one.
+     */
+    PVMXVMCSINFO pVmcsInfo;
+    bool const fInNestedGuestMode = CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx);
+    if (!fInNestedGuestMode)
+        pVmcsInfo = &pVCpu->hm.s.vmx.VmcsInfo;
+    else
+        pVmcsInfo = &pVCpu->hm.s.vmx.VmcsInfoNstGst;
+    int rc = hmR0VmxLoadVmcs(pVmcsInfo);
+    if (RT_SUCCESS(rc))
+    {
+        pVCpu->hm.s.vmx.fSwitchedToNstGstVmcs = fInNestedGuestMode;
+        pVCpu->hm.s.fLeaveDone = false;
+        Log4Func(("Loaded Vmcs. HostCpuId=%u\n", RTMpCpuId()));
+
+        /*
+         * Do the EMT scheduled L1D flush here if needed.
+         */
+        if (pVCpu->CTX_SUFF(pVM)->hm.s.fL1dFlushOnSched)
+            ASMWrMsr(MSR_IA32_FLUSH_CMD, MSR_IA32_FLUSH_CMD_F_L1D);
+        else if (pVCpu->CTX_SUFF(pVM)->hm.s.fMdsClearOnSched)
+            hmR0MdsClear();
+    }
+    return rc;
+}
+
+
+/**
+ * The thread-context callback (only on platforms which support it).
+ *
+ * @param   enmEvent        The thread-context event.
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   fGlobalInit     Whether global VT-x/AMD-V init. was used.
+ * @thread  EMT(pVCpu)
+ */
+VMMR0DECL(void) VMXR0ThreadCtxCallback(RTTHREADCTXEVENT enmEvent, PVMCPUCC pVCpu, bool fGlobalInit)
+{
+    AssertPtr(pVCpu);
+    RT_NOREF1(fGlobalInit);
+
+    switch (enmEvent)
+    {
+        case RTTHREADCTXEVENT_OUT:
+        {
+            Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+            Assert(VMMR0ThreadCtxHookIsEnabled(pVCpu));
+            VMCPU_ASSERT_EMT(pVCpu);
+
+            /* No longjmps (logger flushes, locks) in this fragile context. */
+            VMMRZCallRing3Disable(pVCpu);
+            Log4Func(("Preempting: HostCpuId=%u\n", RTMpCpuId()));
+
+            /* Restore host-state (FPU, debug etc.) */
+            if (!pVCpu->hm.s.fLeaveDone)
+            {
+                /*
+                 * Do -not- import the guest-state here as we might already be in the middle of importing
+                 * it, esp. bad if we're holding the PGM lock, see comment in hmR0VmxImportGuestState().
+                 */
+                hmR0VmxLeave(pVCpu, false /* fImportState */);
+                pVCpu->hm.s.fLeaveDone = true;
+            }
+
+            /* Leave HM context, takes care of local init (term). */
+            int rc = HMR0LeaveCpu(pVCpu);
+            AssertRC(rc);
+
+            /* Restore longjmp state. */
+            VMMRZCallRing3Enable(pVCpu);
+            STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatSwitchPreempt);
+            break;
+        }
+
+        case RTTHREADCTXEVENT_IN:
+        {
+            Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+            Assert(VMMR0ThreadCtxHookIsEnabled(pVCpu));
+            VMCPU_ASSERT_EMT(pVCpu);
+
+            /* No longjmps here, as we don't want to trigger preemption (& its hook) while resuming. */
+            VMMRZCallRing3Disable(pVCpu);
+            Log4Func(("Resumed: HostCpuId=%u\n", RTMpCpuId()));
+
+            /* Initialize the bare minimum state required for HM. This takes care of
+               initializing VT-x if necessary (onlined CPUs, local init etc.) */
+            int rc = hmR0EnterCpu(pVCpu);
+            AssertRC(rc);
+            Assert((pVCpu->hm.s.fCtxChanged &  (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE))
+                                            == (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE));
+
+            /* Load the active VMCS as the current one. */
+            PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
+            rc = hmR0VmxLoadVmcs(pVmcsInfo);
+            AssertRC(rc);
+            Log4Func(("Resumed: Loaded Vmcs. HostCpuId=%u\n", RTMpCpuId()));
+            pVCpu->hm.s.fLeaveDone = false;
+
+            /* Do the EMT scheduled L1D flush if needed. */
+            if (pVCpu->CTX_SUFF(pVM)->hm.s.fL1dFlushOnSched)
+                ASMWrMsr(MSR_IA32_FLUSH_CMD, MSR_IA32_FLUSH_CMD_F_L1D);
+
+            /* Restore longjmp state. */
+            VMMRZCallRing3Enable(pVCpu);
+            break;
+        }
+
+        default:
+            break;
+    }
+}
+
+
+/**
+ * Exports the host state into the VMCS host-state area.
+ * Sets up the VM-exit MSR-load area.
+ *
+ * The CPU state will be loaded from these fields on every successful VM-exit.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu   The cross context virtual CPU structure.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+static int hmR0VmxExportHostState(PVMCPUCC pVCpu)
+{
+    Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+
+    int rc = VINF_SUCCESS;
+    if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_HOST_CONTEXT)
+    {
+        hmR0VmxExportHostControlRegs();
+
+        rc = hmR0VmxExportHostSegmentRegs(pVCpu);
+        AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
+
+        hmR0VmxExportHostMsrs(pVCpu);
+
+        pVCpu->hm.s.fCtxChanged &= ~HM_CHANGED_HOST_CONTEXT;
+    }
+    return rc;
+}
+
+
+/**
+ * Saves the host state in the VMCS host-state.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu   The cross context virtual CPU structure.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+VMMR0DECL(int) VMXR0ExportHostState(PVMCPUCC pVCpu)
+{
+    AssertPtr(pVCpu);
+    Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+
+    /*
+     * Export the host state here while entering HM context.
+     * When thread-context hooks are used, we might get preempted and have to re-save the host
+     * state but most of the time we won't be, so do it here before we disable interrupts.
+     */
+    return hmR0VmxExportHostState(pVCpu);
+}
+
+
+/**
+ * Exports the guest state into the VMCS guest-state area.
+ *
+ * The will typically be done before VM-entry when the guest-CPU state and the
+ * VMCS state may potentially be out of sync.
+ *
+ * Sets up the VM-entry MSR-load and VM-exit MSR-store areas. Sets up the
+ * VM-entry controls.
+ * Sets up the appropriate VMX non-root function to execute guest code based on
+ * the guest CPU mode.
+ *
+ * @returns VBox strict status code.
+ * @retval  VINF_EM_RESCHEDULE_REM if we try to emulate non-paged guest code
+ *          without unrestricted guest execution and the VMMDev is not presently
+ *          mapped (e.g. EFI32).
+ *
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   pVmxTransient   The VMX-transient structure.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+static VBOXSTRICTRC hmR0VmxExportGuestState(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    AssertPtr(pVCpu);
+    HMVMX_ASSERT_PREEMPT_SAFE(pVCpu);
+    LogFlowFunc(("pVCpu=%p\n", pVCpu));
+
+    STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatExportGuestState, x);
+
+    /*
+     * Determine real-on-v86 mode.
+     * Used when the guest is in real-mode and unrestricted guest execution is not used.
+     */
+    PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
+    if (    pVCpu->CTX_SUFF(pVM)->hm.s.vmx.fUnrestrictedGuest
+        || !CPUMIsGuestInRealModeEx(&pVCpu->cpum.GstCtx))
+        pVmcsInfo->RealMode.fRealOnV86Active = false;
+    else
+    {
+        Assert(!pVmxTransient->fIsNestedGuest);
+        pVmcsInfo->RealMode.fRealOnV86Active = true;
+    }
+
+    /*
+     * Any ordering dependency among the sub-functions below must be explicitly stated using comments.
+     * Ideally, assert that the cross-dependent bits are up-to-date at the point of using it.
+     */
+    int rc = hmR0VmxExportGuestEntryExitCtls(pVCpu, pVmxTransient);
+    AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
+
+    rc = hmR0VmxExportGuestCR0(pVCpu, pVmxTransient);
+    AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
+
+    VBOXSTRICTRC rcStrict = hmR0VmxExportGuestCR3AndCR4(pVCpu, pVmxTransient);
+    if (rcStrict == VINF_SUCCESS)
+    { /* likely */ }
+    else
+    {
+        Assert(rcStrict == VINF_EM_RESCHEDULE_REM || RT_FAILURE_NP(rcStrict));
+        return rcStrict;
+    }
+
+    rc = hmR0VmxExportGuestSegRegsXdtr(pVCpu, pVmxTransient);
+    AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
+
+    rc = hmR0VmxExportGuestMsrs(pVCpu, pVmxTransient);
+    AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
+
+    hmR0VmxExportGuestApicTpr(pVCpu, pVmxTransient);
+    hmR0VmxExportGuestXcptIntercepts(pVCpu, pVmxTransient);
+    hmR0VmxExportGuestRip(pVCpu);
+    hmR0VmxExportGuestRsp(pVCpu);
+    hmR0VmxExportGuestRflags(pVCpu, pVmxTransient);
+
+    rc = hmR0VmxExportGuestHwvirtState(pVCpu, pVmxTransient);
+    AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
+
+    /* Clear any bits that may be set but exported unconditionally or unused/reserved bits. */
+    ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~(  (HM_CHANGED_GUEST_GPRS_MASK & ~HM_CHANGED_GUEST_RSP)
+                                                  |  HM_CHANGED_GUEST_CR2
+                                                  | (HM_CHANGED_GUEST_DR_MASK & ~HM_CHANGED_GUEST_DR7)
+                                                  |  HM_CHANGED_GUEST_X87
+                                                  |  HM_CHANGED_GUEST_SSE_AVX
+                                                  |  HM_CHANGED_GUEST_OTHER_XSAVE
+                                                  |  HM_CHANGED_GUEST_XCRx
+                                                  |  HM_CHANGED_GUEST_KERNEL_GS_BASE /* Part of lazy or auto load-store MSRs. */
+                                                  |  HM_CHANGED_GUEST_SYSCALL_MSRS   /* Part of lazy or auto load-store MSRs. */
+                                                  |  HM_CHANGED_GUEST_TSC_AUX
+                                                  |  HM_CHANGED_GUEST_OTHER_MSRS
+                                                  | (HM_CHANGED_KEEPER_STATE_MASK & ~HM_CHANGED_VMX_MASK)));
+
+    STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExportGuestState, x);
+    return rc;
+}
+
+
+/**
+ * Exports the state shared between the host and guest into the VMCS.
+ *
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   pVmxTransient   The VMX-transient structure.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+static void hmR0VmxExportSharedState(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+    Assert(!VMMRZCallRing3IsEnabled(pVCpu));
+
+    if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_GUEST_DR_MASK)
+    {
+        int rc = hmR0VmxExportSharedDebugState(pVCpu, pVmxTransient);
+        AssertRC(rc);
+        pVCpu->hm.s.fCtxChanged &= ~HM_CHANGED_GUEST_DR_MASK;
+
+        /* Loading shared debug bits might have changed eflags.TF bit for debugging purposes. */
+        if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_GUEST_RFLAGS)
+            hmR0VmxExportGuestRflags(pVCpu, pVmxTransient);
+    }
+
+    if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_VMX_GUEST_LAZY_MSRS)
+    {
+        hmR0VmxLazyLoadGuestMsrs(pVCpu);
+        pVCpu->hm.s.fCtxChanged &= ~HM_CHANGED_VMX_GUEST_LAZY_MSRS;
+    }
+
+    AssertMsg(!(pVCpu->hm.s.fCtxChanged & HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE),
+              ("fCtxChanged=%#RX64\n", pVCpu->hm.s.fCtxChanged));
+}
+
+
+/**
+ * Worker for loading the guest-state bits in the inner VT-x execution loop.
+ *
+ * @returns Strict VBox status code (i.e. informational status codes too).
+ * @retval  VINF_EM_RESCHEDULE_REM if we try to emulate non-paged guest code
+ *          without unrestricted guest execution and the VMMDev is not presently
+ *          mapped (e.g. EFI32).
+ *
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   pVmxTransient   The VMX-transient structure.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+static VBOXSTRICTRC hmR0VmxExportGuestStateOptimal(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    HMVMX_ASSERT_PREEMPT_SAFE(pVCpu);
+    Assert(!VMMRZCallRing3IsEnabled(pVCpu));
+    Assert(VMMR0IsLogFlushDisabled(pVCpu));
+
+#ifdef HMVMX_ALWAYS_SYNC_FULL_GUEST_STATE
+    ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_ALL_GUEST);
+#endif
+
+    /*
+     * For many VM-exits only RIP/RSP/RFLAGS (and HWVIRT state when executing a nested-guest)
+     * changes. First try to export only these without going through all other changed-flag checks.
+     */
+    VBOXSTRICTRC   rcStrict;
+    uint64_t const fCtxMask     = HM_CHANGED_ALL_GUEST & ~HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE;
+    uint64_t const fMinimalMask = HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RSP | HM_CHANGED_GUEST_RFLAGS | HM_CHANGED_GUEST_HWVIRT;
+    uint64_t const fCtxChanged  = ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged);
+
+    /* If only RIP/RSP/RFLAGS/HWVIRT changed, export only those (quicker, happens more often).*/
+    if (    (fCtxChanged & fMinimalMask)
+        && !(fCtxChanged & (fCtxMask & ~fMinimalMask)))
+    {
+        hmR0VmxExportGuestRip(pVCpu);
+        hmR0VmxExportGuestRsp(pVCpu);
+        hmR0VmxExportGuestRflags(pVCpu, pVmxTransient);
+        rcStrict = hmR0VmxExportGuestHwvirtState(pVCpu, pVmxTransient);
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatExportMinimal);
+    }
+    /* If anything else also changed, go through the full export routine and export as required. */
+    else if (fCtxChanged & fCtxMask)
+    {
+        rcStrict = hmR0VmxExportGuestState(pVCpu, pVmxTransient);
+        if (RT_LIKELY(rcStrict == VINF_SUCCESS))
+        { /* likely */}
+        else
+        {
+            AssertMsg(rcStrict == VINF_EM_RESCHEDULE_REM, ("Failed to export guest state! rc=%Rrc\n",
+                                                           VBOXSTRICTRC_VAL(rcStrict)));
+            Assert(!VMMRZCallRing3IsEnabled(pVCpu));
+            return rcStrict;
+        }
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatExportFull);
+    }
+    /* Nothing changed, nothing to load here. */
+    else
+        rcStrict = VINF_SUCCESS;
+
+#ifdef VBOX_STRICT
+    /* All the guest state bits should be loaded except maybe the host context and/or the shared host/guest bits. */
+    uint64_t const fCtxChangedCur = ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged);
+    AssertMsg(!(fCtxChangedCur & fCtxMask), ("fCtxChangedCur=%#RX64\n", fCtxChangedCur));
+#endif
+    return rcStrict;
+}
+
+
+/**
+ * Tries to determine what part of the guest-state VT-x has deemed as invalid
+ * and update error record fields accordingly.
+ *
+ * @returns VMX_IGS_* error codes.
+ * @retval VMX_IGS_REASON_NOT_FOUND if this function could not find anything
+ *         wrong with the guest state.
+ *
+ * @param   pVCpu       The cross context virtual CPU structure.
+ * @param   pVmcsInfo   The VMCS info. object.
+ *
+ * @remarks This function assumes our cache of the VMCS controls
+ *          are valid, i.e. hmR0VmxCheckCachedVmcsCtls() succeeded.
+ */
+static uint32_t hmR0VmxCheckGuestState(PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo)
+{
+#define HMVMX_ERROR_BREAK(err)              { uError = (err); break; }
+#define HMVMX_CHECK_BREAK(expr, err)        do { \
+                                                if (!(expr)) { uError = (err); break; } \
+                                            } while (0)
+
+    PVMCC    pVM    = pVCpu->CTX_SUFF(pVM);
+    PCPUMCTX pCtx   = &pVCpu->cpum.GstCtx;
+    uint32_t uError = VMX_IGS_ERROR;
+    uint32_t u32IntrState = 0;
+    bool const fUnrestrictedGuest = pVM->hm.s.vmx.fUnrestrictedGuest;
+    do
+    {
+        int rc;
+
+        /*
+         * Guest-interruptibility state.
+         *
+         * Read this first so that any check that fails prior to those that actually
+         * require the guest-interruptibility state would still reflect the correct
+         * VMCS value and avoids causing further confusion.
+         */
+        rc = VMXReadVmcs32(VMX_VMCS32_GUEST_INT_STATE, &u32IntrState);
+        AssertRC(rc);
+
+        uint32_t u32Val;
+        uint64_t u64Val;
+
+        /*
+         * CR0.
+         */
+        /** @todo Why do we need to OR and AND the fixed-0 and fixed-1 bits below? */
+        uint64_t       fSetCr0 = (pVM->hm.s.vmx.Msrs.u64Cr0Fixed0 & pVM->hm.s.vmx.Msrs.u64Cr0Fixed1);
+        uint64_t const fZapCr0 = (pVM->hm.s.vmx.Msrs.u64Cr0Fixed0 | pVM->hm.s.vmx.Msrs.u64Cr0Fixed1);
+        /* Exceptions for unrestricted guest execution for CR0 fixed bits (PE, PG).
+           See Intel spec. 26.3.1 "Checks on Guest Control Registers, Debug Registers and MSRs." */
+        if (fUnrestrictedGuest)
+            fSetCr0 &= ~(uint64_t)(X86_CR0_PE | X86_CR0_PG);
+
+        uint64_t u64GuestCr0;
+        rc = VMXReadVmcsNw(VMX_VMCS_GUEST_CR0, &u64GuestCr0);
+        AssertRC(rc);
+        HMVMX_CHECK_BREAK((u64GuestCr0 & fSetCr0) == fSetCr0, VMX_IGS_CR0_FIXED1);
+        HMVMX_CHECK_BREAK(!(u64GuestCr0 & ~fZapCr0), VMX_IGS_CR0_FIXED0);
+        if (   !fUnrestrictedGuest
+            &&  (u64GuestCr0 & X86_CR0_PG)
+            && !(u64GuestCr0 & X86_CR0_PE))
+            HMVMX_ERROR_BREAK(VMX_IGS_CR0_PG_PE_COMBO);
+
+        /*
+         * CR4.
+         */
+        /** @todo Why do we need to OR and AND the fixed-0 and fixed-1 bits below? */
+        uint64_t const fSetCr4 = (pVM->hm.s.vmx.Msrs.u64Cr4Fixed0 & pVM->hm.s.vmx.Msrs.u64Cr4Fixed1);
+        uint64_t const fZapCr4 = (pVM->hm.s.vmx.Msrs.u64Cr4Fixed0 | pVM->hm.s.vmx.Msrs.u64Cr4Fixed1);
+
+        uint64_t u64GuestCr4;
+        rc = VMXReadVmcsNw(VMX_VMCS_GUEST_CR4, &u64GuestCr4);
+        AssertRC(rc);
+        HMVMX_CHECK_BREAK((u64GuestCr4 & fSetCr4) == fSetCr4, VMX_IGS_CR4_FIXED1);
+        HMVMX_CHECK_BREAK(!(u64GuestCr4 & ~fZapCr4), VMX_IGS_CR4_FIXED0);
+
+        /*
+         * IA32_DEBUGCTL MSR.
+         */
+        rc = VMXReadVmcs64(VMX_VMCS64_GUEST_DEBUGCTL_FULL, &u64Val);
+        AssertRC(rc);
+        if (   (pVmcsInfo->u32EntryCtls & VMX_ENTRY_CTLS_LOAD_DEBUG)
+            && (u64Val & 0xfffffe3c))                           /* Bits 31:9, bits 5:2 MBZ. */
+        {
+            HMVMX_ERROR_BREAK(VMX_IGS_DEBUGCTL_MSR_RESERVED);
+        }
+        uint64_t u64DebugCtlMsr = u64Val;
+
+#ifdef VBOX_STRICT
+        rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY, &u32Val);
+        AssertRC(rc);
+        Assert(u32Val == pVmcsInfo->u32EntryCtls);
+#endif
+        bool const fLongModeGuest = RT_BOOL(pVmcsInfo->u32EntryCtls & VMX_ENTRY_CTLS_IA32E_MODE_GUEST);
+
+        /*
+         * RIP and RFLAGS.
+         */
+        rc = VMXReadVmcsNw(VMX_VMCS_GUEST_RIP, &u64Val);
+        AssertRC(rc);
+        /* pCtx->rip can be different than the one in the VMCS (e.g. run guest code and VM-exits that don't update it). */
+        if (   !fLongModeGuest
+            || !pCtx->cs.Attr.n.u1Long)
+            HMVMX_CHECK_BREAK(!(u64Val & UINT64_C(0xffffffff00000000)), VMX_IGS_LONGMODE_RIP_INVALID);
+        /** @todo If the processor supports N < 64 linear-address bits, bits 63:N
+         *        must be identical if the "IA-32e mode guest" VM-entry
+         *        control is 1 and CS.L is 1. No check applies if the
+         *        CPU supports 64 linear-address bits. */
+
+        /* Flags in pCtx can be different (real-on-v86 for instance). We are only concerned about the VMCS contents here. */
+        rc = VMXReadVmcsNw(VMX_VMCS_GUEST_RFLAGS, &u64Val);
+        AssertRC(rc);
+        HMVMX_CHECK_BREAK(!(u64Val & UINT64_C(0xffffffffffc08028)),                     /* Bit 63:22, Bit 15, 5, 3 MBZ. */
+                          VMX_IGS_RFLAGS_RESERVED);
+        HMVMX_CHECK_BREAK((u64Val & X86_EFL_RA1_MASK), VMX_IGS_RFLAGS_RESERVED1);       /* Bit 1 MB1. */
+        uint32_t const u32Eflags = u64Val;
+
+        if (   fLongModeGuest
+            || (   fUnrestrictedGuest
+                && !(u64GuestCr0 & X86_CR0_PE)))
+        {
+            HMVMX_CHECK_BREAK(!(u32Eflags & X86_EFL_VM), VMX_IGS_RFLAGS_VM_INVALID);
+        }
+
+        uint32_t u32EntryInfo;
+        rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, &u32EntryInfo);
+        AssertRC(rc);
+        if (VMX_ENTRY_INT_INFO_IS_EXT_INT(u32EntryInfo))
+            HMVMX_CHECK_BREAK(u32Eflags & X86_EFL_IF, VMX_IGS_RFLAGS_IF_INVALID);
+
+        /*
+         * 64-bit checks.
+         */
+        if (fLongModeGuest)
+        {
+            HMVMX_CHECK_BREAK(u64GuestCr0 & X86_CR0_PG,  VMX_IGS_CR0_PG_LONGMODE);
+            HMVMX_CHECK_BREAK(u64GuestCr4 & X86_CR4_PAE, VMX_IGS_CR4_PAE_LONGMODE);
+        }
+
+        if (   !fLongModeGuest
+            && (u64GuestCr4 & X86_CR4_PCIDE))
+            HMVMX_ERROR_BREAK(VMX_IGS_CR4_PCIDE);
+
+        /** @todo CR3 field must be such that bits 63:52 and bits in the range
+         *        51:32 beyond the processor's physical-address width are 0. */
+
+        if (   (pVmcsInfo->u32EntryCtls & VMX_ENTRY_CTLS_LOAD_DEBUG)
+            && (pCtx->dr[7] & X86_DR7_MBZ_MASK))
+            HMVMX_ERROR_BREAK(VMX_IGS_DR7_RESERVED);
+
+        rc = VMXReadVmcsNw(VMX_VMCS_HOST_SYSENTER_ESP, &u64Val);
+        AssertRC(rc);
+        HMVMX_CHECK_BREAK(X86_IS_CANONICAL(u64Val), VMX_IGS_SYSENTER_ESP_NOT_CANONICAL);
+
+        rc = VMXReadVmcsNw(VMX_VMCS_HOST_SYSENTER_EIP, &u64Val);
+        AssertRC(rc);
+        HMVMX_CHECK_BREAK(X86_IS_CANONICAL(u64Val), VMX_IGS_SYSENTER_EIP_NOT_CANONICAL);
+
+        /*
+         * PERF_GLOBAL MSR.
+         */
+        if (pVmcsInfo->u32EntryCtls & VMX_ENTRY_CTLS_LOAD_PERF_MSR)
+        {
+            rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PERF_GLOBAL_CTRL_FULL, &u64Val);
+            AssertRC(rc);
+            HMVMX_CHECK_BREAK(!(u64Val & UINT64_C(0xfffffff8fffffffc)),
+                              VMX_IGS_PERF_GLOBAL_MSR_RESERVED);        /* Bits 63:35, bits 31:2 MBZ. */
+        }
+
+        /*
+         * PAT MSR.
+         */
+        if (pVmcsInfo->u32EntryCtls & VMX_ENTRY_CTLS_LOAD_PAT_MSR)
+        {
+            rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PAT_FULL, &u64Val);
+            AssertRC(rc);
+            HMVMX_CHECK_BREAK(!(u64Val & UINT64_C(0x707070707070707)), VMX_IGS_PAT_MSR_RESERVED);
+            for (unsigned i = 0; i < 8; i++)
+            {
+                uint8_t u8Val = (u64Val & 0xff);
+                if (   u8Val != 0 /* UC */
+                    && u8Val != 1 /* WC */
+                    && u8Val != 4 /* WT */
+                    && u8Val != 5 /* WP */
+                    && u8Val != 6 /* WB */
+                    && u8Val != 7 /* UC- */)
+                    HMVMX_ERROR_BREAK(VMX_IGS_PAT_MSR_INVALID);
+                u64Val >>= 8;
+            }
+        }
+
+        /*
+         * EFER MSR.
+         */
+        if (pVmcsInfo->u32EntryCtls & VMX_ENTRY_CTLS_LOAD_EFER_MSR)
+        {
+            Assert(pVM->hm.s.vmx.fSupportsVmcsEfer);
+            rc = VMXReadVmcs64(VMX_VMCS64_GUEST_EFER_FULL, &u64Val);
+            AssertRC(rc);
+            HMVMX_CHECK_BREAK(!(u64Val & UINT64_C(0xfffffffffffff2fe)),
+                              VMX_IGS_EFER_MSR_RESERVED);               /* Bits 63:12, bit 9, bits 7:1 MBZ. */
+            HMVMX_CHECK_BREAK(RT_BOOL(u64Val & MSR_K6_EFER_LMA) == RT_BOOL(  pVmcsInfo->u32EntryCtls
+                                                                           & VMX_ENTRY_CTLS_IA32E_MODE_GUEST),
+                              VMX_IGS_EFER_LMA_GUEST_MODE_MISMATCH);
+            /** @todo r=ramshankar: Unrestricted check here is probably wrong, see
+             *        iemVmxVmentryCheckGuestState(). */
+            HMVMX_CHECK_BREAK(   fUnrestrictedGuest
+                              || !(u64GuestCr0 & X86_CR0_PG)
+                              || RT_BOOL(u64Val & MSR_K6_EFER_LMA) == RT_BOOL(u64Val & MSR_K6_EFER_LME),
+                              VMX_IGS_EFER_LMA_LME_MISMATCH);
+        }
+
+        /*
+         * Segment registers.
+         */
+        HMVMX_CHECK_BREAK(   (pCtx->ldtr.Attr.u & X86DESCATTR_UNUSABLE)
+                          || !(pCtx->ldtr.Sel & X86_SEL_LDT), VMX_IGS_LDTR_TI_INVALID);
+        if (!(u32Eflags & X86_EFL_VM))
+        {
+            /* CS */
+            HMVMX_CHECK_BREAK(pCtx->cs.Attr.n.u1Present, VMX_IGS_CS_ATTR_P_INVALID);
+            HMVMX_CHECK_BREAK(!(pCtx->cs.Attr.u & 0xf00), VMX_IGS_CS_ATTR_RESERVED);
+            HMVMX_CHECK_BREAK(!(pCtx->cs.Attr.u & 0xfffe0000), VMX_IGS_CS_ATTR_RESERVED);
+            HMVMX_CHECK_BREAK(   (pCtx->cs.u32Limit & 0xfff) == 0xfff
+                              || !(pCtx->cs.Attr.n.u1Granularity), VMX_IGS_CS_ATTR_G_INVALID);
+            HMVMX_CHECK_BREAK(   !(pCtx->cs.u32Limit & 0xfff00000)
+                              || (pCtx->cs.Attr.n.u1Granularity), VMX_IGS_CS_ATTR_G_INVALID);
+            /* CS cannot be loaded with NULL in protected mode. */
+            HMVMX_CHECK_BREAK(pCtx->cs.Attr.u && !(pCtx->cs.Attr.u & X86DESCATTR_UNUSABLE), VMX_IGS_CS_ATTR_UNUSABLE);
+            HMVMX_CHECK_BREAK(pCtx->cs.Attr.n.u1DescType, VMX_IGS_CS_ATTR_S_INVALID);
+            if (pCtx->cs.Attr.n.u4Type == 9 || pCtx->cs.Attr.n.u4Type == 11)
+                HMVMX_CHECK_BREAK(pCtx->cs.Attr.n.u2Dpl == pCtx->ss.Attr.n.u2Dpl, VMX_IGS_CS_SS_ATTR_DPL_UNEQUAL);
+            else if (pCtx->cs.Attr.n.u4Type == 13 || pCtx->cs.Attr.n.u4Type == 15)
+                HMVMX_CHECK_BREAK(pCtx->cs.Attr.n.u2Dpl <= pCtx->ss.Attr.n.u2Dpl, VMX_IGS_CS_SS_ATTR_DPL_MISMATCH);
+            else if (pVM->hm.s.vmx.fUnrestrictedGuest && pCtx->cs.Attr.n.u4Type == 3)
+                HMVMX_CHECK_BREAK(pCtx->cs.Attr.n.u2Dpl == 0, VMX_IGS_CS_ATTR_DPL_INVALID);
+            else
+                HMVMX_ERROR_BREAK(VMX_IGS_CS_ATTR_TYPE_INVALID);
+
+            /* SS */
+            HMVMX_CHECK_BREAK(   pVM->hm.s.vmx.fUnrestrictedGuest
+                              || (pCtx->ss.Sel & X86_SEL_RPL) == (pCtx->cs.Sel & X86_SEL_RPL), VMX_IGS_SS_CS_RPL_UNEQUAL);
+            HMVMX_CHECK_BREAK(pCtx->ss.Attr.n.u2Dpl == (pCtx->ss.Sel & X86_SEL_RPL), VMX_IGS_SS_ATTR_DPL_RPL_UNEQUAL);
+            if (   !(pCtx->cr0 & X86_CR0_PE)
+                || pCtx->cs.Attr.n.u4Type == 3)
+                HMVMX_CHECK_BREAK(!pCtx->ss.Attr.n.u2Dpl, VMX_IGS_SS_ATTR_DPL_INVALID);
+
+            if (!(pCtx->ss.Attr.u & X86DESCATTR_UNUSABLE))
+            {
+                HMVMX_CHECK_BREAK(pCtx->ss.Attr.n.u4Type == 3 || pCtx->ss.Attr.n.u4Type == 7, VMX_IGS_SS_ATTR_TYPE_INVALID);
+                HMVMX_CHECK_BREAK(pCtx->ss.Attr.n.u1Present, VMX_IGS_SS_ATTR_P_INVALID);
+                HMVMX_CHECK_BREAK(!(pCtx->ss.Attr.u & 0xf00), VMX_IGS_SS_ATTR_RESERVED);
+                HMVMX_CHECK_BREAK(!(pCtx->ss.Attr.u & 0xfffe0000), VMX_IGS_SS_ATTR_RESERVED);
+                HMVMX_CHECK_BREAK(   (pCtx->ss.u32Limit & 0xfff) == 0xfff
+                                  || !(pCtx->ss.Attr.n.u1Granularity), VMX_IGS_SS_ATTR_G_INVALID);
+                HMVMX_CHECK_BREAK(   !(pCtx->ss.u32Limit & 0xfff00000)
+                                  || (pCtx->ss.Attr.n.u1Granularity), VMX_IGS_SS_ATTR_G_INVALID);
+            }
+
+            /* DS, ES, FS, GS - only check for usable selectors, see hmR0VmxExportGuestSReg(). */
+            if (!(pCtx->ds.Attr.u & X86DESCATTR_UNUSABLE))
+            {
+                HMVMX_CHECK_BREAK(pCtx->ds.Attr.n.u4Type & X86_SEL_TYPE_ACCESSED, VMX_IGS_DS_ATTR_A_INVALID);
+                HMVMX_CHECK_BREAK(pCtx->ds.Attr.n.u1Present, VMX_IGS_DS_ATTR_P_INVALID);
+                HMVMX_CHECK_BREAK(   pVM->hm.s.vmx.fUnrestrictedGuest
+                                  || pCtx->ds.Attr.n.u4Type > 11
+                                  || pCtx->ds.Attr.n.u2Dpl >= (pCtx->ds.Sel & X86_SEL_RPL), VMX_IGS_DS_ATTR_DPL_RPL_UNEQUAL);
+                HMVMX_CHECK_BREAK(!(pCtx->ds.Attr.u & 0xf00), VMX_IGS_DS_ATTR_RESERVED);
+                HMVMX_CHECK_BREAK(!(pCtx->ds.Attr.u & 0xfffe0000), VMX_IGS_DS_ATTR_RESERVED);
+                HMVMX_CHECK_BREAK(   (pCtx->ds.u32Limit & 0xfff) == 0xfff
+                                  || !(pCtx->ds.Attr.n.u1Granularity), VMX_IGS_DS_ATTR_G_INVALID);
+                HMVMX_CHECK_BREAK(   !(pCtx->ds.u32Limit & 0xfff00000)
+                                  || (pCtx->ds.Attr.n.u1Granularity), VMX_IGS_DS_ATTR_G_INVALID);
+                HMVMX_CHECK_BREAK(   !(pCtx->ds.Attr.n.u4Type & X86_SEL_TYPE_CODE)
+                                  || (pCtx->ds.Attr.n.u4Type & X86_SEL_TYPE_READ), VMX_IGS_DS_ATTR_TYPE_INVALID);
+            }
+            if (!(pCtx->es.Attr.u & X86DESCATTR_UNUSABLE))
+            {
+                HMVMX_CHECK_BREAK(pCtx->es.Attr.n.u4Type & X86_SEL_TYPE_ACCESSED, VMX_IGS_ES_ATTR_A_INVALID);
+                HMVMX_CHECK_BREAK(pCtx->es.Attr.n.u1Present, VMX_IGS_ES_ATTR_P_INVALID);
+                HMVMX_CHECK_BREAK(   pVM->hm.s.vmx.fUnrestrictedGuest
+                                  || pCtx->es.Attr.n.u4Type > 11
+                                  || pCtx->es.Attr.n.u2Dpl >= (pCtx->es.Sel & X86_SEL_RPL), VMX_IGS_DS_ATTR_DPL_RPL_UNEQUAL);
+                HMVMX_CHECK_BREAK(!(pCtx->es.Attr.u & 0xf00), VMX_IGS_ES_ATTR_RESERVED);
+                HMVMX_CHECK_BREAK(!(pCtx->es.Attr.u & 0xfffe0000), VMX_IGS_ES_ATTR_RESERVED);
+                HMVMX_CHECK_BREAK(   (pCtx->es.u32Limit & 0xfff) == 0xfff
+                                  || !(pCtx->es.Attr.n.u1Granularity), VMX_IGS_ES_ATTR_G_INVALID);
+                HMVMX_CHECK_BREAK(   !(pCtx->es.u32Limit & 0xfff00000)
+                                  || (pCtx->es.Attr.n.u1Granularity), VMX_IGS_ES_ATTR_G_INVALID);
+                HMVMX_CHECK_BREAK(   !(pCtx->es.Attr.n.u4Type & X86_SEL_TYPE_CODE)
+                                  || (pCtx->es.Attr.n.u4Type & X86_SEL_TYPE_READ), VMX_IGS_ES_ATTR_TYPE_INVALID);
+            }
+            if (!(pCtx->fs.Attr.u & X86DESCATTR_UNUSABLE))
+            {
+                HMVMX_CHECK_BREAK(pCtx->fs.Attr.n.u4Type & X86_SEL_TYPE_ACCESSED, VMX_IGS_FS_ATTR_A_INVALID);
+                HMVMX_CHECK_BREAK(pCtx->fs.Attr.n.u1Present, VMX_IGS_FS_ATTR_P_INVALID);
+                HMVMX_CHECK_BREAK(   pVM->hm.s.vmx.fUnrestrictedGuest
+                                  || pCtx->fs.Attr.n.u4Type > 11
+                                  || pCtx->fs.Attr.n.u2Dpl >= (pCtx->fs.Sel & X86_SEL_RPL), VMX_IGS_FS_ATTR_DPL_RPL_UNEQUAL);
+                HMVMX_CHECK_BREAK(!(pCtx->fs.Attr.u & 0xf00), VMX_IGS_FS_ATTR_RESERVED);
+                HMVMX_CHECK_BREAK(!(pCtx->fs.Attr.u & 0xfffe0000), VMX_IGS_FS_ATTR_RESERVED);
+                HMVMX_CHECK_BREAK(   (pCtx->fs.u32Limit & 0xfff) == 0xfff
+                                  || !(pCtx->fs.Attr.n.u1Granularity), VMX_IGS_FS_ATTR_G_INVALID);
+                HMVMX_CHECK_BREAK(   !(pCtx->fs.u32Limit & 0xfff00000)
+                                  || (pCtx->fs.Attr.n.u1Granularity), VMX_IGS_FS_ATTR_G_INVALID);
+                HMVMX_CHECK_BREAK(   !(pCtx->fs.Attr.n.u4Type & X86_SEL_TYPE_CODE)
+                                  || (pCtx->fs.Attr.n.u4Type & X86_SEL_TYPE_READ), VMX_IGS_FS_ATTR_TYPE_INVALID);
+            }
+            if (!(pCtx->gs.Attr.u & X86DESCATTR_UNUSABLE))
+            {
+                HMVMX_CHECK_BREAK(pCtx->gs.Attr.n.u4Type & X86_SEL_TYPE_ACCESSED, VMX_IGS_GS_ATTR_A_INVALID);
+                HMVMX_CHECK_BREAK(pCtx->gs.Attr.n.u1Present, VMX_IGS_GS_ATTR_P_INVALID);
+                HMVMX_CHECK_BREAK(   pVM->hm.s.vmx.fUnrestrictedGuest
+                                  || pCtx->gs.Attr.n.u4Type > 11
+                                  || pCtx->gs.Attr.n.u2Dpl >= (pCtx->gs.Sel & X86_SEL_RPL), VMX_IGS_GS_ATTR_DPL_RPL_UNEQUAL);
+                HMVMX_CHECK_BREAK(!(pCtx->gs.Attr.u & 0xf00), VMX_IGS_GS_ATTR_RESERVED);
+                HMVMX_CHECK_BREAK(!(pCtx->gs.Attr.u & 0xfffe0000), VMX_IGS_GS_ATTR_RESERVED);
+                HMVMX_CHECK_BREAK(   (pCtx->gs.u32Limit & 0xfff) == 0xfff
+                                  || !(pCtx->gs.Attr.n.u1Granularity), VMX_IGS_GS_ATTR_G_INVALID);
+                HMVMX_CHECK_BREAK(   !(pCtx->gs.u32Limit & 0xfff00000)
+                                  || (pCtx->gs.Attr.n.u1Granularity), VMX_IGS_GS_ATTR_G_INVALID);
+                HMVMX_CHECK_BREAK(   !(pCtx->gs.Attr.n.u4Type & X86_SEL_TYPE_CODE)
+                                  || (pCtx->gs.Attr.n.u4Type & X86_SEL_TYPE_READ), VMX_IGS_GS_ATTR_TYPE_INVALID);
+            }
+            /* 64-bit capable CPUs. */
+            HMVMX_CHECK_BREAK(X86_IS_CANONICAL(pCtx->fs.u64Base), VMX_IGS_FS_BASE_NOT_CANONICAL);
+            HMVMX_CHECK_BREAK(X86_IS_CANONICAL(pCtx->gs.u64Base), VMX_IGS_GS_BASE_NOT_CANONICAL);
+            HMVMX_CHECK_BREAK(   (pCtx->ldtr.Attr.u & X86DESCATTR_UNUSABLE)
+                              || X86_IS_CANONICAL(pCtx->ldtr.u64Base), VMX_IGS_LDTR_BASE_NOT_CANONICAL);
+            HMVMX_CHECK_BREAK(!RT_HI_U32(pCtx->cs.u64Base), VMX_IGS_LONGMODE_CS_BASE_INVALID);
+            HMVMX_CHECK_BREAK((pCtx->ss.Attr.u & X86DESCATTR_UNUSABLE) || !RT_HI_U32(pCtx->ss.u64Base),
+                              VMX_IGS_LONGMODE_SS_BASE_INVALID);
+            HMVMX_CHECK_BREAK((pCtx->ds.Attr.u & X86DESCATTR_UNUSABLE) || !RT_HI_U32(pCtx->ds.u64Base),
+                              VMX_IGS_LONGMODE_DS_BASE_INVALID);
+            HMVMX_CHECK_BREAK((pCtx->es.Attr.u & X86DESCATTR_UNUSABLE) || !RT_HI_U32(pCtx->es.u64Base),
+                              VMX_IGS_LONGMODE_ES_BASE_INVALID);
+        }
+        else
+        {
+            /* V86 mode checks. */
+            uint32_t u32CSAttr, u32SSAttr, u32DSAttr, u32ESAttr, u32FSAttr, u32GSAttr;
+            if (pVmcsInfo->RealMode.fRealOnV86Active)
+            {
+                u32CSAttr = 0xf3;   u32SSAttr = 0xf3;
+                u32DSAttr = 0xf3;   u32ESAttr = 0xf3;
+                u32FSAttr = 0xf3;   u32GSAttr = 0xf3;
+            }
+            else
+            {
+                u32CSAttr = pCtx->cs.Attr.u;   u32SSAttr = pCtx->ss.Attr.u;
+                u32DSAttr = pCtx->ds.Attr.u;   u32ESAttr = pCtx->es.Attr.u;
+                u32FSAttr = pCtx->fs.Attr.u;   u32GSAttr = pCtx->gs.Attr.u;
+            }
+
+            /* CS */
+            HMVMX_CHECK_BREAK((pCtx->cs.u64Base == (uint64_t)pCtx->cs.Sel << 4), VMX_IGS_V86_CS_BASE_INVALID);
+            HMVMX_CHECK_BREAK(pCtx->cs.u32Limit == 0xffff, VMX_IGS_V86_CS_LIMIT_INVALID);
+            HMVMX_CHECK_BREAK(u32CSAttr == 0xf3, VMX_IGS_V86_CS_ATTR_INVALID);
+            /* SS */
+            HMVMX_CHECK_BREAK((pCtx->ss.u64Base == (uint64_t)pCtx->ss.Sel << 4), VMX_IGS_V86_SS_BASE_INVALID);
+            HMVMX_CHECK_BREAK(pCtx->ss.u32Limit == 0xffff, VMX_IGS_V86_SS_LIMIT_INVALID);
+            HMVMX_CHECK_BREAK(u32SSAttr == 0xf3, VMX_IGS_V86_SS_ATTR_INVALID);
+            /* DS */
+            HMVMX_CHECK_BREAK((pCtx->ds.u64Base == (uint64_t)pCtx->ds.Sel << 4), VMX_IGS_V86_DS_BASE_INVALID);
+            HMVMX_CHECK_BREAK(pCtx->ds.u32Limit == 0xffff, VMX_IGS_V86_DS_LIMIT_INVALID);
+            HMVMX_CHECK_BREAK(u32DSAttr == 0xf3, VMX_IGS_V86_DS_ATTR_INVALID);
+            /* ES */
+            HMVMX_CHECK_BREAK((pCtx->es.u64Base == (uint64_t)pCtx->es.Sel << 4), VMX_IGS_V86_ES_BASE_INVALID);
+            HMVMX_CHECK_BREAK(pCtx->es.u32Limit == 0xffff, VMX_IGS_V86_ES_LIMIT_INVALID);
+            HMVMX_CHECK_BREAK(u32ESAttr == 0xf3, VMX_IGS_V86_ES_ATTR_INVALID);
+            /* FS */
+            HMVMX_CHECK_BREAK((pCtx->fs.u64Base == (uint64_t)pCtx->fs.Sel << 4), VMX_IGS_V86_FS_BASE_INVALID);
+            HMVMX_CHECK_BREAK(pCtx->fs.u32Limit == 0xffff, VMX_IGS_V86_FS_LIMIT_INVALID);
+            HMVMX_CHECK_BREAK(u32FSAttr == 0xf3, VMX_IGS_V86_FS_ATTR_INVALID);
+            /* GS */
+            HMVMX_CHECK_BREAK((pCtx->gs.u64Base == (uint64_t)pCtx->gs.Sel << 4), VMX_IGS_V86_GS_BASE_INVALID);
+            HMVMX_CHECK_BREAK(pCtx->gs.u32Limit == 0xffff, VMX_IGS_V86_GS_LIMIT_INVALID);
+            HMVMX_CHECK_BREAK(u32GSAttr == 0xf3, VMX_IGS_V86_GS_ATTR_INVALID);
+            /* 64-bit capable CPUs. */
+            HMVMX_CHECK_BREAK(X86_IS_CANONICAL(pCtx->fs.u64Base), VMX_IGS_FS_BASE_NOT_CANONICAL);
+            HMVMX_CHECK_BREAK(X86_IS_CANONICAL(pCtx->gs.u64Base), VMX_IGS_GS_BASE_NOT_CANONICAL);
+            HMVMX_CHECK_BREAK(   (pCtx->ldtr.Attr.u & X86DESCATTR_UNUSABLE)
+                              || X86_IS_CANONICAL(pCtx->ldtr.u64Base), VMX_IGS_LDTR_BASE_NOT_CANONICAL);
+            HMVMX_CHECK_BREAK(!RT_HI_U32(pCtx->cs.u64Base), VMX_IGS_LONGMODE_CS_BASE_INVALID);
+            HMVMX_CHECK_BREAK((pCtx->ss.Attr.u & X86DESCATTR_UNUSABLE) || !RT_HI_U32(pCtx->ss.u64Base),
+                              VMX_IGS_LONGMODE_SS_BASE_INVALID);
+            HMVMX_CHECK_BREAK((pCtx->ds.Attr.u & X86DESCATTR_UNUSABLE) || !RT_HI_U32(pCtx->ds.u64Base),
+                              VMX_IGS_LONGMODE_DS_BASE_INVALID);
+            HMVMX_CHECK_BREAK((pCtx->es.Attr.u & X86DESCATTR_UNUSABLE) || !RT_HI_U32(pCtx->es.u64Base),
+                              VMX_IGS_LONGMODE_ES_BASE_INVALID);
+        }
+
+        /*
+         * TR.
+         */
+        HMVMX_CHECK_BREAK(!(pCtx->tr.Sel & X86_SEL_LDT), VMX_IGS_TR_TI_INVALID);
+        /* 64-bit capable CPUs. */
+        HMVMX_CHECK_BREAK(X86_IS_CANONICAL(pCtx->tr.u64Base), VMX_IGS_TR_BASE_NOT_CANONICAL);
+        if (fLongModeGuest)
+            HMVMX_CHECK_BREAK(pCtx->tr.Attr.n.u4Type == 11,           /* 64-bit busy TSS. */
+                              VMX_IGS_LONGMODE_TR_ATTR_TYPE_INVALID);
+        else
+            HMVMX_CHECK_BREAK(   pCtx->tr.Attr.n.u4Type == 3          /* 16-bit busy TSS. */
+                              || pCtx->tr.Attr.n.u4Type == 11,        /* 32-bit busy TSS.*/
+                              VMX_IGS_TR_ATTR_TYPE_INVALID);
+        HMVMX_CHECK_BREAK(!pCtx->tr.Attr.n.u1DescType, VMX_IGS_TR_ATTR_S_INVALID);
+        HMVMX_CHECK_BREAK(pCtx->tr.Attr.n.u1Present, VMX_IGS_TR_ATTR_P_INVALID);
+        HMVMX_CHECK_BREAK(!(pCtx->tr.Attr.u & 0xf00), VMX_IGS_TR_ATTR_RESERVED);   /* Bits 11:8 MBZ. */
+        HMVMX_CHECK_BREAK(   (pCtx->tr.u32Limit & 0xfff) == 0xfff
+                          || !(pCtx->tr.Attr.n.u1Granularity), VMX_IGS_TR_ATTR_G_INVALID);
+        HMVMX_CHECK_BREAK(   !(pCtx->tr.u32Limit & 0xfff00000)
+                          || (pCtx->tr.Attr.n.u1Granularity), VMX_IGS_TR_ATTR_G_INVALID);
+        HMVMX_CHECK_BREAK(!(pCtx->tr.Attr.u & X86DESCATTR_UNUSABLE), VMX_IGS_TR_ATTR_UNUSABLE);
+
+        /*
+         * GDTR and IDTR (64-bit capable checks).
+         */
+        rc = VMXReadVmcsNw(VMX_VMCS_GUEST_GDTR_BASE, &u64Val);
+        AssertRC(rc);
+        HMVMX_CHECK_BREAK(X86_IS_CANONICAL(u64Val), VMX_IGS_GDTR_BASE_NOT_CANONICAL);
+
+        rc = VMXReadVmcsNw(VMX_VMCS_GUEST_IDTR_BASE, &u64Val);
+        AssertRC(rc);
+        HMVMX_CHECK_BREAK(X86_IS_CANONICAL(u64Val), VMX_IGS_IDTR_BASE_NOT_CANONICAL);
+
+        rc = VMXReadVmcs32(VMX_VMCS32_GUEST_GDTR_LIMIT, &u32Val);
+        AssertRC(rc);
+        HMVMX_CHECK_BREAK(!(u32Val & 0xffff0000), VMX_IGS_GDTR_LIMIT_INVALID);      /* Bits 31:16 MBZ. */
+
+        rc = VMXReadVmcs32(VMX_VMCS32_GUEST_IDTR_LIMIT, &u32Val);
+        AssertRC(rc);
+        HMVMX_CHECK_BREAK(!(u32Val & 0xffff0000), VMX_IGS_IDTR_LIMIT_INVALID);      /* Bits 31:16 MBZ. */
+
+        /*
+         * Guest Non-Register State.
+         */
+        /* Activity State. */
+        uint32_t u32ActivityState;
+        rc = VMXReadVmcs32(VMX_VMCS32_GUEST_ACTIVITY_STATE, &u32ActivityState);
+        AssertRC(rc);
+        HMVMX_CHECK_BREAK(   !u32ActivityState
+                          || (u32ActivityState & RT_BF_GET(pVM->hm.s.vmx.Msrs.u64Misc, VMX_BF_MISC_ACTIVITY_STATES)),
+                             VMX_IGS_ACTIVITY_STATE_INVALID);
+        HMVMX_CHECK_BREAK(   !(pCtx->ss.Attr.n.u2Dpl)
+                          || u32ActivityState != VMX_VMCS_GUEST_ACTIVITY_HLT, VMX_IGS_ACTIVITY_STATE_HLT_INVALID);
+
+        if (   u32IntrState == VMX_VMCS_GUEST_INT_STATE_BLOCK_MOVSS
+            || u32IntrState == VMX_VMCS_GUEST_INT_STATE_BLOCK_STI)
+            HMVMX_CHECK_BREAK(u32ActivityState == VMX_VMCS_GUEST_ACTIVITY_ACTIVE, VMX_IGS_ACTIVITY_STATE_ACTIVE_INVALID);
+
+        /** @todo Activity state and injecting interrupts. Left as a todo since we
+         *        currently don't use activity states but ACTIVE. */
+
+        HMVMX_CHECK_BREAK(   !(pVmcsInfo->u32EntryCtls & VMX_ENTRY_CTLS_ENTRY_TO_SMM)
+                          || u32ActivityState != VMX_VMCS_GUEST_ACTIVITY_SIPI_WAIT, VMX_IGS_ACTIVITY_STATE_SIPI_WAIT_INVALID);
+
+        /* Guest interruptibility-state. */
+        HMVMX_CHECK_BREAK(!(u32IntrState & 0xffffffe0), VMX_IGS_INTERRUPTIBILITY_STATE_RESERVED);
+        HMVMX_CHECK_BREAK((u32IntrState & (VMX_VMCS_GUEST_INT_STATE_BLOCK_STI | VMX_VMCS_GUEST_INT_STATE_BLOCK_MOVSS))
+                                       != (VMX_VMCS_GUEST_INT_STATE_BLOCK_STI | VMX_VMCS_GUEST_INT_STATE_BLOCK_MOVSS),
+                          VMX_IGS_INTERRUPTIBILITY_STATE_STI_MOVSS_INVALID);
+        HMVMX_CHECK_BREAK(   (u32Eflags & X86_EFL_IF)
+                          || !(u32IntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_STI),
+                          VMX_IGS_INTERRUPTIBILITY_STATE_STI_EFL_INVALID);
+        if (VMX_ENTRY_INT_INFO_IS_EXT_INT(u32EntryInfo))
+        {
+            HMVMX_CHECK_BREAK(   !(u32IntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_STI)
+                              && !(u32IntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_MOVSS),
+                              VMX_IGS_INTERRUPTIBILITY_STATE_EXT_INT_INVALID);
+        }
+        else if (VMX_ENTRY_INT_INFO_IS_XCPT_NMI(u32EntryInfo))
+        {
+            HMVMX_CHECK_BREAK(!(u32IntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_MOVSS),
+                              VMX_IGS_INTERRUPTIBILITY_STATE_MOVSS_INVALID);
+            HMVMX_CHECK_BREAK(!(u32IntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_STI),
+                              VMX_IGS_INTERRUPTIBILITY_STATE_STI_INVALID);
+        }
+        /** @todo Assumes the processor is not in SMM. */
+        HMVMX_CHECK_BREAK(!(u32IntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_SMI),
+                          VMX_IGS_INTERRUPTIBILITY_STATE_SMI_INVALID);
+        HMVMX_CHECK_BREAK(   !(pVmcsInfo->u32EntryCtls & VMX_ENTRY_CTLS_ENTRY_TO_SMM)
+                          || (u32IntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_SMI),
+                             VMX_IGS_INTERRUPTIBILITY_STATE_SMI_SMM_INVALID);
+        if (   (pVmcsInfo->u32PinCtls & VMX_PIN_CTLS_VIRT_NMI)
+            && VMX_ENTRY_INT_INFO_IS_XCPT_NMI(u32EntryInfo))
+            HMVMX_CHECK_BREAK(!(u32IntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_NMI), VMX_IGS_INTERRUPTIBILITY_STATE_NMI_INVALID);
+
+        /* Pending debug exceptions. */
+        rc = VMXReadVmcsNw(VMX_VMCS_GUEST_PENDING_DEBUG_XCPTS, &u64Val);
+        AssertRC(rc);
+        /* Bits 63:15, Bit 13, Bits 11:4 MBZ. */
+        HMVMX_CHECK_BREAK(!(u64Val & UINT64_C(0xffffffffffffaff0)), VMX_IGS_LONGMODE_PENDING_DEBUG_RESERVED);
+        u32Val = u64Val;    /* For pending debug exceptions checks below. */
+
+        if (   (u32IntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_STI)
+            || (u32IntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_MOVSS)
+            || u32ActivityState == VMX_VMCS_GUEST_ACTIVITY_HLT)
+        {
+            if (   (u32Eflags & X86_EFL_TF)
+                && !(u64DebugCtlMsr & RT_BIT_64(1)))    /* Bit 1 is IA32_DEBUGCTL.BTF. */
+            {
+                /* Bit 14 is PendingDebug.BS. */
+                HMVMX_CHECK_BREAK(u32Val & RT_BIT(14), VMX_IGS_PENDING_DEBUG_XCPT_BS_NOT_SET);
+            }
+            if (   !(u32Eflags & X86_EFL_TF)
+                || (u64DebugCtlMsr & RT_BIT_64(1)))     /* Bit 1 is IA32_DEBUGCTL.BTF. */
+            {
+                /* Bit 14 is PendingDebug.BS. */
+                HMVMX_CHECK_BREAK(!(u32Val & RT_BIT(14)), VMX_IGS_PENDING_DEBUG_XCPT_BS_NOT_CLEAR);
+            }
+        }
+
+        /* VMCS link pointer. */
+        rc = VMXReadVmcs64(VMX_VMCS64_GUEST_VMCS_LINK_PTR_FULL, &u64Val);
+        AssertRC(rc);
+        if (u64Val != UINT64_C(0xffffffffffffffff))
+        {
+            HMVMX_CHECK_BREAK(!(u64Val & 0xfff), VMX_IGS_VMCS_LINK_PTR_RESERVED);
+            /** @todo Bits beyond the processor's physical-address width MBZ. */
+            /** @todo SMM checks. */
+            Assert(pVmcsInfo->HCPhysShadowVmcs == u64Val);
+            Assert(pVmcsInfo->pvShadowVmcs);
+            VMXVMCSREVID VmcsRevId;
+            VmcsRevId.u = *(uint32_t *)pVmcsInfo->pvShadowVmcs;
+            HMVMX_CHECK_BREAK(VmcsRevId.n.u31RevisionId == RT_BF_GET(pVM->hm.s.vmx.Msrs.u64Basic, VMX_BF_BASIC_VMCS_ID),
+                              VMX_IGS_VMCS_LINK_PTR_SHADOW_VMCS_ID_INVALID);
+            HMVMX_CHECK_BREAK(VmcsRevId.n.fIsShadowVmcs == (uint32_t)!!(pVmcsInfo->u32ProcCtls2 & VMX_PROC_CTLS2_VMCS_SHADOWING),
+                              VMX_IGS_VMCS_LINK_PTR_NOT_SHADOW);
+        }
+
+        /** @todo Checks on Guest Page-Directory-Pointer-Table Entries when guest is
+         *        not using nested paging? */
+        if (   pVM->hm.s.fNestedPaging
+            && !fLongModeGuest
+            && CPUMIsGuestInPAEModeEx(pCtx))
+        {
+            rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE0_FULL, &u64Val);
+            AssertRC(rc);
+            HMVMX_CHECK_BREAK(!(u64Val & X86_PDPE_PAE_MBZ_MASK), VMX_IGS_PAE_PDPTE_RESERVED);
+
+            rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE1_FULL, &u64Val);
+            AssertRC(rc);
+            HMVMX_CHECK_BREAK(!(u64Val & X86_PDPE_PAE_MBZ_MASK), VMX_IGS_PAE_PDPTE_RESERVED);
+
+            rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE2_FULL, &u64Val);
+            AssertRC(rc);
+            HMVMX_CHECK_BREAK(!(u64Val & X86_PDPE_PAE_MBZ_MASK), VMX_IGS_PAE_PDPTE_RESERVED);
+
+            rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE3_FULL, &u64Val);
+            AssertRC(rc);
+            HMVMX_CHECK_BREAK(!(u64Val & X86_PDPE_PAE_MBZ_MASK), VMX_IGS_PAE_PDPTE_RESERVED);
+        }
+
+        /* Shouldn't happen but distinguish it from AssertRCBreak() errors. */
+        if (uError == VMX_IGS_ERROR)
+            uError = VMX_IGS_REASON_NOT_FOUND;
+    } while (0);
+
+    pVCpu->hm.s.u32HMError = uError;
+    pVCpu->hm.s.vmx.LastError.u32GuestIntrState = u32IntrState;
+    return uError;
+
+#undef HMVMX_ERROR_BREAK
+#undef HMVMX_CHECK_BREAK
+}
+
+
+/**
+ * Map the APIC-access page for virtualizing APIC accesses.
+ *
+ * This can cause a longjumps to R3 due to the acquisition of the PGM lock. Hence,
+ * this not done as part of exporting guest state, see @bugref{8721}.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu   The cross context virtual CPU structure.
+ */
+static int hmR0VmxMapHCApicAccessPage(PVMCPUCC pVCpu)
+{
+    PVMCC pVM = pVCpu->CTX_SUFF(pVM);
+    uint64_t const u64MsrApicBase = APICGetBaseMsrNoCheck(pVCpu);
+
+    Assert(PDMHasApic(pVM));
+    Assert(u64MsrApicBase);
+
+    RTGCPHYS const GCPhysApicBase = u64MsrApicBase & PAGE_BASE_GC_MASK;
+    Log4Func(("Mappping HC APIC-access page at %#RGp\n", GCPhysApicBase));
+
+    /* Unalias the existing mapping. */
+    int rc = PGMHandlerPhysicalReset(pVM, GCPhysApicBase);
+    AssertRCReturn(rc, rc);
+
+    /* Map the HC APIC-access page in place of the MMIO page, also updates the shadow page tables if necessary. */
+    Assert(pVM->hm.s.vmx.HCPhysApicAccess != NIL_RTHCPHYS);
+    rc = IOMR0MmioMapMmioHCPage(pVM, pVCpu, GCPhysApicBase, pVM->hm.s.vmx.HCPhysApicAccess, X86_PTE_RW | X86_PTE_P);
+    AssertRCReturn(rc, rc);
+
+    /* Update the per-VCPU cache of the APIC base MSR. */
+    pVCpu->hm.s.vmx.u64GstMsrApicBase = u64MsrApicBase;
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Worker function passed to RTMpOnSpecific() that is to be called on the target
+ * CPU.
+ *
+ * @param   idCpu       The ID for the CPU the function is called on.
+ * @param   pvUser1     Null, not used.
+ * @param   pvUser2     Null, not used.
+ */
+static DECLCALLBACK(void) hmR0DispatchHostNmi(RTCPUID idCpu, void *pvUser1, void *pvUser2)
+{
+    RT_NOREF3(idCpu, pvUser1, pvUser2);
+    VMXDispatchHostNmi();
+}
+
+
+/**
+ * Dispatching an NMI on the host CPU that received it.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu       The cross context virtual CPU structure.
+ * @param   pVmcsInfo   The VMCS info. object corresponding to the VMCS that was
+ *                      executing when receiving the host NMI in VMX non-root
+ *                      operation.
+ */
+static int hmR0VmxExitHostNmi(PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo)
+{
+    RTCPUID const idCpu = pVmcsInfo->idHostCpuExec;
+    Assert(idCpu != NIL_RTCPUID);
+
+    /*
+     * We don't want to delay dispatching the NMI any more than we have to. However,
+     * we have already chosen -not- to dispatch NMIs when interrupts were still disabled
+     * after executing guest or nested-guest code for the following reasons:
+     *
+     *   - We would need to perform VMREADs with interrupts disabled and is orders of
+     *     magnitude worse when we run as a nested hypervisor without VMCS shadowing
+     *     supported by the host hypervisor.
+     *
+     *   - It affects the common VM-exit scenario and keeps interrupts disabled for a
+     *     longer period of time just for handling an edge case like host NMIs which do
+     *     not occur nearly as frequently as other VM-exits.
+     *
+     * Let's cover the most likely scenario first. Check if we are on the target CPU
+     * and dispatch the NMI right away. This should be much faster than calling into
+     * RTMpOnSpecific() machinery.
+     */
+    bool fDispatched = false;
+    RTCCUINTREG const fEFlags = ASMIntDisableFlags();
+    if (idCpu == RTMpCpuId())
+    {
+        VMXDispatchHostNmi();
+        fDispatched = true;
+    }
+    ASMSetFlags(fEFlags);
+    if (fDispatched)
+    {
+        STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatExitHostNmiInGC);
+        return VINF_SUCCESS;
+    }
+
+    /*
+     * RTMpOnSpecific() waits until the worker function has run on the target CPU. So
+     * there should be no race or recursion even if we are unlucky enough to be preempted
+     * (to the target CPU) without dispatching the host NMI above.
+     */
+    STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatExitHostNmiInGCIpi);
+    return RTMpOnSpecific(idCpu, &hmR0DispatchHostNmi, NULL /* pvUser1 */,  NULL /* pvUser2 */);
+}
+
+
+#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+/**
+ * Merges the guest with the nested-guest MSR bitmap in preparation of executing the
+ * nested-guest using hardware-assisted VMX.
+ *
+ * @param   pVCpu               The cross context virtual CPU structure.
+ * @param   pVmcsInfoNstGst     The nested-guest VMCS info. object.
+ * @param   pVmcsInfoGst        The guest VMCS info. object.
+ */
+static void hmR0VmxMergeMsrBitmapNested(PCVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfoNstGst, PCVMXVMCSINFO pVmcsInfoGst)
+{
+    uint32_t const cbMsrBitmap    = X86_PAGE_4K_SIZE;
+    uint64_t       *pu64MsrBitmap = (uint64_t *)pVmcsInfoNstGst->pvMsrBitmap;
+    Assert(pu64MsrBitmap);
+
+    /*
+     * We merge the guest MSR bitmap with the nested-guest MSR bitmap such that any
+     * MSR that is intercepted by the guest is also intercepted while executing the
+     * nested-guest using hardware-assisted VMX.
+     *
+     * Note! If the nested-guest is not using an MSR bitmap, every MSR must cause a
+     *       nested-guest VM-exit even if the outer guest is not intercepting some
+     *       MSRs. We cannot assume the caller has initialized the nested-guest
+     *       MSR bitmap in this case.
+     *
+     *       The nested hypervisor may also switch whether it uses MSR bitmaps for
+     *       each of its VM-entry, hence initializing it once per-VM while setting
+     *       up the nested-guest VMCS is not sufficient.
+     */
+    PCVMXVVMCS pVmcsNstGst = pVCpu->cpum.GstCtx.hwvirt.vmx.CTX_SUFF(pVmcs);
+    if (pVmcsNstGst->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS)
+    {
+        uint64_t const *pu64MsrBitmapNstGst = (uint64_t const *)pVCpu->cpum.GstCtx.hwvirt.vmx.CTX_SUFF(pvMsrBitmap);
+        uint64_t const *pu64MsrBitmapGst    = (uint64_t const *)pVmcsInfoGst->pvMsrBitmap;
+        Assert(pu64MsrBitmapNstGst);
+        Assert(pu64MsrBitmapGst);
+
+        uint32_t const cFrags = cbMsrBitmap / sizeof(uint64_t);
+        for (uint32_t i = 0; i < cFrags; i++)
+            pu64MsrBitmap[i] = pu64MsrBitmapNstGst[i] | pu64MsrBitmapGst[i];
+    }
+    else
+        ASMMemFill32(pu64MsrBitmap, cbMsrBitmap, UINT32_C(0xffffffff));
+}
+
+
+/**
+ * Merges the guest VMCS in to the nested-guest VMCS controls in preparation of
+ * hardware-assisted VMX execution of the nested-guest.
+ *
+ * For a guest, we don't modify these controls once we set up the VMCS and hence
+ * this function is never called.
+ *
+ * For nested-guests since the nested hypervisor provides these controls on every
+ * nested-guest VM-entry and could potentially change them everytime we need to
+ * merge them before every nested-guest VM-entry.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu   The cross context virtual CPU structure.
+ */
+static int hmR0VmxMergeVmcsNested(PVMCPUCC pVCpu)
+{
+    PVMCC pVM = pVCpu->CTX_SUFF(pVM);
+    PCVMXVMCSINFO pVmcsInfoGst = &pVCpu->hm.s.vmx.VmcsInfo;
+    PCVMXVVMCS    pVmcsNstGst  = pVCpu->cpum.GstCtx.hwvirt.vmx.CTX_SUFF(pVmcs);
+    Assert(pVmcsNstGst);
+
+    /*
+     * Merge the controls with the requirements of the guest VMCS.
+     *
+     * We do not need to validate the nested-guest VMX features specified in the nested-guest
+     * VMCS with the features supported by the physical CPU as it's already done by the
+     * VMLAUNCH/VMRESUME instruction emulation.
+     *
+     * This is because the VMX features exposed by CPUM (through CPUID/MSRs) to the guest are
+     * derived from the VMX features supported by the physical CPU.
+     */
+
+    /* Pin-based VM-execution controls. */
+    uint32_t const u32PinCtls = pVmcsNstGst->u32PinCtls | pVmcsInfoGst->u32PinCtls;
+
+    /* Processor-based VM-execution controls. */
+    uint32_t       u32ProcCtls = (pVmcsNstGst->u32ProcCtls  & ~VMX_PROC_CTLS_USE_IO_BITMAPS)
+                               | (pVmcsInfoGst->u32ProcCtls & ~(  VMX_PROC_CTLS_INT_WINDOW_EXIT
+                                                                | VMX_PROC_CTLS_NMI_WINDOW_EXIT
+                                                                | VMX_PROC_CTLS_USE_TPR_SHADOW
+                                                                | VMX_PROC_CTLS_MONITOR_TRAP_FLAG));
+
+    /* Secondary processor-based VM-execution controls. */
+    uint32_t const u32ProcCtls2 = (pVmcsNstGst->u32ProcCtls2  & ~VMX_PROC_CTLS2_VPID)
+                                | (pVmcsInfoGst->u32ProcCtls2 & ~(  VMX_PROC_CTLS2_VIRT_APIC_ACCESS
+                                                                  | VMX_PROC_CTLS2_INVPCID
+                                                                  | VMX_PROC_CTLS2_VMCS_SHADOWING
+                                                                  | VMX_PROC_CTLS2_RDTSCP
+                                                                  | VMX_PROC_CTLS2_XSAVES_XRSTORS
+                                                                  | VMX_PROC_CTLS2_APIC_REG_VIRT
+                                                                  | VMX_PROC_CTLS2_VIRT_INT_DELIVERY
+                                                                  | VMX_PROC_CTLS2_VMFUNC));
+
+    /*
+     * VM-entry controls:
+     * These controls contains state that depends on the nested-guest state (primarily
+     * EFER MSR) and is thus not constant between VMLAUNCH/VMRESUME and the nested-guest
+     * VM-exit. Although the nested hypervisor cannot change it, we need to in order to
+     * properly continue executing the nested-guest if the EFER MSR changes but does not
+     * cause a nested-guest VM-exits.
+     *
+     * VM-exit controls:
+     * These controls specify the host state on return. We cannot use the controls from
+     * the nested hypervisor state as is as it would contain the guest state rather than
+     * the host state. Since the host state is subject to change (e.g. preemption, trips
+     * to ring-3, longjmp and rescheduling to a different host CPU) they are not constant
+     * through VMLAUNCH/VMRESUME and the nested-guest VM-exit.
+     *
+     * VM-entry MSR-load:
+     * The guest MSRs from the VM-entry MSR-load area are already loaded into the guest-CPU
+     * context by the VMLAUNCH/VMRESUME instruction emulation.
+     *
+     * VM-exit MSR-store:
+     * The VM-exit emulation will take care of populating the MSRs from the guest-CPU context
+     * back into the VM-exit MSR-store area.
+     *
+     * VM-exit MSR-load areas:
+     * This must contain the real host MSRs with hardware-assisted VMX execution. Hence, we
+     * can entirely ignore what the nested hypervisor wants to load here.
+     */
+
+    /*
+     * Exception bitmap.
+     *
+     * We could remove #UD from the guest bitmap and merge it with the nested-guest bitmap
+     * here (and avoid doing anything while exporting nested-guest state), but to keep the
+     * code more flexible if intercepting exceptions become more dynamic in the future we do
+     * it as part of exporting the nested-guest state.
+     */
+    uint32_t const u32XcptBitmap = pVmcsNstGst->u32XcptBitmap | pVmcsInfoGst->u32XcptBitmap;
+
+    /*
+     * CR0/CR4 guest/host mask.
+     *
+     * Modifications by the nested-guest to CR0/CR4 bits owned by the host and the guest must
+     * cause VM-exits, so we need to merge them here.
+     */
+    uint64_t const u64Cr0Mask = pVmcsNstGst->u64Cr0Mask.u | pVmcsInfoGst->u64Cr0Mask;
+    uint64_t const u64Cr4Mask = pVmcsNstGst->u64Cr4Mask.u | pVmcsInfoGst->u64Cr4Mask;
+
+    /*
+     * Page-fault error-code mask and match.
+     *
+     * Although we require unrestricted guest execution (and thereby nested-paging) for
+     * hardware-assisted VMX execution of nested-guests and thus the outer guest doesn't
+     * normally intercept #PFs, it might intercept them for debugging purposes.
+     *
+     * If the outer guest is not intercepting #PFs, we can use the nested-guest #PF filters.
+     * If the outer guest is intercepting #PFs, we must intercept all #PFs.
+     */
+    uint32_t u32XcptPFMask;
+    uint32_t u32XcptPFMatch;
+    if (!(pVmcsInfoGst->u32XcptBitmap & RT_BIT(X86_XCPT_PF)))
+    {
+        u32XcptPFMask  = pVmcsNstGst->u32XcptPFMask;
+        u32XcptPFMatch = pVmcsNstGst->u32XcptPFMatch;
+    }
+    else
+    {
+        u32XcptPFMask  = 0;
+        u32XcptPFMatch = 0;
+    }
+
+    /*
+     * Pause-Loop exiting.
+     */
+    uint32_t const cPleGapTicks    = RT_MIN(pVM->hm.s.vmx.cPleGapTicks,    pVmcsNstGst->u32PleGap);
+    uint32_t const cPleWindowTicks = RT_MIN(pVM->hm.s.vmx.cPleWindowTicks, pVmcsNstGst->u32PleWindow);
+
+    /*
+     * Pending debug exceptions.
+     * Currently just copy whatever the nested-guest provides us.
+     */
+    uint64_t const uPendingDbgXcpts = pVmcsNstGst->u64GuestPendingDbgXcpts.u;
+
+    /*
+     * I/O Bitmap.
+     *
+     * We do not use the I/O bitmap that may be provided by the nested hypervisor as we always
+     * intercept all I/O port accesses.
+     */
+    Assert(u32ProcCtls & VMX_PROC_CTLS_UNCOND_IO_EXIT);
+    Assert(!(u32ProcCtls & VMX_PROC_CTLS_USE_IO_BITMAPS));
+
+    /*
+     * VMCS shadowing.
+     *
+     * We do not yet expose VMCS shadowing to the guest and thus VMCS shadowing should not be
+     * enabled while executing the nested-guest.
+     */
+    Assert(!(u32ProcCtls2 & VMX_PROC_CTLS2_VMCS_SHADOWING));
+
+    /*
+     * APIC-access page.
+     */
+    RTHCPHYS HCPhysApicAccess;
+    if (u32ProcCtls2 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS)
+    {
+        Assert(pVM->hm.s.vmx.Msrs.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS);
+        RTGCPHYS const GCPhysApicAccess = pVmcsNstGst->u64AddrApicAccess.u;
+
+        /** @todo NSTVMX: This is not really correct but currently is required to make
+         *        things work. We need to re-enable the page handler when we fallback to
+         *        IEM execution of the nested-guest! */
+        PGMHandlerPhysicalPageTempOff(pVM, GCPhysApicAccess, GCPhysApicAccess);
+
+        void          *pvPage;
+        PGMPAGEMAPLOCK PgLockApicAccess;
+        int rc = PGMPhysGCPhys2CCPtr(pVM, GCPhysApicAccess, &pvPage, &PgLockApicAccess);
+        if (RT_SUCCESS(rc))
+        {
+            rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysApicAccess, &HCPhysApicAccess);
+            AssertMsgRCReturn(rc, ("Failed to get host-physical address for APIC-access page at %#RGp\n", GCPhysApicAccess), rc);
+
+            /** @todo Handle proper releasing of page-mapping lock later. */
+            PGMPhysReleasePageMappingLock(pVCpu->CTX_SUFF(pVM), &PgLockApicAccess);
+        }
+        else
+            return rc;
+    }
+    else
+        HCPhysApicAccess = 0;
+
+    /*
+     * Virtual-APIC page and TPR threshold.
+     */
+    RTHCPHYS HCPhysVirtApic;
+    uint32_t u32TprThreshold;
+    if (u32ProcCtls & VMX_PROC_CTLS_USE_TPR_SHADOW)
+    {
+        Assert(pVM->hm.s.vmx.Msrs.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_TPR_SHADOW);
+        RTGCPHYS const GCPhysVirtApic = pVmcsNstGst->u64AddrVirtApic.u;
+
+        void          *pvPage;
+        PGMPAGEMAPLOCK PgLockVirtApic;
+        int rc = PGMPhysGCPhys2CCPtr(pVM, GCPhysVirtApic, &pvPage, &PgLockVirtApic);
+        if (RT_SUCCESS(rc))
+        {
+            rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysVirtApic, &HCPhysVirtApic);
+            AssertMsgRCReturn(rc, ("Failed to get host-physical address for virtual-APIC page at %#RGp\n", GCPhysVirtApic), rc);
+
+            /** @todo Handle proper releasing of page-mapping lock later. */
+            PGMPhysReleasePageMappingLock(pVCpu->CTX_SUFF(pVM), &PgLockVirtApic);
+        }
+        else
+            return rc;
+
+        u32TprThreshold = pVmcsNstGst->u32TprThreshold;
+    }
+    else
+    {
+        HCPhysVirtApic  = 0;
+        u32TprThreshold = 0;
+
+        /*
+         * We must make sure CR8 reads/write must cause VM-exits when TPR shadowing is not
+         * used by the nested hypervisor. Preventing MMIO accesses to the physical APIC will
+         * be taken care of by EPT/shadow paging.
+         */
+        if (pVM->hm.s.fAllow64BitGuests)
+        {
+            u32ProcCtls |= VMX_PROC_CTLS_CR8_STORE_EXIT
+                        |  VMX_PROC_CTLS_CR8_LOAD_EXIT;
+        }
+    }
+
+    /*
+     * Validate basic assumptions.
+     */
+    PVMXVMCSINFO pVmcsInfoNstGst = &pVCpu->hm.s.vmx.VmcsInfoNstGst;
+    Assert(pVM->hm.s.vmx.fAllowUnrestricted);
+    Assert(pVM->hm.s.vmx.Msrs.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_SECONDARY_CTLS);
+    Assert(hmGetVmxActiveVmcsInfo(pVCpu) == pVmcsInfoNstGst);
+
+    /*
+     * Commit it to the nested-guest VMCS.
+     */
+    int rc = VINF_SUCCESS;
+    if (pVmcsInfoNstGst->u32PinCtls != u32PinCtls)
+        rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PIN_EXEC, u32PinCtls);
+    if (pVmcsInfoNstGst->u32ProcCtls != u32ProcCtls)
+        rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, u32ProcCtls);
+    if (pVmcsInfoNstGst->u32ProcCtls2 != u32ProcCtls2)
+        rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC2, u32ProcCtls2);
+    if (pVmcsInfoNstGst->u32XcptBitmap != u32XcptBitmap)
+        rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_EXCEPTION_BITMAP, u32XcptBitmap);
+    if (pVmcsInfoNstGst->u64Cr0Mask != u64Cr0Mask)
+        rc |= VMXWriteVmcsNw(VMX_VMCS_CTRL_CR0_MASK, u64Cr0Mask);
+    if (pVmcsInfoNstGst->u64Cr4Mask != u64Cr4Mask)
+        rc |= VMXWriteVmcsNw(VMX_VMCS_CTRL_CR4_MASK, u64Cr4Mask);
+    if (pVmcsInfoNstGst->u32XcptPFMask != u32XcptPFMask)
+        rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MASK, u32XcptPFMask);
+    if (pVmcsInfoNstGst->u32XcptPFMatch != u32XcptPFMatch)
+        rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MATCH, u32XcptPFMatch);
+    if (   !(u32ProcCtls  & VMX_PROC_CTLS_PAUSE_EXIT)
+        &&  (u32ProcCtls2 & VMX_PROC_CTLS2_PAUSE_LOOP_EXIT))
+    {
+        Assert(pVM->hm.s.vmx.Msrs.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_PAUSE_LOOP_EXIT);
+        rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PLE_GAP, cPleGapTicks);
+        rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PLE_WINDOW, cPleWindowTicks);
+    }
+    if (u32ProcCtls & VMX_PROC_CTLS_USE_TPR_SHADOW)
+    {
+        rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_TPR_THRESHOLD, u32TprThreshold);
+        rc |= VMXWriteVmcs64(VMX_VMCS64_CTRL_VIRT_APIC_PAGEADDR_FULL, HCPhysVirtApic);
+    }
+    if (u32ProcCtls2 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS)
+        rc |= VMXWriteVmcs64(VMX_VMCS64_CTRL_APIC_ACCESSADDR_FULL, HCPhysApicAccess);
+    rc |= VMXWriteVmcsNw(VMX_VMCS_GUEST_PENDING_DEBUG_XCPTS, uPendingDbgXcpts);
+    AssertRC(rc);
+
+    /*
+     * Update the nested-guest VMCS cache.
+     */
+    pVmcsInfoNstGst->u32PinCtls     = u32PinCtls;
+    pVmcsInfoNstGst->u32ProcCtls    = u32ProcCtls;
+    pVmcsInfoNstGst->u32ProcCtls2   = u32ProcCtls2;
+    pVmcsInfoNstGst->u32XcptBitmap  = u32XcptBitmap;
+    pVmcsInfoNstGst->u64Cr0Mask     = u64Cr0Mask;
+    pVmcsInfoNstGst->u64Cr4Mask     = u64Cr4Mask;
+    pVmcsInfoNstGst->u32XcptPFMask  = u32XcptPFMask;
+    pVmcsInfoNstGst->u32XcptPFMatch = u32XcptPFMatch;
+    pVmcsInfoNstGst->HCPhysVirtApic = HCPhysVirtApic;
+
+    /*
+     * We need to flush the TLB if we are switching the APIC-access page address.
+     * See Intel spec. 28.3.3.4 "Guidelines for Use of the INVEPT Instruction".
+     */
+    if (u32ProcCtls2 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS)
+        pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb = true;
+
+    /*
+     * MSR bitmap.
+     *
+     * The MSR bitmap address has already been initialized while setting up the nested-guest
+     * VMCS, here we need to merge the MSR bitmaps.
+     */
+    if (u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS)
+        hmR0VmxMergeMsrBitmapNested(pVCpu, pVmcsInfoNstGst, pVmcsInfoGst);
+
+    return VINF_SUCCESS;
+}
+#endif /* VBOX_WITH_NESTED_HWVIRT_VMX */
+
+
+/**
+ * Does the preparations before executing guest code in VT-x.
+ *
+ * This may cause longjmps to ring-3 and may even result in rescheduling to the
+ * recompiler/IEM. We must be cautious what we do here regarding committing
+ * guest-state information into the VMCS assuming we assuredly execute the
+ * guest in VT-x mode.
+ *
+ * If we fall back to the recompiler/IEM after updating the VMCS and clearing
+ * the common-state (TRPM/forceflags), we must undo those changes so that the
+ * recompiler/IEM can (and should) use them when it resumes guest execution.
+ * Otherwise such operations must be done when we can no longer exit to ring-3.
+ *
+ * @returns Strict VBox status code (i.e. informational status codes too).
+ * @retval  VINF_SUCCESS if we can proceed with running the guest, interrupts
+ *          have been disabled.
+ * @retval  VINF_VMX_VMEXIT if a nested-guest VM-exit occurs (e.g., while evaluating
+ *          pending events).
+ * @retval  VINF_EM_RESET if a triple-fault occurs while injecting a
+ *          double-fault into the guest.
+ * @retval  VINF_EM_DBG_STEPPED if @a fStepping is true and an event was
+ *          dispatched directly.
+ * @retval  VINF_* scheduling changes, we have to go back to ring-3.
+ *
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   pVmxTransient   The VMX-transient structure.
+ * @param   fStepping       Whether we are single-stepping the guest in the
+ *                          hypervisor debugger. Makes us ignore some of the reasons
+ *                          for returning to ring-3, and return VINF_EM_DBG_STEPPED
+ *                          if event dispatching took place.
+ */
+static VBOXSTRICTRC hmR0VmxPreRunGuest(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient, bool fStepping)
+{
+    Assert(VMMRZCallRing3IsEnabled(pVCpu));
+
+    Log4Func(("fIsNested=%RTbool fStepping=%RTbool\n", pVmxTransient->fIsNestedGuest, fStepping));
+
+#ifdef VBOX_WITH_NESTED_HWVIRT_ONLY_IN_IEM
+    if (pVmxTransient->fIsNestedGuest)
+    {
+        RT_NOREF2(pVCpu, fStepping);
+        Log2Func(("Rescheduling to IEM due to nested-hwvirt or forced IEM exec -> VINF_EM_RESCHEDULE_REM\n"));
+        return VINF_EM_RESCHEDULE_REM;
+    }
+#endif
+
+#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
+    PGMRZDynMapFlushAutoSet(pVCpu);
+#endif
+
+    /*
+     * Check and process force flag actions, some of which might require us to go back to ring-3.
+     */
+    VBOXSTRICTRC rcStrict = hmR0VmxCheckForceFlags(pVCpu, pVmxTransient, fStepping);
+    if (rcStrict == VINF_SUCCESS)
+    {
+        /* FFs don't get set all the time. */
+#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+        if (   pVmxTransient->fIsNestedGuest
+            && !CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx))
+        {
+            STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchNstGstVmexit);
+            return VINF_VMX_VMEXIT;
+        }
+#endif
+    }
+    else
+        return rcStrict;
+
+    /*
+     * Virtualize memory-mapped accesses to the physical APIC (may take locks).
+     */
+    PVMCC pVM = pVCpu->CTX_SUFF(pVM);
+    if (   !pVCpu->hm.s.vmx.u64GstMsrApicBase
+        && (pVM->hm.s.vmx.Msrs.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS)
+        && PDMHasApic(pVM))
+    {
+        int rc = hmR0VmxMapHCApicAccessPage(pVCpu);
+        AssertRCReturn(rc, rc);
+    }
+
+#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+    /*
+     * Merge guest VMCS controls with the nested-guest VMCS controls.
+     *
+     * Even if we have not executed the guest prior to this (e.g. when resuming from a
+     * saved state), we should be okay with merging controls as we initialize the
+     * guest VMCS controls as part of VM setup phase.
+     */
+    if (   pVmxTransient->fIsNestedGuest
+        && !pVCpu->hm.s.vmx.fMergedNstGstCtls)
+    {
+        int rc = hmR0VmxMergeVmcsNested(pVCpu);
+        AssertRCReturn(rc, rc);
+        pVCpu->hm.s.vmx.fMergedNstGstCtls = true;
+    }
+#endif
+
+    /*
+     * Evaluate events to be injected into the guest.
+     *
+     * Events in TRPM can be injected without inspecting the guest state.
+     * If any new events (interrupts/NMI) are pending currently, we try to set up the
+     * guest to cause a VM-exit the next time they are ready to receive the event.
+     *
+     * With nested-guests, evaluating pending events may cause VM-exits. Also, verify
+     * that the event in TRPM that we will inject using hardware-assisted VMX is -not-
+     * subject to interecption. Otherwise, we should have checked and injected them
+     * manually elsewhere (IEM).
+     */
+    if (TRPMHasTrap(pVCpu))
+    {
+        Assert(!pVmxTransient->fIsNestedGuest || !CPUMIsGuestVmxInterceptEvents(&pVCpu->cpum.GstCtx));
+        hmR0VmxTrpmTrapToPendingEvent(pVCpu);
+    }
+
+    uint32_t fIntrState;
+    rcStrict = hmR0VmxEvaluatePendingEvent(pVCpu, pVmxTransient, &fIntrState);
+
+#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+    /*
+     * While evaluating pending events if something failed (unlikely) or if we were
+     * preparing to run a nested-guest but performed a nested-guest VM-exit, we should bail.
+     */
+    if (rcStrict != VINF_SUCCESS)
+        return rcStrict;
+    if (   pVmxTransient->fIsNestedGuest
+        && !CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx))
+    {
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchNstGstVmexit);
+        return VINF_VMX_VMEXIT;
+    }
+#else
+    Assert(rcStrict == VINF_SUCCESS);
+#endif
+
+    /*
+     * Event injection may take locks (currently the PGM lock for real-on-v86 case) and thus
+     * needs to be done with longjmps or interrupts + preemption enabled. Event injection might
+     * also result in triple-faulting the VM.
+     *
+     * With nested-guests, the above does not apply since unrestricted guest execution is a
+     * requirement. Regardless, we do this here to avoid duplicating code elsewhere.
+     */
+    rcStrict = hmR0VmxInjectPendingEvent(pVCpu, pVmxTransient, fIntrState, fStepping);
+    if (RT_LIKELY(rcStrict == VINF_SUCCESS))
+    { /* likely */ }
+    else
+    {
+        AssertMsg(rcStrict == VINF_EM_RESET || (rcStrict == VINF_EM_DBG_STEPPED && fStepping),
+                  ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict)));
+        return rcStrict;
+    }
+
+    /*
+     * A longjump might result in importing CR3 even for VM-exits that don't necessarily
+     * import CR3 themselves. We will need to update them here, as even as late as the above
+     * hmR0VmxInjectPendingEvent() call may lazily import guest-CPU state on demand causing
+     * the below force flags to be set.
+     */
+    if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3))
+    {
+        Assert(!(ASMAtomicUoReadU64(&pVCpu->cpum.GstCtx.fExtrn) & CPUMCTX_EXTRN_CR3));
+        int rc2 = PGMUpdateCR3(pVCpu, CPUMGetGuestCR3(pVCpu));
+        AssertMsgReturn(rc2 == VINF_SUCCESS || rc2 == VINF_PGM_SYNC_CR3,
+                        ("%Rrc\n", rc2), RT_FAILURE_NP(rc2) ? rc2 : VERR_IPE_UNEXPECTED_INFO_STATUS);
+        Assert(!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3));
+    }
+    if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_PAE_PDPES))
+    {
+        PGMGstUpdatePaePdpes(pVCpu, &pVCpu->hm.s.aPdpes[0]);
+        Assert(!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_PAE_PDPES));
+    }
+
+#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+    /* Paranoia. */
+    Assert(!pVmxTransient->fIsNestedGuest || CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx));
+#endif
+
+    /*
+     * No longjmps to ring-3 from this point on!!!
+     * Asserts() will still longjmp to ring-3 (but won't return), which is intentional, better than a kernel panic.
+     * This also disables flushing of the R0-logger instance (if any).
+     */
+    VMMRZCallRing3Disable(pVCpu);
+
+    /*
+     * Export the guest state bits.
+     *
+     * We cannot perform longjmps while loading the guest state because we do not preserve the
+     * host/guest state (although the VMCS will be preserved) across longjmps which can cause
+     * CPU migration.
+     *
+     * If we are injecting events to a real-on-v86 mode guest, we would have updated RIP and some segment
+     * registers. Hence, exporting of the guest state needs to be done -after- injection of events.
+     */
+    rcStrict = hmR0VmxExportGuestStateOptimal(pVCpu, pVmxTransient);
+    if (RT_LIKELY(rcStrict == VINF_SUCCESS))
+    { /* likely */ }
+    else
+    {
+        VMMRZCallRing3Enable(pVCpu);
+        return rcStrict;
+    }
+
+    /*
+     * We disable interrupts so that we don't miss any interrupts that would flag preemption
+     * (IPI/timers etc.) when thread-context hooks aren't used and we've been running with
+     * preemption disabled for a while.  Since this is purely to aid the
+     * RTThreadPreemptIsPending() code, it doesn't matter that it may temporarily reenable and
+     * disable interrupt on NT.
+     *
+     * We need to check for force-flags that could've possible been altered since we last
+     * checked them (e.g. by PDMGetInterrupt() leaving the PDM critical section,
+     * see @bugref{6398}).
+     *
+     * We also check a couple of other force-flags as a last opportunity to get the EMT back
+     * to ring-3 before executing guest code.
+     */
+    pVmxTransient->fEFlags = ASMIntDisableFlags();
+
+    if (   (   !VM_FF_IS_ANY_SET(pVM, VM_FF_EMT_RENDEZVOUS | VM_FF_TM_VIRTUAL_SYNC)
+            && !VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_HM_TO_R3_MASK))
+        || (   fStepping /* Optimized for the non-stepping case, so a bit of unnecessary work when stepping. */
+            && !VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_HM_TO_R3_MASK & ~(VMCPU_FF_TIMER | VMCPU_FF_PDM_CRITSECT))) )
+    {
+        if (!RTThreadPreemptIsPending(NIL_RTTHREAD))
+        {
+#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+            /*
+             * If we are executing a nested-guest make sure that we should intercept subsequent
+             * events. The one we are injecting might be part of VM-entry. This is mainly to keep
+             * the VM-exit instruction emulation happy.
+             */
+            if (pVmxTransient->fIsNestedGuest)
+                CPUMSetGuestVmxInterceptEvents(&pVCpu->cpum.GstCtx, true);
+#endif
+
+            /*
+             * We've injected any pending events. This is really the point of no return (to ring-3).
+             *
+             * Note! The caller expects to continue with interrupts & longjmps disabled on successful
+             *       returns from this function, so do -not- enable them here.
+             */
+            pVCpu->hm.s.Event.fPending = false;
+            return VINF_SUCCESS;
+        }
+
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchPendingHostIrq);
+        rcStrict = VINF_EM_RAW_INTERRUPT;
+    }
+    else
+    {
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchHmToR3FF);
+        rcStrict = VINF_EM_RAW_TO_R3;
+    }
+
+    ASMSetFlags(pVmxTransient->fEFlags);
+    VMMRZCallRing3Enable(pVCpu);
+
+    return rcStrict;
+}
+
+
+/**
+ * Final preparations before executing guest code using hardware-assisted VMX.
+ *
+ * We can no longer get preempted to a different host CPU and there are no returns
+ * to ring-3. We ignore any errors that may happen from this point (e.g. VMWRITE
+ * failures), this function is not intended to fail sans unrecoverable hardware
+ * errors.
+ *
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   pVmxTransient   The VMX-transient structure.
+ *
+ * @remarks Called with preemption disabled.
+ * @remarks No-long-jump zone!!!
+ */
+static void hmR0VmxPreRunGuestCommitted(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    Assert(!VMMRZCallRing3IsEnabled(pVCpu));
+    Assert(VMMR0IsLogFlushDisabled(pVCpu));
+    Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+    Assert(!pVCpu->hm.s.Event.fPending);
+
+    /*
+     * Indicate start of guest execution and where poking EMT out of guest-context is recognized.
+     */
+    VMCPU_ASSERT_STATE(pVCpu, VMCPUSTATE_STARTED_HM);
+    VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_EXEC);
+
+    PVMCC         pVM          = pVCpu->CTX_SUFF(pVM);
+    PVMXVMCSINFO  pVmcsInfo    = pVmxTransient->pVmcsInfo;
+    PHMPHYSCPU    pHostCpu     = hmR0GetCurrentCpu();
+    RTCPUID const idCurrentCpu = pHostCpu->idCpu;
+
+    if (!CPUMIsGuestFPUStateActive(pVCpu))
+    {
+        STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatLoadGuestFpuState, x);
+        if (CPUMR0LoadGuestFPU(pVM, pVCpu) == VINF_CPUM_HOST_CR0_MODIFIED)
+            pVCpu->hm.s.fCtxChanged |= HM_CHANGED_HOST_CONTEXT;
+        STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatLoadGuestFpuState, x);
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatLoadGuestFpu);
+    }
+
+    /*
+     * Re-export the host state bits as we may've been preempted (only happens when
+     * thread-context hooks are used or when the VM start function changes) or if
+     * the host CR0 is modified while loading the guest FPU state above.
+     *
+     * The 64-on-32 switcher saves the (64-bit) host state into the VMCS and if we
+     * changed the switcher back to 32-bit, we *must* save the 32-bit host state here,
+     * see @bugref{8432}.
+     *
+     * This may also happen when switching to/from a nested-guest VMCS without leaving
+     * ring-0.
+     */
+    if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_HOST_CONTEXT)
+    {
+        hmR0VmxExportHostState(pVCpu);
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatExportHostState);
+    }
+    Assert(!(pVCpu->hm.s.fCtxChanged & HM_CHANGED_HOST_CONTEXT));
+
+    /*
+     * Export the state shared between host and guest (FPU, debug, lazy MSRs).
+     */
+    if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE)
+        hmR0VmxExportSharedState(pVCpu, pVmxTransient);
+    AssertMsg(!pVCpu->hm.s.fCtxChanged, ("fCtxChanged=%#RX64\n", pVCpu->hm.s.fCtxChanged));
+
+    /*
+     * Store status of the shared guest/host debug state at the time of VM-entry.
+     */
+    pVmxTransient->fWasGuestDebugStateActive = CPUMIsGuestDebugStateActive(pVCpu);
+    pVmxTransient->fWasHyperDebugStateActive = CPUMIsHyperDebugStateActive(pVCpu);
+
+    /*
+     * Always cache the TPR-shadow if the virtual-APIC page exists, thereby skipping
+     * more than one conditional check. The post-run side of our code shall determine
+     * if it needs to sync. the virtual APIC TPR with the TPR-shadow.
+     */
+    if (pVmcsInfo->pbVirtApic)
+        pVmxTransient->u8GuestTpr = pVmcsInfo->pbVirtApic[XAPIC_OFF_TPR];
+
+    /*
+     * Update the host MSRs values in the VM-exit MSR-load area.
+     */
+    if (!pVCpu->hm.s.vmx.fUpdatedHostAutoMsrs)
+    {
+        if (pVmcsInfo->cExitMsrLoad > 0)
+            hmR0VmxUpdateAutoLoadHostMsrs(pVCpu, pVmcsInfo);
+        pVCpu->hm.s.vmx.fUpdatedHostAutoMsrs = true;
+    }
+
+    /*
+     * Evaluate if we need to intercept guest RDTSC/P accesses. Set up the
+     * VMX-preemption timer based on the next virtual sync clock deadline.
+     */
+    if (   !pVmxTransient->fUpdatedTscOffsettingAndPreemptTimer
+        || idCurrentCpu != pVCpu->hm.s.idLastCpu)
+    {
+        hmR0VmxUpdateTscOffsettingAndPreemptTimer(pVCpu, pVmxTransient);
+        pVmxTransient->fUpdatedTscOffsettingAndPreemptTimer = true;
+    }
+
+    /* Record statistics of how often we use TSC offsetting as opposed to intercepting RDTSC/P. */
+    bool const fIsRdtscIntercepted = RT_BOOL(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_RDTSC_EXIT);
+    if (!fIsRdtscIntercepted)
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatTscOffset);
+    else
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatTscIntercept);
+
+    ASMAtomicWriteBool(&pVCpu->hm.s.fCheckedTLBFlush, true);    /* Used for TLB flushing, set this across the world switch. */
+    hmR0VmxFlushTaggedTlb(pHostCpu, pVCpu, pVmcsInfo);          /* Invalidate the appropriate guest entries from the TLB. */
+    Assert(idCurrentCpu == pVCpu->hm.s.idLastCpu);
+    pVCpu->hm.s.vmx.LastError.idCurrentCpu = idCurrentCpu;      /* Record the error reporting info. with the current host CPU. */
+    pVmcsInfo->idHostCpuState = idCurrentCpu;                   /* Record the CPU for which the host-state has been exported. */
+    pVmcsInfo->idHostCpuExec  = idCurrentCpu;                   /* Record the CPU on which we shall execute. */
+
+    STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatEntry, &pVCpu->hm.s.StatInGC, x);
+
+    TMNotifyStartOfExecution(pVM, pVCpu);                       /* Notify TM to resume its clocks when TSC is tied to execution,
+                                                                   as we're about to start executing the guest. */
+
+    /*
+     * Load the guest TSC_AUX MSR when we are not intercepting RDTSCP.
+     *
+     * This is done this late as updating the TSC offsetting/preemption timer above
+     * figures out if we can skip intercepting RDTSCP by calculating the number of
+     * host CPU ticks till the next virtual sync deadline (for the dynamic case).
+     */
+    if (   (pVmcsInfo->u32ProcCtls2 & VMX_PROC_CTLS2_RDTSCP)
+        && !fIsRdtscIntercepted)
+    {
+        hmR0VmxImportGuestState(pVCpu, pVmcsInfo, CPUMCTX_EXTRN_TSC_AUX);
+
+        /* NB: Because we call hmR0VmxAddAutoLoadStoreMsr with fUpdateHostMsr=true,
+           it's safe even after hmR0VmxUpdateAutoLoadHostMsrs has already been done. */
+        int rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, MSR_K8_TSC_AUX, CPUMGetGuestTscAux(pVCpu),
+                                            true /* fSetReadWrite */, true /* fUpdateHostMsr */);
+        AssertRC(rc);
+        Assert(!pVmxTransient->fRemoveTscAuxMsr);
+        pVmxTransient->fRemoveTscAuxMsr = true;
+    }
+
+#ifdef VBOX_STRICT
+    Assert(pVCpu->hm.s.vmx.fUpdatedHostAutoMsrs);
+    hmR0VmxCheckAutoLoadStoreMsrs(pVCpu, pVmcsInfo, pVmxTransient->fIsNestedGuest);
+    hmR0VmxCheckHostEferMsr(pVCpu, pVmcsInfo);
+    AssertRC(hmR0VmxCheckCachedVmcsCtls(pVCpu, pVmcsInfo, pVmxTransient->fIsNestedGuest));
+#endif
+
+#ifdef HMVMX_ALWAYS_CHECK_GUEST_STATE
+    /** @todo r=ramshankar: We can now probably use iemVmxVmentryCheckGuestState here.
+     *        Add a PVMXMSRS parameter to it, so that IEM can look at the host MSRs,
+     *        see @bugref{9180#c54}. */
+    uint32_t const uInvalidReason = hmR0VmxCheckGuestState(pVCpu, pVmcsInfo);
+    if (uInvalidReason != VMX_IGS_REASON_NOT_FOUND)
+        Log4(("hmR0VmxCheckGuestState returned %#x\n", uInvalidReason));
+#endif
+}
+
+
+/**
+ * First C routine invoked after running guest code using hardware-assisted VMX.
+ *
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   pVmxTransient   The VMX-transient structure.
+ * @param   rcVMRun         Return code of VMLAUNCH/VMRESUME.
+ *
+ * @remarks Called with interrupts disabled, and returns with interrupts enabled!
+ *
+ * @remarks No-long-jump zone!!! This function will however re-enable longjmps
+ *          unconditionally when it is safe to do so.
+ */
+static void hmR0VmxPostRunGuest(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient, int rcVMRun)
+{
+    uint64_t const uHostTsc = ASMReadTSC();                     /** @todo We can do a lot better here, see @bugref{9180#c38}. */
+
+    ASMAtomicWriteBool(&pVCpu->hm.s.fCheckedTLBFlush, false);   /* See HMInvalidatePageOnAllVCpus(): used for TLB flushing. */
+    ASMAtomicIncU32(&pVCpu->hm.s.cWorldSwitchExits);            /* Initialized in vmR3CreateUVM(): used for EMT poking. */
+    pVCpu->hm.s.fCtxChanged            = 0;                     /* Exits/longjmps to ring-3 requires saving the guest state. */
+    pVmxTransient->fVmcsFieldsRead     = 0;                     /* Transient fields need to be read from the VMCS. */
+    pVmxTransient->fVectoringPF        = false;                 /* Vectoring page-fault needs to be determined later. */
+    pVmxTransient->fVectoringDoublePF  = false;                 /* Vectoring double page-fault needs to be determined later. */
+
+    PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
+    if (!(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_RDTSC_EXIT))
+    {
+        uint64_t uGstTsc;
+        if (!pVmxTransient->fIsNestedGuest)
+            uGstTsc = uHostTsc + pVmcsInfo->u64TscOffset;
+        else
+        {
+            uint64_t const uNstGstTsc = uHostTsc + pVmcsInfo->u64TscOffset;
+            uGstTsc = CPUMRemoveNestedGuestTscOffset(pVCpu, uNstGstTsc);
+        }
+        TMCpuTickSetLastSeen(pVCpu, uGstTsc);                           /* Update TM with the guest TSC. */
+    }
+
+    STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatInGC, &pVCpu->hm.s.StatPreExit, x);
+    TMNotifyEndOfExecution(pVCpu->CTX_SUFF(pVM), pVCpu);                /* Notify TM that the guest is no longer running. */
+    VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_HM);
+
+    pVCpu->hm.s.vmx.fRestoreHostFlags |= VMX_RESTORE_HOST_REQUIRED;     /* Some host state messed up by VMX needs restoring. */
+    pVmcsInfo->fVmcsState |= VMX_V_VMCS_LAUNCH_STATE_LAUNCHED;          /* Use VMRESUME instead of VMLAUNCH in the next run. */
+#ifdef VBOX_STRICT
+    hmR0VmxCheckHostEferMsr(pVCpu, pVmcsInfo);                          /* Verify that the host EFER MSR wasn't modified. */
+#endif
+    Assert(!ASMIntAreEnabled());
+    ASMSetFlags(pVmxTransient->fEFlags);                                /* Enable interrupts. */
+    Assert(!VMMRZCallRing3IsEnabled(pVCpu));
+
+#ifdef HMVMX_ALWAYS_CLEAN_TRANSIENT
+    /*
+     * Clean all the VMCS fields in the transient structure before reading
+     * anything from the VMCS.
+     */
+    pVmxTransient->uExitReason            = 0;
+    pVmxTransient->uExitIntErrorCode      = 0;
+    pVmxTransient->uExitQual              = 0;
+    pVmxTransient->uGuestLinearAddr       = 0;
+    pVmxTransient->uExitIntInfo           = 0;
+    pVmxTransient->cbExitInstr            = 0;
+    pVmxTransient->ExitInstrInfo.u        = 0;
+    pVmxTransient->uEntryIntInfo          = 0;
+    pVmxTransient->uEntryXcptErrorCode    = 0;
+    pVmxTransient->cbEntryInstr           = 0;
+    pVmxTransient->uIdtVectoringInfo      = 0;
+    pVmxTransient->uIdtVectoringErrorCode = 0;
+#endif
+
+    /*
+     * Save the basic VM-exit reason and check if the VM-entry failed.
+     * See Intel spec. 24.9.1 "Basic VM-exit Information".
+     */
+    uint32_t uExitReason;
+    int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_REASON, &uExitReason);
+    AssertRC(rc);
+    pVmxTransient->uExitReason    = VMX_EXIT_REASON_BASIC(uExitReason);
+    pVmxTransient->fVMEntryFailed = VMX_EXIT_REASON_HAS_ENTRY_FAILED(uExitReason);
+
+    /*
+     * Log the VM-exit before logging anything else as otherwise it might be a
+     * tad confusing what happens before and after the world-switch.
+     */
+    HMVMX_LOG_EXIT(pVCpu, uExitReason);
+
+    /*
+     * Remove the TSC_AUX MSR from the auto-load/store MSR area and reset any MSR
+     * bitmap permissions, if it was added before VM-entry.
+     */
+    if (pVmxTransient->fRemoveTscAuxMsr)
+    {
+        hmR0VmxRemoveAutoLoadStoreMsr(pVCpu, pVmxTransient, MSR_K8_TSC_AUX);
+        pVmxTransient->fRemoveTscAuxMsr = false;
+    }
+
+    /*
+     * Check if VMLAUNCH/VMRESUME succeeded.
+     * If this failed, we cause a guru meditation and cease further execution.
+     *
+     * However, if we are executing a nested-guest we might fail if we use the
+     * fast path rather than fully emulating VMLAUNCH/VMRESUME instruction in IEM.
+     */
+    if (RT_LIKELY(rcVMRun == VINF_SUCCESS))
+    {
+        /*
+         * Update the VM-exit history array here even if the VM-entry failed due to:
+         *   - Invalid guest state.
+         *   - MSR loading.
+         *   - Machine-check event.
+         *
+         * In any of the above cases we will still have a "valid" VM-exit reason
+         * despite @a fVMEntryFailed being false.
+         *
+         * See Intel spec. 26.7 "VM-Entry failures during or after loading guest state".
+         *
+         * Note! We don't have CS or RIP at this point.  Will probably address that later
+         *       by amending the history entry added here.
+         */
+        EMHistoryAddExit(pVCpu, EMEXIT_MAKE_FT(EMEXIT_F_KIND_VMX, pVmxTransient->uExitReason & EMEXIT_F_TYPE_MASK),
+                         UINT64_MAX, uHostTsc);
+
+        if (RT_LIKELY(!pVmxTransient->fVMEntryFailed))
+        {
+            VMMRZCallRing3Enable(pVCpu);
+
+            Assert(!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3));
+            Assert(!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_PAE_PDPES));
+
+#ifdef HMVMX_ALWAYS_SAVE_RO_GUEST_STATE
+            hmR0VmxReadAllRoFieldsVmcs(pVmxTransient);
+#endif
+
+            /*
+             * Import the guest-interruptibility state always as we need it while evaluating
+             * injecting events on re-entry.
+             *
+             * We don't import CR0 (when unrestricted guest execution is unavailable) despite
+             * checking for real-mode while exporting the state because all bits that cause
+             * mode changes wrt CR0 are intercepted.
+             */
+            uint64_t const fImportMask = CPUMCTX_EXTRN_HM_VMX_INT_STATE
+#if defined(HMVMX_ALWAYS_SYNC_FULL_GUEST_STATE) || defined(HMVMX_ALWAYS_SAVE_FULL_GUEST_STATE)
+                                       | HMVMX_CPUMCTX_EXTRN_ALL
+#elif defined(HMVMX_ALWAYS_SAVE_GUEST_RFLAGS)
+                                       | CPUMCTX_EXTRN_RFLAGS
+#endif
+                                       ;
+            rc = hmR0VmxImportGuestState(pVCpu, pVmcsInfo, fImportMask);
+            AssertRC(rc);
+
+            /*
+             * Sync the TPR shadow with our APIC state.
+             */
+            if (   !pVmxTransient->fIsNestedGuest
+                && (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_TPR_SHADOW))
+            {
+                Assert(pVmcsInfo->pbVirtApic);
+                if (pVmxTransient->u8GuestTpr != pVmcsInfo->pbVirtApic[XAPIC_OFF_TPR])
+                {
+                    rc = APICSetTpr(pVCpu, pVmcsInfo->pbVirtApic[XAPIC_OFF_TPR]);
+                    AssertRC(rc);
+                    ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_APIC_TPR);
+                }
+            }
+
+            Assert(VMMRZCallRing3IsEnabled(pVCpu));
+            return;
+        }
+    }
+#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+    else if (pVmxTransient->fIsNestedGuest)
+        AssertMsgFailed(("VMLAUNCH/VMRESUME failed but shouldn't happen when VMLAUNCH/VMRESUME was emulated in IEM!\n"));
+#endif
+    else
+        Log4Func(("VM-entry failure: rcVMRun=%Rrc fVMEntryFailed=%RTbool\n", rcVMRun, pVmxTransient->fVMEntryFailed));
+
+    VMMRZCallRing3Enable(pVCpu);
+}
+
+
+/**
+ * Runs the guest code using hardware-assisted VMX the normal way.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu       The cross context virtual CPU structure.
+ * @param   pcLoops     Pointer to the number of executed loops.
+ */
+static VBOXSTRICTRC hmR0VmxRunGuestCodeNormal(PVMCPUCC pVCpu, uint32_t *pcLoops)
+{
+    uint32_t const cMaxResumeLoops = pVCpu->CTX_SUFF(pVM)->hm.s.cMaxResumeLoops;
+    Assert(pcLoops);
+    Assert(*pcLoops <= cMaxResumeLoops);
+    Assert(!CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx));
+
+#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+    /*
+     * Switch to the guest VMCS as we may have transitioned from executing the nested-guest
+     * without leaving ring-0. Otherwise, if we came from ring-3 we would have loaded the
+     * guest VMCS while entering the VMX ring-0 session.
+     */
+    if (pVCpu->hm.s.vmx.fSwitchedToNstGstVmcs)
+    {
+        int rc = hmR0VmxSwitchToGstOrNstGstVmcs(pVCpu, false /* fSwitchToNstGstVmcs */);
+        if (RT_SUCCESS(rc))
+        { /* likely */ }
+        else
+        {
+            LogRelFunc(("Failed to switch to the guest VMCS. rc=%Rrc\n", rc));
+            return rc;
+        }
+    }
+#endif
+
+    VMXTRANSIENT VmxTransient;
+    RT_ZERO(VmxTransient);
+    VmxTransient.pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
+
+    /* Paranoia. */
+    Assert(VmxTransient.pVmcsInfo == &pVCpu->hm.s.vmx.VmcsInfo);
+
+    VBOXSTRICTRC rcStrict = VERR_INTERNAL_ERROR_5;
+    for (;;)
+    {
+        Assert(!HMR0SuspendPending());
+        HMVMX_ASSERT_CPU_SAFE(pVCpu);
+        STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatEntry, x);
+
+        /*
+         * Preparatory work for running nested-guest code, this may force us to
+         * return to ring-3.
+         *
+         * Warning! This bugger disables interrupts on VINF_SUCCESS!
+         */
+        rcStrict = hmR0VmxPreRunGuest(pVCpu, &VmxTransient, false /* fStepping */);
+        if (rcStrict != VINF_SUCCESS)
+            break;
+
+        /* Interrupts are disabled at this point! */
+        hmR0VmxPreRunGuestCommitted(pVCpu, &VmxTransient);
+        int rcRun = hmR0VmxRunGuest(pVCpu, &VmxTransient);
+        hmR0VmxPostRunGuest(pVCpu, &VmxTransient, rcRun);
+        /* Interrupts are re-enabled at this point! */
+
+        /*
+         * Check for errors with running the VM (VMLAUNCH/VMRESUME).
+         */
+        if (RT_SUCCESS(rcRun))
+        { /* very likely */ }
+        else
+        {
+            STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatPreExit, x);
+            hmR0VmxReportWorldSwitchError(pVCpu, rcRun, &VmxTransient);
+            return rcRun;
+        }
+
+        /*
+         * Profile the VM-exit.
+         */
+        AssertMsg(VmxTransient.uExitReason <= VMX_EXIT_MAX, ("%#x\n", VmxTransient.uExitReason));
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatExitAll);
+        STAM_COUNTER_INC(&pVCpu->hm.s.paStatExitReasonR0[VmxTransient.uExitReason & MASK_EXITREASON_STAT]);
+        STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatPreExit, &pVCpu->hm.s.StatExitHandling, x);
+        HMVMX_START_EXIT_DISPATCH_PROF();
+
+        VBOXVMM_R0_HMVMX_VMEXIT_NOCTX(pVCpu, &pVCpu->cpum.GstCtx, VmxTransient.uExitReason);
+
+        /*
+         * Handle the VM-exit.
+         */
+#ifdef HMVMX_USE_FUNCTION_TABLE
+        rcStrict = g_apfnVMExitHandlers[VmxTransient.uExitReason](pVCpu, &VmxTransient);
+#else
+        rcStrict = hmR0VmxHandleExit(pVCpu, &VmxTransient);
+#endif
+        STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitHandling, x);
+        if (rcStrict == VINF_SUCCESS)
+        {
+            if (++(*pcLoops) <= cMaxResumeLoops)
+                continue;
+            STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchMaxResumeLoops);
+            rcStrict = VINF_EM_RAW_INTERRUPT;
+        }
+        break;
+    }
+
+    STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatEntry, x);
+    return rcStrict;
+}
+
+
+#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+/**
+ * Runs the nested-guest code using hardware-assisted VMX.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu       The cross context virtual CPU structure.
+ * @param   pcLoops     Pointer to the number of executed loops.
+ *
+ * @sa      hmR0VmxRunGuestCodeNormal.
+ */
+static VBOXSTRICTRC hmR0VmxRunGuestCodeNested(PVMCPUCC pVCpu, uint32_t *pcLoops)
+{
+    uint32_t const cMaxResumeLoops = pVCpu->CTX_SUFF(pVM)->hm.s.cMaxResumeLoops;
+    Assert(pcLoops);
+    Assert(*pcLoops <= cMaxResumeLoops);
+    Assert(CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx));
+
+    /*
+     * Switch to the nested-guest VMCS as we may have transitioned from executing the
+     * guest without leaving ring-0. Otherwise, if we came from ring-3 we would have
+     * loaded the nested-guest VMCS while entering the VMX ring-0 session.
+     */
+    if (!pVCpu->hm.s.vmx.fSwitchedToNstGstVmcs)
+    {
+        int rc = hmR0VmxSwitchToGstOrNstGstVmcs(pVCpu, true /* fSwitchToNstGstVmcs */);
+        if (RT_SUCCESS(rc))
+        { /* likely */ }
+        else
+        {
+            LogRelFunc(("Failed to switch to the nested-guest VMCS. rc=%Rrc\n", rc));
+            return rc;
+        }
+    }
+
+    VMXTRANSIENT VmxTransient;
+    RT_ZERO(VmxTransient);
+    VmxTransient.pVmcsInfo      = hmGetVmxActiveVmcsInfo(pVCpu);
+    VmxTransient.fIsNestedGuest = true;
+
+    /* Paranoia. */
+    Assert(VmxTransient.pVmcsInfo == &pVCpu->hm.s.vmx.VmcsInfoNstGst);
+
+    VBOXSTRICTRC rcStrict = VERR_INTERNAL_ERROR_5;
+    for (;;)
+    {
+        Assert(!HMR0SuspendPending());
+        HMVMX_ASSERT_CPU_SAFE(pVCpu);
+        STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatEntry, x);
+
+        /*
+         * Preparatory work for running guest code, this may force us to
+         * return to ring-3.
+         *
+         * Warning! This bugger disables interrupts on VINF_SUCCESS!
+         */
+        rcStrict = hmR0VmxPreRunGuest(pVCpu, &VmxTransient, false /* fStepping */);
+        if (rcStrict != VINF_SUCCESS)
+            break;
+
+        /* Interrupts are disabled at this point! */
+        hmR0VmxPreRunGuestCommitted(pVCpu, &VmxTransient);
+        int rcRun = hmR0VmxRunGuest(pVCpu, &VmxTransient);
+        hmR0VmxPostRunGuest(pVCpu, &VmxTransient, rcRun);
+        /* Interrupts are re-enabled at this point! */
+
+        /*
+         * Check for errors with running the VM (VMLAUNCH/VMRESUME).
+         */
+        if (RT_SUCCESS(rcRun))
+        { /* very likely */ }
+        else
+        {
+            STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatPreExit, x);
+            hmR0VmxReportWorldSwitchError(pVCpu, rcRun, &VmxTransient);
+            return rcRun;
+        }
+
+        /*
+         * Profile the VM-exit.
+         */
+        AssertMsg(VmxTransient.uExitReason <= VMX_EXIT_MAX, ("%#x\n", VmxTransient.uExitReason));
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatExitAll);
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatNestedExitAll);
+        STAM_COUNTER_INC(&pVCpu->hm.s.paStatNestedExitReasonR0[VmxTransient.uExitReason & MASK_EXITREASON_STAT]);
+        STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatPreExit, &pVCpu->hm.s.StatExitHandling, x);
+        HMVMX_START_EXIT_DISPATCH_PROF();
+
+        VBOXVMM_R0_HMVMX_VMEXIT_NOCTX(pVCpu, &pVCpu->cpum.GstCtx, VmxTransient.uExitReason);
+
+        /*
+         * Handle the VM-exit.
+         */
+        rcStrict = hmR0VmxHandleExitNested(pVCpu, &VmxTransient);
+        STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitHandling, x);
+        if (rcStrict == VINF_SUCCESS)
+        {
+            if (!CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx))
+            {
+                STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchNstGstVmexit);
+                rcStrict = VINF_VMX_VMEXIT;
+            }
+            else
+            {
+                if (++(*pcLoops) <= cMaxResumeLoops)
+                    continue;
+                STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchMaxResumeLoops);
+                rcStrict = VINF_EM_RAW_INTERRUPT;
+            }
+        }
+        else
+            Assert(rcStrict != VINF_VMX_VMEXIT);
+        break;
+    }
+
+    STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatEntry, x);
+    return rcStrict;
+}
+#endif /* VBOX_WITH_NESTED_HWVIRT_VMX */
+
+
+/** @name Execution loop for single stepping, DBGF events and expensive Dtrace
+ *  probes.
+ *
+ * The following few functions and associated structure contains the bloat
+ * necessary for providing detailed debug events and dtrace probes as well as
+ * reliable host side single stepping.  This works on the principle of
+ * "subclassing" the normal execution loop and workers.  We replace the loop
+ * method completely and override selected helpers to add necessary adjustments
+ * to their core operation.
+ *
+ * The goal is to keep the "parent" code lean and mean, so as not to sacrifice
+ * any performance for debug and analysis features.
+ *
+ * @{
+ */
+
+/**
+ * Transient per-VCPU debug state of VMCS and related info. we save/restore in
+ * the debug run loop.
+ */
+typedef struct VMXRUNDBGSTATE
+{
+    /** The RIP we started executing at.  This is for detecting that we stepped.  */
+    uint64_t    uRipStart;
+    /** The CS we started executing with.  */
+    uint16_t    uCsStart;
+
+    /** Whether we've actually modified the 1st execution control field. */
+    bool        fModifiedProcCtls : 1;
+    /** Whether we've actually modified the 2nd execution control field. */
+    bool        fModifiedProcCtls2 : 1;
+    /** Whether we've actually modified the exception bitmap. */
+    bool        fModifiedXcptBitmap : 1;
+
+    /** We desire the modified the CR0 mask to be cleared. */
+    bool        fClearCr0Mask : 1;
+    /** We desire the modified the CR4 mask to be cleared. */
+    bool        fClearCr4Mask : 1;
+    /** Stuff we need in VMX_VMCS32_CTRL_PROC_EXEC. */
+    uint32_t    fCpe1Extra;
+    /** Stuff we do not want in VMX_VMCS32_CTRL_PROC_EXEC. */
+    uint32_t    fCpe1Unwanted;
+    /** Stuff we need in VMX_VMCS32_CTRL_PROC_EXEC2. */
+    uint32_t    fCpe2Extra;
+    /** Extra stuff we need in VMX_VMCS32_CTRL_EXCEPTION_BITMAP. */
+    uint32_t    bmXcptExtra;
+    /** The sequence number of the Dtrace provider settings the state was
+     *  configured against. */
+    uint32_t    uDtraceSettingsSeqNo;
+    /** VM-exits to check (one bit per VM-exit). */
+    uint32_t    bmExitsToCheck[3];
+
+    /** The initial VMX_VMCS32_CTRL_PROC_EXEC value (helps with restore). */
+    uint32_t    fProcCtlsInitial;
+    /** The initial VMX_VMCS32_CTRL_PROC_EXEC2 value (helps with restore). */
+    uint32_t    fProcCtls2Initial;
+    /** The initial VMX_VMCS32_CTRL_EXCEPTION_BITMAP value (helps with restore). */
+    uint32_t    bmXcptInitial;
+} VMXRUNDBGSTATE;
+AssertCompileMemberSize(VMXRUNDBGSTATE, bmExitsToCheck, (VMX_EXIT_MAX + 1 + 31) / 32 * 4);
+typedef VMXRUNDBGSTATE *PVMXRUNDBGSTATE;
+
+
+/**
+ * Initializes the VMXRUNDBGSTATE structure.
+ *
+ * @param   pVCpu           The cross context virtual CPU structure of the
+ *                          calling EMT.
+ * @param   pVmxTransient   The VMX-transient structure.
+ * @param   pDbgState       The debug state to initialize.
+ */
+static void hmR0VmxRunDebugStateInit(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient, PVMXRUNDBGSTATE pDbgState)
+{
+    pDbgState->uRipStart            = pVCpu->cpum.GstCtx.rip;
+    pDbgState->uCsStart             = pVCpu->cpum.GstCtx.cs.Sel;
+
+    pDbgState->fModifiedProcCtls    = false;
+    pDbgState->fModifiedProcCtls2   = false;
+    pDbgState->fModifiedXcptBitmap  = false;
+    pDbgState->fClearCr0Mask        = false;
+    pDbgState->fClearCr4Mask        = false;
+    pDbgState->fCpe1Extra           = 0;
+    pDbgState->fCpe1Unwanted        = 0;
+    pDbgState->fCpe2Extra           = 0;
+    pDbgState->bmXcptExtra          = 0;
+    pDbgState->fProcCtlsInitial     = pVmxTransient->pVmcsInfo->u32ProcCtls;
+    pDbgState->fProcCtls2Initial    = pVmxTransient->pVmcsInfo->u32ProcCtls2;
+    pDbgState->bmXcptInitial        = pVmxTransient->pVmcsInfo->u32XcptBitmap;
+}
+
+
+/**
+ * Updates the VMSC fields with changes requested by @a pDbgState.
+ *
+ * This is performed after hmR0VmxPreRunGuestDebugStateUpdate as well
+ * immediately before executing guest code, i.e. when interrupts are disabled.
+ * We don't check status codes here as we cannot easily assert or return in the
+ * latter case.
+ *
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   pVmxTransient   The VMX-transient structure.
+ * @param   pDbgState       The debug state.
+ */
+static void hmR0VmxPreRunGuestDebugStateApply(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient, PVMXRUNDBGSTATE pDbgState)
+{
+    /*
+     * Ensure desired flags in VMCS control fields are set.
+     * (Ignoring write failure here, as we're committed and it's just debug extras.)
+     *
+     * Note! We load the shadow CR0 & CR4 bits when we flag the clearing, so
+     *       there should be no stale data in pCtx at this point.
+     */
+    PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
+    if (   (pVmcsInfo->u32ProcCtls & pDbgState->fCpe1Extra) != pDbgState->fCpe1Extra
+        || (pVmcsInfo->u32ProcCtls & pDbgState->fCpe1Unwanted))
+    {
+        pVmcsInfo->u32ProcCtls |= pDbgState->fCpe1Extra;
+        pVmcsInfo->u32ProcCtls &= ~pDbgState->fCpe1Unwanted;
+        VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVmcsInfo->u32ProcCtls);
+        Log6Func(("VMX_VMCS32_CTRL_PROC_EXEC: %#RX32\n", pVmcsInfo->u32ProcCtls));
+        pDbgState->fModifiedProcCtls   = true;
+    }
+
+    if ((pVmcsInfo->u32ProcCtls2 & pDbgState->fCpe2Extra) != pDbgState->fCpe2Extra)
+    {
+        pVmcsInfo->u32ProcCtls2  |= pDbgState->fCpe2Extra;
+        VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC2, pVmcsInfo->u32ProcCtls2);
+        Log6Func(("VMX_VMCS32_CTRL_PROC_EXEC2: %#RX32\n", pVmcsInfo->u32ProcCtls2));
+        pDbgState->fModifiedProcCtls2  = true;
+    }
+
+    if ((pVmcsInfo->u32XcptBitmap & pDbgState->bmXcptExtra) != pDbgState->bmXcptExtra)
+    {
+        pVmcsInfo->u32XcptBitmap |= pDbgState->bmXcptExtra;
+        VMXWriteVmcs32(VMX_VMCS32_CTRL_EXCEPTION_BITMAP, pVmcsInfo->u32XcptBitmap);
+        Log6Func(("VMX_VMCS32_CTRL_EXCEPTION_BITMAP: %#RX32\n", pVmcsInfo->u32XcptBitmap));
+        pDbgState->fModifiedXcptBitmap = true;
+    }
+
+    if (pDbgState->fClearCr0Mask && pVmcsInfo->u64Cr0Mask != 0)
+    {
+        pVmcsInfo->u64Cr0Mask = 0;
+        VMXWriteVmcsNw(VMX_VMCS_CTRL_CR0_MASK, 0);
+        Log6Func(("VMX_VMCS_CTRL_CR0_MASK: 0\n"));
+    }
+
+    if (pDbgState->fClearCr4Mask && pVmcsInfo->u64Cr4Mask != 0)
+    {
+        pVmcsInfo->u64Cr4Mask = 0;
+        VMXWriteVmcsNw(VMX_VMCS_CTRL_CR4_MASK, 0);
+        Log6Func(("VMX_VMCS_CTRL_CR4_MASK: 0\n"));
+    }
+
+    NOREF(pVCpu);
+}
+
+
+/**
+ * Restores VMCS fields that were changed by hmR0VmxPreRunGuestDebugStateApply for
+ * re-entry next time around.
+ *
+ * @returns Strict VBox status code (i.e. informational status codes too).
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   pVmxTransient   The VMX-transient structure.
+ * @param   pDbgState       The debug state.
+ * @param   rcStrict        The return code from executing the guest using single
+ *                          stepping.
+ */
+static VBOXSTRICTRC hmR0VmxRunDebugStateRevert(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient, PVMXRUNDBGSTATE pDbgState,
+                                               VBOXSTRICTRC rcStrict)
+{
+    /*
+     * Restore VM-exit control settings as we may not reenter this function the
+     * next time around.
+     */
+    PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
+
+    /* We reload the initial value, trigger what we can of recalculations the
+       next time around.  From the looks of things, that's all that's required atm. */
+    if (pDbgState->fModifiedProcCtls)
+    {
+        if (!(pDbgState->fProcCtlsInitial & VMX_PROC_CTLS_MOV_DR_EXIT) && CPUMIsHyperDebugStateActive(pVCpu))
+            pDbgState->fProcCtlsInitial |= VMX_PROC_CTLS_MOV_DR_EXIT; /* Avoid assertion in hmR0VmxLeave */
+        int rc2 = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pDbgState->fProcCtlsInitial);
+        AssertRC(rc2);
+        pVmcsInfo->u32ProcCtls = pDbgState->fProcCtlsInitial;
+    }
+
+    /* We're currently the only ones messing with this one, so just restore the
+       cached value and reload the field. */
+    if (   pDbgState->fModifiedProcCtls2
+        && pVmcsInfo->u32ProcCtls2 != pDbgState->fProcCtls2Initial)
+    {
+        int rc2 = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC2, pDbgState->fProcCtls2Initial);
+        AssertRC(rc2);
+        pVmcsInfo->u32ProcCtls2 = pDbgState->fProcCtls2Initial;
+    }
+
+    /* If we've modified the exception bitmap, we restore it and trigger
+       reloading and partial recalculation the next time around. */
+    if (pDbgState->fModifiedXcptBitmap)
+        pVmcsInfo->u32XcptBitmap = pDbgState->bmXcptInitial;
+
+    return rcStrict;
+}
+
+
+/**
+ * Configures VM-exit controls for current DBGF and DTrace settings.
+ *
+ * This updates @a pDbgState and the VMCS execution control fields to reflect
+ * the necessary VM-exits demanded by DBGF and DTrace.
+ *
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   pVmxTransient   The VMX-transient structure. May update
+ *                          fUpdatedTscOffsettingAndPreemptTimer.
+ * @param   pDbgState       The debug state.
+ */
+static void hmR0VmxPreRunGuestDebugStateUpdate(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient, PVMXRUNDBGSTATE pDbgState)
+{
+    /*
+     * Take down the dtrace serial number so we can spot changes.
+     */
+    pDbgState->uDtraceSettingsSeqNo = VBOXVMM_GET_SETTINGS_SEQ_NO();
+    ASMCompilerBarrier();
+
+    /*
+     * We'll rebuild most of the middle block of data members (holding the
+     * current settings) as we go along here, so start by clearing it all.
+     */
+    pDbgState->bmXcptExtra      = 0;
+    pDbgState->fCpe1Extra       = 0;
+    pDbgState->fCpe1Unwanted    = 0;
+    pDbgState->fCpe2Extra       = 0;
+    for (unsigned i = 0; i < RT_ELEMENTS(pDbgState->bmExitsToCheck); i++)
+        pDbgState->bmExitsToCheck[i] = 0;
+
+    /*
+     * Software interrupts (INT XXh) - no idea how to trigger these...
+     */
+    PVMCC pVM = pVCpu->CTX_SUFF(pVM);
+    if (   DBGF_IS_EVENT_ENABLED(pVM, DBGFEVENT_INTERRUPT_SOFTWARE)
+        || VBOXVMM_INT_SOFTWARE_ENABLED())
+    {
+        ASMBitSet(pDbgState->bmExitsToCheck, VMX_EXIT_XCPT_OR_NMI);
+    }
+
+    /*
+     * INT3 breakpoints - triggered by #BP exceptions.
+     */
+    if (pVM->dbgf.ro.cEnabledInt3Breakpoints > 0)
+        pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_BP);
+
+    /*
+     * Exception bitmap and XCPT events+probes.
+     */
+    for (int iXcpt = 0; iXcpt < (DBGFEVENT_XCPT_LAST - DBGFEVENT_XCPT_FIRST + 1); iXcpt++)
+        if (DBGF_IS_EVENT_ENABLED(pVM, (DBGFEVENTTYPE)(DBGFEVENT_XCPT_FIRST + iXcpt)))
+            pDbgState->bmXcptExtra |= RT_BIT_32(iXcpt);
+
+    if (VBOXVMM_XCPT_DE_ENABLED())  pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_DE);
+    if (VBOXVMM_XCPT_DB_ENABLED())  pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_DB);
+    if (VBOXVMM_XCPT_BP_ENABLED())  pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_BP);
+    if (VBOXVMM_XCPT_OF_ENABLED())  pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_OF);
+    if (VBOXVMM_XCPT_BR_ENABLED())  pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_BR);
+    if (VBOXVMM_XCPT_UD_ENABLED())  pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_UD);
+    if (VBOXVMM_XCPT_NM_ENABLED())  pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_NM);
+    if (VBOXVMM_XCPT_DF_ENABLED())  pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_DF);
+    if (VBOXVMM_XCPT_TS_ENABLED())  pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_TS);
+    if (VBOXVMM_XCPT_NP_ENABLED())  pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_NP);
+    if (VBOXVMM_XCPT_SS_ENABLED())  pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_SS);
+    if (VBOXVMM_XCPT_GP_ENABLED())  pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_GP);
+    if (VBOXVMM_XCPT_PF_ENABLED())  pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_PF);
+    if (VBOXVMM_XCPT_MF_ENABLED())  pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_MF);
+    if (VBOXVMM_XCPT_AC_ENABLED())  pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_AC);
+    if (VBOXVMM_XCPT_XF_ENABLED())  pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_XF);
+    if (VBOXVMM_XCPT_VE_ENABLED())  pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_VE);
+    if (VBOXVMM_XCPT_SX_ENABLED())  pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_SX);
+
+    if (pDbgState->bmXcptExtra)
+        ASMBitSet(pDbgState->bmExitsToCheck, VMX_EXIT_XCPT_OR_NMI);
+
+    /*
+     * Process events and probes for VM-exits, making sure we get the wanted VM-exits.
+     *
+     * Note! This is the reverse of what hmR0VmxHandleExitDtraceEvents does.
+     *       So, when adding/changing/removing please don't forget to update it.
+     *
+     * Some of the macros are picking up local variables to save horizontal space,
+     * (being able to see it in a table is the lesser evil here).
+     */
+#define IS_EITHER_ENABLED(a_pVM, a_EventSubName) \
+        (    DBGF_IS_EVENT_ENABLED(a_pVM, RT_CONCAT(DBGFEVENT_, a_EventSubName)) \
+         ||  RT_CONCAT3(VBOXVMM_, a_EventSubName, _ENABLED)() )
+#define SET_ONLY_XBM_IF_EITHER_EN(a_EventSubName, a_uExit) \
+        if (IS_EITHER_ENABLED(pVM, a_EventSubName)) \
+        {   AssertCompile((unsigned)(a_uExit) < sizeof(pDbgState->bmExitsToCheck) * 8); \
+            ASMBitSet((pDbgState)->bmExitsToCheck, a_uExit); \
+        } else do { } while (0)
+#define SET_CPE1_XBM_IF_EITHER_EN(a_EventSubName, a_uExit, a_fCtrlProcExec) \
+        if (IS_EITHER_ENABLED(pVM, a_EventSubName)) \
+        { \
+            (pDbgState)->fCpe1Extra |= (a_fCtrlProcExec); \
+            AssertCompile((unsigned)(a_uExit) < sizeof(pDbgState->bmExitsToCheck) * 8); \
+            ASMBitSet((pDbgState)->bmExitsToCheck, a_uExit); \
+        } else do { } while (0)
+#define SET_CPEU_XBM_IF_EITHER_EN(a_EventSubName, a_uExit, a_fUnwantedCtrlProcExec) \
+        if (IS_EITHER_ENABLED(pVM, a_EventSubName)) \
+        { \
+            (pDbgState)->fCpe1Unwanted |= (a_fUnwantedCtrlProcExec); \
+            AssertCompile((unsigned)(a_uExit) < sizeof(pDbgState->bmExitsToCheck) * 8); \
+            ASMBitSet((pDbgState)->bmExitsToCheck, a_uExit); \
+        } else do { } while (0)
+#define SET_CPE2_XBM_IF_EITHER_EN(a_EventSubName, a_uExit, a_fCtrlProcExec2) \
+        if (IS_EITHER_ENABLED(pVM, a_EventSubName)) \
+        { \
+            (pDbgState)->fCpe2Extra |= (a_fCtrlProcExec2); \
+            AssertCompile((unsigned)(a_uExit) < sizeof(pDbgState->bmExitsToCheck) * 8); \
+            ASMBitSet((pDbgState)->bmExitsToCheck, a_uExit); \
+        } else do { } while (0)
+
+    SET_ONLY_XBM_IF_EITHER_EN(EXIT_TASK_SWITCH,         VMX_EXIT_TASK_SWITCH);   /* unconditional */
+    SET_ONLY_XBM_IF_EITHER_EN(EXIT_VMX_EPT_VIOLATION,   VMX_EXIT_EPT_VIOLATION); /* unconditional */
+    SET_ONLY_XBM_IF_EITHER_EN(EXIT_VMX_EPT_MISCONFIG,   VMX_EXIT_EPT_MISCONFIG); /* unconditional (unless #VE) */
+    SET_ONLY_XBM_IF_EITHER_EN(EXIT_VMX_VAPIC_ACCESS,    VMX_EXIT_APIC_ACCESS);   /* feature dependent, nothing to enable here */
+    SET_ONLY_XBM_IF_EITHER_EN(EXIT_VMX_VAPIC_WRITE,     VMX_EXIT_APIC_WRITE);    /* feature dependent, nothing to enable here */
+
+    SET_ONLY_XBM_IF_EITHER_EN(INSTR_CPUID,              VMX_EXIT_CPUID);         /* unconditional */
+    SET_ONLY_XBM_IF_EITHER_EN( EXIT_CPUID,              VMX_EXIT_CPUID);
+    SET_ONLY_XBM_IF_EITHER_EN(INSTR_GETSEC,             VMX_EXIT_GETSEC);        /* unconditional */
+    SET_ONLY_XBM_IF_EITHER_EN( EXIT_GETSEC,             VMX_EXIT_GETSEC);
+    SET_CPE1_XBM_IF_EITHER_EN(INSTR_HALT,               VMX_EXIT_HLT,      VMX_PROC_CTLS_HLT_EXIT); /* paranoia */
+    SET_ONLY_XBM_IF_EITHER_EN( EXIT_HALT,               VMX_EXIT_HLT);
+    SET_ONLY_XBM_IF_EITHER_EN(INSTR_INVD,               VMX_EXIT_INVD);          /* unconditional */
+    SET_ONLY_XBM_IF_EITHER_EN( EXIT_INVD,               VMX_EXIT_INVD);
+    SET_CPE1_XBM_IF_EITHER_EN(INSTR_INVLPG,             VMX_EXIT_INVLPG,   VMX_PROC_CTLS_INVLPG_EXIT);
+    SET_ONLY_XBM_IF_EITHER_EN( EXIT_INVLPG,             VMX_EXIT_INVLPG);
+    SET_CPE1_XBM_IF_EITHER_EN(INSTR_RDPMC,              VMX_EXIT_RDPMC,    VMX_PROC_CTLS_RDPMC_EXIT);
+    SET_ONLY_XBM_IF_EITHER_EN( EXIT_RDPMC,              VMX_EXIT_RDPMC);
+    SET_CPE1_XBM_IF_EITHER_EN(INSTR_RDTSC,              VMX_EXIT_RDTSC,    VMX_PROC_CTLS_RDTSC_EXIT);
+    SET_ONLY_XBM_IF_EITHER_EN( EXIT_RDTSC,              VMX_EXIT_RDTSC);
+    SET_ONLY_XBM_IF_EITHER_EN(INSTR_RSM,                VMX_EXIT_RSM);           /* unconditional */
+    SET_ONLY_XBM_IF_EITHER_EN( EXIT_RSM,                VMX_EXIT_RSM);
+    SET_ONLY_XBM_IF_EITHER_EN(INSTR_VMM_CALL,           VMX_EXIT_VMCALL);        /* unconditional */
+    SET_ONLY_XBM_IF_EITHER_EN( EXIT_VMM_CALL,           VMX_EXIT_VMCALL);
+    SET_ONLY_XBM_IF_EITHER_EN(INSTR_VMX_VMCLEAR,        VMX_EXIT_VMCLEAR);       /* unconditional */
+    SET_ONLY_XBM_IF_EITHER_EN( EXIT_VMX_VMCLEAR,        VMX_EXIT_VMCLEAR);
+    SET_ONLY_XBM_IF_EITHER_EN(INSTR_VMX_VMLAUNCH,       VMX_EXIT_VMLAUNCH);      /* unconditional */
+    SET_ONLY_XBM_IF_EITHER_EN( EXIT_VMX_VMLAUNCH,       VMX_EXIT_VMLAUNCH);
+    SET_ONLY_XBM_IF_EITHER_EN(INSTR_VMX_VMPTRLD,        VMX_EXIT_VMPTRLD);       /* unconditional */
+    SET_ONLY_XBM_IF_EITHER_EN( EXIT_VMX_VMPTRLD,        VMX_EXIT_VMPTRLD);
+    SET_ONLY_XBM_IF_EITHER_EN(INSTR_VMX_VMPTRST,        VMX_EXIT_VMPTRST);       /* unconditional */
+    SET_ONLY_XBM_IF_EITHER_EN( EXIT_VMX_VMPTRST,        VMX_EXIT_VMPTRST);
+    SET_ONLY_XBM_IF_EITHER_EN(INSTR_VMX_VMREAD,         VMX_EXIT_VMREAD);        /* unconditional */
+    SET_ONLY_XBM_IF_EITHER_EN( EXIT_VMX_VMREAD,         VMX_EXIT_VMREAD);
+    SET_ONLY_XBM_IF_EITHER_EN(INSTR_VMX_VMRESUME,       VMX_EXIT_VMRESUME);      /* unconditional */
+    SET_ONLY_XBM_IF_EITHER_EN( EXIT_VMX_VMRESUME,       VMX_EXIT_VMRESUME);
+    SET_ONLY_XBM_IF_EITHER_EN(INSTR_VMX_VMWRITE,        VMX_EXIT_VMWRITE);       /* unconditional */
+    SET_ONLY_XBM_IF_EITHER_EN( EXIT_VMX_VMWRITE,        VMX_EXIT_VMWRITE);
+    SET_ONLY_XBM_IF_EITHER_EN(INSTR_VMX_VMXOFF,         VMX_EXIT_VMXOFF);        /* unconditional */
+    SET_ONLY_XBM_IF_EITHER_EN( EXIT_VMX_VMXOFF,         VMX_EXIT_VMXOFF);
+    SET_ONLY_XBM_IF_EITHER_EN(INSTR_VMX_VMXON,          VMX_EXIT_VMXON);         /* unconditional */
+    SET_ONLY_XBM_IF_EITHER_EN( EXIT_VMX_VMXON,          VMX_EXIT_VMXON);
+
+    if (   IS_EITHER_ENABLED(pVM, INSTR_CRX_READ)
+        || IS_EITHER_ENABLED(pVM, INSTR_CRX_WRITE))
+    {
+        int rc = hmR0VmxImportGuestState(pVCpu, pVmxTransient->pVmcsInfo, CPUMCTX_EXTRN_CR0 | CPUMCTX_EXTRN_CR4
+                                                                        | CPUMCTX_EXTRN_APIC_TPR);
+        AssertRC(rc);
+
+#if 0 /** @todo fix me */
+        pDbgState->fClearCr0Mask = true;
+        pDbgState->fClearCr4Mask = true;
+#endif
+        if (IS_EITHER_ENABLED(pVM, INSTR_CRX_READ))
+            pDbgState->fCpe1Extra |= VMX_PROC_CTLS_CR3_STORE_EXIT | VMX_PROC_CTLS_CR8_STORE_EXIT;
+        if (IS_EITHER_ENABLED(pVM, INSTR_CRX_WRITE))
+            pDbgState->fCpe1Extra |= VMX_PROC_CTLS_CR3_LOAD_EXIT | VMX_PROC_CTLS_CR8_LOAD_EXIT;
+        pDbgState->fCpe1Unwanted |= VMX_PROC_CTLS_USE_TPR_SHADOW; /* risky? */
+        /* Note! We currently don't use VMX_VMCS32_CTRL_CR3_TARGET_COUNT.  It would
+                 require clearing here and in the loop if we start using it. */
+        ASMBitSet(pDbgState->bmExitsToCheck, VMX_EXIT_MOV_CRX);
+    }
+    else
+    {
+        if (pDbgState->fClearCr0Mask)
+        {
+            pDbgState->fClearCr0Mask = false;
+            ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_CR0);
+        }
+        if (pDbgState->fClearCr4Mask)
+        {
+            pDbgState->fClearCr4Mask = false;
+            ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_CR4);
+        }
+    }
+    SET_ONLY_XBM_IF_EITHER_EN( EXIT_CRX_READ,           VMX_EXIT_MOV_CRX);
+    SET_ONLY_XBM_IF_EITHER_EN( EXIT_CRX_WRITE,          VMX_EXIT_MOV_CRX);
+
+    if (   IS_EITHER_ENABLED(pVM, INSTR_DRX_READ)
+        || IS_EITHER_ENABLED(pVM, INSTR_DRX_WRITE))
+    {
+        /** @todo later, need to fix handler as it assumes this won't usually happen. */
+        ASMBitSet(pDbgState->bmExitsToCheck, VMX_EXIT_MOV_DRX);
+    }
+    SET_ONLY_XBM_IF_EITHER_EN( EXIT_DRX_READ,           VMX_EXIT_MOV_DRX);
+    SET_ONLY_XBM_IF_EITHER_EN( EXIT_DRX_WRITE,          VMX_EXIT_MOV_DRX);
+
+    SET_CPEU_XBM_IF_EITHER_EN(INSTR_RDMSR,              VMX_EXIT_RDMSR,    VMX_PROC_CTLS_USE_MSR_BITMAPS); /* risky clearing this? */
+    SET_ONLY_XBM_IF_EITHER_EN( EXIT_RDMSR,              VMX_EXIT_RDMSR);
+    SET_CPEU_XBM_IF_EITHER_EN(INSTR_WRMSR,              VMX_EXIT_WRMSR,    VMX_PROC_CTLS_USE_MSR_BITMAPS);
+    SET_ONLY_XBM_IF_EITHER_EN( EXIT_WRMSR,              VMX_EXIT_WRMSR);
+    SET_CPE1_XBM_IF_EITHER_EN(INSTR_MWAIT,              VMX_EXIT_MWAIT,    VMX_PROC_CTLS_MWAIT_EXIT);   /* paranoia */
+    SET_ONLY_XBM_IF_EITHER_EN( EXIT_MWAIT,              VMX_EXIT_MWAIT);
+    SET_CPE1_XBM_IF_EITHER_EN(INSTR_MONITOR,            VMX_EXIT_MONITOR,  VMX_PROC_CTLS_MONITOR_EXIT); /* paranoia */
+    SET_ONLY_XBM_IF_EITHER_EN( EXIT_MONITOR,            VMX_EXIT_MONITOR);
+#if 0 /** @todo too slow, fix handler. */
+    SET_CPE1_XBM_IF_EITHER_EN(INSTR_PAUSE,              VMX_EXIT_PAUSE,    VMX_PROC_CTLS_PAUSE_EXIT);
+#endif
+    SET_ONLY_XBM_IF_EITHER_EN( EXIT_PAUSE,              VMX_EXIT_PAUSE);
+
+    if (   IS_EITHER_ENABLED(pVM, INSTR_SGDT)
+        || IS_EITHER_ENABLED(pVM, INSTR_SIDT)
+        || IS_EITHER_ENABLED(pVM, INSTR_LGDT)
+        || IS_EITHER_ENABLED(pVM, INSTR_LIDT))
+    {
+        pDbgState->fCpe2Extra |= VMX_PROC_CTLS2_DESC_TABLE_EXIT;
+        ASMBitSet(pDbgState->bmExitsToCheck, VMX_EXIT_GDTR_IDTR_ACCESS);
+    }
+    SET_ONLY_XBM_IF_EITHER_EN( EXIT_SGDT,               VMX_EXIT_GDTR_IDTR_ACCESS);
+    SET_ONLY_XBM_IF_EITHER_EN( EXIT_SIDT,               VMX_EXIT_GDTR_IDTR_ACCESS);
+    SET_ONLY_XBM_IF_EITHER_EN( EXIT_LGDT,               VMX_EXIT_GDTR_IDTR_ACCESS);
+    SET_ONLY_XBM_IF_EITHER_EN( EXIT_LIDT,               VMX_EXIT_GDTR_IDTR_ACCESS);
+
+    if (   IS_EITHER_ENABLED(pVM, INSTR_SLDT)
+        || IS_EITHER_ENABLED(pVM, INSTR_STR)
+        || IS_EITHER_ENABLED(pVM, INSTR_LLDT)
+        || IS_EITHER_ENABLED(pVM, INSTR_LTR))
+    {
+        pDbgState->fCpe2Extra |= VMX_PROC_CTLS2_DESC_TABLE_EXIT;
+        ASMBitSet(pDbgState->bmExitsToCheck, VMX_EXIT_LDTR_TR_ACCESS);
+    }
+    SET_ONLY_XBM_IF_EITHER_EN( EXIT_SLDT,               VMX_EXIT_LDTR_TR_ACCESS);
+    SET_ONLY_XBM_IF_EITHER_EN( EXIT_STR,                VMX_EXIT_LDTR_TR_ACCESS);
+    SET_ONLY_XBM_IF_EITHER_EN( EXIT_LLDT,               VMX_EXIT_LDTR_TR_ACCESS);
+    SET_ONLY_XBM_IF_EITHER_EN( EXIT_LTR,                VMX_EXIT_LDTR_TR_ACCESS);
+
+    SET_ONLY_XBM_IF_EITHER_EN(INSTR_VMX_INVEPT,         VMX_EXIT_INVEPT);        /* unconditional */
+    SET_ONLY_XBM_IF_EITHER_EN( EXIT_VMX_INVEPT,         VMX_EXIT_INVEPT);
+    SET_CPE1_XBM_IF_EITHER_EN(INSTR_RDTSCP,             VMX_EXIT_RDTSCP,   VMX_PROC_CTLS_RDTSC_EXIT);
+    SET_ONLY_XBM_IF_EITHER_EN( EXIT_RDTSCP,             VMX_EXIT_RDTSCP);
+    SET_ONLY_XBM_IF_EITHER_EN(INSTR_VMX_INVVPID,        VMX_EXIT_INVVPID);       /* unconditional */
+    SET_ONLY_XBM_IF_EITHER_EN( EXIT_VMX_INVVPID,        VMX_EXIT_INVVPID);
+    SET_CPE2_XBM_IF_EITHER_EN(INSTR_WBINVD,             VMX_EXIT_WBINVD,   VMX_PROC_CTLS2_WBINVD_EXIT);
+    SET_ONLY_XBM_IF_EITHER_EN( EXIT_WBINVD,             VMX_EXIT_WBINVD);
+    SET_ONLY_XBM_IF_EITHER_EN(INSTR_XSETBV,             VMX_EXIT_XSETBV);        /* unconditional */
+    SET_ONLY_XBM_IF_EITHER_EN( EXIT_XSETBV,             VMX_EXIT_XSETBV);
+    SET_CPE2_XBM_IF_EITHER_EN(INSTR_RDRAND,             VMX_EXIT_RDRAND,   VMX_PROC_CTLS2_RDRAND_EXIT);
+    SET_ONLY_XBM_IF_EITHER_EN( EXIT_RDRAND,             VMX_EXIT_RDRAND);
+    SET_CPE1_XBM_IF_EITHER_EN(INSTR_VMX_INVPCID,        VMX_EXIT_INVPCID,  VMX_PROC_CTLS_INVLPG_EXIT);
+    SET_ONLY_XBM_IF_EITHER_EN( EXIT_VMX_INVPCID,        VMX_EXIT_INVPCID);
+    SET_ONLY_XBM_IF_EITHER_EN(INSTR_VMX_VMFUNC,         VMX_EXIT_VMFUNC);        /* unconditional for the current setup */
+    SET_ONLY_XBM_IF_EITHER_EN( EXIT_VMX_VMFUNC,         VMX_EXIT_VMFUNC);
+    SET_CPE2_XBM_IF_EITHER_EN(INSTR_RDSEED,             VMX_EXIT_RDSEED,   VMX_PROC_CTLS2_RDSEED_EXIT);
+    SET_ONLY_XBM_IF_EITHER_EN( EXIT_RDSEED,             VMX_EXIT_RDSEED);
+    SET_ONLY_XBM_IF_EITHER_EN(INSTR_XSAVES,             VMX_EXIT_XSAVES);        /* unconditional (enabled by host, guest cfg) */
+    SET_ONLY_XBM_IF_EITHER_EN(EXIT_XSAVES,              VMX_EXIT_XSAVES);
+    SET_ONLY_XBM_IF_EITHER_EN(INSTR_XRSTORS,            VMX_EXIT_XRSTORS);       /* unconditional (enabled by host, guest cfg) */
+    SET_ONLY_XBM_IF_EITHER_EN( EXIT_XRSTORS,            VMX_EXIT_XRSTORS);
+
+#undef IS_EITHER_ENABLED
+#undef SET_ONLY_XBM_IF_EITHER_EN
+#undef SET_CPE1_XBM_IF_EITHER_EN
+#undef SET_CPEU_XBM_IF_EITHER_EN
+#undef SET_CPE2_XBM_IF_EITHER_EN
+
+    /*
+     * Sanitize the control stuff.
+     */
+    pDbgState->fCpe2Extra       &= pVM->hm.s.vmx.Msrs.ProcCtls2.n.allowed1;
+    if (pDbgState->fCpe2Extra)
+        pDbgState->fCpe1Extra   |= VMX_PROC_CTLS_USE_SECONDARY_CTLS;
+    pDbgState->fCpe1Extra       &= pVM->hm.s.vmx.Msrs.ProcCtls.n.allowed1;
+    pDbgState->fCpe1Unwanted    &= ~pVM->hm.s.vmx.Msrs.ProcCtls.n.allowed0;
+    if (pVCpu->hm.s.fDebugWantRdTscExit != RT_BOOL(pDbgState->fCpe1Extra & VMX_PROC_CTLS_RDTSC_EXIT))
+    {
+        pVCpu->hm.s.fDebugWantRdTscExit ^= true;
+        pVmxTransient->fUpdatedTscOffsettingAndPreemptTimer = false;
+    }
+
+    Log6(("HM: debug state: cpe1=%#RX32 cpeu=%#RX32 cpe2=%#RX32%s%s\n",
+          pDbgState->fCpe1Extra, pDbgState->fCpe1Unwanted, pDbgState->fCpe2Extra,
+          pDbgState->fClearCr0Mask ? " clr-cr0" : "",
+          pDbgState->fClearCr4Mask ? " clr-cr4" : ""));
+}
+
+
+/**
+ * Fires off DBGF events and dtrace probes for a VM-exit, when it's
+ * appropriate.
+ *
+ * The caller has checked the VM-exit against the
+ * VMXRUNDBGSTATE::bmExitsToCheck bitmap. The caller has checked for NMIs
+ * already, so we don't have to do that either.
+ *
+ * @returns Strict VBox status code (i.e. informational status codes too).
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   pVmxTransient   The VMX-transient structure.
+ * @param   uExitReason     The VM-exit reason.
+ *
+ * @remarks The name of this function is displayed by dtrace, so keep it short
+ *          and to the point. No longer than 33 chars long, please.
+ */
+static VBOXSTRICTRC hmR0VmxHandleExitDtraceEvents(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient, uint32_t uExitReason)
+{
+    /*
+     * Translate the event into a DBGF event (enmEvent + uEventArg) and at the
+     * same time check whether any corresponding Dtrace event is enabled (fDtrace).
+     *
+     * Note! This is the reverse operation of what hmR0VmxPreRunGuestDebugStateUpdate
+     *       does.  Must add/change/remove both places.  Same ordering, please.
+     *
+     *       Added/removed events must also be reflected in the next section
+     *       where we dispatch dtrace events.
+     */
+    bool            fDtrace1   = false;
+    bool            fDtrace2   = false;
+    DBGFEVENTTYPE   enmEvent1  = DBGFEVENT_END;
+    DBGFEVENTTYPE   enmEvent2  = DBGFEVENT_END;
+    uint32_t        uEventArg  = 0;
+#define SET_EXIT(a_EventSubName) \
+        do { \
+            enmEvent2 = RT_CONCAT(DBGFEVENT_EXIT_,  a_EventSubName); \
+            fDtrace2  = RT_CONCAT3(VBOXVMM_EXIT_,   a_EventSubName, _ENABLED)(); \
+        } while (0)
+#define SET_BOTH(a_EventSubName) \
+        do { \
+            enmEvent1 = RT_CONCAT(DBGFEVENT_INSTR_, a_EventSubName); \
+            enmEvent2 = RT_CONCAT(DBGFEVENT_EXIT_,  a_EventSubName); \
+            fDtrace1  = RT_CONCAT3(VBOXVMM_INSTR_,  a_EventSubName, _ENABLED)(); \
+            fDtrace2  = RT_CONCAT3(VBOXVMM_EXIT_,   a_EventSubName, _ENABLED)(); \
+        } while (0)
+    switch (uExitReason)
+    {
+        case VMX_EXIT_MTF:
+            return hmR0VmxExitMtf(pVCpu, pVmxTransient);
+
+        case VMX_EXIT_XCPT_OR_NMI:
+        {
+            uint8_t const idxVector = VMX_EXIT_INT_INFO_VECTOR(pVmxTransient->uExitIntInfo);
+            switch (VMX_EXIT_INT_INFO_TYPE(pVmxTransient->uExitIntInfo))
+            {
+                case VMX_EXIT_INT_INFO_TYPE_HW_XCPT:
+                case VMX_EXIT_INT_INFO_TYPE_SW_XCPT:
+                case VMX_EXIT_INT_INFO_TYPE_PRIV_SW_XCPT:
+                    if (idxVector <= (unsigned)(DBGFEVENT_XCPT_LAST - DBGFEVENT_XCPT_FIRST))
+                    {
+                        if (VMX_EXIT_INT_INFO_IS_ERROR_CODE_VALID(pVmxTransient->uExitIntInfo))
+                        {
+                            hmR0VmxReadExitIntErrorCodeVmcs(pVmxTransient);
+                            uEventArg = pVmxTransient->uExitIntErrorCode;
+                        }
+                        enmEvent1 = (DBGFEVENTTYPE)(DBGFEVENT_XCPT_FIRST + idxVector);
+                        switch (enmEvent1)
+                        {
+                            case DBGFEVENT_XCPT_DE: fDtrace1 = VBOXVMM_XCPT_DE_ENABLED(); break;
+                            case DBGFEVENT_XCPT_DB: fDtrace1 = VBOXVMM_XCPT_DB_ENABLED(); break;
+                            case DBGFEVENT_XCPT_BP: fDtrace1 = VBOXVMM_XCPT_BP_ENABLED(); break;
+                            case DBGFEVENT_XCPT_OF: fDtrace1 = VBOXVMM_XCPT_OF_ENABLED(); break;
+                            case DBGFEVENT_XCPT_BR: fDtrace1 = VBOXVMM_XCPT_BR_ENABLED(); break;
+                            case DBGFEVENT_XCPT_UD: fDtrace1 = VBOXVMM_XCPT_UD_ENABLED(); break;
+                            case DBGFEVENT_XCPT_NM: fDtrace1 = VBOXVMM_XCPT_NM_ENABLED(); break;
+                            case DBGFEVENT_XCPT_DF: fDtrace1 = VBOXVMM_XCPT_DF_ENABLED(); break;
+                            case DBGFEVENT_XCPT_TS: fDtrace1 = VBOXVMM_XCPT_TS_ENABLED(); break;
+                            case DBGFEVENT_XCPT_NP: fDtrace1 = VBOXVMM_XCPT_NP_ENABLED(); break;
+                            case DBGFEVENT_XCPT_SS: fDtrace1 = VBOXVMM_XCPT_SS_ENABLED(); break;
+                            case DBGFEVENT_XCPT_GP: fDtrace1 = VBOXVMM_XCPT_GP_ENABLED(); break;
+                            case DBGFEVENT_XCPT_PF: fDtrace1 = VBOXVMM_XCPT_PF_ENABLED(); break;
+                            case DBGFEVENT_XCPT_MF: fDtrace1 = VBOXVMM_XCPT_MF_ENABLED(); break;
+                            case DBGFEVENT_XCPT_AC: fDtrace1 = VBOXVMM_XCPT_AC_ENABLED(); break;
+                            case DBGFEVENT_XCPT_XF: fDtrace1 = VBOXVMM_XCPT_XF_ENABLED(); break;
+                            case DBGFEVENT_XCPT_VE: fDtrace1 = VBOXVMM_XCPT_VE_ENABLED(); break;
+                            case DBGFEVENT_XCPT_SX: fDtrace1 = VBOXVMM_XCPT_SX_ENABLED(); break;
+                            default:                                                      break;
+                        }
+                    }
+                    else
+                        AssertFailed();
+                    break;
+
+                case VMX_EXIT_INT_INFO_TYPE_SW_INT:
+                    uEventArg = idxVector;
+                    enmEvent1 = DBGFEVENT_INTERRUPT_SOFTWARE;
+                    fDtrace1  = VBOXVMM_INT_SOFTWARE_ENABLED();
+                    break;
+            }
+            break;
+        }
+
+        case VMX_EXIT_TRIPLE_FAULT:
+            enmEvent1 = DBGFEVENT_TRIPLE_FAULT;
+            //fDtrace1  = VBOXVMM_EXIT_TRIPLE_FAULT_ENABLED();
+            break;
+        case VMX_EXIT_TASK_SWITCH:      SET_EXIT(TASK_SWITCH); break;
+        case VMX_EXIT_EPT_VIOLATION:    SET_EXIT(VMX_EPT_VIOLATION); break;
+        case VMX_EXIT_EPT_MISCONFIG:    SET_EXIT(VMX_EPT_MISCONFIG); break;
+        case VMX_EXIT_APIC_ACCESS:      SET_EXIT(VMX_VAPIC_ACCESS); break;
+        case VMX_EXIT_APIC_WRITE:       SET_EXIT(VMX_VAPIC_WRITE); break;
+
+        /* Instruction specific VM-exits: */
+        case VMX_EXIT_CPUID:            SET_BOTH(CPUID); break;
+        case VMX_EXIT_GETSEC:           SET_BOTH(GETSEC); break;
+        case VMX_EXIT_HLT:              SET_BOTH(HALT); break;
+        case VMX_EXIT_INVD:             SET_BOTH(INVD); break;
+        case VMX_EXIT_INVLPG:           SET_BOTH(INVLPG); break;
+        case VMX_EXIT_RDPMC:            SET_BOTH(RDPMC); break;
+        case VMX_EXIT_RDTSC:            SET_BOTH(RDTSC); break;
+        case VMX_EXIT_RSM:              SET_BOTH(RSM); break;
+        case VMX_EXIT_VMCALL:           SET_BOTH(VMM_CALL); break;
+        case VMX_EXIT_VMCLEAR:          SET_BOTH(VMX_VMCLEAR); break;
+        case VMX_EXIT_VMLAUNCH:         SET_BOTH(VMX_VMLAUNCH); break;
+        case VMX_EXIT_VMPTRLD:          SET_BOTH(VMX_VMPTRLD); break;
+        case VMX_EXIT_VMPTRST:          SET_BOTH(VMX_VMPTRST); break;
+        case VMX_EXIT_VMREAD:           SET_BOTH(VMX_VMREAD); break;
+        case VMX_EXIT_VMRESUME:         SET_BOTH(VMX_VMRESUME); break;
+        case VMX_EXIT_VMWRITE:          SET_BOTH(VMX_VMWRITE); break;
+        case VMX_EXIT_VMXOFF:           SET_BOTH(VMX_VMXOFF); break;
+        case VMX_EXIT_VMXON:            SET_BOTH(VMX_VMXON); break;
+        case VMX_EXIT_MOV_CRX:
+            hmR0VmxReadExitQualVmcs(pVmxTransient);
+            if (VMX_EXIT_QUAL_CRX_ACCESS(pVmxTransient->uExitQual) == VMX_EXIT_QUAL_CRX_ACCESS_READ)
+                SET_BOTH(CRX_READ);
+            else
+                SET_BOTH(CRX_WRITE);
+            uEventArg = VMX_EXIT_QUAL_CRX_REGISTER(pVmxTransient->uExitQual);
+            break;
+        case VMX_EXIT_MOV_DRX:
+            hmR0VmxReadExitQualVmcs(pVmxTransient);
+            if (   VMX_EXIT_QUAL_DRX_DIRECTION(pVmxTransient->uExitQual)
+                == VMX_EXIT_QUAL_DRX_DIRECTION_READ)
+                SET_BOTH(DRX_READ);
+            else
+                SET_BOTH(DRX_WRITE);
+            uEventArg = VMX_EXIT_QUAL_DRX_REGISTER(pVmxTransient->uExitQual);
+            break;
+        case VMX_EXIT_RDMSR:            SET_BOTH(RDMSR); break;
+        case VMX_EXIT_WRMSR:            SET_BOTH(WRMSR); break;
+        case VMX_EXIT_MWAIT:            SET_BOTH(MWAIT); break;
+        case VMX_EXIT_MONITOR:          SET_BOTH(MONITOR); break;
+        case VMX_EXIT_PAUSE:            SET_BOTH(PAUSE); break;
+        case VMX_EXIT_GDTR_IDTR_ACCESS:
+            hmR0VmxReadExitInstrInfoVmcs(pVmxTransient);
+            switch (RT_BF_GET(pVmxTransient->ExitInstrInfo.u, VMX_BF_XDTR_INSINFO_INSTR_ID))
+            {
+                case VMX_XDTR_INSINFO_II_SGDT: SET_BOTH(SGDT); break;
+                case VMX_XDTR_INSINFO_II_SIDT: SET_BOTH(SIDT); break;
+                case VMX_XDTR_INSINFO_II_LGDT: SET_BOTH(LGDT); break;
+                case VMX_XDTR_INSINFO_II_LIDT: SET_BOTH(LIDT); break;
+            }
+            break;
+
+        case VMX_EXIT_LDTR_TR_ACCESS:
+            hmR0VmxReadExitInstrInfoVmcs(pVmxTransient);
+            switch (RT_BF_GET(pVmxTransient->ExitInstrInfo.u, VMX_BF_YYTR_INSINFO_INSTR_ID))
+            {
+                case VMX_YYTR_INSINFO_II_SLDT: SET_BOTH(SLDT); break;
+                case VMX_YYTR_INSINFO_II_STR:  SET_BOTH(STR); break;
+                case VMX_YYTR_INSINFO_II_LLDT: SET_BOTH(LLDT); break;
+                case VMX_YYTR_INSINFO_II_LTR:  SET_BOTH(LTR); break;
+            }
+            break;
+
+        case VMX_EXIT_INVEPT:           SET_BOTH(VMX_INVEPT); break;
+        case VMX_EXIT_RDTSCP:           SET_BOTH(RDTSCP); break;
+        case VMX_EXIT_INVVPID:          SET_BOTH(VMX_INVVPID); break;
+        case VMX_EXIT_WBINVD:           SET_BOTH(WBINVD); break;
+        case VMX_EXIT_XSETBV:           SET_BOTH(XSETBV); break;
+        case VMX_EXIT_RDRAND:           SET_BOTH(RDRAND); break;
+        case VMX_EXIT_INVPCID:          SET_BOTH(VMX_INVPCID); break;
+        case VMX_EXIT_VMFUNC:           SET_BOTH(VMX_VMFUNC); break;
+        case VMX_EXIT_RDSEED:           SET_BOTH(RDSEED); break;
+        case VMX_EXIT_XSAVES:           SET_BOTH(XSAVES); break;
+        case VMX_EXIT_XRSTORS:          SET_BOTH(XRSTORS); break;
+
+        /* Events that aren't relevant at this point. */
+        case VMX_EXIT_EXT_INT:
+        case VMX_EXIT_INT_WINDOW:
+        case VMX_EXIT_NMI_WINDOW:
+        case VMX_EXIT_TPR_BELOW_THRESHOLD:
+        case VMX_EXIT_PREEMPT_TIMER:
+        case VMX_EXIT_IO_INSTR:
+            break;
+
+        /* Errors and unexpected events. */
+        case VMX_EXIT_INIT_SIGNAL:
+        case VMX_EXIT_SIPI:
+        case VMX_EXIT_IO_SMI:
+        case VMX_EXIT_SMI:
+        case VMX_EXIT_ERR_INVALID_GUEST_STATE:
+        case VMX_EXIT_ERR_MSR_LOAD:
+        case VMX_EXIT_ERR_MACHINE_CHECK:
+        case VMX_EXIT_PML_FULL:
+        case VMX_EXIT_VIRTUALIZED_EOI:
+            break;
+
+        default:
+            AssertMsgFailed(("Unexpected VM-exit=%#x\n", uExitReason));
+            break;
+    }
+#undef SET_BOTH
+#undef SET_EXIT
+
+    /*
+     * Dtrace tracepoints go first.   We do them here at once so we don't
+     * have to copy the guest state saving and stuff a few dozen times.
+     * Down side is that we've got to repeat the switch, though this time
+     * we use enmEvent since the probes are a subset of what DBGF does.
+     */
+    if (fDtrace1 || fDtrace2)
+    {
+        hmR0VmxReadExitQualVmcs(pVmxTransient);
+        hmR0VmxImportGuestState(pVCpu, pVmxTransient->pVmcsInfo, HMVMX_CPUMCTX_EXTRN_ALL);
+        PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+        switch (enmEvent1)
+        {
+            /** @todo consider which extra parameters would be helpful for each probe.   */
+            case DBGFEVENT_END: break;
+            case DBGFEVENT_XCPT_DE:                 VBOXVMM_XCPT_DE(pVCpu, pCtx); break;
+            case DBGFEVENT_XCPT_DB:                 VBOXVMM_XCPT_DB(pVCpu, pCtx, pCtx->dr[6]); break;
+            case DBGFEVENT_XCPT_BP:                 VBOXVMM_XCPT_BP(pVCpu, pCtx); break;
+            case DBGFEVENT_XCPT_OF:                 VBOXVMM_XCPT_OF(pVCpu, pCtx); break;
+            case DBGFEVENT_XCPT_BR:                 VBOXVMM_XCPT_BR(pVCpu, pCtx); break;
+            case DBGFEVENT_XCPT_UD:                 VBOXVMM_XCPT_UD(pVCpu, pCtx); break;
+            case DBGFEVENT_XCPT_NM:                 VBOXVMM_XCPT_NM(pVCpu, pCtx); break;
+            case DBGFEVENT_XCPT_DF:                 VBOXVMM_XCPT_DF(pVCpu, pCtx); break;
+            case DBGFEVENT_XCPT_TS:                 VBOXVMM_XCPT_TS(pVCpu, pCtx, uEventArg); break;
+            case DBGFEVENT_XCPT_NP:                 VBOXVMM_XCPT_NP(pVCpu, pCtx, uEventArg); break;
+            case DBGFEVENT_XCPT_SS:                 VBOXVMM_XCPT_SS(pVCpu, pCtx, uEventArg); break;
+            case DBGFEVENT_XCPT_GP:                 VBOXVMM_XCPT_GP(pVCpu, pCtx, uEventArg); break;
+            case DBGFEVENT_XCPT_PF:                 VBOXVMM_XCPT_PF(pVCpu, pCtx, uEventArg, pCtx->cr2); break;
+            case DBGFEVENT_XCPT_MF:                 VBOXVMM_XCPT_MF(pVCpu, pCtx); break;
+            case DBGFEVENT_XCPT_AC:                 VBOXVMM_XCPT_AC(pVCpu, pCtx); break;
+            case DBGFEVENT_XCPT_XF:                 VBOXVMM_XCPT_XF(pVCpu, pCtx); break;
+            case DBGFEVENT_XCPT_VE:                 VBOXVMM_XCPT_VE(pVCpu, pCtx); break;
+            case DBGFEVENT_XCPT_SX:                 VBOXVMM_XCPT_SX(pVCpu, pCtx, uEventArg); break;
+            case DBGFEVENT_INTERRUPT_SOFTWARE:      VBOXVMM_INT_SOFTWARE(pVCpu, pCtx, (uint8_t)uEventArg); break;
+            case DBGFEVENT_INSTR_CPUID:             VBOXVMM_INSTR_CPUID(pVCpu, pCtx, pCtx->eax, pCtx->ecx); break;
+            case DBGFEVENT_INSTR_GETSEC:            VBOXVMM_INSTR_GETSEC(pVCpu, pCtx); break;
+            case DBGFEVENT_INSTR_HALT:              VBOXVMM_INSTR_HALT(pVCpu, pCtx); break;
+            case DBGFEVENT_INSTR_INVD:              VBOXVMM_INSTR_INVD(pVCpu, pCtx); break;
+            case DBGFEVENT_INSTR_INVLPG:            VBOXVMM_INSTR_INVLPG(pVCpu, pCtx); break;
+            case DBGFEVENT_INSTR_RDPMC:             VBOXVMM_INSTR_RDPMC(pVCpu, pCtx); break;
+            case DBGFEVENT_INSTR_RDTSC:             VBOXVMM_INSTR_RDTSC(pVCpu, pCtx); break;
+            case DBGFEVENT_INSTR_RSM:               VBOXVMM_INSTR_RSM(pVCpu, pCtx); break;
+            case DBGFEVENT_INSTR_CRX_READ:          VBOXVMM_INSTR_CRX_READ(pVCpu, pCtx, (uint8_t)uEventArg); break;
+            case DBGFEVENT_INSTR_CRX_WRITE:         VBOXVMM_INSTR_CRX_WRITE(pVCpu, pCtx, (uint8_t)uEventArg); break;
+            case DBGFEVENT_INSTR_DRX_READ:          VBOXVMM_INSTR_DRX_READ(pVCpu, pCtx, (uint8_t)uEventArg); break;
+            case DBGFEVENT_INSTR_DRX_WRITE:         VBOXVMM_INSTR_DRX_WRITE(pVCpu, pCtx, (uint8_t)uEventArg); break;
+            case DBGFEVENT_INSTR_RDMSR:             VBOXVMM_INSTR_RDMSR(pVCpu, pCtx, pCtx->ecx); break;
+            case DBGFEVENT_INSTR_WRMSR:             VBOXVMM_INSTR_WRMSR(pVCpu, pCtx, pCtx->ecx,
+                                                                        RT_MAKE_U64(pCtx->eax, pCtx->edx)); break;
+            case DBGFEVENT_INSTR_MWAIT:             VBOXVMM_INSTR_MWAIT(pVCpu, pCtx); break;
+            case DBGFEVENT_INSTR_MONITOR:           VBOXVMM_INSTR_MONITOR(pVCpu, pCtx); break;
+            case DBGFEVENT_INSTR_PAUSE:             VBOXVMM_INSTR_PAUSE(pVCpu, pCtx); break;
+            case DBGFEVENT_INSTR_SGDT:              VBOXVMM_INSTR_SGDT(pVCpu, pCtx); break;
+            case DBGFEVENT_INSTR_SIDT:              VBOXVMM_INSTR_SIDT(pVCpu, pCtx); break;
+            case DBGFEVENT_INSTR_LGDT:              VBOXVMM_INSTR_LGDT(pVCpu, pCtx); break;
+            case DBGFEVENT_INSTR_LIDT:              VBOXVMM_INSTR_LIDT(pVCpu, pCtx); break;
+            case DBGFEVENT_INSTR_SLDT:              VBOXVMM_INSTR_SLDT(pVCpu, pCtx); break;
+            case DBGFEVENT_INSTR_STR:               VBOXVMM_INSTR_STR(pVCpu, pCtx); break;
+            case DBGFEVENT_INSTR_LLDT:              VBOXVMM_INSTR_LLDT(pVCpu, pCtx); break;
+            case DBGFEVENT_INSTR_LTR:               VBOXVMM_INSTR_LTR(pVCpu, pCtx); break;
+            case DBGFEVENT_INSTR_RDTSCP:            VBOXVMM_INSTR_RDTSCP(pVCpu, pCtx); break;
+            case DBGFEVENT_INSTR_WBINVD:            VBOXVMM_INSTR_WBINVD(pVCpu, pCtx); break;
+            case DBGFEVENT_INSTR_XSETBV:            VBOXVMM_INSTR_XSETBV(pVCpu, pCtx); break;
+            case DBGFEVENT_INSTR_RDRAND:            VBOXVMM_INSTR_RDRAND(pVCpu, pCtx); break;
+            case DBGFEVENT_INSTR_RDSEED:            VBOXVMM_INSTR_RDSEED(pVCpu, pCtx); break;
+            case DBGFEVENT_INSTR_XSAVES:            VBOXVMM_INSTR_XSAVES(pVCpu, pCtx); break;
+            case DBGFEVENT_INSTR_XRSTORS:           VBOXVMM_INSTR_XRSTORS(pVCpu, pCtx); break;
+            case DBGFEVENT_INSTR_VMM_CALL:          VBOXVMM_INSTR_VMM_CALL(pVCpu, pCtx); break;
+            case DBGFEVENT_INSTR_VMX_VMCLEAR:       VBOXVMM_INSTR_VMX_VMCLEAR(pVCpu, pCtx); break;
+            case DBGFEVENT_INSTR_VMX_VMLAUNCH:      VBOXVMM_INSTR_VMX_VMLAUNCH(pVCpu, pCtx); break;
+            case DBGFEVENT_INSTR_VMX_VMPTRLD:       VBOXVMM_INSTR_VMX_VMPTRLD(pVCpu, pCtx); break;
+            case DBGFEVENT_INSTR_VMX_VMPTRST:       VBOXVMM_INSTR_VMX_VMPTRST(pVCpu, pCtx); break;
+            case DBGFEVENT_INSTR_VMX_VMREAD:        VBOXVMM_INSTR_VMX_VMREAD(pVCpu, pCtx); break;
+            case DBGFEVENT_INSTR_VMX_VMRESUME:      VBOXVMM_INSTR_VMX_VMRESUME(pVCpu, pCtx); break;
+            case DBGFEVENT_INSTR_VMX_VMWRITE:       VBOXVMM_INSTR_VMX_VMWRITE(pVCpu, pCtx); break;
+            case DBGFEVENT_INSTR_VMX_VMXOFF:        VBOXVMM_INSTR_VMX_VMXOFF(pVCpu, pCtx); break;
+            case DBGFEVENT_INSTR_VMX_VMXON:         VBOXVMM_INSTR_VMX_VMXON(pVCpu, pCtx); break;
+            case DBGFEVENT_INSTR_VMX_INVEPT:        VBOXVMM_INSTR_VMX_INVEPT(pVCpu, pCtx); break;
+            case DBGFEVENT_INSTR_VMX_INVVPID:       VBOXVMM_INSTR_VMX_INVVPID(pVCpu, pCtx); break;
+            case DBGFEVENT_INSTR_VMX_INVPCID:       VBOXVMM_INSTR_VMX_INVPCID(pVCpu, pCtx); break;
+            case DBGFEVENT_INSTR_VMX_VMFUNC:        VBOXVMM_INSTR_VMX_VMFUNC(pVCpu, pCtx); break;
+            default: AssertMsgFailed(("enmEvent1=%d uExitReason=%d\n", enmEvent1, uExitReason)); break;
+        }
+        switch (enmEvent2)
+        {
+            /** @todo consider which extra parameters would be helpful for each probe. */
+            case DBGFEVENT_END: break;
+            case DBGFEVENT_EXIT_TASK_SWITCH:        VBOXVMM_EXIT_TASK_SWITCH(pVCpu, pCtx); break;
+            case DBGFEVENT_EXIT_CPUID:              VBOXVMM_EXIT_CPUID(pVCpu, pCtx, pCtx->eax, pCtx->ecx); break;
+            case DBGFEVENT_EXIT_GETSEC:             VBOXVMM_EXIT_GETSEC(pVCpu, pCtx); break;
+            case DBGFEVENT_EXIT_HALT:               VBOXVMM_EXIT_HALT(pVCpu, pCtx); break;
+            case DBGFEVENT_EXIT_INVD:               VBOXVMM_EXIT_INVD(pVCpu, pCtx); break;
+            case DBGFEVENT_EXIT_INVLPG:             VBOXVMM_EXIT_INVLPG(pVCpu, pCtx); break;
+            case DBGFEVENT_EXIT_RDPMC:              VBOXVMM_EXIT_RDPMC(pVCpu, pCtx); break;
+            case DBGFEVENT_EXIT_RDTSC:              VBOXVMM_EXIT_RDTSC(pVCpu, pCtx); break;
+            case DBGFEVENT_EXIT_RSM:                VBOXVMM_EXIT_RSM(pVCpu, pCtx); break;
+            case DBGFEVENT_EXIT_CRX_READ:           VBOXVMM_EXIT_CRX_READ(pVCpu, pCtx, (uint8_t)uEventArg); break;
+            case DBGFEVENT_EXIT_CRX_WRITE:          VBOXVMM_EXIT_CRX_WRITE(pVCpu, pCtx, (uint8_t)uEventArg); break;
+            case DBGFEVENT_EXIT_DRX_READ:           VBOXVMM_EXIT_DRX_READ(pVCpu, pCtx, (uint8_t)uEventArg); break;
+            case DBGFEVENT_EXIT_DRX_WRITE:          VBOXVMM_EXIT_DRX_WRITE(pVCpu, pCtx, (uint8_t)uEventArg); break;
+            case DBGFEVENT_EXIT_RDMSR:              VBOXVMM_EXIT_RDMSR(pVCpu, pCtx, pCtx->ecx); break;
+            case DBGFEVENT_EXIT_WRMSR:              VBOXVMM_EXIT_WRMSR(pVCpu, pCtx, pCtx->ecx,
+                                                                       RT_MAKE_U64(pCtx->eax, pCtx->edx)); break;
+            case DBGFEVENT_EXIT_MWAIT:              VBOXVMM_EXIT_MWAIT(pVCpu, pCtx); break;
+            case DBGFEVENT_EXIT_MONITOR:            VBOXVMM_EXIT_MONITOR(pVCpu, pCtx); break;
+            case DBGFEVENT_EXIT_PAUSE:              VBOXVMM_EXIT_PAUSE(pVCpu, pCtx); break;
+            case DBGFEVENT_EXIT_SGDT:               VBOXVMM_EXIT_SGDT(pVCpu, pCtx); break;
+            case DBGFEVENT_EXIT_SIDT:               VBOXVMM_EXIT_SIDT(pVCpu, pCtx); break;
+            case DBGFEVENT_EXIT_LGDT:               VBOXVMM_EXIT_LGDT(pVCpu, pCtx); break;
+            case DBGFEVENT_EXIT_LIDT:               VBOXVMM_EXIT_LIDT(pVCpu, pCtx); break;
+            case DBGFEVENT_EXIT_SLDT:               VBOXVMM_EXIT_SLDT(pVCpu, pCtx); break;
+            case DBGFEVENT_EXIT_STR:                VBOXVMM_EXIT_STR(pVCpu, pCtx); break;
+            case DBGFEVENT_EXIT_LLDT:               VBOXVMM_EXIT_LLDT(pVCpu, pCtx); break;
+            case DBGFEVENT_EXIT_LTR:                VBOXVMM_EXIT_LTR(pVCpu, pCtx); break;
+            case DBGFEVENT_EXIT_RDTSCP:             VBOXVMM_EXIT_RDTSCP(pVCpu, pCtx); break;
+            case DBGFEVENT_EXIT_WBINVD:             VBOXVMM_EXIT_WBINVD(pVCpu, pCtx); break;
+            case DBGFEVENT_EXIT_XSETBV:             VBOXVMM_EXIT_XSETBV(pVCpu, pCtx); break;
+            case DBGFEVENT_EXIT_RDRAND:             VBOXVMM_EXIT_RDRAND(pVCpu, pCtx); break;
+            case DBGFEVENT_EXIT_RDSEED:             VBOXVMM_EXIT_RDSEED(pVCpu, pCtx); break;
+            case DBGFEVENT_EXIT_XSAVES:             VBOXVMM_EXIT_XSAVES(pVCpu, pCtx); break;
+            case DBGFEVENT_EXIT_XRSTORS:            VBOXVMM_EXIT_XRSTORS(pVCpu, pCtx); break;
+            case DBGFEVENT_EXIT_VMM_CALL:           VBOXVMM_EXIT_VMM_CALL(pVCpu, pCtx); break;
+            case DBGFEVENT_EXIT_VMX_VMCLEAR:        VBOXVMM_EXIT_VMX_VMCLEAR(pVCpu, pCtx); break;
+            case DBGFEVENT_EXIT_VMX_VMLAUNCH:       VBOXVMM_EXIT_VMX_VMLAUNCH(pVCpu, pCtx); break;
+            case DBGFEVENT_EXIT_VMX_VMPTRLD:        VBOXVMM_EXIT_VMX_VMPTRLD(pVCpu, pCtx); break;
+            case DBGFEVENT_EXIT_VMX_VMPTRST:        VBOXVMM_EXIT_VMX_VMPTRST(pVCpu, pCtx); break;
+            case DBGFEVENT_EXIT_VMX_VMREAD:         VBOXVMM_EXIT_VMX_VMREAD(pVCpu, pCtx); break;
+            case DBGFEVENT_EXIT_VMX_VMRESUME:       VBOXVMM_EXIT_VMX_VMRESUME(pVCpu, pCtx); break;
+            case DBGFEVENT_EXIT_VMX_VMWRITE:        VBOXVMM_EXIT_VMX_VMWRITE(pVCpu, pCtx); break;
+            case DBGFEVENT_EXIT_VMX_VMXOFF:         VBOXVMM_EXIT_VMX_VMXOFF(pVCpu, pCtx); break;
+            case DBGFEVENT_EXIT_VMX_VMXON:          VBOXVMM_EXIT_VMX_VMXON(pVCpu, pCtx); break;
+            case DBGFEVENT_EXIT_VMX_INVEPT:         VBOXVMM_EXIT_VMX_INVEPT(pVCpu, pCtx); break;
+            case DBGFEVENT_EXIT_VMX_INVVPID:        VBOXVMM_EXIT_VMX_INVVPID(pVCpu, pCtx); break;
+            case DBGFEVENT_EXIT_VMX_INVPCID:        VBOXVMM_EXIT_VMX_INVPCID(pVCpu, pCtx); break;
+            case DBGFEVENT_EXIT_VMX_VMFUNC:         VBOXVMM_EXIT_VMX_VMFUNC(pVCpu, pCtx); break;
+            case DBGFEVENT_EXIT_VMX_EPT_MISCONFIG:  VBOXVMM_EXIT_VMX_EPT_MISCONFIG(pVCpu, pCtx); break;
+            case DBGFEVENT_EXIT_VMX_EPT_VIOLATION:  VBOXVMM_EXIT_VMX_EPT_VIOLATION(pVCpu, pCtx); break;
+            case DBGFEVENT_EXIT_VMX_VAPIC_ACCESS:   VBOXVMM_EXIT_VMX_VAPIC_ACCESS(pVCpu, pCtx); break;
+            case DBGFEVENT_EXIT_VMX_VAPIC_WRITE:    VBOXVMM_EXIT_VMX_VAPIC_WRITE(pVCpu, pCtx); break;
+            default: AssertMsgFailed(("enmEvent2=%d uExitReason=%d\n", enmEvent2, uExitReason)); break;
+        }
+    }
+
+    /*
+     * Fire of the DBGF event, if enabled (our check here is just a quick one,
+     * the DBGF call will do a full check).
+     *
+     * Note! DBGF sets DBGFEVENT_INTERRUPT_SOFTWARE in the bitmap.
+     * Note! If we have to events, we prioritize the first, i.e. the instruction
+     *       one, in order to avoid event nesting.
+     */
+    PVMCC pVM = pVCpu->CTX_SUFF(pVM);
+    if (   enmEvent1 != DBGFEVENT_END
+        && DBGF_IS_EVENT_ENABLED(pVM, enmEvent1))
+    {
+        hmR0VmxImportGuestState(pVCpu, pVmxTransient->pVmcsInfo, CPUMCTX_EXTRN_CS | CPUMCTX_EXTRN_RIP);
+        VBOXSTRICTRC rcStrict = DBGFEventGenericWithArgs(pVM, pVCpu, enmEvent1, DBGFEVENTCTX_HM, 1, uEventArg);
+        if (rcStrict != VINF_SUCCESS)
+            return rcStrict;
+    }
+    else if (   enmEvent2 != DBGFEVENT_END
+             && DBGF_IS_EVENT_ENABLED(pVM, enmEvent2))
+    {
+        hmR0VmxImportGuestState(pVCpu, pVmxTransient->pVmcsInfo, CPUMCTX_EXTRN_CS | CPUMCTX_EXTRN_RIP);
+        VBOXSTRICTRC rcStrict = DBGFEventGenericWithArgs(pVM, pVCpu, enmEvent2, DBGFEVENTCTX_HM, 1, uEventArg);
+        if (rcStrict != VINF_SUCCESS)
+            return rcStrict;
+    }
+
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Single-stepping VM-exit filtering.
+ *
+ * This is preprocessing the VM-exits and deciding whether we've gotten far
+ * enough to return VINF_EM_DBG_STEPPED already.  If not, normal VM-exit
+ * handling is performed.
+ *
+ * @returns Strict VBox status code (i.e. informational status codes too).
+ * @param   pVCpu           The cross context virtual CPU structure of the calling EMT.
+ * @param   pVmxTransient   The VMX-transient structure.
+ * @param   pDbgState       The debug state.
+ */
+DECLINLINE(VBOXSTRICTRC) hmR0VmxRunDebugHandleExit(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient, PVMXRUNDBGSTATE pDbgState)
+{
+    /*
+     * Expensive (saves context) generic dtrace VM-exit probe.
+     */
+    uint32_t const uExitReason = pVmxTransient->uExitReason;
+    if (!VBOXVMM_R0_HMVMX_VMEXIT_ENABLED())
+    { /* more likely */ }
+    else
+    {
+        hmR0VmxReadExitQualVmcs(pVmxTransient);
+        int rc = hmR0VmxImportGuestState(pVCpu, pVmxTransient->pVmcsInfo, HMVMX_CPUMCTX_EXTRN_ALL);
+        AssertRC(rc);
+        VBOXVMM_R0_HMVMX_VMEXIT(pVCpu, &pVCpu->cpum.GstCtx, pVmxTransient->uExitReason, pVmxTransient->uExitQual);
+    }
+
+    /*
+     * Check for host NMI, just to get that out of the way.
+     */
+    if (uExitReason != VMX_EXIT_XCPT_OR_NMI)
+    { /* normally likely */ }
+    else
+    {
+        hmR0VmxReadExitIntInfoVmcs(pVmxTransient);
+        uint32_t const uIntType = VMX_EXIT_INT_INFO_TYPE(pVmxTransient->uExitIntInfo);
+        if (uIntType == VMX_EXIT_INT_INFO_TYPE_NMI)
+            return hmR0VmxExitHostNmi(pVCpu, pVmxTransient->pVmcsInfo);
+    }
+
+    /*
+     * Check for single stepping event if we're stepping.
+     */
+    if (pVCpu->hm.s.fSingleInstruction)
+    {
+        switch (uExitReason)
+        {
+            case VMX_EXIT_MTF:
+                return hmR0VmxExitMtf(pVCpu, pVmxTransient);
+
+            /* Various events: */
+            case VMX_EXIT_XCPT_OR_NMI:
+            case VMX_EXIT_EXT_INT:
+            case VMX_EXIT_TRIPLE_FAULT:
+            case VMX_EXIT_INT_WINDOW:
+            case VMX_EXIT_NMI_WINDOW:
+            case VMX_EXIT_TASK_SWITCH:
+            case VMX_EXIT_TPR_BELOW_THRESHOLD:
+            case VMX_EXIT_APIC_ACCESS:
+            case VMX_EXIT_EPT_VIOLATION:
+            case VMX_EXIT_EPT_MISCONFIG:
+            case VMX_EXIT_PREEMPT_TIMER:
+
+            /* Instruction specific VM-exits: */
+            case VMX_EXIT_CPUID:
+            case VMX_EXIT_GETSEC:
+            case VMX_EXIT_HLT:
+            case VMX_EXIT_INVD:
+            case VMX_EXIT_INVLPG:
+            case VMX_EXIT_RDPMC:
+            case VMX_EXIT_RDTSC:
+            case VMX_EXIT_RSM:
+            case VMX_EXIT_VMCALL:
+            case VMX_EXIT_VMCLEAR:
+            case VMX_EXIT_VMLAUNCH:
+            case VMX_EXIT_VMPTRLD:
+            case VMX_EXIT_VMPTRST:
+            case VMX_EXIT_VMREAD:
+            case VMX_EXIT_VMRESUME:
+            case VMX_EXIT_VMWRITE:
+            case VMX_EXIT_VMXOFF:
+            case VMX_EXIT_VMXON:
+            case VMX_EXIT_MOV_CRX:
+            case VMX_EXIT_MOV_DRX:
+            case VMX_EXIT_IO_INSTR:
+            case VMX_EXIT_RDMSR:
+            case VMX_EXIT_WRMSR:
+            case VMX_EXIT_MWAIT:
+            case VMX_EXIT_MONITOR:
+            case VMX_EXIT_PAUSE:
+            case VMX_EXIT_GDTR_IDTR_ACCESS:
+            case VMX_EXIT_LDTR_TR_ACCESS:
+            case VMX_EXIT_INVEPT:
+            case VMX_EXIT_RDTSCP:
+            case VMX_EXIT_INVVPID:
+            case VMX_EXIT_WBINVD:
+            case VMX_EXIT_XSETBV:
+            case VMX_EXIT_RDRAND:
+            case VMX_EXIT_INVPCID:
+            case VMX_EXIT_VMFUNC:
+            case VMX_EXIT_RDSEED:
+            case VMX_EXIT_XSAVES:
+            case VMX_EXIT_XRSTORS:
+            {
+                int rc = hmR0VmxImportGuestState(pVCpu, pVmxTransient->pVmcsInfo, CPUMCTX_EXTRN_CS | CPUMCTX_EXTRN_RIP);
+                AssertRCReturn(rc, rc);
+                if (   pVCpu->cpum.GstCtx.rip    != pDbgState->uRipStart
+                    || pVCpu->cpum.GstCtx.cs.Sel != pDbgState->uCsStart)
+                    return VINF_EM_DBG_STEPPED;
+                break;
+            }
+
+            /* Errors and unexpected events: */
+            case VMX_EXIT_INIT_SIGNAL:
+            case VMX_EXIT_SIPI:
+            case VMX_EXIT_IO_SMI:
+            case VMX_EXIT_SMI:
+            case VMX_EXIT_ERR_INVALID_GUEST_STATE:
+            case VMX_EXIT_ERR_MSR_LOAD:
+            case VMX_EXIT_ERR_MACHINE_CHECK:
+            case VMX_EXIT_PML_FULL:
+            case VMX_EXIT_VIRTUALIZED_EOI:
+            case VMX_EXIT_APIC_WRITE:  /* Some talk about this being fault like, so I guess we must process it? */
+                break;
+
+            default:
+                AssertMsgFailed(("Unexpected VM-exit=%#x\n", uExitReason));
+                break;
+        }
+    }
+
+    /*
+     * Check for debugger event breakpoints and dtrace probes.
+     */
+    if (   uExitReason < RT_ELEMENTS(pDbgState->bmExitsToCheck) * 32U
+        && ASMBitTest(pDbgState->bmExitsToCheck, uExitReason) )
+    {
+        VBOXSTRICTRC rcStrict = hmR0VmxHandleExitDtraceEvents(pVCpu, pVmxTransient, uExitReason);
+        if (rcStrict != VINF_SUCCESS)
+            return rcStrict;
+    }
+
+    /*
+     * Normal processing.
+     */
+#ifdef HMVMX_USE_FUNCTION_TABLE
+    return g_apfnVMExitHandlers[uExitReason](pVCpu, pVmxTransient);
+#else
+    return hmR0VmxHandleExit(pVCpu, pVmxTransient, uExitReason);
+#endif
+}
+
+
+/**
+ * Single steps guest code using hardware-assisted VMX.
+ *
+ * This is -not- the same as the guest single-stepping itself (say using EFLAGS.TF)
+ * but single-stepping through the hypervisor debugger.
+ *
+ * @returns Strict VBox status code (i.e. informational status codes too).
+ * @param   pVCpu       The cross context virtual CPU structure.
+ * @param   pcLoops     Pointer to the number of executed loops.
+ *
+ * @note    Mostly the same as hmR0VmxRunGuestCodeNormal().
+ */
+static VBOXSTRICTRC hmR0VmxRunGuestCodeDebug(PVMCPUCC pVCpu, uint32_t *pcLoops)
+{
+    uint32_t const cMaxResumeLoops = pVCpu->CTX_SUFF(pVM)->hm.s.cMaxResumeLoops;
+    Assert(pcLoops);
+    Assert(*pcLoops <= cMaxResumeLoops);
+
+    VMXTRANSIENT VmxTransient;
+    RT_ZERO(VmxTransient);
+    VmxTransient.pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
+
+    /* Set HMCPU indicators.  */
+    bool const fSavedSingleInstruction = pVCpu->hm.s.fSingleInstruction;
+    pVCpu->hm.s.fSingleInstruction     = pVCpu->hm.s.fSingleInstruction || DBGFIsStepping(pVCpu);
+    pVCpu->hm.s.fDebugWantRdTscExit    = false;
+    pVCpu->hm.s.fUsingDebugLoop        = true;
+
+    /* State we keep to help modify and later restore the VMCS fields we alter, and for detecting steps.  */
+    VMXRUNDBGSTATE DbgState;
+    hmR0VmxRunDebugStateInit(pVCpu, &VmxTransient, &DbgState);
+    hmR0VmxPreRunGuestDebugStateUpdate(pVCpu, &VmxTransient, &DbgState);
+
+    /*
+     * The loop.
+     */
+    VBOXSTRICTRC rcStrict  = VERR_INTERNAL_ERROR_5;
+    for (;;)
+    {
+        Assert(!HMR0SuspendPending());
+        HMVMX_ASSERT_CPU_SAFE(pVCpu);
+        STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatEntry, x);
+        bool fStepping = pVCpu->hm.s.fSingleInstruction;
+
+        /* Set up VM-execution controls the next two can respond to. */
+        hmR0VmxPreRunGuestDebugStateApply(pVCpu, &VmxTransient, &DbgState);
+
+        /*
+         * Preparatory work for running guest code, this may force us to
+         * return to ring-3.
+         *
+         * Warning! This bugger disables interrupts on VINF_SUCCESS!
+         */
+        rcStrict = hmR0VmxPreRunGuest(pVCpu, &VmxTransient, fStepping);
+        if (rcStrict != VINF_SUCCESS)
+            break;
+
+        /* Interrupts are disabled at this point! */
+        hmR0VmxPreRunGuestCommitted(pVCpu, &VmxTransient);
+
+        /* Override any obnoxious code in the above two calls. */
+        hmR0VmxPreRunGuestDebugStateApply(pVCpu, &VmxTransient, &DbgState);
+
+        /*
+         * Finally execute the guest.
+         */
+        int rcRun = hmR0VmxRunGuest(pVCpu, &VmxTransient);
+
+        hmR0VmxPostRunGuest(pVCpu, &VmxTransient, rcRun);
+        /* Interrupts are re-enabled at this point! */
+
+        /* Check for errors with running the VM (VMLAUNCH/VMRESUME). */
+        if (RT_SUCCESS(rcRun))
+        { /* very likely */ }
+        else
+        {
+            STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatPreExit, x);
+            hmR0VmxReportWorldSwitchError(pVCpu, rcRun, &VmxTransient);
+            return rcRun;
+        }
+
+        /* Profile the VM-exit. */
+        AssertMsg(VmxTransient.uExitReason <= VMX_EXIT_MAX, ("%#x\n", VmxTransient.uExitReason));
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatExitAll);
+        STAM_COUNTER_INC(&pVCpu->hm.s.paStatExitReasonR0[VmxTransient.uExitReason & MASK_EXITREASON_STAT]);
+        STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatPreExit, &pVCpu->hm.s.StatExitHandling, x);
+        HMVMX_START_EXIT_DISPATCH_PROF();
+
+        VBOXVMM_R0_HMVMX_VMEXIT_NOCTX(pVCpu, &pVCpu->cpum.GstCtx, VmxTransient.uExitReason);
+
+        /*
+         * Handle the VM-exit - we quit earlier on certain VM-exits, see hmR0VmxHandleExitDebug().
+         */
+        rcStrict = hmR0VmxRunDebugHandleExit(pVCpu, &VmxTransient, &DbgState);
+        STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitHandling, x);
+        if (rcStrict != VINF_SUCCESS)
+            break;
+        if (++(*pcLoops) > cMaxResumeLoops)
+        {
+            STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchMaxResumeLoops);
+            rcStrict = VINF_EM_RAW_INTERRUPT;
+            break;
+        }
+
+        /*
+         * Stepping: Did the RIP change, if so, consider it a single step.
+         * Otherwise, make sure one of the TFs gets set.
+         */
+        if (fStepping)
+        {
+            int rc = hmR0VmxImportGuestState(pVCpu, VmxTransient.pVmcsInfo, CPUMCTX_EXTRN_CS | CPUMCTX_EXTRN_RIP);
+            AssertRC(rc);
+            if (   pVCpu->cpum.GstCtx.rip    != DbgState.uRipStart
+                || pVCpu->cpum.GstCtx.cs.Sel != DbgState.uCsStart)
+            {
+                rcStrict = VINF_EM_DBG_STEPPED;
+                break;
+            }
+            ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_DR7);
+        }
+
+        /*
+         * Update when dtrace settings changes (DBGF kicks us, so no need to check).
+         */
+        if (VBOXVMM_GET_SETTINGS_SEQ_NO() != DbgState.uDtraceSettingsSeqNo)
+            hmR0VmxPreRunGuestDebugStateUpdate(pVCpu, &VmxTransient, &DbgState);
+    }
+
+    /*
+     * Clear the X86_EFL_TF if necessary.
+     */
+    if (pVCpu->hm.s.fClearTrapFlag)
+    {
+        int rc = hmR0VmxImportGuestState(pVCpu, VmxTransient.pVmcsInfo, CPUMCTX_EXTRN_RFLAGS);
+        AssertRC(rc);
+        pVCpu->hm.s.fClearTrapFlag = false;
+        pVCpu->cpum.GstCtx.eflags.Bits.u1TF = 0;
+    }
+    /** @todo there seems to be issues with the resume flag when the monitor trap
+     *        flag is pending without being used. Seen early in bios init when
+     *        accessing APIC page in protected mode. */
+
+    /*
+     * Restore VM-exit control settings as we may not re-enter this function the
+     * next time around.
+     */
+    rcStrict = hmR0VmxRunDebugStateRevert(pVCpu, &VmxTransient, &DbgState, rcStrict);
+
+    /* Restore HMCPU indicators. */
+    pVCpu->hm.s.fUsingDebugLoop     = false;
+    pVCpu->hm.s.fDebugWantRdTscExit = false;
+    pVCpu->hm.s.fSingleInstruction  = fSavedSingleInstruction;
+
+    STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatEntry, x);
+    return rcStrict;
+}
+
+
+/** @} */
+
+
+/**
+ * Checks if any expensive dtrace probes are enabled and we should go to the
+ * debug loop.
+ *
+ * @returns true if we should use debug loop, false if not.
+ */
+static bool hmR0VmxAnyExpensiveProbesEnabled(void)
+{
+    /* It's probably faster to OR the raw 32-bit counter variables together.
+       Since the variables are in an array and the probes are next to one
+       another (more or less), we have good locality.  So, better read
+       eight-nine cache lines ever time and only have one conditional, than
+       128+ conditionals, right? */
+    return (  VBOXVMM_R0_HMVMX_VMEXIT_ENABLED_RAW() /* expensive too due to context */
+            | VBOXVMM_XCPT_DE_ENABLED_RAW()
+            | VBOXVMM_XCPT_DB_ENABLED_RAW()
+            | VBOXVMM_XCPT_BP_ENABLED_RAW()
+            | VBOXVMM_XCPT_OF_ENABLED_RAW()
+            | VBOXVMM_XCPT_BR_ENABLED_RAW()
+            | VBOXVMM_XCPT_UD_ENABLED_RAW()
+            | VBOXVMM_XCPT_NM_ENABLED_RAW()
+            | VBOXVMM_XCPT_DF_ENABLED_RAW()
+            | VBOXVMM_XCPT_TS_ENABLED_RAW()
+            | VBOXVMM_XCPT_NP_ENABLED_RAW()
+            | VBOXVMM_XCPT_SS_ENABLED_RAW()
+            | VBOXVMM_XCPT_GP_ENABLED_RAW()
+            | VBOXVMM_XCPT_PF_ENABLED_RAW()
+            | VBOXVMM_XCPT_MF_ENABLED_RAW()
+            | VBOXVMM_XCPT_AC_ENABLED_RAW()
+            | VBOXVMM_XCPT_XF_ENABLED_RAW()
+            | VBOXVMM_XCPT_VE_ENABLED_RAW()
+            | VBOXVMM_XCPT_SX_ENABLED_RAW()
+            | VBOXVMM_INT_SOFTWARE_ENABLED_RAW()
+            | VBOXVMM_INT_HARDWARE_ENABLED_RAW()
+           ) != 0
+        || (  VBOXVMM_INSTR_HALT_ENABLED_RAW()
+            | VBOXVMM_INSTR_MWAIT_ENABLED_RAW()
+            | VBOXVMM_INSTR_MONITOR_ENABLED_RAW()
+            | VBOXVMM_INSTR_CPUID_ENABLED_RAW()
+            | VBOXVMM_INSTR_INVD_ENABLED_RAW()
+            | VBOXVMM_INSTR_WBINVD_ENABLED_RAW()
+            | VBOXVMM_INSTR_INVLPG_ENABLED_RAW()
+            | VBOXVMM_INSTR_RDTSC_ENABLED_RAW()
+            | VBOXVMM_INSTR_RDTSCP_ENABLED_RAW()
+            | VBOXVMM_INSTR_RDPMC_ENABLED_RAW()
+            | VBOXVMM_INSTR_RDMSR_ENABLED_RAW()
+            | VBOXVMM_INSTR_WRMSR_ENABLED_RAW()
+            | VBOXVMM_INSTR_CRX_READ_ENABLED_RAW()
+            | VBOXVMM_INSTR_CRX_WRITE_ENABLED_RAW()
+            | VBOXVMM_INSTR_DRX_READ_ENABLED_RAW()
+            | VBOXVMM_INSTR_DRX_WRITE_ENABLED_RAW()
+            | VBOXVMM_INSTR_PAUSE_ENABLED_RAW()
+            | VBOXVMM_INSTR_XSETBV_ENABLED_RAW()
+            | VBOXVMM_INSTR_SIDT_ENABLED_RAW()
+            | VBOXVMM_INSTR_LIDT_ENABLED_RAW()
+            | VBOXVMM_INSTR_SGDT_ENABLED_RAW()
+            | VBOXVMM_INSTR_LGDT_ENABLED_RAW()
+            | VBOXVMM_INSTR_SLDT_ENABLED_RAW()
+            | VBOXVMM_INSTR_LLDT_ENABLED_RAW()
+            | VBOXVMM_INSTR_STR_ENABLED_RAW()
+            | VBOXVMM_INSTR_LTR_ENABLED_RAW()
+            | VBOXVMM_INSTR_GETSEC_ENABLED_RAW()
+            | VBOXVMM_INSTR_RSM_ENABLED_RAW()
+            | VBOXVMM_INSTR_RDRAND_ENABLED_RAW()
+            | VBOXVMM_INSTR_RDSEED_ENABLED_RAW()
+            | VBOXVMM_INSTR_XSAVES_ENABLED_RAW()
+            | VBOXVMM_INSTR_XRSTORS_ENABLED_RAW()
+            | VBOXVMM_INSTR_VMM_CALL_ENABLED_RAW()
+            | VBOXVMM_INSTR_VMX_VMCLEAR_ENABLED_RAW()
+            | VBOXVMM_INSTR_VMX_VMLAUNCH_ENABLED_RAW()
+            | VBOXVMM_INSTR_VMX_VMPTRLD_ENABLED_RAW()
+            | VBOXVMM_INSTR_VMX_VMPTRST_ENABLED_RAW()
+            | VBOXVMM_INSTR_VMX_VMREAD_ENABLED_RAW()
+            | VBOXVMM_INSTR_VMX_VMRESUME_ENABLED_RAW()
+            | VBOXVMM_INSTR_VMX_VMWRITE_ENABLED_RAW()
+            | VBOXVMM_INSTR_VMX_VMXOFF_ENABLED_RAW()
+            | VBOXVMM_INSTR_VMX_VMXON_ENABLED_RAW()
+            | VBOXVMM_INSTR_VMX_VMFUNC_ENABLED_RAW()
+            | VBOXVMM_INSTR_VMX_INVEPT_ENABLED_RAW()
+            | VBOXVMM_INSTR_VMX_INVVPID_ENABLED_RAW()
+            | VBOXVMM_INSTR_VMX_INVPCID_ENABLED_RAW()
+           ) != 0
+        || (  VBOXVMM_EXIT_TASK_SWITCH_ENABLED_RAW()
+            | VBOXVMM_EXIT_HALT_ENABLED_RAW()
+            | VBOXVMM_EXIT_MWAIT_ENABLED_RAW()
+            | VBOXVMM_EXIT_MONITOR_ENABLED_RAW()
+            | VBOXVMM_EXIT_CPUID_ENABLED_RAW()
+            | VBOXVMM_EXIT_INVD_ENABLED_RAW()
+            | VBOXVMM_EXIT_WBINVD_ENABLED_RAW()
+            | VBOXVMM_EXIT_INVLPG_ENABLED_RAW()
+            | VBOXVMM_EXIT_RDTSC_ENABLED_RAW()
+            | VBOXVMM_EXIT_RDTSCP_ENABLED_RAW()
+            | VBOXVMM_EXIT_RDPMC_ENABLED_RAW()
+            | VBOXVMM_EXIT_RDMSR_ENABLED_RAW()
+            | VBOXVMM_EXIT_WRMSR_ENABLED_RAW()
+            | VBOXVMM_EXIT_CRX_READ_ENABLED_RAW()
+            | VBOXVMM_EXIT_CRX_WRITE_ENABLED_RAW()
+            | VBOXVMM_EXIT_DRX_READ_ENABLED_RAW()
+            | VBOXVMM_EXIT_DRX_WRITE_ENABLED_RAW()
+            | VBOXVMM_EXIT_PAUSE_ENABLED_RAW()
+            | VBOXVMM_EXIT_XSETBV_ENABLED_RAW()
+            | VBOXVMM_EXIT_SIDT_ENABLED_RAW()
+            | VBOXVMM_EXIT_LIDT_ENABLED_RAW()
+            | VBOXVMM_EXIT_SGDT_ENABLED_RAW()
+            | VBOXVMM_EXIT_LGDT_ENABLED_RAW()
+            | VBOXVMM_EXIT_SLDT_ENABLED_RAW()
+            | VBOXVMM_EXIT_LLDT_ENABLED_RAW()
+            | VBOXVMM_EXIT_STR_ENABLED_RAW()
+            | VBOXVMM_EXIT_LTR_ENABLED_RAW()
+            | VBOXVMM_EXIT_GETSEC_ENABLED_RAW()
+            | VBOXVMM_EXIT_RSM_ENABLED_RAW()
+            | VBOXVMM_EXIT_RDRAND_ENABLED_RAW()
+            | VBOXVMM_EXIT_RDSEED_ENABLED_RAW()
+            | VBOXVMM_EXIT_XSAVES_ENABLED_RAW()
+            | VBOXVMM_EXIT_XRSTORS_ENABLED_RAW()
+            | VBOXVMM_EXIT_VMM_CALL_ENABLED_RAW()
+            | VBOXVMM_EXIT_VMX_VMCLEAR_ENABLED_RAW()
+            | VBOXVMM_EXIT_VMX_VMLAUNCH_ENABLED_RAW()
+            | VBOXVMM_EXIT_VMX_VMPTRLD_ENABLED_RAW()
+            | VBOXVMM_EXIT_VMX_VMPTRST_ENABLED_RAW()
+            | VBOXVMM_EXIT_VMX_VMREAD_ENABLED_RAW()
+            | VBOXVMM_EXIT_VMX_VMRESUME_ENABLED_RAW()
+            | VBOXVMM_EXIT_VMX_VMWRITE_ENABLED_RAW()
+            | VBOXVMM_EXIT_VMX_VMXOFF_ENABLED_RAW()
+            | VBOXVMM_EXIT_VMX_VMXON_ENABLED_RAW()
+            | VBOXVMM_EXIT_VMX_VMFUNC_ENABLED_RAW()
+            | VBOXVMM_EXIT_VMX_INVEPT_ENABLED_RAW()
+            | VBOXVMM_EXIT_VMX_INVVPID_ENABLED_RAW()
+            | VBOXVMM_EXIT_VMX_INVPCID_ENABLED_RAW()
+            | VBOXVMM_EXIT_VMX_EPT_VIOLATION_ENABLED_RAW()
+            | VBOXVMM_EXIT_VMX_EPT_MISCONFIG_ENABLED_RAW()
+            | VBOXVMM_EXIT_VMX_VAPIC_ACCESS_ENABLED_RAW()
+            | VBOXVMM_EXIT_VMX_VAPIC_WRITE_ENABLED_RAW()
+           ) != 0;
+}
+
+
+/**
+ * Runs the guest using hardware-assisted VMX.
+ *
+ * @returns Strict VBox status code (i.e. informational status codes too).
+ * @param   pVCpu   The cross context virtual CPU structure.
+ */
+VMMR0DECL(VBOXSTRICTRC) VMXR0RunGuestCode(PVMCPUCC pVCpu)
+{
+    AssertPtr(pVCpu);
+    PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+    Assert(VMMRZCallRing3IsEnabled(pVCpu));
+    Assert(!ASMAtomicUoReadU64(&pCtx->fExtrn));
+    HMVMX_ASSERT_PREEMPT_SAFE(pVCpu);
+
+    VBOXSTRICTRC rcStrict;
+    uint32_t     cLoops = 0;
+    for (;;)
+    {
+#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+        bool const fInNestedGuestMode = CPUMIsGuestInVmxNonRootMode(pCtx);
+#else
+        NOREF(pCtx);
+        bool const fInNestedGuestMode = false;
+#endif
+        if (!fInNestedGuestMode)
+        {
+            if (   !pVCpu->hm.s.fUseDebugLoop
+                && (!VBOXVMM_ANY_PROBES_ENABLED() || !hmR0VmxAnyExpensiveProbesEnabled())
+                && !DBGFIsStepping(pVCpu)
+                && !pVCpu->CTX_SUFF(pVM)->dbgf.ro.cEnabledInt3Breakpoints)
+                rcStrict = hmR0VmxRunGuestCodeNormal(pVCpu, &cLoops);
+            else
+                rcStrict = hmR0VmxRunGuestCodeDebug(pVCpu, &cLoops);
+        }
+#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+        else
+            rcStrict = hmR0VmxRunGuestCodeNested(pVCpu, &cLoops);
+
+        if (rcStrict == VINF_VMX_VMLAUNCH_VMRESUME)
+        {
+            Assert(CPUMIsGuestInVmxNonRootMode(pCtx));
+            continue;
+        }
+        if (rcStrict == VINF_VMX_VMEXIT)
+        {
+            Assert(!CPUMIsGuestInVmxNonRootMode(pCtx));
+            continue;
+        }
+#endif
+        break;
+    }
+
+    int const rcLoop = VBOXSTRICTRC_VAL(rcStrict);
+    switch (rcLoop)
+    {
+        case VERR_EM_INTERPRETER:   rcStrict = VINF_EM_RAW_EMULATE_INSTR;   break;
+        case VINF_EM_RESET:         rcStrict = VINF_EM_TRIPLE_FAULT;        break;
+    }
+
+    int rc2 = hmR0VmxExitToRing3(pVCpu, rcStrict);
+    if (RT_FAILURE(rc2))
+    {
+        pVCpu->hm.s.u32HMError = (uint32_t)VBOXSTRICTRC_VAL(rcStrict);
+        rcStrict = rc2;
+    }
+    Assert(!ASMAtomicUoReadU64(&pCtx->fExtrn));
+    Assert(!VMMRZCallRing3IsNotificationSet(pVCpu));
+    return rcStrict;
+}
+
+
+#ifndef HMVMX_USE_FUNCTION_TABLE
+/**
+ * Handles a guest VM-exit from hardware-assisted VMX execution.
+ *
+ * @returns Strict VBox status code (i.e. informational status codes too).
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   pVmxTransient   The VMX-transient structure.
+ */
+DECLINLINE(VBOXSTRICTRC) hmR0VmxHandleExit(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+#ifdef DEBUG_ramshankar
+# define VMEXIT_CALL_RET(a_fSave, a_CallExpr) \
+       do { \
+            if (a_fSave != 0) \
+                hmR0VmxImportGuestState(pVCpu, pVmxTransient->pVmcsInfo, HMVMX_CPUMCTX_EXTRN_ALL); \
+            VBOXSTRICTRC rcStrict = a_CallExpr; \
+            if (a_fSave != 0) \
+                ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_ALL_GUEST); \
+            return rcStrict; \
+        } while (0)
+#else
+# define VMEXIT_CALL_RET(a_fSave, a_CallExpr) return a_CallExpr
+#endif
+    uint32_t const uExitReason = pVmxTransient->uExitReason;
+    switch (uExitReason)
+    {
+        case VMX_EXIT_EPT_MISCONFIG:           VMEXIT_CALL_RET(0, hmR0VmxExitEptMisconfig(pVCpu, pVmxTransient));
+        case VMX_EXIT_EPT_VIOLATION:           VMEXIT_CALL_RET(0, hmR0VmxExitEptViolation(pVCpu, pVmxTransient));
+        case VMX_EXIT_IO_INSTR:                VMEXIT_CALL_RET(0, hmR0VmxExitIoInstr(pVCpu, pVmxTransient));
+        case VMX_EXIT_CPUID:                   VMEXIT_CALL_RET(0, hmR0VmxExitCpuid(pVCpu, pVmxTransient));
+        case VMX_EXIT_RDTSC:                   VMEXIT_CALL_RET(0, hmR0VmxExitRdtsc(pVCpu, pVmxTransient));
+        case VMX_EXIT_RDTSCP:                  VMEXIT_CALL_RET(0, hmR0VmxExitRdtscp(pVCpu, pVmxTransient));
+        case VMX_EXIT_APIC_ACCESS:             VMEXIT_CALL_RET(0, hmR0VmxExitApicAccess(pVCpu, pVmxTransient));
+        case VMX_EXIT_XCPT_OR_NMI:             VMEXIT_CALL_RET(0, hmR0VmxExitXcptOrNmi(pVCpu, pVmxTransient));
+        case VMX_EXIT_MOV_CRX:                 VMEXIT_CALL_RET(0, hmR0VmxExitMovCRx(pVCpu, pVmxTransient));
+        case VMX_EXIT_EXT_INT:                 VMEXIT_CALL_RET(0, hmR0VmxExitExtInt(pVCpu, pVmxTransient));
+        case VMX_EXIT_INT_WINDOW:              VMEXIT_CALL_RET(0, hmR0VmxExitIntWindow(pVCpu, pVmxTransient));
+        case VMX_EXIT_TPR_BELOW_THRESHOLD:     VMEXIT_CALL_RET(0, hmR0VmxExitTprBelowThreshold(pVCpu, pVmxTransient));
+        case VMX_EXIT_MWAIT:                   VMEXIT_CALL_RET(0, hmR0VmxExitMwait(pVCpu, pVmxTransient));
+        case VMX_EXIT_MONITOR:                 VMEXIT_CALL_RET(0, hmR0VmxExitMonitor(pVCpu, pVmxTransient));
+        case VMX_EXIT_TASK_SWITCH:             VMEXIT_CALL_RET(0, hmR0VmxExitTaskSwitch(pVCpu, pVmxTransient));
+        case VMX_EXIT_PREEMPT_TIMER:           VMEXIT_CALL_RET(0, hmR0VmxExitPreemptTimer(pVCpu, pVmxTransient));
+        case VMX_EXIT_RDMSR:                   VMEXIT_CALL_RET(0, hmR0VmxExitRdmsr(pVCpu, pVmxTransient));
+        case VMX_EXIT_WRMSR:                   VMEXIT_CALL_RET(0, hmR0VmxExitWrmsr(pVCpu, pVmxTransient));
+        case VMX_EXIT_VMCALL:                  VMEXIT_CALL_RET(0, hmR0VmxExitVmcall(pVCpu, pVmxTransient));
+        case VMX_EXIT_MOV_DRX:                 VMEXIT_CALL_RET(0, hmR0VmxExitMovDRx(pVCpu, pVmxTransient));
+        case VMX_EXIT_HLT:                     VMEXIT_CALL_RET(0, hmR0VmxExitHlt(pVCpu, pVmxTransient));
+        case VMX_EXIT_INVD:                    VMEXIT_CALL_RET(0, hmR0VmxExitInvd(pVCpu, pVmxTransient));
+        case VMX_EXIT_INVLPG:                  VMEXIT_CALL_RET(0, hmR0VmxExitInvlpg(pVCpu, pVmxTransient));
+        case VMX_EXIT_MTF:                     VMEXIT_CALL_RET(0, hmR0VmxExitMtf(pVCpu, pVmxTransient));
+        case VMX_EXIT_PAUSE:                   VMEXIT_CALL_RET(0, hmR0VmxExitPause(pVCpu, pVmxTransient));
+        case VMX_EXIT_WBINVD:                  VMEXIT_CALL_RET(0, hmR0VmxExitWbinvd(pVCpu, pVmxTransient));
+        case VMX_EXIT_XSETBV:                  VMEXIT_CALL_RET(0, hmR0VmxExitXsetbv(pVCpu, pVmxTransient));
+        case VMX_EXIT_INVPCID:                 VMEXIT_CALL_RET(0, hmR0VmxExitInvpcid(pVCpu, pVmxTransient));
+        case VMX_EXIT_GETSEC:                  VMEXIT_CALL_RET(0, hmR0VmxExitGetsec(pVCpu, pVmxTransient));
+        case VMX_EXIT_RDPMC:                   VMEXIT_CALL_RET(0, hmR0VmxExitRdpmc(pVCpu, pVmxTransient));
+#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+        case VMX_EXIT_VMCLEAR:                 VMEXIT_CALL_RET(0, hmR0VmxExitVmclear(pVCpu, pVmxTransient));
+        case VMX_EXIT_VMLAUNCH:                VMEXIT_CALL_RET(0, hmR0VmxExitVmlaunch(pVCpu, pVmxTransient));
+        case VMX_EXIT_VMPTRLD:                 VMEXIT_CALL_RET(0, hmR0VmxExitVmptrld(pVCpu, pVmxTransient));
+        case VMX_EXIT_VMPTRST:                 VMEXIT_CALL_RET(0, hmR0VmxExitVmptrst(pVCpu, pVmxTransient));
+        case VMX_EXIT_VMREAD:                  VMEXIT_CALL_RET(0, hmR0VmxExitVmread(pVCpu, pVmxTransient));
+        case VMX_EXIT_VMRESUME:                VMEXIT_CALL_RET(0, hmR0VmxExitVmwrite(pVCpu, pVmxTransient));
+        case VMX_EXIT_VMWRITE:                 VMEXIT_CALL_RET(0, hmR0VmxExitVmresume(pVCpu, pVmxTransient));
+        case VMX_EXIT_VMXOFF:                  VMEXIT_CALL_RET(0, hmR0VmxExitVmxoff(pVCpu, pVmxTransient));
+        case VMX_EXIT_VMXON:                   VMEXIT_CALL_RET(0, hmR0VmxExitVmxon(pVCpu, pVmxTransient));
+        case VMX_EXIT_INVVPID:                 VMEXIT_CALL_RET(0, hmR0VmxExitInvvpid(pVCpu, pVmxTransient));
+        case VMX_EXIT_INVEPT:                  VMEXIT_CALL_RET(0, hmR0VmxExitSetPendingXcptUD(pVCpu, pVmxTransient));
+#else
+        case VMX_EXIT_VMCLEAR:
+        case VMX_EXIT_VMLAUNCH:
+        case VMX_EXIT_VMPTRLD:
+        case VMX_EXIT_VMPTRST:
+        case VMX_EXIT_VMREAD:
+        case VMX_EXIT_VMRESUME:
+        case VMX_EXIT_VMWRITE:
+        case VMX_EXIT_VMXOFF:
+        case VMX_EXIT_VMXON:
+        case VMX_EXIT_INVVPID:
+        case VMX_EXIT_INVEPT:
+            return hmR0VmxExitSetPendingXcptUD(pVCpu, pVmxTransient);
+#endif
+
+        case VMX_EXIT_TRIPLE_FAULT:            return hmR0VmxExitTripleFault(pVCpu, pVmxTransient);
+        case VMX_EXIT_NMI_WINDOW:              return hmR0VmxExitNmiWindow(pVCpu, pVmxTransient);
+        case VMX_EXIT_ERR_INVALID_GUEST_STATE: return hmR0VmxExitErrInvalidGuestState(pVCpu, pVmxTransient);
+
+        case VMX_EXIT_INIT_SIGNAL:
+        case VMX_EXIT_SIPI:
+        case VMX_EXIT_IO_SMI:
+        case VMX_EXIT_SMI:
+        case VMX_EXIT_ERR_MSR_LOAD:
+        case VMX_EXIT_ERR_MACHINE_CHECK:
+        case VMX_EXIT_PML_FULL:
+        case VMX_EXIT_VIRTUALIZED_EOI:
+        case VMX_EXIT_GDTR_IDTR_ACCESS:
+        case VMX_EXIT_LDTR_TR_ACCESS:
+        case VMX_EXIT_APIC_WRITE:
+        case VMX_EXIT_RDRAND:
+        case VMX_EXIT_RSM:
+        case VMX_EXIT_VMFUNC:
+        case VMX_EXIT_ENCLS:
+        case VMX_EXIT_RDSEED:
+        case VMX_EXIT_XSAVES:
+        case VMX_EXIT_XRSTORS:
+        case VMX_EXIT_UMWAIT:
+        case VMX_EXIT_TPAUSE:
+        default:
+            return hmR0VmxExitErrUnexpected(pVCpu, pVmxTransient);
+    }
+#undef VMEXIT_CALL_RET
+}
+#endif /* !HMVMX_USE_FUNCTION_TABLE */
+
+
+#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+/**
+ * Handles a nested-guest VM-exit from hardware-assisted VMX execution.
+ *
+ * @returns Strict VBox status code (i.e. informational status codes too).
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   pVmxTransient   The VMX-transient structure.
+ */
+DECLINLINE(VBOXSTRICTRC) hmR0VmxHandleExitNested(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    uint32_t const uExitReason = pVmxTransient->uExitReason;
+    switch (uExitReason)
+    {
+        case VMX_EXIT_EPT_MISCONFIG:            return hmR0VmxExitEptMisconfig(pVCpu, pVmxTransient);
+        case VMX_EXIT_EPT_VIOLATION:            return hmR0VmxExitEptViolation(pVCpu, pVmxTransient);
+        case VMX_EXIT_XCPT_OR_NMI:              return hmR0VmxExitXcptOrNmiNested(pVCpu, pVmxTransient);
+        case VMX_EXIT_IO_INSTR:                 return hmR0VmxExitIoInstrNested(pVCpu, pVmxTransient);
+        case VMX_EXIT_HLT:                      return hmR0VmxExitHltNested(pVCpu, pVmxTransient);
+
+        /*
+         * We shouldn't direct host physical interrupts to the nested-guest.
+         */
+        case VMX_EXIT_EXT_INT:
+            return hmR0VmxExitExtInt(pVCpu, pVmxTransient);
+
+        /*
+         * Instructions that cause VM-exits unconditionally or the condition is
+         * always is taken solely from the nested hypervisor (meaning if the VM-exit
+         * happens, it's guaranteed to be a nested-guest VM-exit).
+         *
+         *   - Provides VM-exit instruction length ONLY.
+         */
+        case VMX_EXIT_CPUID:              /* Unconditional. */
+        case VMX_EXIT_VMCALL:
+        case VMX_EXIT_GETSEC:
+        case VMX_EXIT_INVD:
+        case VMX_EXIT_XSETBV:
+        case VMX_EXIT_VMLAUNCH:
+        case VMX_EXIT_VMRESUME:
+        case VMX_EXIT_VMXOFF:
+        case VMX_EXIT_ENCLS:              /* Condition specified solely by nested hypervisor. */
+        case VMX_EXIT_VMFUNC:
+            return hmR0VmxExitInstrNested(pVCpu, pVmxTransient);
+
+        /*
+         * Instructions that cause VM-exits unconditionally or the condition is
+         * always is taken solely from the nested hypervisor (meaning if the VM-exit
+         * happens, it's guaranteed to be a nested-guest VM-exit).
+         *
+         *   - Provides VM-exit instruction length.
+         *   - Provides VM-exit information.
+         *   - Optionally provides Exit qualification.
+         *
+         * Since Exit qualification is 0 for all VM-exits where it is not
+         * applicable, reading and passing it to the guest should produce
+         * defined behavior.
+         *
+         * See Intel spec. 27.2.1 "Basic VM-Exit Information".
+         */
+        case VMX_EXIT_INVEPT:             /* Unconditional. */
+        case VMX_EXIT_INVVPID:
+        case VMX_EXIT_VMCLEAR:
+        case VMX_EXIT_VMPTRLD:
+        case VMX_EXIT_VMPTRST:
+        case VMX_EXIT_VMXON:
+        case VMX_EXIT_GDTR_IDTR_ACCESS:   /* Condition specified solely by nested hypervisor. */
+        case VMX_EXIT_LDTR_TR_ACCESS:
+        case VMX_EXIT_RDRAND:
+        case VMX_EXIT_RDSEED:
+        case VMX_EXIT_XSAVES:
+        case VMX_EXIT_XRSTORS:
+        case VMX_EXIT_UMWAIT:
+        case VMX_EXIT_TPAUSE:
+            return hmR0VmxExitInstrWithInfoNested(pVCpu, pVmxTransient);
+
+        case VMX_EXIT_RDTSC:                    return hmR0VmxExitRdtscNested(pVCpu, pVmxTransient);
+        case VMX_EXIT_RDTSCP:                   return hmR0VmxExitRdtscpNested(pVCpu, pVmxTransient);
+        case VMX_EXIT_RDMSR:                    return hmR0VmxExitRdmsrNested(pVCpu, pVmxTransient);
+        case VMX_EXIT_WRMSR:                    return hmR0VmxExitWrmsrNested(pVCpu, pVmxTransient);
+        case VMX_EXIT_INVLPG:                   return hmR0VmxExitInvlpgNested(pVCpu, pVmxTransient);
+        case VMX_EXIT_INVPCID:                  return hmR0VmxExitInvpcidNested(pVCpu, pVmxTransient);
+        case VMX_EXIT_TASK_SWITCH:              return hmR0VmxExitTaskSwitchNested(pVCpu, pVmxTransient);
+        case VMX_EXIT_WBINVD:                   return hmR0VmxExitWbinvdNested(pVCpu, pVmxTransient);
+        case VMX_EXIT_MTF:                      return hmR0VmxExitMtfNested(pVCpu, pVmxTransient);
+        case VMX_EXIT_APIC_ACCESS:              return hmR0VmxExitApicAccessNested(pVCpu, pVmxTransient);
+        case VMX_EXIT_APIC_WRITE:               return hmR0VmxExitApicWriteNested(pVCpu, pVmxTransient);
+        case VMX_EXIT_VIRTUALIZED_EOI:          return hmR0VmxExitVirtEoiNested(pVCpu, pVmxTransient);
+        case VMX_EXIT_MOV_CRX:                  return hmR0VmxExitMovCRxNested(pVCpu, pVmxTransient);
+        case VMX_EXIT_INT_WINDOW:               return hmR0VmxExitIntWindowNested(pVCpu, pVmxTransient);
+        case VMX_EXIT_NMI_WINDOW:               return hmR0VmxExitNmiWindowNested(pVCpu, pVmxTransient);
+        case VMX_EXIT_TPR_BELOW_THRESHOLD:      return hmR0VmxExitTprBelowThresholdNested(pVCpu, pVmxTransient);
+        case VMX_EXIT_MWAIT:                    return hmR0VmxExitMwaitNested(pVCpu, pVmxTransient);
+        case VMX_EXIT_MONITOR:                  return hmR0VmxExitMonitorNested(pVCpu, pVmxTransient);
+        case VMX_EXIT_PAUSE:                    return hmR0VmxExitPauseNested(pVCpu, pVmxTransient);
+
+        case VMX_EXIT_PREEMPT_TIMER:
+        {
+            /** @todo NSTVMX: Preempt timer. */
+            return hmR0VmxExitPreemptTimer(pVCpu, pVmxTransient);
+        }
+
+        case VMX_EXIT_MOV_DRX:                  return hmR0VmxExitMovDRxNested(pVCpu, pVmxTransient);
+        case VMX_EXIT_RDPMC:                    return hmR0VmxExitRdpmcNested(pVCpu, pVmxTransient);
+
+        case VMX_EXIT_VMREAD:
+        case VMX_EXIT_VMWRITE:                  return hmR0VmxExitVmreadVmwriteNested(pVCpu, pVmxTransient);
+
+        case VMX_EXIT_TRIPLE_FAULT:             return hmR0VmxExitTripleFaultNested(pVCpu, pVmxTransient);
+        case VMX_EXIT_ERR_INVALID_GUEST_STATE:  return hmR0VmxExitErrInvalidGuestStateNested(pVCpu, pVmxTransient);
+
+        case VMX_EXIT_INIT_SIGNAL:
+        case VMX_EXIT_SIPI:
+        case VMX_EXIT_IO_SMI:
+        case VMX_EXIT_SMI:
+        case VMX_EXIT_ERR_MSR_LOAD:
+        case VMX_EXIT_ERR_MACHINE_CHECK:
+        case VMX_EXIT_PML_FULL:
+        case VMX_EXIT_RSM:
+        default:
+            return hmR0VmxExitErrUnexpected(pVCpu, pVmxTransient);
+    }
+}
+#endif /* VBOX_WITH_NESTED_HWVIRT_VMX */
+
+
+/** @name VM-exit helpers.
+ * @{
+ */
+/* -=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= */
+/* -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= VM-exit helpers -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- */
+/* -=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= */
+
+/** Macro for VM-exits called unexpectedly. */
+#define HMVMX_UNEXPECTED_EXIT_RET(a_pVCpu, a_HmError) \
+    do { \
+        (a_pVCpu)->hm.s.u32HMError = (a_HmError); \
+        return VERR_VMX_UNEXPECTED_EXIT; \
+    } while (0)
+
+#ifdef VBOX_STRICT
+/* Is there some generic IPRT define for this that are not in Runtime/internal/\* ?? */
+# define HMVMX_ASSERT_PREEMPT_CPUID_VAR() \
+    RTCPUID const idAssertCpu = RTThreadPreemptIsEnabled(NIL_RTTHREAD) ? NIL_RTCPUID : RTMpCpuId()
+
+# define HMVMX_ASSERT_PREEMPT_CPUID() \
+    do { \
+         RTCPUID const idAssertCpuNow = RTThreadPreemptIsEnabled(NIL_RTTHREAD) ? NIL_RTCPUID : RTMpCpuId(); \
+         AssertMsg(idAssertCpu == idAssertCpuNow,  ("VMX %#x, %#x\n", idAssertCpu, idAssertCpuNow)); \
+    } while (0)
+
+# define HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(a_pVCpu, a_pVmxTransient) \
+    do { \
+        AssertPtr((a_pVCpu)); \
+        AssertPtr((a_pVmxTransient)); \
+        Assert((a_pVmxTransient)->fVMEntryFailed == false); \
+        Assert((a_pVmxTransient)->pVmcsInfo); \
+        Assert(ASMIntAreEnabled()); \
+        HMVMX_ASSERT_PREEMPT_SAFE(a_pVCpu); \
+        HMVMX_ASSERT_PREEMPT_CPUID_VAR(); \
+        Log4Func(("vcpu[%RU32]\n", (a_pVCpu)->idCpu)); \
+        HMVMX_ASSERT_PREEMPT_SAFE(a_pVCpu); \
+        if (VMMR0IsLogFlushDisabled((a_pVCpu))) \
+            HMVMX_ASSERT_PREEMPT_CPUID(); \
+        HMVMX_STOP_EXIT_DISPATCH_PROF(); \
+    } while (0)
+
+# define HMVMX_VALIDATE_NESTED_EXIT_HANDLER_PARAMS(a_pVCpu, a_pVmxTransient) \
+    do { \
+        HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(a_pVCpu, a_pVmxTransient); \
+        Assert((a_pVmxTransient)->fIsNestedGuest); \
+    } while (0)
+
+# define HMVMX_VALIDATE_EXIT_XCPT_HANDLER_PARAMS(a_pVCpu, a_pVmxTransient) \
+    do { \
+        Log4Func(("\n")); \
+    } while (0)
+#else
+# define HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(a_pVCpu, a_pVmxTransient) \
+    do { \
+        HMVMX_STOP_EXIT_DISPATCH_PROF(); \
+        NOREF((a_pVCpu)); NOREF((a_pVmxTransient)); \
+    } while (0)
+
+# define HMVMX_VALIDATE_NESTED_EXIT_HANDLER_PARAMS(a_pVCpu, a_pVmxTransient) \
+    do { HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(a_pVCpu, a_pVmxTransient); } while (0)
+
+# define HMVMX_VALIDATE_EXIT_XCPT_HANDLER_PARAMS(a_pVCpu, a_pVmxTransient)      do { } while (0)
+#endif
+
+#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+/** Macro that does the necessary privilege checks and intercepted VM-exits for
+ *  guests that attempted to execute a VMX instruction. */
+# define HMVMX_CHECK_EXIT_DUE_TO_VMX_INSTR(a_pVCpu, a_uExitReason) \
+    do \
+    { \
+        VBOXSTRICTRC rcStrictTmp = hmR0VmxCheckExitDueToVmxInstr((a_pVCpu), (a_uExitReason)); \
+        if (rcStrictTmp == VINF_SUCCESS) \
+        { /* likely */ } \
+        else if (rcStrictTmp == VINF_HM_PENDING_XCPT) \
+        { \
+            Assert((a_pVCpu)->hm.s.Event.fPending); \
+            Log4Func(("Privilege checks failed -> %#x\n", VMX_ENTRY_INT_INFO_VECTOR((a_pVCpu)->hm.s.Event.u64IntInfo))); \
+            return VINF_SUCCESS; \
+        } \
+        else \
+        { \
+            int rcTmp = VBOXSTRICTRC_VAL(rcStrictTmp); \
+            AssertMsgFailedReturn(("Unexpected failure. rc=%Rrc", rcTmp), rcTmp); \
+        } \
+    } while (0)
+
+/** Macro that decodes a memory operand for an VM-exit caused by an instruction. */
+# define HMVMX_DECODE_MEM_OPERAND(a_pVCpu, a_uExitInstrInfo, a_uExitQual, a_enmMemAccess, a_pGCPtrEffAddr) \
+    do \
+    { \
+        VBOXSTRICTRC rcStrictTmp = hmR0VmxDecodeMemOperand((a_pVCpu), (a_uExitInstrInfo), (a_uExitQual), (a_enmMemAccess), \
+                                                           (a_pGCPtrEffAddr)); \
+        if (rcStrictTmp == VINF_SUCCESS) \
+        { /* likely */ } \
+        else if (rcStrictTmp == VINF_HM_PENDING_XCPT) \
+        { \
+            uint8_t const uXcptTmp = VMX_ENTRY_INT_INFO_VECTOR((a_pVCpu)->hm.s.Event.u64IntInfo); \
+            Log4Func(("Memory operand decoding failed, raising xcpt %#x\n", uXcptTmp)); \
+            NOREF(uXcptTmp); \
+            return VINF_SUCCESS; \
+        } \
+        else \
+        { \
+            Log4Func(("hmR0VmxDecodeMemOperand failed. rc=%Rrc\n", VBOXSTRICTRC_VAL(rcStrictTmp))); \
+            return rcStrictTmp; \
+        } \
+    } while (0)
+#endif /* VBOX_WITH_NESTED_HWVIRT_VMX */
+
+
+/**
+ * Advances the guest RIP by the specified number of bytes.
+ *
+ * @param   pVCpu       The cross context virtual CPU structure.
+ * @param   cbInstr     Number of bytes to advance the RIP by.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+DECLINLINE(void) hmR0VmxAdvanceGuestRipBy(PVMCPUCC pVCpu, uint32_t cbInstr)
+{
+    /* Advance the RIP. */
+    pVCpu->cpum.GstCtx.rip += cbInstr;
+    ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP);
+
+    /* Update interrupt inhibition. */
+    if (   VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS)
+        && pVCpu->cpum.GstCtx.rip != EMGetInhibitInterruptsPC(pVCpu))
+        VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS);
+}
+
+
+/**
+ * Advances the guest RIP after reading it from the VMCS.
+ *
+ * @returns VBox status code, no informational status codes.
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   pVmxTransient   The VMX-transient structure.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+static int hmR0VmxAdvanceGuestRip(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
+    int rc = hmR0VmxImportGuestState(pVCpu, pVmxTransient->pVmcsInfo, CPUMCTX_EXTRN_RIP | CPUMCTX_EXTRN_RFLAGS);
+    AssertRCReturn(rc, rc);
+
+    hmR0VmxAdvanceGuestRipBy(pVCpu, pVmxTransient->cbExitInstr);
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Handle a condition that occurred while delivering an event through the guest or
+ * nested-guest IDT.
+ *
+ * @returns Strict VBox status code (i.e. informational status codes too).
+ * @retval  VINF_SUCCESS if we should continue handling the VM-exit.
+ * @retval  VINF_HM_DOUBLE_FAULT if a \#DF condition was detected and we ought
+ *          to continue execution of the guest which will delivery the \#DF.
+ * @retval  VINF_EM_RESET if we detected a triple-fault condition.
+ * @retval  VERR_EM_GUEST_CPU_HANG if we detected a guest CPU hang.
+ *
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   pVmxTransient   The VMX-transient structure.
+ *
+ * @remarks Requires all fields in HMVMX_READ_XCPT_INFO to be read from the VMCS.
+ *          Additionally, HMVMX_READ_EXIT_QUALIFICATION is required if the VM-exit
+ *          is due to an EPT violation, PML full or SPP-related event.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+static VBOXSTRICTRC hmR0VmxCheckExitDueToEventDelivery(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    Assert(!pVCpu->hm.s.Event.fPending);
+    HMVMX_ASSERT_READ(pVmxTransient, HMVMX_READ_XCPT_INFO);
+    if (   pVmxTransient->uExitReason == VMX_EXIT_EPT_VIOLATION
+        || pVmxTransient->uExitReason == VMX_EXIT_PML_FULL
+        || pVmxTransient->uExitReason == VMX_EXIT_SPP_EVENT)
+        HMVMX_ASSERT_READ(pVmxTransient, HMVMX_READ_EXIT_QUALIFICATION);
+
+    VBOXSTRICTRC   rcStrict       = VINF_SUCCESS;
+    PCVMXVMCSINFO  pVmcsInfo      = pVmxTransient->pVmcsInfo;
+    uint32_t const uIdtVectorInfo = pVmxTransient->uIdtVectoringInfo;
+    uint32_t const uExitIntInfo   = pVmxTransient->uExitIntInfo;
+    if (VMX_IDT_VECTORING_INFO_IS_VALID(uIdtVectorInfo))
+    {
+        uint32_t const uIdtVector     = VMX_IDT_VECTORING_INFO_VECTOR(uIdtVectorInfo);
+        uint32_t const uIdtVectorType = VMX_IDT_VECTORING_INFO_TYPE(uIdtVectorInfo);
+
+        /*
+         * If the event was a software interrupt (generated with INT n) or a software exception
+         * (generated by INT3/INTO) or a privileged software exception (generated by INT1), we
+         * can handle the VM-exit and continue guest execution which will re-execute the
+         * instruction rather than re-injecting the exception, as that can cause premature
+         * trips to ring-3 before injection and involve TRPM which currently has no way of
+         * storing that these exceptions were caused by these instructions (ICEBP's #DB poses
+         * the problem).
+         */
+        IEMXCPTRAISE     enmRaise;
+        IEMXCPTRAISEINFO fRaiseInfo;
+        if (   uIdtVectorType == VMX_IDT_VECTORING_INFO_TYPE_SW_INT
+            || uIdtVectorType == VMX_IDT_VECTORING_INFO_TYPE_SW_XCPT
+            || uIdtVectorType == VMX_IDT_VECTORING_INFO_TYPE_PRIV_SW_XCPT)
+        {
+            enmRaise   = IEMXCPTRAISE_REEXEC_INSTR;
+            fRaiseInfo = IEMXCPTRAISEINFO_NONE;
+        }
+        else if (VMX_EXIT_INT_INFO_IS_VALID(uExitIntInfo))
+        {
+            uint32_t const uExitVectorType = VMX_EXIT_INT_INFO_TYPE(uExitIntInfo);
+            uint8_t const  uExitVector     = VMX_EXIT_INT_INFO_VECTOR(uExitIntInfo);
+            Assert(uExitVectorType == VMX_EXIT_INT_INFO_TYPE_HW_XCPT);
+
+            uint32_t const fIdtVectorFlags  = hmR0VmxGetIemXcptFlags(uIdtVector, uIdtVectorType);
+            uint32_t const fExitVectorFlags = hmR0VmxGetIemXcptFlags(uExitVector, uExitVectorType);
+
+            enmRaise = IEMEvaluateRecursiveXcpt(pVCpu, fIdtVectorFlags, uIdtVector, fExitVectorFlags, uExitVector, &fRaiseInfo);
+
+            /* Determine a vectoring #PF condition, see comment in hmR0VmxExitXcptPF(). */
+            if (fRaiseInfo & (IEMXCPTRAISEINFO_EXT_INT_PF | IEMXCPTRAISEINFO_NMI_PF))
+            {
+                pVmxTransient->fVectoringPF = true;
+                enmRaise = IEMXCPTRAISE_PREV_EVENT;
+            }
+        }
+        else
+        {
+            /*
+             * If an exception or hardware interrupt delivery caused an EPT violation/misconfig or APIC access
+             * VM-exit, then the VM-exit interruption-information will not be valid and we end up here.
+             * It is sufficient to reflect the original event to the guest after handling the VM-exit.
+             */
+            Assert(   uIdtVectorType == VMX_IDT_VECTORING_INFO_TYPE_HW_XCPT
+                   || uIdtVectorType == VMX_IDT_VECTORING_INFO_TYPE_NMI
+                   || uIdtVectorType == VMX_IDT_VECTORING_INFO_TYPE_EXT_INT);
+            enmRaise   = IEMXCPTRAISE_PREV_EVENT;
+            fRaiseInfo = IEMXCPTRAISEINFO_NONE;
+        }
+
+        /*
+         * On CPUs that support Virtual NMIs, if this VM-exit (be it an exception or EPT violation/misconfig
+         * etc.) occurred while delivering the NMI, we need to clear the block-by-NMI field in the guest
+         * interruptibility-state before re-delivering the NMI after handling the VM-exit. Otherwise the
+         * subsequent VM-entry would fail, see @bugref{7445}.
+         *
+         * See Intel spec. 30.7.1.2 "Resuming Guest Software after Handling an Exception".
+         */
+        if (   uIdtVectorType == VMX_IDT_VECTORING_INFO_TYPE_NMI
+            && enmRaise == IEMXCPTRAISE_PREV_EVENT
+            && (pVmcsInfo->u32PinCtls & VMX_PIN_CTLS_VIRT_NMI)
+            && CPUMIsGuestNmiBlocking(pVCpu))
+        {
+            CPUMSetGuestNmiBlocking(pVCpu, false);
+        }
+
+        switch (enmRaise)
+        {
+            case IEMXCPTRAISE_CURRENT_XCPT:
+            {
+                Log4Func(("IDT: Pending secondary Xcpt: idtinfo=%#RX64 exitinfo=%#RX64\n", uIdtVectorInfo, uExitIntInfo));
+                Assert(rcStrict == VINF_SUCCESS);
+                break;
+            }
+
+            case IEMXCPTRAISE_PREV_EVENT:
+            {
+                uint32_t u32ErrCode;
+                if (VMX_IDT_VECTORING_INFO_IS_ERROR_CODE_VALID(uIdtVectorInfo))
+                    u32ErrCode = pVmxTransient->uIdtVectoringErrorCode;
+                else
+                    u32ErrCode = 0;
+
+                /* If uExitVector is #PF, CR2 value will be updated from the VMCS if it's a guest #PF, see hmR0VmxExitXcptPF(). */
+                STAM_COUNTER_INC(&pVCpu->hm.s.StatInjectReflect);
+                hmR0VmxSetPendingEvent(pVCpu, VMX_ENTRY_INT_INFO_FROM_EXIT_IDT_INFO(uIdtVectorInfo), 0 /* cbInstr */,
+                                       u32ErrCode, pVCpu->cpum.GstCtx.cr2);
+
+                Log4Func(("IDT: Pending vectoring event %#RX64 Err=%#RX32\n", pVCpu->hm.s.Event.u64IntInfo,
+                          pVCpu->hm.s.Event.u32ErrCode));
+                Assert(rcStrict == VINF_SUCCESS);
+                break;
+            }
+
+            case IEMXCPTRAISE_REEXEC_INSTR:
+                Assert(rcStrict == VINF_SUCCESS);
+                break;
+
+            case IEMXCPTRAISE_DOUBLE_FAULT:
+            {
+                /*
+                 * Determing a vectoring double #PF condition. Used later, when PGM evaluates the
+                 * second #PF as a guest #PF (and not a shadow #PF) and needs to be converted into a #DF.
+                 */
+                if (fRaiseInfo & IEMXCPTRAISEINFO_PF_PF)
+                {
+                    pVmxTransient->fVectoringDoublePF = true;
+                    Log4Func(("IDT: Vectoring double #PF %#RX64 cr2=%#RX64\n", pVCpu->hm.s.Event.u64IntInfo,
+                          pVCpu->cpum.GstCtx.cr2));
+                    rcStrict = VINF_SUCCESS;
+                }
+                else
+                {
+                    STAM_COUNTER_INC(&pVCpu->hm.s.StatInjectConvertDF);
+                    hmR0VmxSetPendingXcptDF(pVCpu);
+                    Log4Func(("IDT: Pending vectoring #DF %#RX64 uIdtVector=%#x uExitVector=%#x\n", pVCpu->hm.s.Event.u64IntInfo,
+                              uIdtVector, VMX_EXIT_INT_INFO_VECTOR(uExitIntInfo)));
+                    rcStrict = VINF_HM_DOUBLE_FAULT;
+                }
+                break;
+            }
+
+            case IEMXCPTRAISE_TRIPLE_FAULT:
+            {
+                Log4Func(("IDT: Pending vectoring triple-fault uIdt=%#x uExit=%#x\n", uIdtVector,
+                          VMX_EXIT_INT_INFO_VECTOR(uExitIntInfo)));
+                rcStrict = VINF_EM_RESET;
+                break;
+            }
+
+            case IEMXCPTRAISE_CPU_HANG:
+            {
+                Log4Func(("IDT: Bad guest! Entering CPU hang. fRaiseInfo=%#x\n", fRaiseInfo));
+                rcStrict = VERR_EM_GUEST_CPU_HANG;
+                break;
+            }
+
+            default:
+            {
+                AssertMsgFailed(("IDT: vcpu[%RU32] Unexpected/invalid value! enmRaise=%#x\n", pVCpu->idCpu, enmRaise));
+                rcStrict = VERR_VMX_IPE_2;
+                break;
+            }
+        }
+    }
+    else if (   (pVmcsInfo->u32PinCtls & VMX_PIN_CTLS_VIRT_NMI)
+             && !CPUMIsGuestNmiBlocking(pVCpu))
+    {
+        if (    VMX_EXIT_INT_INFO_IS_VALID(uExitIntInfo)
+             && VMX_EXIT_INT_INFO_VECTOR(uExitIntInfo) != X86_XCPT_DF
+             && VMX_EXIT_INT_INFO_IS_NMI_UNBLOCK_IRET(uExitIntInfo))
+        {
+            /*
+             * Execution of IRET caused a fault when NMI blocking was in effect (i.e we're in
+             * the guest or nested-guest NMI handler). We need to set the block-by-NMI field so
+             * that virtual NMIs remain blocked until the IRET execution is completed.
+             *
+             * See Intel spec. 31.7.1.2 "Resuming Guest Software After Handling An Exception".
+             */
+            CPUMSetGuestNmiBlocking(pVCpu, true);
+            Log4Func(("Set NMI blocking. uExitReason=%u\n", pVmxTransient->uExitReason));
+        }
+        else if (   pVmxTransient->uExitReason == VMX_EXIT_EPT_VIOLATION
+                 || pVmxTransient->uExitReason == VMX_EXIT_PML_FULL
+                 || pVmxTransient->uExitReason == VMX_EXIT_SPP_EVENT)
+        {
+            /*
+             * Execution of IRET caused an EPT violation, page-modification log-full event or
+             * SPP-related event VM-exit when NMI blocking was in effect (i.e. we're in the
+             * guest or nested-guest NMI handler). We need to set the block-by-NMI field so
+             * that virtual NMIs remain blocked until the IRET execution is completed.
+             *
+             * See Intel spec. 27.2.3 "Information about NMI unblocking due to IRET"
+             */
+            if (VMX_EXIT_QUAL_EPT_IS_NMI_UNBLOCK_IRET(pVmxTransient->uExitQual))
+            {
+                CPUMSetGuestNmiBlocking(pVCpu, true);
+                Log4Func(("Set NMI blocking. uExitReason=%u\n", pVmxTransient->uExitReason));
+            }
+        }
+    }
+
+    Assert(   rcStrict == VINF_SUCCESS  || rcStrict == VINF_HM_DOUBLE_FAULT
+           || rcStrict == VINF_EM_RESET || rcStrict == VERR_EM_GUEST_CPU_HANG);
+    return rcStrict;
+}
+
+
+#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+/**
+ * Perform the relevant VMX instruction checks for VM-exits that occurred due to the
+ * guest attempting to execute a VMX instruction.
+ *
+ * @returns Strict VBox status code (i.e. informational status codes too).
+ * @retval  VINF_SUCCESS if we should continue handling the VM-exit.
+ * @retval  VINF_HM_PENDING_XCPT if an exception was raised.
+ *
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   uExitReason     The VM-exit reason.
+ *
+ * @todo    NSTVMX: Document other error codes when VM-exit is implemented.
+ * @remarks No-long-jump zone!!!
+ */
+static VBOXSTRICTRC hmR0VmxCheckExitDueToVmxInstr(PVMCPUCC pVCpu, uint32_t uExitReason)
+{
+    HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_CR0 | CPUMCTX_EXTRN_RFLAGS | CPUMCTX_EXTRN_SS
+                              | CPUMCTX_EXTRN_CS  | CPUMCTX_EXTRN_EFER);
+
+    /*
+     * The physical CPU would have already checked the CPU mode/code segment.
+     * We shall just assert here for paranoia.
+     * See Intel spec. 25.1.1 "Relative Priority of Faults and VM Exits".
+     */
+    Assert(!CPUMIsGuestInRealOrV86ModeEx(&pVCpu->cpum.GstCtx));
+    Assert(   !CPUMIsGuestInLongModeEx(&pVCpu->cpum.GstCtx)
+           ||  CPUMIsGuestIn64BitCodeEx(&pVCpu->cpum.GstCtx));
+
+    if (uExitReason == VMX_EXIT_VMXON)
+    {
+        HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_CR4);
+
+        /*
+         * We check CR4.VMXE because it is required to be always set while in VMX operation
+         * by physical CPUs and our CR4 read-shadow is only consulted when executing specific
+         * instructions (CLTS, LMSW, MOV CR, and SMSW) and thus doesn't affect CPU operation
+         * otherwise (i.e. physical CPU won't automatically #UD if Cr4Shadow.VMXE is 0).
+         */
+        if (!CPUMIsGuestVmxEnabled(&pVCpu->cpum.GstCtx))
+        {
+            Log4Func(("CR4.VMXE is not set -> #UD\n"));
+            hmR0VmxSetPendingXcptUD(pVCpu);
+            return VINF_HM_PENDING_XCPT;
+        }
+    }
+    else if (!CPUMIsGuestInVmxRootMode(&pVCpu->cpum.GstCtx))
+    {
+        /*
+         * The guest has not entered VMX operation but attempted to execute a VMX instruction
+         * (other than VMXON), we need to raise a #UD.
+         */
+        Log4Func(("Not in VMX root mode -> #UD\n"));
+        hmR0VmxSetPendingXcptUD(pVCpu);
+        return VINF_HM_PENDING_XCPT;
+    }
+
+    /* All other checks (including VM-exit intercepts) are handled by IEM instruction emulation. */
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Decodes the memory operand of an instruction that caused a VM-exit.
+ *
+ * The Exit qualification field provides the displacement field for memory
+ * operand instructions, if any.
+ *
+ * @returns Strict VBox status code (i.e. informational status codes too).
+ * @retval  VINF_SUCCESS if the operand was successfully decoded.
+ * @retval  VINF_HM_PENDING_XCPT if an exception was raised while decoding the
+ *          operand.
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @param   uExitInstrInfo  The VM-exit instruction information field.
+ * @param   enmMemAccess    The memory operand's access type (read or write).
+ * @param   GCPtrDisp       The instruction displacement field, if any. For
+ *                          RIP-relative addressing pass RIP + displacement here.
+ * @param   pGCPtrMem       Where to store the effective destination memory address.
+ *
+ * @remarks Warning! This function ASSUMES the instruction cannot be used in real or
+ *          virtual-8086 mode hence skips those checks while verifying if the
+ *          segment is valid.
+ */
+static VBOXSTRICTRC hmR0VmxDecodeMemOperand(PVMCPUCC pVCpu, uint32_t uExitInstrInfo, RTGCPTR GCPtrDisp, VMXMEMACCESS enmMemAccess,
+                                            PRTGCPTR pGCPtrMem)
+{
+    Assert(pGCPtrMem);
+    Assert(!CPUMIsGuestInRealOrV86Mode(pVCpu));
+    HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_RIP | CPUMCTX_EXTRN_RSP | CPUMCTX_EXTRN_SREG_MASK | CPUMCTX_EXTRN_EFER
+                              | CPUMCTX_EXTRN_CR0);
+
+    static uint64_t const s_auAddrSizeMasks[]   = { UINT64_C(0xffff), UINT64_C(0xffffffff), UINT64_C(0xffffffffffffffff) };
+    static uint64_t const s_auAccessSizeMasks[] = { sizeof(uint16_t), sizeof(uint32_t), sizeof(uint64_t) };
+    AssertCompile(RT_ELEMENTS(s_auAccessSizeMasks) == RT_ELEMENTS(s_auAddrSizeMasks));
+
+    VMXEXITINSTRINFO ExitInstrInfo;
+    ExitInstrInfo.u = uExitInstrInfo;
+    uint8_t const   uAddrSize     =  ExitInstrInfo.All.u3AddrSize;
+    uint8_t const   iSegReg       =  ExitInstrInfo.All.iSegReg;
+    bool const      fIdxRegValid  = !ExitInstrInfo.All.fIdxRegInvalid;
+    uint8_t const   iIdxReg       =  ExitInstrInfo.All.iIdxReg;
+    uint8_t const   uScale        =  ExitInstrInfo.All.u2Scaling;
+    bool const      fBaseRegValid = !ExitInstrInfo.All.fBaseRegInvalid;
+    uint8_t const   iBaseReg      =  ExitInstrInfo.All.iBaseReg;
+    bool const      fIsMemOperand = !ExitInstrInfo.All.fIsRegOperand;
+    bool const      fIsLongMode   =  CPUMIsGuestInLongModeEx(&pVCpu->cpum.GstCtx);
+
+    /*
+     * Validate instruction information.
+     * This shouldn't happen on real hardware but useful while testing our nested hardware-virtualization code.
+     */
+    AssertLogRelMsgReturn(uAddrSize < RT_ELEMENTS(s_auAddrSizeMasks),
+                          ("Invalid address size. ExitInstrInfo=%#RX32\n", ExitInstrInfo.u), VERR_VMX_IPE_1);
+    AssertLogRelMsgReturn(iSegReg  < X86_SREG_COUNT,
+                          ("Invalid segment register. ExitInstrInfo=%#RX32\n", ExitInstrInfo.u), VERR_VMX_IPE_2);
+    AssertLogRelMsgReturn(fIsMemOperand,
+                          ("Expected memory operand. ExitInstrInfo=%#RX32\n", ExitInstrInfo.u), VERR_VMX_IPE_3);
+
+    /*
+     * Compute the complete effective address.
+     *
+     * See AMD instruction spec. 1.4.2 "SIB Byte Format"
+     * See AMD spec. 4.5.2 "Segment Registers".
+     */
+    RTGCPTR GCPtrMem = GCPtrDisp;
+    if (fBaseRegValid)
+        GCPtrMem += pVCpu->cpum.GstCtx.aGRegs[iBaseReg].u64;
+    if (fIdxRegValid)
+        GCPtrMem += pVCpu->cpum.GstCtx.aGRegs[iIdxReg].u64 << uScale;
+
+    RTGCPTR const GCPtrOff = GCPtrMem;
+    if (   !fIsLongMode
+        || iSegReg >= X86_SREG_FS)
+        GCPtrMem += pVCpu->cpum.GstCtx.aSRegs[iSegReg].u64Base;
+    GCPtrMem &= s_auAddrSizeMasks[uAddrSize];
+
+    /*
+     * Validate effective address.
+     * See AMD spec. 4.5.3 "Segment Registers in 64-Bit Mode".
+     */
+    uint8_t const cbAccess = s_auAccessSizeMasks[uAddrSize];
+    Assert(cbAccess > 0);
+    if (fIsLongMode)
+    {
+        if (X86_IS_CANONICAL(GCPtrMem))
+        {
+            *pGCPtrMem = GCPtrMem;
+            return VINF_SUCCESS;
+        }
+
+        /** @todo r=ramshankar: We should probably raise \#SS or \#GP. See AMD spec. 4.12.2
+         *        "Data Limit Checks in 64-bit Mode". */
+        Log4Func(("Long mode effective address is not canonical GCPtrMem=%#RX64\n", GCPtrMem));
+        hmR0VmxSetPendingXcptGP(pVCpu, 0);
+        return VINF_HM_PENDING_XCPT;
+    }
+
+    /*
+     * This is a watered down version of iemMemApplySegment().
+     * Parts that are not applicable for VMX instructions like real-or-v8086 mode
+     * and segment CPL/DPL checks are skipped.
+     */
+    RTGCPTR32 const GCPtrFirst32 = (RTGCPTR32)GCPtrOff;
+    RTGCPTR32 const GCPtrLast32  = GCPtrFirst32 + cbAccess - 1;
+    PCCPUMSELREG    pSel         = &pVCpu->cpum.GstCtx.aSRegs[iSegReg];
+
+    /* Check if the segment is present and usable. */
+    if (    pSel->Attr.n.u1Present
+        && !pSel->Attr.n.u1Unusable)
+    {
+        Assert(pSel->Attr.n.u1DescType);
+        if (!(pSel->Attr.n.u4Type & X86_SEL_TYPE_CODE))
+        {
+            /* Check permissions for the data segment. */
+            if (   enmMemAccess == VMXMEMACCESS_WRITE
+                && !(pSel->Attr.n.u4Type & X86_SEL_TYPE_WRITE))
+            {
+                Log4Func(("Data segment access invalid. iSegReg=%#x Attr=%#RX32\n", iSegReg, pSel->Attr.u));
+                hmR0VmxSetPendingXcptGP(pVCpu, iSegReg);
+                return VINF_HM_PENDING_XCPT;
+            }
+
+            /* Check limits if it's a normal data segment. */
+            if (!(pSel->Attr.n.u4Type & X86_SEL_TYPE_DOWN))
+            {
+                if (   GCPtrFirst32 > pSel->u32Limit
+                    || GCPtrLast32  > pSel->u32Limit)
+                {
+                    Log4Func(("Data segment limit exceeded. "
+                              "iSegReg=%#x GCPtrFirst32=%#RX32 GCPtrLast32=%#RX32 u32Limit=%#RX32\n", iSegReg, GCPtrFirst32,
+                              GCPtrLast32, pSel->u32Limit));
+                    if (iSegReg == X86_SREG_SS)
+                        hmR0VmxSetPendingXcptSS(pVCpu, 0);
+                    else
+                        hmR0VmxSetPendingXcptGP(pVCpu, 0);
+                    return VINF_HM_PENDING_XCPT;
+                }
+            }
+            else
+            {
+               /* Check limits if it's an expand-down data segment.
+                  Note! The upper boundary is defined by the B bit, not the G bit! */
+               if (   GCPtrFirst32 < pSel->u32Limit + UINT32_C(1)
+                   || GCPtrLast32  > (pSel->Attr.n.u1DefBig ? UINT32_MAX : UINT32_C(0xffff)))
+               {
+                   Log4Func(("Expand-down data segment limit exceeded. "
+                             "iSegReg=%#x GCPtrFirst32=%#RX32 GCPtrLast32=%#RX32 u32Limit=%#RX32\n", iSegReg, GCPtrFirst32,
+                             GCPtrLast32, pSel->u32Limit));
+                   if (iSegReg == X86_SREG_SS)
+                       hmR0VmxSetPendingXcptSS(pVCpu, 0);
+                   else
+                       hmR0VmxSetPendingXcptGP(pVCpu, 0);
+                   return VINF_HM_PENDING_XCPT;
+               }
+            }
+        }
+        else
+        {
+            /* Check permissions for the code segment. */
+            if (   enmMemAccess == VMXMEMACCESS_WRITE
+                || (   enmMemAccess == VMXMEMACCESS_READ
+                    && !(pSel->Attr.n.u4Type & X86_SEL_TYPE_READ)))
+            {
+                Log4Func(("Code segment access invalid. Attr=%#RX32\n", pSel->Attr.u));
+                Assert(!CPUMIsGuestInRealOrV86ModeEx(&pVCpu->cpum.GstCtx));
+                hmR0VmxSetPendingXcptGP(pVCpu, 0);
+                return VINF_HM_PENDING_XCPT;
+            }
+
+            /* Check limits for the code segment (normal/expand-down not applicable for code segments). */
+            if (   GCPtrFirst32 > pSel->u32Limit
+                || GCPtrLast32  > pSel->u32Limit)
+            {
+                Log4Func(("Code segment limit exceeded. GCPtrFirst32=%#RX32 GCPtrLast32=%#RX32 u32Limit=%#RX32\n",
+                          GCPtrFirst32, GCPtrLast32, pSel->u32Limit));
+                if (iSegReg == X86_SREG_SS)
+                    hmR0VmxSetPendingXcptSS(pVCpu, 0);
+                else
+                    hmR0VmxSetPendingXcptGP(pVCpu, 0);
+                return VINF_HM_PENDING_XCPT;
+            }
+        }
+    }
+    else
+    {
+        Log4Func(("Not present or unusable segment. iSegReg=%#x Attr=%#RX32\n", iSegReg, pSel->Attr.u));
+        hmR0VmxSetPendingXcptGP(pVCpu, 0);
+        return VINF_HM_PENDING_XCPT;
+    }
+
+    *pGCPtrMem = GCPtrMem;
+    return VINF_SUCCESS;
+}
+#endif /* VBOX_WITH_NESTED_HWVIRT_VMX */
+
+
+/**
+ * VM-exit helper for LMSW.
+ */
+static VBOXSTRICTRC hmR0VmxExitLmsw(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo, uint8_t cbInstr, uint16_t uMsw, RTGCPTR GCPtrEffDst)
+{
+    int rc = hmR0VmxImportGuestState(pVCpu, pVmcsInfo, IEM_CPUMCTX_EXTRN_MUST_MASK);
+    AssertRCReturn(rc, rc);
+
+    VBOXSTRICTRC rcStrict = IEMExecDecodedLmsw(pVCpu, cbInstr, uMsw, GCPtrEffDst);
+    AssertMsg(   rcStrict == VINF_SUCCESS
+              || rcStrict == VINF_IEM_RAISED_XCPT, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict)));
+
+    ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS | HM_CHANGED_GUEST_CR0);
+    if (rcStrict == VINF_IEM_RAISED_XCPT)
+    {
+        ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
+        rcStrict = VINF_SUCCESS;
+    }
+
+    STAM_COUNTER_INC(&pVCpu->hm.s.StatExitLmsw);
+    Log4Func(("rcStrict=%Rrc\n", VBOXSTRICTRC_VAL(rcStrict)));
+    return rcStrict;
+}
+
+
+/**
+ * VM-exit helper for CLTS.
+ */
+static VBOXSTRICTRC hmR0VmxExitClts(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo, uint8_t cbInstr)
+{
+    int rc = hmR0VmxImportGuestState(pVCpu, pVmcsInfo, IEM_CPUMCTX_EXTRN_MUST_MASK);
+    AssertRCReturn(rc, rc);
+
+    VBOXSTRICTRC rcStrict = IEMExecDecodedClts(pVCpu, cbInstr);
+    AssertMsg(   rcStrict == VINF_SUCCESS
+              || rcStrict == VINF_IEM_RAISED_XCPT, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict)));
+
+    ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS | HM_CHANGED_GUEST_CR0);
+    if (rcStrict == VINF_IEM_RAISED_XCPT)
+    {
+        ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
+        rcStrict = VINF_SUCCESS;
+    }
+
+    STAM_COUNTER_INC(&pVCpu->hm.s.StatExitClts);
+    Log4Func(("rcStrict=%Rrc\n", VBOXSTRICTRC_VAL(rcStrict)));
+    return rcStrict;
+}
+
+
+/**
+ * VM-exit helper for MOV from CRx (CRx read).
+ */
+static VBOXSTRICTRC hmR0VmxExitMovFromCrX(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo, uint8_t cbInstr, uint8_t iGReg, uint8_t iCrReg)
+{
+    Assert(iCrReg < 16);
+    Assert(iGReg < RT_ELEMENTS(pVCpu->cpum.GstCtx.aGRegs));
+
+    int rc = hmR0VmxImportGuestState(pVCpu, pVmcsInfo, IEM_CPUMCTX_EXTRN_MUST_MASK);
+    AssertRCReturn(rc, rc);
+
+    VBOXSTRICTRC rcStrict = IEMExecDecodedMovCRxRead(pVCpu, cbInstr, iGReg, iCrReg);
+    AssertMsg(   rcStrict == VINF_SUCCESS
+              || rcStrict == VINF_IEM_RAISED_XCPT, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict)));
+
+    if (iGReg == X86_GREG_xSP)
+        ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS | HM_CHANGED_GUEST_RSP);
+    else
+        ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS);
+#ifdef VBOX_WITH_STATISTICS
+    switch (iCrReg)
+    {
+        case 0: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCR0Read); break;
+        case 2: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCR2Read); break;
+        case 3: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCR3Read); break;
+        case 4: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCR4Read); break;
+        case 8: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCR8Read); break;
+    }
+#endif
+    Log4Func(("CR%d Read access rcStrict=%Rrc\n", iCrReg, VBOXSTRICTRC_VAL(rcStrict)));
+    return rcStrict;
+}
+
+
+/**
+ * VM-exit helper for MOV to CRx (CRx write).
+ */
+static VBOXSTRICTRC hmR0VmxExitMovToCrX(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo, uint8_t cbInstr, uint8_t iGReg, uint8_t iCrReg)
+{
+    int rc = hmR0VmxImportGuestState(pVCpu, pVmcsInfo, IEM_CPUMCTX_EXTRN_MUST_MASK);
+    AssertRCReturn(rc, rc);
+
+    VBOXSTRICTRC rcStrict = IEMExecDecodedMovCRxWrite(pVCpu, cbInstr, iCrReg, iGReg);
+    AssertMsg(   rcStrict == VINF_SUCCESS
+              || rcStrict == VINF_IEM_RAISED_XCPT
+              || rcStrict == VINF_PGM_SYNC_CR3, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict)));
+
+    switch (iCrReg)
+    {
+        case 0:
+            ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS | HM_CHANGED_GUEST_CR0
+                                                     | HM_CHANGED_GUEST_EFER_MSR | HM_CHANGED_VMX_ENTRY_EXIT_CTLS);
+            STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCR0Write);
+            Log4Func(("CR0 write. rcStrict=%Rrc CR0=%#RX64\n", VBOXSTRICTRC_VAL(rcStrict), pVCpu->cpum.GstCtx.cr0));
+            break;
+
+        case 2:
+            STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCR2Write);
+            /* Nothing to do here, CR2 it's not part of the VMCS. */
+            break;
+
+        case 3:
+            ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS | HM_CHANGED_GUEST_CR3);
+            STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCR3Write);
+            Log4Func(("CR3 write. rcStrict=%Rrc CR3=%#RX64\n", VBOXSTRICTRC_VAL(rcStrict), pVCpu->cpum.GstCtx.cr3));
+            break;
+
+        case 4:
+            ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS | HM_CHANGED_GUEST_CR4);
+            STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCR4Write);
+            Log4Func(("CR4 write. rc=%Rrc CR4=%#RX64 fLoadSaveGuestXcr0=%u\n", VBOXSTRICTRC_VAL(rcStrict),
+                      pVCpu->cpum.GstCtx.cr4, pVCpu->hm.s.fLoadSaveGuestXcr0));
+            break;
+
+        case 8:
+            ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged,
+                             HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS | HM_CHANGED_GUEST_APIC_TPR);
+            STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCR8Write);
+            break;
+
+        default:
+            AssertMsgFailed(("Invalid CRx register %#x\n", iCrReg));
+            break;
+    }
+
+    if (rcStrict == VINF_IEM_RAISED_XCPT)
+    {
+        ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
+        rcStrict = VINF_SUCCESS;
+    }
+    return rcStrict;
+}
+
+
+/**
+ * VM-exit exception handler for \#PF (Page-fault exception).
+ *
+ * @remarks Requires all fields in HMVMX_READ_XCPT_INFO to be read from the VMCS.
+ */
+static VBOXSTRICTRC hmR0VmxExitXcptPF(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    HMVMX_VALIDATE_EXIT_XCPT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+    PVMCC pVM = pVCpu->CTX_SUFF(pVM);
+    hmR0VmxReadExitQualVmcs(pVmxTransient);
+
+    if (!pVM->hm.s.fNestedPaging)
+    { /* likely */ }
+    else
+    {
+#if !defined(HMVMX_ALWAYS_TRAP_ALL_XCPTS) && !defined(HMVMX_ALWAYS_TRAP_PF)
+        Assert(pVmxTransient->fIsNestedGuest || pVCpu->hm.s.fUsingDebugLoop);
+#endif
+        pVCpu->hm.s.Event.fPending = false;                  /* In case it's a contributory or vectoring #PF. */
+        if (!pVmxTransient->fVectoringDoublePF)
+        {
+            hmR0VmxSetPendingEvent(pVCpu, VMX_ENTRY_INT_INFO_FROM_EXIT_INT_INFO(pVmxTransient->uExitIntInfo), 0 /* cbInstr */,
+                                   pVmxTransient->uExitIntErrorCode, pVmxTransient->uExitQual);
+        }
+        else
+        {
+            /* A guest page-fault occurred during delivery of a page-fault. Inject #DF. */
+            Assert(!pVmxTransient->fIsNestedGuest);
+            hmR0VmxSetPendingXcptDF(pVCpu);
+            Log4Func(("Pending #DF due to vectoring #PF w/ NestedPaging\n"));
+        }
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestPF);
+        return VINF_SUCCESS;
+    }
+
+    Assert(!pVmxTransient->fIsNestedGuest);
+
+    /* If it's a vectoring #PF, emulate injecting the original event injection as PGMTrap0eHandler() is incapable
+       of differentiating between instruction emulation and event injection that caused a #PF. See @bugref{6607}. */
+    if (pVmxTransient->fVectoringPF)
+    {
+        Assert(pVCpu->hm.s.Event.fPending);
+        return VINF_EM_RAW_INJECT_TRPM_EVENT;
+    }
+
+    PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+    int rc = hmR0VmxImportGuestState(pVCpu, pVmxTransient->pVmcsInfo, HMVMX_CPUMCTX_EXTRN_ALL);
+    AssertRCReturn(rc, rc);
+
+    Log4Func(("#PF: cs:rip=%#04x:%#RX64 err_code=%#RX32 exit_qual=%#RX64 cr3=%#RX64\n", pCtx->cs.Sel, pCtx->rip,
+              pVmxTransient->uExitIntErrorCode, pVmxTransient->uExitQual, pCtx->cr3));
+
+    TRPMAssertXcptPF(pVCpu, pVmxTransient->uExitQual, (RTGCUINT)pVmxTransient->uExitIntErrorCode);
+    rc = PGMTrap0eHandler(pVCpu, pVmxTransient->uExitIntErrorCode, CPUMCTX2CORE(pCtx), (RTGCPTR)pVmxTransient->uExitQual);
+
+    Log4Func(("#PF: rc=%Rrc\n", rc));
+    if (rc == VINF_SUCCESS)
+    {
+        /*
+         * This is typically a shadow page table sync or a MMIO instruction. But we may have
+         * emulated something like LTR or a far jump. Any part of the CPU context may have changed.
+         */
+        ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_ALL_GUEST);
+        TRPMResetTrap(pVCpu);
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatExitShadowPF);
+        return rc;
+    }
+
+    if (rc == VINF_EM_RAW_GUEST_TRAP)
+    {
+        if (!pVmxTransient->fVectoringDoublePF)
+        {
+            /* It's a guest page fault and needs to be reflected to the guest. */
+            uint32_t const uGstErrorCode = TRPMGetErrorCode(pVCpu);
+            TRPMResetTrap(pVCpu);
+            pVCpu->hm.s.Event.fPending = false;                 /* In case it's a contributory #PF. */
+            hmR0VmxSetPendingEvent(pVCpu, VMX_ENTRY_INT_INFO_FROM_EXIT_INT_INFO(pVmxTransient->uExitIntInfo), 0 /* cbInstr */,
+                                   uGstErrorCode, pVmxTransient->uExitQual);
+        }
+        else
+        {
+            /* A guest page-fault occurred during delivery of a page-fault. Inject #DF. */
+            TRPMResetTrap(pVCpu);
+            pVCpu->hm.s.Event.fPending = false;     /* Clear pending #PF to replace it with #DF. */
+            hmR0VmxSetPendingXcptDF(pVCpu);
+            Log4Func(("#PF: Pending #DF due to vectoring #PF\n"));
+        }
+
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestPF);
+        return VINF_SUCCESS;
+    }
+
+    TRPMResetTrap(pVCpu);
+    STAM_COUNTER_INC(&pVCpu->hm.s.StatExitShadowPFEM);
+    return rc;
+}
+
+
+/**
+ * VM-exit exception handler for \#MF (Math Fault: floating point exception).
+ *
+ * @remarks Requires all fields in HMVMX_READ_XCPT_INFO to be read from the VMCS.
+ */
+static VBOXSTRICTRC hmR0VmxExitXcptMF(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    HMVMX_VALIDATE_EXIT_XCPT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+    STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestMF);
+
+    int rc = hmR0VmxImportGuestState(pVCpu, pVmxTransient->pVmcsInfo, CPUMCTX_EXTRN_CR0);
+    AssertRCReturn(rc, rc);
+
+    if (!(pVCpu->cpum.GstCtx.cr0 & X86_CR0_NE))
+    {
+        /* Convert a #MF into a FERR -> IRQ 13. See @bugref{6117}. */
+        rc = PDMIsaSetIrq(pVCpu->CTX_SUFF(pVM), 13, 1, 0 /* uTagSrc */);
+
+        /** @todo r=ramshankar: The Intel spec. does -not- specify that this VM-exit
+         *        provides VM-exit instruction length. If this causes problem later,
+         *        disassemble the instruction like it's done on AMD-V. */
+        int rc2 = hmR0VmxAdvanceGuestRip(pVCpu, pVmxTransient);
+        AssertRCReturn(rc2, rc2);
+        return rc;
+    }
+
+    hmR0VmxSetPendingEvent(pVCpu, VMX_ENTRY_INT_INFO_FROM_EXIT_INT_INFO(pVmxTransient->uExitIntInfo), pVmxTransient->cbExitInstr,
+                           pVmxTransient->uExitIntErrorCode, 0 /* GCPtrFaultAddress */);
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * VM-exit exception handler for \#BP (Breakpoint exception).
+ *
+ * @remarks Requires all fields in HMVMX_READ_XCPT_INFO to be read from the VMCS.
+ */
+static VBOXSTRICTRC hmR0VmxExitXcptBP(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    HMVMX_VALIDATE_EXIT_XCPT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+    STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestBP);
+
+    int rc = hmR0VmxImportGuestState(pVCpu, pVmxTransient->pVmcsInfo, HMVMX_CPUMCTX_EXTRN_ALL);
+    AssertRCReturn(rc, rc);
+
+    if (!pVmxTransient->fIsNestedGuest)
+        rc = DBGFRZTrap03Handler(pVCpu->CTX_SUFF(pVM), pVCpu, CPUMCTX2CORE(&pVCpu->cpum.GstCtx));
+    else
+        rc = VINF_EM_RAW_GUEST_TRAP;
+
+    if (rc == VINF_EM_RAW_GUEST_TRAP)
+    {
+        hmR0VmxSetPendingEvent(pVCpu, VMX_ENTRY_INT_INFO_FROM_EXIT_INT_INFO(pVmxTransient->uExitIntInfo),
+                               pVmxTransient->cbExitInstr, pVmxTransient->uExitIntErrorCode, 0 /* GCPtrFaultAddress */);
+        rc = VINF_SUCCESS;
+    }
+
+    Assert(rc == VINF_SUCCESS || rc == VINF_EM_DBG_BREAKPOINT);
+    return rc;
+}
+
+
+/**
+ * VM-exit exception handler for \#AC (Alignment-check exception).
+ *
+ * @remarks Requires all fields in HMVMX_READ_XCPT_INFO to be read from the VMCS.
+ */
+static VBOXSTRICTRC hmR0VmxExitXcptAC(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    HMVMX_VALIDATE_EXIT_XCPT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+    STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestAC);
+
+    /* Re-inject it. We'll detect any nesting before getting here. */
+    hmR0VmxSetPendingEvent(pVCpu, VMX_ENTRY_INT_INFO_FROM_EXIT_INT_INFO(pVmxTransient->uExitIntInfo),
+                           pVmxTransient->cbExitInstr, pVmxTransient->uExitIntErrorCode, 0 /* GCPtrFaultAddress */);
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * VM-exit exception handler for \#DB (Debug exception).
+ *
+ * @remarks Requires all fields in HMVMX_READ_XCPT_INFO to be read from the VMCS.
+ */
+static VBOXSTRICTRC hmR0VmxExitXcptDB(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    HMVMX_VALIDATE_EXIT_XCPT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+    STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestDB);
+
+    /*
+     * Get the DR6-like values from the Exit qualification and pass it to DBGF for processing.
+     */
+    hmR0VmxReadExitQualVmcs(pVmxTransient);
+
+    /* Refer Intel spec. Table 27-1. "Exit Qualifications for debug exceptions" for the format. */
+    uint64_t const uDR6 = X86_DR6_INIT_VAL
+                        | (pVmxTransient->uExitQual & (  X86_DR6_B0 | X86_DR6_B1 | X86_DR6_B2 | X86_DR6_B3
+                                                       | X86_DR6_BD | X86_DR6_BS));
+
+    int rc;
+    PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+    if (!pVmxTransient->fIsNestedGuest)
+    {
+        rc = DBGFRZTrap01Handler(pVCpu->CTX_SUFF(pVM), pVCpu, CPUMCTX2CORE(pCtx), uDR6, pVCpu->hm.s.fSingleInstruction);
+
+        /*
+         * Prevents stepping twice over the same instruction when the guest is stepping using
+         * EFLAGS.TF and the hypervisor debugger is stepping using MTF.
+         * Testcase: DOSQEMM, break (using "ba x 1") at cs:rip 0x70:0x774 and step (using "t").
+         */
+        if (   rc == VINF_EM_DBG_STEPPED
+            && (pVmxTransient->pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_MONITOR_TRAP_FLAG))
+        {
+            Assert(pVCpu->hm.s.fSingleInstruction);
+            rc = VINF_EM_RAW_GUEST_TRAP;
+        }
+    }
+    else
+        rc = VINF_EM_RAW_GUEST_TRAP;
+    Log6Func(("rc=%Rrc\n", rc));
+    if (rc == VINF_EM_RAW_GUEST_TRAP)
+    {
+        /*
+         * The exception was for the guest.  Update DR6, DR7.GD and
+         * IA32_DEBUGCTL.LBR before forwarding it.
+         * See Intel spec. 27.1 "Architectural State before a VM-Exit".
+         */
+        VMMRZCallRing3Disable(pVCpu);
+        HM_DISABLE_PREEMPT(pVCpu);
+
+        pCtx->dr[6] &= ~X86_DR6_B_MASK;
+        pCtx->dr[6] |= uDR6;
+        if (CPUMIsGuestDebugStateActive(pVCpu))
+            ASMSetDR6(pCtx->dr[6]);
+
+        HM_RESTORE_PREEMPT();
+        VMMRZCallRing3Enable(pVCpu);
+
+        rc = hmR0VmxImportGuestState(pVCpu, pVmxTransient->pVmcsInfo, CPUMCTX_EXTRN_DR7);
+        AssertRCReturn(rc, rc);
+
+        /* X86_DR7_GD will be cleared if DRx accesses should be trapped inside the guest. */
+        pCtx->dr[7] &= ~(uint64_t)X86_DR7_GD;
+
+        /* Paranoia. */
+        pCtx->dr[7] &= ~(uint64_t)X86_DR7_RAZ_MASK;
+        pCtx->dr[7] |= X86_DR7_RA1_MASK;
+
+        rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_DR7, pCtx->dr[7]);
+        AssertRC(rc);
+
+        /*
+         * Raise #DB in the guest.
+         *
+         * It is important to reflect exactly what the VM-exit gave us (preserving the
+         * interruption-type) rather than use hmR0VmxSetPendingXcptDB() as the #DB could've
+         * been raised while executing ICEBP (INT1) and not the regular #DB. Thus it may
+         * trigger different handling in the CPU (like skipping DPL checks), see @bugref{6398}.
+         *
+         * Intel re-documented ICEBP/INT1 on May 2018 previously documented as part of
+         * Intel 386, see Intel spec. 24.8.3 "VM-Entry Controls for Event Injection".
+         */
+        hmR0VmxSetPendingEvent(pVCpu, VMX_ENTRY_INT_INFO_FROM_EXIT_INT_INFO(pVmxTransient->uExitIntInfo),
+                               pVmxTransient->cbExitInstr, pVmxTransient->uExitIntErrorCode, 0 /* GCPtrFaultAddress */);
+        return VINF_SUCCESS;
+    }
+
+    /*
+     * Not a guest trap, must be a hypervisor related debug event then.
+     * Update DR6 in case someone is interested in it.
+     */
+    AssertMsg(rc == VINF_EM_DBG_STEPPED || rc == VINF_EM_DBG_BREAKPOINT, ("%Rrc\n", rc));
+    AssertReturn(pVmxTransient->fWasHyperDebugStateActive, VERR_HM_IPE_5);
+    CPUMSetHyperDR6(pVCpu, uDR6);
+
+    return rc;
+}
+
+
+/**
+ * Hacks its way around the lovely mesa driver's backdoor accesses.
+ *
+ * @sa hmR0SvmHandleMesaDrvGp.
+ */
+static int hmR0VmxHandleMesaDrvGp(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient, PCPUMCTX pCtx)
+{
+    LogFunc(("cs:rip=%#04x:%#RX64 rcx=%#RX64 rbx=%#RX64\n", pCtx->cs.Sel, pCtx->rip, pCtx->rcx, pCtx->rbx));
+    RT_NOREF(pCtx);
+
+    /* For now we'll just skip the instruction. */
+    return hmR0VmxAdvanceGuestRip(pVCpu, pVmxTransient);
+}
+
+
+/**
+ * Checks if the \#GP'ing instruction is the mesa driver doing it's lovely
+ * backdoor logging w/o checking what it is running inside.
+ *
+ * This recognizes an "IN EAX,DX" instruction executed in flat ring-3, with the
+ * backdoor port and magic numbers loaded in registers.
+ *
+ * @returns true if it is, false if it isn't.
+ * @sa      hmR0SvmIsMesaDrvGp.
+ */
+DECLINLINE(bool) hmR0VmxIsMesaDrvGp(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient, PCPUMCTX pCtx)
+{
+    /* 0xed:  IN eAX,dx */
+    uint8_t abInstr[1];
+    if (pVmxTransient->cbExitInstr != sizeof(abInstr))
+        return false;
+
+    /* Check that it is #GP(0). */
+    if (pVmxTransient->uExitIntErrorCode != 0)
+        return false;
+
+    /* Check magic and port. */
+    Assert(!(pCtx->fExtrn & (CPUMCTX_EXTRN_RAX | CPUMCTX_EXTRN_RDX | CPUMCTX_EXTRN_RCX)));
+    /*Log(("hmR0VmxIsMesaDrvGp: rax=%RX64 rdx=%RX64\n", pCtx->rax, pCtx->rdx));*/
+    if (pCtx->rax != UINT32_C(0x564d5868))
+        return false;
+    if (pCtx->dx != UINT32_C(0x5658))
+        return false;
+
+    /* Flat ring-3 CS. */
+    AssertCompile(HMVMX_CPUMCTX_EXTRN_ALL & CPUMCTX_EXTRN_CS);
+    Assert(!(pCtx->fExtrn & CPUMCTX_EXTRN_CS));
+    /*Log(("hmR0VmxIsMesaDrvGp: cs.Attr.n.u2Dpl=%d base=%Rx64\n", pCtx->cs.Attr.n.u2Dpl, pCtx->cs.u64Base));*/
+    if (pCtx->cs.Attr.n.u2Dpl != 3)
+        return false;
+    if (pCtx->cs.u64Base != 0)
+        return false;
+
+    /* Check opcode. */
+    AssertCompile(HMVMX_CPUMCTX_EXTRN_ALL & CPUMCTX_EXTRN_RIP);
+    Assert(!(pCtx->fExtrn & CPUMCTX_EXTRN_RIP));
+    int rc = PGMPhysSimpleReadGCPtr(pVCpu, abInstr, pCtx->rip, sizeof(abInstr));
+    /*Log(("hmR0VmxIsMesaDrvGp: PGMPhysSimpleReadGCPtr -> %Rrc %#x\n", rc, abInstr[0]));*/
+    if (RT_FAILURE(rc))
+        return false;
+    if (abInstr[0] != 0xed)
+        return false;
+
+    return true;
+}
+
+
+/**
+ * VM-exit exception handler for \#GP (General-protection exception).
+ *
+ * @remarks Requires all fields in HMVMX_READ_XCPT_INFO to be read from the VMCS.
+ */
+static VBOXSTRICTRC hmR0VmxExitXcptGP(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    HMVMX_VALIDATE_EXIT_XCPT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+    STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestGP);
+
+    PCPUMCTX     pCtx      = &pVCpu->cpum.GstCtx;
+    PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
+    if (pVmcsInfo->RealMode.fRealOnV86Active)
+    { /* likely */ }
+    else
+    {
+#ifndef HMVMX_ALWAYS_TRAP_ALL_XCPTS
+        Assert(pVCpu->hm.s.fUsingDebugLoop || pVCpu->hm.s.fTrapXcptGpForLovelyMesaDrv || pVmxTransient->fIsNestedGuest);
+#endif
+        /*
+         * If the guest is not in real-mode or we have unrestricted guest execution support, or if we are
+         * executing a nested-guest, reflect #GP to the guest or nested-guest.
+         */
+        int rc = hmR0VmxImportGuestState(pVCpu, pVmcsInfo, HMVMX_CPUMCTX_EXTRN_ALL);
+        AssertRCReturn(rc, rc);
+        Log4Func(("Gst: cs:rip=%#04x:%#RX64 ErrorCode=%#x cr0=%#RX64 cpl=%u tr=%#04x\n", pCtx->cs.Sel, pCtx->rip,
+                  pVmxTransient->uExitIntErrorCode, pCtx->cr0, CPUMGetGuestCPL(pVCpu), pCtx->tr.Sel));
+
+        if (    pVmxTransient->fIsNestedGuest
+            || !pVCpu->hm.s.fTrapXcptGpForLovelyMesaDrv
+            || !hmR0VmxIsMesaDrvGp(pVCpu, pVmxTransient, pCtx))
+            hmR0VmxSetPendingEvent(pVCpu, VMX_ENTRY_INT_INFO_FROM_EXIT_INT_INFO(pVmxTransient->uExitIntInfo),
+                                   pVmxTransient->cbExitInstr, pVmxTransient->uExitIntErrorCode, 0 /* GCPtrFaultAddress */);
+        else
+            rc = hmR0VmxHandleMesaDrvGp(pVCpu, pVmxTransient, pCtx);
+        return rc;
+    }
+
+    Assert(CPUMIsGuestInRealModeEx(pCtx));
+    Assert(!pVCpu->CTX_SUFF(pVM)->hm.s.vmx.fUnrestrictedGuest);
+    Assert(!pVmxTransient->fIsNestedGuest);
+
+    int rc = hmR0VmxImportGuestState(pVCpu, pVmcsInfo, HMVMX_CPUMCTX_EXTRN_ALL);
+    AssertRCReturn(rc, rc);
+
+    VBOXSTRICTRC rcStrict = IEMExecOne(pVCpu);
+    if (rcStrict == VINF_SUCCESS)
+    {
+        if (!CPUMIsGuestInRealModeEx(pCtx))
+        {
+            /*
+             * The guest is no longer in real-mode, check if we can continue executing the
+             * guest using hardware-assisted VMX. Otherwise, fall back to emulation.
+             */
+            pVmcsInfo->RealMode.fRealOnV86Active = false;
+            if (HMCanExecuteVmxGuest(pVCpu->pVMR0, pVCpu, pCtx))
+            {
+                Log4Func(("Mode changed but guest still suitable for executing using hardware-assisted VMX\n"));
+                ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_ALL_GUEST);
+            }
+            else
+            {
+                Log4Func(("Mode changed -> VINF_EM_RESCHEDULE\n"));
+                rcStrict = VINF_EM_RESCHEDULE;
+            }
+        }
+        else
+            ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_ALL_GUEST);
+    }
+    else if (rcStrict == VINF_IEM_RAISED_XCPT)
+    {
+        rcStrict = VINF_SUCCESS;
+        ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
+    }
+    return VBOXSTRICTRC_VAL(rcStrict);
+}
+
+
+/**
+ * VM-exit exception handler wrapper for all other exceptions that are not handled
+ * by a specific handler.
+ *
+ * This simply re-injects the exception back into the VM without any special
+ * processing.
+ *
+ * @remarks Requires all fields in HMVMX_READ_XCPT_INFO to be read from the VMCS.
+ */
+static VBOXSTRICTRC hmR0VmxExitXcptOthers(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    HMVMX_VALIDATE_EXIT_XCPT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+
+#ifndef HMVMX_ALWAYS_TRAP_ALL_XCPTS
+    PCVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
+    AssertMsg(pVCpu->hm.s.fUsingDebugLoop || pVmcsInfo->RealMode.fRealOnV86Active || pVmxTransient->fIsNestedGuest,
+              ("uVector=%#x u32XcptBitmap=%#X32\n",
+               VMX_EXIT_INT_INFO_VECTOR(pVmxTransient->uExitIntInfo), pVmcsInfo->u32XcptBitmap));
+    NOREF(pVmcsInfo);
+#endif
+
+    /*
+     * Re-inject the exception into the guest. This cannot be a double-fault condition which
+     * would have been handled while checking exits due to event delivery.
+     */
+    uint8_t const uVector = VMX_EXIT_INT_INFO_VECTOR(pVmxTransient->uExitIntInfo);
+
+#ifdef HMVMX_ALWAYS_TRAP_ALL_XCPTS
+    int rc = hmR0VmxImportGuestState(pVCpu, pVmxTransient->pVmcsInfo, CPUMCTX_EXTRN_CS | CPUMCTX_EXTRN_RIP);
+    AssertRCReturn(rc, rc);
+    Log4Func(("Reinjecting Xcpt. uVector=%#x cs:rip=%#04x:%#RX64\n", uVector, pCtx->cs.Sel, pCtx->rip));
+#endif
+
+#ifdef VBOX_WITH_STATISTICS
+    switch (uVector)
+    {
+        case X86_XCPT_DE:   STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestDE);     break;
+        case X86_XCPT_DB:   STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestDB);     break;
+        case X86_XCPT_BP:   STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestBP);     break;
+        case X86_XCPT_OF:   STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestOF);     break;
+        case X86_XCPT_BR:   STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestBR);     break;
+        case X86_XCPT_UD:   STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestUD);     break;
+        case X86_XCPT_NM:   STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestOF);     break;
+        case X86_XCPT_DF:   STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestDF);     break;
+        case X86_XCPT_TS:   STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestTS);     break;
+        case X86_XCPT_NP:   STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestNP);     break;
+        case X86_XCPT_SS:   STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestSS);     break;
+        case X86_XCPT_GP:   STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestGP);     break;
+        case X86_XCPT_PF:   STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestPF);     break;
+        case X86_XCPT_MF:   STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestMF);     break;
+        case X86_XCPT_AC:   STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestAC);     break;
+        case X86_XCPT_XF:   STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestXF);     break;
+        default:
+            STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestXcpUnk);
+            break;
+    }
+#endif
+
+    /* We should never call this function for a page-fault, we'd need to pass on the fault address below otherwise. */
+    Assert(!VMX_EXIT_INT_INFO_IS_XCPT_PF(pVmxTransient->uExitIntInfo));
+    NOREF(uVector);
+
+    /* Re-inject the original exception into the guest. */
+    hmR0VmxSetPendingEvent(pVCpu, VMX_ENTRY_INT_INFO_FROM_EXIT_INT_INFO(pVmxTransient->uExitIntInfo),
+                           pVmxTransient->cbExitInstr, pVmxTransient->uExitIntErrorCode, 0 /* GCPtrFaultAddress */);
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * VM-exit exception handler for all exceptions (except NMIs!).
+ *
+ * @remarks This may be called for both guests and nested-guests. Take care to not
+ *          make assumptions and avoid doing anything that is not relevant when
+ *          executing a nested-guest (e.g., Mesa driver hacks).
+ */
+static VBOXSTRICTRC hmR0VmxExitXcpt(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    HMVMX_ASSERT_READ(pVmxTransient, HMVMX_READ_XCPT_INFO);
+
+    /*
+     * If this VM-exit occurred while delivering an event through the guest IDT, take
+     * action based on the return code and additional hints (e.g. for page-faults)
+     * that will be updated in the VMX transient structure.
+     */
+    VBOXSTRICTRC rcStrict = hmR0VmxCheckExitDueToEventDelivery(pVCpu, pVmxTransient);
+    if (rcStrict == VINF_SUCCESS)
+    {
+        /*
+         * If an exception caused a VM-exit due to delivery of an event, the original
+         * event may have to be re-injected into the guest. We shall reinject it and
+         * continue guest execution. However, page-fault is a complicated case and
+         * needs additional processing done in hmR0VmxExitXcptPF().
+         */
+        Assert(VMX_EXIT_INT_INFO_IS_VALID(pVmxTransient->uExitIntInfo));
+        uint8_t const uVector = VMX_EXIT_INT_INFO_VECTOR(pVmxTransient->uExitIntInfo);
+        if (   !pVCpu->hm.s.Event.fPending
+            || uVector == X86_XCPT_PF)
+        {
+            switch (uVector)
+            {
+                case X86_XCPT_PF: return hmR0VmxExitXcptPF(pVCpu, pVmxTransient);
+                case X86_XCPT_GP: return hmR0VmxExitXcptGP(pVCpu, pVmxTransient);
+                case X86_XCPT_MF: return hmR0VmxExitXcptMF(pVCpu, pVmxTransient);
+                case X86_XCPT_DB: return hmR0VmxExitXcptDB(pVCpu, pVmxTransient);
+                case X86_XCPT_BP: return hmR0VmxExitXcptBP(pVCpu, pVmxTransient);
+                case X86_XCPT_AC: return hmR0VmxExitXcptAC(pVCpu, pVmxTransient);
+                default:
+                    return hmR0VmxExitXcptOthers(pVCpu, pVmxTransient);
+            }
+        }
+        /* else: inject pending event before resuming guest execution. */
+    }
+    else if (rcStrict == VINF_HM_DOUBLE_FAULT)
+    {
+        Assert(pVCpu->hm.s.Event.fPending);
+        rcStrict = VINF_SUCCESS;
+    }
+
+    return rcStrict;
+}
+/** @} */
+
+
+/** @name VM-exit handlers.
+ * @{
+ */
+/* -=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= */
+/* -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- VM-exit handlers -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- */
+/* -=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= */
+
+/**
+ * VM-exit handler for external interrupts (VMX_EXIT_EXT_INT).
+ */
+HMVMX_EXIT_DECL hmR0VmxExitExtInt(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+    STAM_COUNTER_INC(&pVCpu->hm.s.StatExitExtInt);
+    /* Windows hosts (32-bit and 64-bit) have DPC latency issues. See @bugref{6853}. */
+    if (VMMR0ThreadCtxHookIsEnabled(pVCpu))
+        return VINF_SUCCESS;
+    return VINF_EM_RAW_INTERRUPT;
+}
+
+
+/**
+ * VM-exit handler for exceptions or NMIs (VMX_EXIT_XCPT_OR_NMI). Conditional
+ * VM-exit.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitXcptOrNmi(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+    STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatExitXcptNmi, y3);
+
+    hmR0VmxReadExitIntInfoVmcs(pVmxTransient);
+
+    uint32_t const uExitIntType = VMX_EXIT_INT_INFO_TYPE(pVmxTransient->uExitIntInfo);
+    uint8_t const  uVector      = VMX_EXIT_INT_INFO_VECTOR(pVmxTransient->uExitIntInfo);
+    Assert(VMX_EXIT_INT_INFO_IS_VALID(pVmxTransient->uExitIntInfo));
+
+    PCVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
+    Assert(   !(pVmcsInfo->u32ExitCtls & VMX_EXIT_CTLS_ACK_EXT_INT)
+           && uExitIntType != VMX_EXIT_INT_INFO_TYPE_EXT_INT);
+    NOREF(pVmcsInfo);
+
+    VBOXSTRICTRC rcStrict;
+    switch (uExitIntType)
+    {
+        /*
+         * Host physical NMIs:
+         *     This cannot be a guest NMI as the only way for the guest to receive an NMI is if we
+         *     injected it ourselves and anything we inject is not going to cause a VM-exit directly
+         *     for the event being injected[1]. Go ahead and dispatch the NMI to the host[2].
+         *
+         *     See Intel spec. 27.2.3 "Information for VM Exits During Event Delivery".
+         *     See Intel spec. 27.5.5 "Updating Non-Register State".
+         */
+        case VMX_EXIT_INT_INFO_TYPE_NMI:
+        {
+            rcStrict = hmR0VmxExitHostNmi(pVCpu, pVmcsInfo);
+            break;
+        }
+
+        /*
+         * Privileged software exceptions (#DB from ICEBP),
+         * Software exceptions (#BP and #OF),
+         * Hardware exceptions:
+         *     Process the required exceptions and resume guest execution if possible.
+         */
+        case VMX_EXIT_INT_INFO_TYPE_PRIV_SW_XCPT:
+            Assert(uVector == X86_XCPT_DB);
+            RT_FALL_THRU();
+        case VMX_EXIT_INT_INFO_TYPE_SW_XCPT:
+            Assert(uVector == X86_XCPT_BP || uVector == X86_XCPT_OF || uExitIntType == VMX_EXIT_INT_INFO_TYPE_PRIV_SW_XCPT);
+            RT_FALL_THRU();
+        case VMX_EXIT_INT_INFO_TYPE_HW_XCPT:
+        {
+            NOREF(uVector);
+            hmR0VmxReadExitIntErrorCodeVmcs(pVmxTransient);
+            hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
+            hmR0VmxReadIdtVectoringInfoVmcs(pVmxTransient);
+            hmR0VmxReadIdtVectoringErrorCodeVmcs(pVmxTransient);
+
+            rcStrict = hmR0VmxExitXcpt(pVCpu, pVmxTransient);
+            break;
+        }
+
+        default:
+        {
+            pVCpu->hm.s.u32HMError = pVmxTransient->uExitIntInfo;
+            rcStrict = VERR_VMX_UNEXPECTED_INTERRUPTION_EXIT_TYPE;
+            AssertMsgFailed(("Invalid/unexpected VM-exit interruption info %#x\n", pVmxTransient->uExitIntInfo));
+            break;
+        }
+    }
+
+    STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitXcptNmi, y3);
+    return rcStrict;
+}
+
+
+/**
+ * VM-exit handler for interrupt-window exiting (VMX_EXIT_INT_WINDOW).
+ */
+HMVMX_EXIT_NSRC_DECL hmR0VmxExitIntWindow(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+
+    /* Indicate that we no longer need to VM-exit when the guest is ready to receive interrupts, it is now ready. */
+    PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
+    hmR0VmxClearIntWindowExitVmcs(pVmcsInfo);
+
+    /* Evaluate and deliver pending events and resume guest execution. */
+    STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIntWindow);
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * VM-exit handler for NMI-window exiting (VMX_EXIT_NMI_WINDOW).
+ */
+HMVMX_EXIT_NSRC_DECL hmR0VmxExitNmiWindow(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+
+    PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
+    if (RT_UNLIKELY(!(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_NMI_WINDOW_EXIT))) /** @todo NSTVMX: Turn this into an assertion. */
+    {
+        AssertMsgFailed(("Unexpected NMI-window exit.\n"));
+        HMVMX_UNEXPECTED_EXIT_RET(pVCpu, pVmxTransient->uExitReason);
+    }
+
+    Assert(!CPUMIsGuestNmiBlocking(pVCpu));
+
+    /*
+     * If block-by-STI is set when we get this VM-exit, it means the CPU doesn't block NMIs following STI.
+     * It is therefore safe to unblock STI and deliver the NMI ourselves. See @bugref{7445}.
+     */
+    uint32_t fIntrState;
+    int rc = VMXReadVmcs32(VMX_VMCS32_GUEST_INT_STATE, &fIntrState);
+    AssertRC(rc);
+    Assert(!(fIntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_MOVSS));
+    if (fIntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_STI)
+    {
+        if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS))
+            VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS);
+
+        fIntrState &= ~VMX_VMCS_GUEST_INT_STATE_BLOCK_STI;
+        rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_INT_STATE, fIntrState);
+        AssertRC(rc);
+    }
+
+    /* Indicate that we no longer need to VM-exit when the guest is ready to receive NMIs, it is now ready */
+    hmR0VmxClearNmiWindowExitVmcs(pVmcsInfo);
+
+    /* Evaluate and deliver pending events and resume guest execution. */
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * VM-exit handler for WBINVD (VMX_EXIT_WBINVD). Conditional VM-exit.
+ */
+HMVMX_EXIT_NSRC_DECL hmR0VmxExitWbinvd(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+    return hmR0VmxAdvanceGuestRip(pVCpu, pVmxTransient);
+}
+
+
+/**
+ * VM-exit handler for INVD (VMX_EXIT_INVD). Unconditional VM-exit.
+ */
+HMVMX_EXIT_NSRC_DECL hmR0VmxExitInvd(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+    return hmR0VmxAdvanceGuestRip(pVCpu, pVmxTransient);
+}
+
+
+/**
+ * VM-exit handler for CPUID (VMX_EXIT_CPUID). Unconditional VM-exit.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitCpuid(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+
+    /*
+     * Get the state we need and update the exit history entry.
+     */
+    PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
+    hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
+
+    int rc = hmR0VmxImportGuestState(pVCpu, pVmcsInfo, IEM_CPUMCTX_EXTRN_EXEC_DECODED_NO_MEM_MASK);
+    AssertRCReturn(rc, rc);
+
+    VBOXSTRICTRC rcStrict;
+    PCEMEXITREC pExitRec = EMHistoryUpdateFlagsAndTypeAndPC(pVCpu,
+                                                            EMEXIT_MAKE_FT(EMEXIT_F_KIND_EM | EMEXIT_F_HM, EMEXITTYPE_CPUID),
+                                                            pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base);
+    if (!pExitRec)
+    {
+        /*
+         * Regular CPUID instruction execution.
+         */
+        rcStrict = IEMExecDecodedCpuid(pVCpu, pVmxTransient->cbExitInstr);
+        if (rcStrict == VINF_SUCCESS)
+            ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS);
+        else if (rcStrict == VINF_IEM_RAISED_XCPT)
+        {
+            ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
+            rcStrict = VINF_SUCCESS;
+        }
+    }
+    else
+    {
+        /*
+         * Frequent exit or something needing probing.  Get state and call EMHistoryExec.
+         */
+        int rc2 = hmR0VmxImportGuestState(pVCpu, pVmcsInfo, HMVMX_CPUMCTX_EXTRN_ALL);
+        AssertRCReturn(rc2, rc2);
+
+        Log4(("CpuIdExit/%u: %04x:%08RX64: %#x/%#x -> EMHistoryExec\n",
+              pVCpu->idCpu, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, pVCpu->cpum.GstCtx.eax, pVCpu->cpum.GstCtx.ecx));
+
+        rcStrict = EMHistoryExec(pVCpu, pExitRec, 0);
+        ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_ALL_GUEST);
+
+        Log4(("CpuIdExit/%u: %04x:%08RX64: EMHistoryExec -> %Rrc + %04x:%08RX64\n",
+              pVCpu->idCpu, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
+              VBOXSTRICTRC_VAL(rcStrict), pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
+    }
+    return rcStrict;
+}
+
+
+/**
+ * VM-exit handler for GETSEC (VMX_EXIT_GETSEC). Unconditional VM-exit.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitGetsec(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+
+    PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
+    int rc = hmR0VmxImportGuestState(pVCpu, pVmcsInfo, CPUMCTX_EXTRN_CR4);
+    AssertRCReturn(rc, rc);
+
+    if (pVCpu->cpum.GstCtx.cr4 & X86_CR4_SMXE)
+        return VINF_EM_RAW_EMULATE_INSTR;
+
+    AssertMsgFailed(("hmR0VmxExitGetsec: Unexpected VM-exit when CR4.SMXE is 0.\n"));
+    HMVMX_UNEXPECTED_EXIT_RET(pVCpu, pVmxTransient->uExitReason);
+}
+
+
+/**
+ * VM-exit handler for RDTSC (VMX_EXIT_RDTSC). Conditional VM-exit.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitRdtsc(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+
+    PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
+    hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
+    int rc = hmR0VmxImportGuestState(pVCpu, pVmcsInfo, IEM_CPUMCTX_EXTRN_MUST_MASK);
+    AssertRCReturn(rc, rc);
+
+    VBOXSTRICTRC rcStrict = IEMExecDecodedRdtsc(pVCpu, pVmxTransient->cbExitInstr);
+    if (RT_LIKELY(rcStrict == VINF_SUCCESS))
+    {
+        /* If we get a spurious VM-exit when TSC offsetting is enabled,
+           we must reset offsetting on VM-entry. See @bugref{6634}. */
+        if (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_TSC_OFFSETTING)
+            pVmxTransient->fUpdatedTscOffsettingAndPreemptTimer = false;
+        ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS);
+    }
+    else if (rcStrict == VINF_IEM_RAISED_XCPT)
+    {
+        ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
+        rcStrict = VINF_SUCCESS;
+    }
+    return rcStrict;
+}
+
+
+/**
+ * VM-exit handler for RDTSCP (VMX_EXIT_RDTSCP). Conditional VM-exit.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitRdtscp(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+
+    PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
+    hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
+    int rc = hmR0VmxImportGuestState(pVCpu, pVmcsInfo, IEM_CPUMCTX_EXTRN_MUST_MASK | CPUMCTX_EXTRN_TSC_AUX);
+    AssertRCReturn(rc, rc);
+
+    VBOXSTRICTRC rcStrict = IEMExecDecodedRdtscp(pVCpu, pVmxTransient->cbExitInstr);
+    if (RT_LIKELY(rcStrict == VINF_SUCCESS))
+    {
+        /* If we get a spurious VM-exit when TSC offsetting is enabled,
+           we must reset offsetting on VM-reentry. See @bugref{6634}. */
+        if (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_TSC_OFFSETTING)
+            pVmxTransient->fUpdatedTscOffsettingAndPreemptTimer = false;
+        ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS);
+    }
+    else if (rcStrict == VINF_IEM_RAISED_XCPT)
+    {
+        ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
+        rcStrict = VINF_SUCCESS;
+    }
+    return rcStrict;
+}
+
+
+/**
+ * VM-exit handler for RDPMC (VMX_EXIT_RDPMC). Conditional VM-exit.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitRdpmc(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+
+    PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
+    int rc = hmR0VmxImportGuestState(pVCpu, pVmcsInfo, CPUMCTX_EXTRN_CR4    | CPUMCTX_EXTRN_CR0
+                                                     | CPUMCTX_EXTRN_RFLAGS | CPUMCTX_EXTRN_SS);
+    AssertRCReturn(rc, rc);
+
+    PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+    rc = EMInterpretRdpmc(pVCpu->CTX_SUFF(pVM), pVCpu, CPUMCTX2CORE(pCtx));
+    if (RT_LIKELY(rc == VINF_SUCCESS))
+    {
+        rc = hmR0VmxAdvanceGuestRip(pVCpu, pVmxTransient);
+        Assert(pVmxTransient->cbExitInstr == 2);
+    }
+    else
+    {
+        AssertMsgFailed(("hmR0VmxExitRdpmc: EMInterpretRdpmc failed with %Rrc\n", rc));
+        rc = VERR_EM_INTERPRETER;
+    }
+    return rc;
+}
+
+
+/**
+ * VM-exit handler for VMCALL (VMX_EXIT_VMCALL). Unconditional VM-exit.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitVmcall(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+
+    VBOXSTRICTRC rcStrict = VERR_VMX_IPE_3;
+    if (EMAreHypercallInstructionsEnabled(pVCpu))
+    {
+        PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
+        int rc = hmR0VmxImportGuestState(pVCpu, pVmcsInfo, CPUMCTX_EXTRN_RIP | CPUMCTX_EXTRN_RFLAGS | CPUMCTX_EXTRN_CR0
+                                                         | CPUMCTX_EXTRN_SS  | CPUMCTX_EXTRN_CS     | CPUMCTX_EXTRN_EFER);
+        AssertRCReturn(rc, rc);
+
+        /* Perform the hypercall. */
+        rcStrict = GIMHypercall(pVCpu, &pVCpu->cpum.GstCtx);
+        if (rcStrict == VINF_SUCCESS)
+        {
+            rc = hmR0VmxAdvanceGuestRip(pVCpu, pVmxTransient);
+            AssertRCReturn(rc, rc);
+        }
+        else
+            Assert(   rcStrict == VINF_GIM_R3_HYPERCALL
+                   || rcStrict == VINF_GIM_HYPERCALL_CONTINUING
+                   || RT_FAILURE(rcStrict));
+
+        /* If the hypercall changes anything other than guest's general-purpose registers,
+           we would need to reload the guest changed bits here before VM-entry. */
+    }
+    else
+        Log4Func(("Hypercalls not enabled\n"));
+
+    /* If hypercalls are disabled or the hypercall failed for some reason, raise #UD and continue. */
+    if (RT_FAILURE(rcStrict))
+    {
+        hmR0VmxSetPendingXcptUD(pVCpu);
+        rcStrict = VINF_SUCCESS;
+    }
+
+    return rcStrict;
+}
+
+
+/**
+ * VM-exit handler for INVLPG (VMX_EXIT_INVLPG). Conditional VM-exit.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitInvlpg(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+    Assert(!pVCpu->CTX_SUFF(pVM)->hm.s.fNestedPaging || pVCpu->hm.s.fUsingDebugLoop);
+
+    PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
+    hmR0VmxReadExitQualVmcs(pVmxTransient);
+    hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
+    int rc = hmR0VmxImportGuestState(pVCpu, pVmcsInfo, IEM_CPUMCTX_EXTRN_EXEC_DECODED_MEM_MASK);
+    AssertRCReturn(rc, rc);
+
+    VBOXSTRICTRC rcStrict = IEMExecDecodedInvlpg(pVCpu, pVmxTransient->cbExitInstr, pVmxTransient->uExitQual);
+
+    if (rcStrict == VINF_SUCCESS || rcStrict == VINF_PGM_SYNC_CR3)
+        ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS);
+    else if (rcStrict == VINF_IEM_RAISED_XCPT)
+    {
+        ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
+        rcStrict = VINF_SUCCESS;
+    }
+    else
+        AssertMsgFailed(("Unexpected IEMExecDecodedInvlpg(%#RX64) status: %Rrc\n", pVmxTransient->uExitQual,
+                         VBOXSTRICTRC_VAL(rcStrict)));
+    return rcStrict;
+}
+
+
+/**
+ * VM-exit handler for MONITOR (VMX_EXIT_MONITOR). Conditional VM-exit.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitMonitor(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+
+    PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
+    hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
+    int rc = hmR0VmxImportGuestState(pVCpu, pVmcsInfo, IEM_CPUMCTX_EXTRN_EXEC_DECODED_MEM_MASK | CPUMCTX_EXTRN_DS);
+    AssertRCReturn(rc, rc);
+
+    VBOXSTRICTRC rcStrict = IEMExecDecodedMonitor(pVCpu, pVmxTransient->cbExitInstr);
+    if (rcStrict == VINF_SUCCESS)
+        ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS);
+    else if (rcStrict == VINF_IEM_RAISED_XCPT)
+    {
+        ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
+        rcStrict = VINF_SUCCESS;
+    }
+
+    return rcStrict;
+}
+
+
+/**
+ * VM-exit handler for MWAIT (VMX_EXIT_MWAIT). Conditional VM-exit.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitMwait(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+
+    PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
+    hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
+    int rc = hmR0VmxImportGuestState(pVCpu, pVmcsInfo, IEM_CPUMCTX_EXTRN_EXEC_DECODED_NO_MEM_MASK);
+    AssertRCReturn(rc, rc);
+
+    VBOXSTRICTRC rcStrict = IEMExecDecodedMwait(pVCpu, pVmxTransient->cbExitInstr);
+    if (RT_SUCCESS(rcStrict))
+    {
+        ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS);
+        if (EMMonitorWaitShouldContinue(pVCpu, &pVCpu->cpum.GstCtx))
+            rcStrict = VINF_SUCCESS;
+    }
+
+    return rcStrict;
+}
+
+
+/**
+ * VM-exit handler for triple faults (VMX_EXIT_TRIPLE_FAULT). Unconditional
+ * VM-exit.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitTripleFault(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+    return VINF_EM_RESET;
+}
+
+
+/**
+ * VM-exit handler for HLT (VMX_EXIT_HLT). Conditional VM-exit.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitHlt(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+
+    int rc = hmR0VmxAdvanceGuestRip(pVCpu, pVmxTransient);
+    AssertRCReturn(rc, rc);
+
+    HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_RFLAGS);            /* Advancing the RIP above should've imported eflags. */
+    if (EMShouldContinueAfterHalt(pVCpu, &pVCpu->cpum.GstCtx))    /* Requires eflags. */
+        rc = VINF_SUCCESS;
+    else
+        rc = VINF_EM_HALT;
+
+    if (rc != VINF_SUCCESS)
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchHltToR3);
+    return rc;
+}
+
+
+/**
+ * VM-exit handler for instructions that result in a \#UD exception delivered to
+ * the guest.
+ */
+HMVMX_EXIT_NSRC_DECL hmR0VmxExitSetPendingXcptUD(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+    hmR0VmxSetPendingXcptUD(pVCpu);
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * VM-exit handler for expiry of the VMX-preemption timer.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitPreemptTimer(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+
+    /* If the VMX-preemption timer has expired, reinitialize the preemption timer on next VM-entry. */
+    pVmxTransient->fUpdatedTscOffsettingAndPreemptTimer = false;
+
+    /* If there are any timer events pending, fall back to ring-3, otherwise resume guest execution. */
+    PVMCC pVM = pVCpu->CTX_SUFF(pVM);
+    bool fTimersPending = TMTimerPollBool(pVM, pVCpu);
+    STAM_COUNTER_INC(&pVCpu->hm.s.StatExitPreemptTimer);
+    return fTimersPending ? VINF_EM_RAW_TIMER_PENDING : VINF_SUCCESS;
+}
+
+
+/**
+ * VM-exit handler for XSETBV (VMX_EXIT_XSETBV). Unconditional VM-exit.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitXsetbv(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+
+    PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
+    hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
+    int rc = hmR0VmxImportGuestState(pVCpu, pVmcsInfo, IEM_CPUMCTX_EXTRN_MUST_MASK | CPUMCTX_EXTRN_CR4);
+    AssertRCReturn(rc, rc);
+
+    VBOXSTRICTRC rcStrict = IEMExecDecodedXsetbv(pVCpu, pVmxTransient->cbExitInstr);
+    ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, rcStrict != VINF_IEM_RAISED_XCPT ? HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS
+                                                                                : HM_CHANGED_RAISED_XCPT_MASK);
+
+    PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+    pVCpu->hm.s.fLoadSaveGuestXcr0 = (pCtx->cr4 & X86_CR4_OSXSAVE) && pCtx->aXcr[0] != ASMGetXcr0();
+
+    return rcStrict;
+}
+
+
+/**
+ * VM-exit handler for INVPCID (VMX_EXIT_INVPCID). Conditional VM-exit.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitInvpcid(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+
+    /** @todo Enable the new code after finding a reliably guest test-case. */
+#if 1
+    return VERR_EM_INTERPRETER;
+#else
+    hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
+    hmR0VmxReadExitInstrInfoVmcs(pVmxTransient);
+    hmR0VmxReadExitQualVmcs(pVmxTransient);
+    int rc = hmR0VmxImportGuestState(pVCpu, pVmxTransient->pVmcsInfo, CPUMCTX_EXTRN_RSP | CPUMCTX_EXTRN_SREG_MASK
+                                                                    | IEM_CPUMCTX_EXTRN_EXEC_DECODED_MEM_MASK);
+    AssertRCReturn(rc, rc);
+
+    /* Paranoia. Ensure this has a memory operand. */
+    Assert(!pVmxTransient->ExitInstrInfo.Inv.u1Cleared0);
+
+    uint8_t const iGReg = pVmxTransient->ExitInstrInfo.VmreadVmwrite.iReg2;
+    Assert(iGReg < RT_ELEMENTS(pVCpu->cpum.GstCtx.aGRegs));
+    uint64_t const uType = CPUMIsGuestIn64BitCode(pVCpu) ? pVCpu->cpum.GstCtx.aGRegs[iGReg].u64
+                                                         : pVCpu->cpum.GstCtx.aGRegs[iGReg].u32;
+
+    RTGCPTR GCPtrDesc;
+    HMVMX_DECODE_MEM_OPERAND(pVCpu, pVmxTransient->ExitInstrInfo.u, pVmxTransient->uExitQual, VMXMEMACCESS_READ, &GCPtrDesc);
+
+    VBOXSTRICTRC rcStrict = IEMExecDecodedInvpcid(pVCpu, pVmxTransient->cbExitInstr, pVmxTransient->ExitInstrInfo.Inv.iSegReg,
+                                                  GCPtrDesc, uType);
+    if (RT_LIKELY(rcStrict == VINF_SUCCESS))
+        ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS);
+    else if (rcStrict == VINF_IEM_RAISED_XCPT)
+    {
+        ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
+        rcStrict = VINF_SUCCESS;
+    }
+    return rcStrict;
+#endif
+}
+
+
+/**
+ * VM-exit handler for invalid-guest-state (VMX_EXIT_ERR_INVALID_GUEST_STATE). Error
+ * VM-exit.
+ */
+HMVMX_EXIT_NSRC_DECL hmR0VmxExitErrInvalidGuestState(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
+    int rc = hmR0VmxImportGuestState(pVCpu, pVmcsInfo, HMVMX_CPUMCTX_EXTRN_ALL);
+    AssertRCReturn(rc, rc);
+
+    rc = hmR0VmxCheckCachedVmcsCtls(pVCpu, pVmcsInfo, pVmxTransient->fIsNestedGuest);
+    if (RT_FAILURE(rc))
+        return rc;
+
+    uint32_t const uInvalidReason = hmR0VmxCheckGuestState(pVCpu, pVmcsInfo);
+    NOREF(uInvalidReason);
+
+#ifdef VBOX_STRICT
+    uint32_t fIntrState;
+    uint64_t u64Val;
+    hmR0VmxReadEntryIntInfoVmcs(pVmxTransient);
+    hmR0VmxReadEntryXcptErrorCodeVmcs(pVmxTransient);
+    hmR0VmxReadEntryInstrLenVmcs(pVmxTransient);
+
+    Log4(("uInvalidReason                             %u\n",     uInvalidReason));
+    Log4(("VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO    %#RX32\n", pVmxTransient->uEntryIntInfo));
+    Log4(("VMX_VMCS32_CTRL_ENTRY_EXCEPTION_ERRCODE    %#RX32\n", pVmxTransient->uEntryXcptErrorCode));
+    Log4(("VMX_VMCS32_CTRL_ENTRY_INSTR_LENGTH         %#RX32\n", pVmxTransient->cbEntryInstr));
+
+    rc = VMXReadVmcs32(VMX_VMCS32_GUEST_INT_STATE, &fIntrState);            AssertRC(rc);
+    Log4(("VMX_VMCS32_GUEST_INT_STATE                 %#RX32\n", fIntrState));
+    rc = VMXReadVmcsNw(VMX_VMCS_GUEST_CR0, &u64Val);                        AssertRC(rc);
+    Log4(("VMX_VMCS_GUEST_CR0                         %#RX64\n", u64Val));
+    rc = VMXReadVmcsNw(VMX_VMCS_CTRL_CR0_MASK, &u64Val);                    AssertRC(rc);
+    Log4(("VMX_VMCS_CTRL_CR0_MASK                     %#RX64\n", u64Val));
+    rc = VMXReadVmcsNw(VMX_VMCS_CTRL_CR0_READ_SHADOW, &u64Val);             AssertRC(rc);
+    Log4(("VMX_VMCS_CTRL_CR4_READ_SHADOW              %#RX64\n", u64Val));
+    rc = VMXReadVmcsNw(VMX_VMCS_CTRL_CR4_MASK, &u64Val);                    AssertRC(rc);
+    Log4(("VMX_VMCS_CTRL_CR4_MASK                     %#RX64\n", u64Val));
+    rc = VMXReadVmcsNw(VMX_VMCS_CTRL_CR4_READ_SHADOW, &u64Val);             AssertRC(rc);
+    Log4(("VMX_VMCS_CTRL_CR4_READ_SHADOW              %#RX64\n", u64Val));
+    if (pVCpu->CTX_SUFF(pVM)->hm.s.fNestedPaging)
+    {
+        rc = VMXReadVmcs64(VMX_VMCS64_CTRL_EPTP_FULL, &u64Val);             AssertRC(rc);
+        Log4(("VMX_VMCS64_CTRL_EPTP_FULL                  %#RX64\n", u64Val));
+    }
+    hmR0DumpRegs(pVCpu, HM_DUMP_REG_FLAGS_ALL);
+#endif
+
+    return VERR_VMX_INVALID_GUEST_STATE;
+}
+
+/**
+ * VM-exit handler for all undefined/unexpected reasons. Should never happen.
+ */
+HMVMX_EXIT_NSRC_DECL hmR0VmxExitErrUnexpected(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    /*
+     * Cumulative notes of all recognized but unexpected VM-exits.
+     *
+     * 1. This does -not- cover scenarios like a page-fault VM-exit occurring when
+     *    nested-paging is used.
+     *
+     * 2. Any instruction that causes a VM-exit unconditionally (for e.g. VMXON) must be
+     *    emulated or a #UD must be raised in the guest. Therefore, we should -not- be using
+     *    this function (and thereby stop VM execution) for handling such instructions.
+     *
+     *
+     * VMX_EXIT_INIT_SIGNAL:
+     *    INIT signals are blocked in VMX root operation by VMXON and by SMI in SMM.
+     *    It is -NOT- blocked in VMX non-root operation so we can, in theory, still get these
+     *    VM-exits. However, we should not receive INIT signals VM-exit while executing a VM.
+     *
+     *    See Intel spec. 33.14.1 Default Treatment of SMI Delivery"
+     *    See Intel spec. 29.3 "VMX Instructions" for "VMXON".
+     *    See Intel spec. "23.8 Restrictions on VMX operation".
+     *
+     * VMX_EXIT_SIPI:
+     *    SIPI exits can only occur in VMX non-root operation when the "wait-for-SIPI" guest
+     *    activity state is used. We don't make use of it as our guests don't have direct
+     *    access to the host local APIC.
+     *
+     *    See Intel spec. 25.3 "Other Causes of VM-exits".
+     *
+     * VMX_EXIT_IO_SMI:
+     * VMX_EXIT_SMI:
+     *    This can only happen if we support dual-monitor treatment of SMI, which can be
+     *    activated by executing VMCALL in VMX root operation. Only an STM (SMM transfer
+     *    monitor) would get this VM-exit when we (the executive monitor) execute a VMCALL in
+     *    VMX root mode or receive an SMI. If we get here, something funny is going on.
+     *
+     *    See Intel spec. 33.15.6 "Activating the Dual-Monitor Treatment"
+     *    See Intel spec. 25.3 "Other Causes of VM-Exits"
+     *
+     * VMX_EXIT_ERR_MSR_LOAD:
+     *    Failures while loading MSRs are part of the VM-entry MSR-load area are unexpected
+     *    and typically indicates a bug in the hypervisor code. We thus cannot not resume
+     *    execution.
+     *
+     *    See Intel spec. 26.7 "VM-Entry Failures During Or After Loading Guest State".
+     *
+     * VMX_EXIT_ERR_MACHINE_CHECK:
+     *    Machine check exceptions indicates a fatal/unrecoverable hardware condition
+     *    including but not limited to system bus, ECC, parity, cache and TLB errors. A
+     *    #MC exception abort class exception is raised. We thus cannot assume a
+     *    reasonable chance of continuing any sort of execution and we bail.
+     *
+     *    See Intel spec. 15.1 "Machine-check Architecture".
+     *    See Intel spec. 27.1 "Architectural State Before A VM Exit".
+     *
+     * VMX_EXIT_PML_FULL:
+     * VMX_EXIT_VIRTUALIZED_EOI:
+     * VMX_EXIT_APIC_WRITE:
+     *    We do not currently support any of these features and thus they are all unexpected
+     *    VM-exits.
+     *
+     * VMX_EXIT_GDTR_IDTR_ACCESS:
+     * VMX_EXIT_LDTR_TR_ACCESS:
+     * VMX_EXIT_RDRAND:
+     * VMX_EXIT_RSM:
+     * VMX_EXIT_VMFUNC:
+     * VMX_EXIT_ENCLS:
+     * VMX_EXIT_RDSEED:
+     * VMX_EXIT_XSAVES:
+     * VMX_EXIT_XRSTORS:
+     * VMX_EXIT_UMWAIT:
+     * VMX_EXIT_TPAUSE:
+     *    These VM-exits are -not- caused unconditionally by execution of the corresponding
+     *    instruction. Any VM-exit for these instructions indicate a hardware problem,
+     *    unsupported CPU modes (like SMM) or potentially corrupt VMCS controls.
+     *
+     *    See Intel spec. 25.1.3 "Instructions That Cause VM Exits Conditionally".
+     */
+    HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+    AssertMsgFailed(("Unexpected VM-exit %u\n", pVmxTransient->uExitReason));
+    HMVMX_UNEXPECTED_EXIT_RET(pVCpu, pVmxTransient->uExitReason);
+}
+
+
+/**
+ * VM-exit handler for RDMSR (VMX_EXIT_RDMSR).
+ */
+HMVMX_EXIT_DECL hmR0VmxExitRdmsr(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+
+    /** @todo Optimize this: We currently drag in the whole MSR state
+     * (CPUMCTX_EXTRN_ALL_MSRS) here.  We should optimize this to only get
+     * MSRs required.  That would require changes to IEM and possibly CPUM too.
+     * (Should probably do it lazy fashion from CPUMAllMsrs.cpp). */
+    PVMXVMCSINFO   pVmcsInfo = pVmxTransient->pVmcsInfo;
+    uint32_t const idMsr     = pVCpu->cpum.GstCtx.ecx;
+    uint64_t       fImport   = IEM_CPUMCTX_EXTRN_EXEC_DECODED_NO_MEM_MASK | CPUMCTX_EXTRN_ALL_MSRS;
+    switch (idMsr)
+    {
+        case MSR_K8_FS_BASE: fImport |= CPUMCTX_EXTRN_FS; break;
+        case MSR_K8_GS_BASE: fImport |= CPUMCTX_EXTRN_GS; break;
+    }
+
+    hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
+    int rc = hmR0VmxImportGuestState(pVCpu, pVmcsInfo, fImport);
+    AssertRCReturn(rc, rc);
+
+    Log4Func(("ecx=%#RX32\n", idMsr));
+
+#ifdef VBOX_STRICT
+    Assert(!pVmxTransient->fIsNestedGuest);
+    if (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS)
+    {
+        if (   hmR0VmxIsAutoLoadGuestMsr(pVmcsInfo, idMsr)
+            && idMsr != MSR_K6_EFER)
+        {
+            AssertMsgFailed(("Unexpected RDMSR for an MSR in the auto-load/store area in the VMCS. ecx=%#RX32\n", idMsr));
+            HMVMX_UNEXPECTED_EXIT_RET(pVCpu, idMsr);
+        }
+        if (hmR0VmxIsLazyGuestMsr(pVCpu, idMsr))
+        {
+            Assert(pVmcsInfo->pvMsrBitmap);
+            uint32_t fMsrpm = CPUMGetVmxMsrPermission(pVmcsInfo->pvMsrBitmap, idMsr);
+            if (fMsrpm & VMXMSRPM_ALLOW_RD)
+            {
+                AssertMsgFailed(("Unexpected RDMSR for a passthru lazy-restore MSR. ecx=%#RX32\n", idMsr));
+                HMVMX_UNEXPECTED_EXIT_RET(pVCpu, idMsr);
+            }
+        }
+    }
+#endif
+
+    VBOXSTRICTRC rcStrict = IEMExecDecodedRdmsr(pVCpu, pVmxTransient->cbExitInstr);
+    STAM_COUNTER_INC(&pVCpu->hm.s.StatExitRdmsr);
+    if (rcStrict == VINF_SUCCESS)
+        ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS);
+    else if (rcStrict == VINF_IEM_RAISED_XCPT)
+    {
+        ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
+        rcStrict = VINF_SUCCESS;
+    }
+    else
+        AssertMsg(rcStrict == VINF_CPUM_R3_MSR_READ, ("Unexpected IEMExecDecodedRdmsr rc (%Rrc)\n", VBOXSTRICTRC_VAL(rcStrict)));
+
+    return rcStrict;
+}
+
+
+/**
+ * VM-exit handler for WRMSR (VMX_EXIT_WRMSR).
+ */
+HMVMX_EXIT_DECL hmR0VmxExitWrmsr(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+
+    /** @todo Optimize this: We currently drag in the whole MSR state
+     * (CPUMCTX_EXTRN_ALL_MSRS) here.  We should optimize this to only get
+     * MSRs required.  That would require changes to IEM and possibly CPUM too.
+     * (Should probably do it lazy fashion from CPUMAllMsrs.cpp). */
+    uint32_t const idMsr    = pVCpu->cpum.GstCtx.ecx;
+    uint64_t       fImport  = IEM_CPUMCTX_EXTRN_EXEC_DECODED_NO_MEM_MASK | CPUMCTX_EXTRN_ALL_MSRS;
+
+    /*
+     * The FS and GS base MSRs are not part of the above all-MSRs mask.
+     * Although we don't need to fetch the base as it will be overwritten shortly, while
+     * loading guest-state we would also load the entire segment register including limit
+     * and attributes and thus we need to load them here.
+     */
+    switch (idMsr)
+    {
+        case MSR_K8_FS_BASE: fImport |= CPUMCTX_EXTRN_FS; break;
+        case MSR_K8_GS_BASE: fImport |= CPUMCTX_EXTRN_GS; break;
+    }
+
+    PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
+    hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
+    int rc = hmR0VmxImportGuestState(pVCpu, pVmcsInfo, fImport);
+    AssertRCReturn(rc, rc);
+
+    Log4Func(("ecx=%#RX32 edx:eax=%#RX32:%#RX32\n", idMsr, pVCpu->cpum.GstCtx.edx, pVCpu->cpum.GstCtx.eax));
+
+    VBOXSTRICTRC rcStrict = IEMExecDecodedWrmsr(pVCpu, pVmxTransient->cbExitInstr);
+    STAM_COUNTER_INC(&pVCpu->hm.s.StatExitWrmsr);
+
+    if (rcStrict == VINF_SUCCESS)
+    {
+        ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS);
+
+        /* If this is an X2APIC WRMSR access, update the APIC state as well. */
+        if (    idMsr == MSR_IA32_APICBASE
+            || (   idMsr >= MSR_IA32_X2APIC_START
+                && idMsr <= MSR_IA32_X2APIC_END))
+        {
+            /*
+             * We've already saved the APIC related guest-state (TPR) in post-run phase.
+             * When full APIC register virtualization is implemented we'll have to make
+             * sure APIC state is saved from the VMCS before IEM changes it.
+             */
+            ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_APIC_TPR);
+        }
+        else if (idMsr == MSR_IA32_TSC)        /* Windows 7 does this during bootup. See @bugref{6398}. */
+            pVmxTransient->fUpdatedTscOffsettingAndPreemptTimer = false;
+        else if (idMsr == MSR_K6_EFER)
+        {
+            /*
+             * If the guest touches the EFER MSR we need to update the VM-Entry and VM-Exit controls
+             * as well, even if it is -not- touching bits that cause paging mode changes (LMA/LME).
+             * We care about the other bits as well, SCE and NXE. See @bugref{7368}.
+             */
+            ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_EFER_MSR | HM_CHANGED_VMX_ENTRY_EXIT_CTLS);
+        }
+
+        /* Update MSRs that are part of the VMCS and auto-load/store area when MSR-bitmaps are not used. */
+        if (!(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS))
+        {
+            switch (idMsr)
+            {
+                case MSR_IA32_SYSENTER_CS:  ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_SYSENTER_CS_MSR);  break;
+                case MSR_IA32_SYSENTER_EIP: ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_SYSENTER_EIP_MSR); break;
+                case MSR_IA32_SYSENTER_ESP: ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_SYSENTER_ESP_MSR); break;
+                case MSR_K8_FS_BASE:        ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_FS);               break;
+                case MSR_K8_GS_BASE:        ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_GS);               break;
+                case MSR_K6_EFER:           /* Nothing to do, already handled above. */                                    break;
+                default:
+                {
+                    if (hmR0VmxIsLazyGuestMsr(pVCpu, idMsr))
+                        ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_VMX_GUEST_LAZY_MSRS);
+                    else if (hmR0VmxIsAutoLoadGuestMsr(pVmcsInfo, idMsr))
+                        ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_VMX_GUEST_AUTO_MSRS);
+                    break;
+                }
+            }
+        }
+#ifdef VBOX_STRICT
+        else
+        {
+            /* Paranoia. Validate that MSRs in the MSR-bitmaps with write-passthru are not intercepted. */
+            switch (idMsr)
+            {
+                case MSR_IA32_SYSENTER_CS:
+                case MSR_IA32_SYSENTER_EIP:
+                case MSR_IA32_SYSENTER_ESP:
+                case MSR_K8_FS_BASE:
+                case MSR_K8_GS_BASE:
+                {
+                    AssertMsgFailed(("Unexpected WRMSR for an MSR in the VMCS. ecx=%#RX32\n", idMsr));
+                    HMVMX_UNEXPECTED_EXIT_RET(pVCpu, idMsr);
+                }
+
+                /* Writes to MSRs in auto-load/store area/swapped MSRs, shouldn't cause VM-exits with MSR-bitmaps. */
+                default:
+                {
+                    if (hmR0VmxIsAutoLoadGuestMsr(pVmcsInfo, idMsr))
+                    {
+                        /* EFER MSR writes are always intercepted. */
+                        if (idMsr != MSR_K6_EFER)
+                        {
+                            AssertMsgFailed(("Unexpected WRMSR for an MSR in the auto-load/store area in the VMCS. ecx=%#RX32\n",
+                                             idMsr));
+                            HMVMX_UNEXPECTED_EXIT_RET(pVCpu, idMsr);
+                        }
+                    }
+
+                    if (hmR0VmxIsLazyGuestMsr(pVCpu, idMsr))
+                    {
+                        Assert(pVmcsInfo->pvMsrBitmap);
+                        uint32_t fMsrpm = CPUMGetVmxMsrPermission(pVmcsInfo->pvMsrBitmap, idMsr);
+                        if (fMsrpm & VMXMSRPM_ALLOW_WR)
+                        {
+                            AssertMsgFailed(("Unexpected WRMSR for passthru, lazy-restore MSR. ecx=%#RX32\n", idMsr));
+                            HMVMX_UNEXPECTED_EXIT_RET(pVCpu, idMsr);
+                        }
+                    }
+                    break;
+                }
+            }
+        }
+#endif  /* VBOX_STRICT */
+    }
+    else if (rcStrict == VINF_IEM_RAISED_XCPT)
+    {
+        ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
+        rcStrict = VINF_SUCCESS;
+    }
+    else
+        AssertMsg(rcStrict == VINF_CPUM_R3_MSR_WRITE, ("Unexpected IEMExecDecodedWrmsr rc (%Rrc)\n", VBOXSTRICTRC_VAL(rcStrict)));
+
+    return rcStrict;
+}
+
+
+/**
+ * VM-exit handler for PAUSE (VMX_EXIT_PAUSE). Conditional VM-exit.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitPause(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+
+    /** @todo The guest has likely hit a contended spinlock. We might want to
+     *        poke a schedule different guest VCPU. */
+    int rc = hmR0VmxAdvanceGuestRip(pVCpu, pVmxTransient);
+    if (RT_SUCCESS(rc))
+        return VINF_EM_RAW_INTERRUPT;
+
+    AssertMsgFailed(("hmR0VmxExitPause: Failed to increment RIP. rc=%Rrc\n", rc));
+    return rc;
+}
+
+
+/**
+ * VM-exit handler for when the TPR value is lowered below the specified
+ * threshold (VMX_EXIT_TPR_BELOW_THRESHOLD). Conditional VM-exit.
+ */
+HMVMX_EXIT_NSRC_DECL hmR0VmxExitTprBelowThreshold(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+    Assert(pVmxTransient->pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_TPR_SHADOW);
+
+    /*
+     * The TPR shadow would've been synced with the APIC TPR in the post-run phase.
+     * We'll re-evaluate pending interrupts and inject them before the next VM
+     * entry so we can just continue execution here.
+     */
+    STAM_COUNTER_INC(&pVCpu->hm.s.StatExitTprBelowThreshold);
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * VM-exit handler for control-register accesses (VMX_EXIT_MOV_CRX). Conditional
+ * VM-exit.
+ *
+ * @retval VINF_SUCCESS when guest execution can continue.
+ * @retval VINF_PGM_SYNC_CR3 CR3 sync is required, back to ring-3.
+ * @retval VERR_EM_RESCHEDULE_REM when we need to return to ring-3 due to
+ *         incompatible guest state for VMX execution (real-on-v86 case).
+ */
+HMVMX_EXIT_DECL hmR0VmxExitMovCRx(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+    STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatExitMovCRx, y2);
+
+    PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
+    hmR0VmxReadExitQualVmcs(pVmxTransient);
+    hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
+
+    VBOXSTRICTRC rcStrict;
+    PVMCC pVM = pVCpu->CTX_SUFF(pVM);
+    uint64_t const uExitQual   = pVmxTransient->uExitQual;
+    uint32_t const uAccessType = VMX_EXIT_QUAL_CRX_ACCESS(uExitQual);
+    switch (uAccessType)
+    {
+        /*
+         * MOV to CRx.
+         */
+        case VMX_EXIT_QUAL_CRX_ACCESS_WRITE:
+        {
+            int rc = hmR0VmxImportGuestState(pVCpu, pVmcsInfo, IEM_CPUMCTX_EXTRN_MUST_MASK);
+            AssertRCReturn(rc, rc);
+
+            HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_CR0);
+            uint32_t const uOldCr0 = pVCpu->cpum.GstCtx.cr0;
+            uint8_t const  iGReg   = VMX_EXIT_QUAL_CRX_GENREG(uExitQual);
+            uint8_t const  iCrReg  = VMX_EXIT_QUAL_CRX_REGISTER(uExitQual);
+
+            /*
+             * MOV to CR3 only cause a VM-exit when one or more of the following are true:
+             *   - When nested paging isn't used.
+             *   - If the guest doesn't have paging enabled (intercept CR3 to update shadow page tables).
+             *   - We are executing in the VM debug loop.
+             */
+            Assert(   iCrReg != 3
+                   || !pVM->hm.s.fNestedPaging
+                   || !CPUMIsGuestPagingEnabledEx(&pVCpu->cpum.GstCtx)
+                   || pVCpu->hm.s.fUsingDebugLoop);
+
+            /* MOV to CR8 writes only cause VM-exits when TPR shadow is not used. */
+            Assert(   iCrReg != 8
+                   || !(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_TPR_SHADOW));
+
+            rcStrict = hmR0VmxExitMovToCrX(pVCpu, pVmcsInfo, pVmxTransient->cbExitInstr, iGReg, iCrReg);
+            AssertMsg(   rcStrict == VINF_SUCCESS
+                      || rcStrict == VINF_PGM_SYNC_CR3, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict)));
+
+            /*
+             * This is a kludge for handling switches back to real mode when we try to use
+             * V86 mode to run real mode code directly.  Problem is that V86 mode cannot
+             * deal with special selector values, so we have to return to ring-3 and run
+             * there till the selector values are V86 mode compatible.
+             *
+             * Note! Using VINF_EM_RESCHEDULE_REM here rather than VINF_EM_RESCHEDULE since the
+             *       latter is an alias for VINF_IEM_RAISED_XCPT which is asserted at the end of
+             *       this function.
+             */
+            if (   iCrReg == 0
+                && rcStrict == VINF_SUCCESS
+                && !pVM->hm.s.vmx.fUnrestrictedGuest
+                && CPUMIsGuestInRealModeEx(&pVCpu->cpum.GstCtx)
+                && (uOldCr0 & X86_CR0_PE)
+                && !(pVCpu->cpum.GstCtx.cr0 & X86_CR0_PE))
+            {
+                /** @todo Check selectors rather than returning all the time.  */
+                Assert(!pVmxTransient->fIsNestedGuest);
+                Log4Func(("CR0 write, back to real mode -> VINF_EM_RESCHEDULE_REM\n"));
+                rcStrict = VINF_EM_RESCHEDULE_REM;
+            }
+            break;
+        }
+
+        /*
+         * MOV from CRx.
+         */
+        case VMX_EXIT_QUAL_CRX_ACCESS_READ:
+        {
+            uint8_t const iGReg  = VMX_EXIT_QUAL_CRX_GENREG(uExitQual);
+            uint8_t const iCrReg = VMX_EXIT_QUAL_CRX_REGISTER(uExitQual);
+
+            /*
+             * MOV from CR3 only cause a VM-exit when one or more of the following are true:
+             *   - When nested paging isn't used.
+             *   - If the guest doesn't have paging enabled (pass guest's CR3 rather than our identity mapped CR3).
+             *   - We are executing in the VM debug loop.
+             */
+            Assert(   iCrReg != 3
+                   || !pVM->hm.s.fNestedPaging
+                   || !CPUMIsGuestPagingEnabledEx(&pVCpu->cpum.GstCtx)
+                   || pVCpu->hm.s.fUsingDebugLoop);
+
+            /* MOV from CR8 reads only cause a VM-exit when the TPR shadow feature isn't enabled. */
+            Assert(   iCrReg != 8
+                   || !(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_TPR_SHADOW));
+
+            rcStrict = hmR0VmxExitMovFromCrX(pVCpu, pVmcsInfo, pVmxTransient->cbExitInstr, iGReg, iCrReg);
+            break;
+        }
+
+        /*
+         * CLTS (Clear Task-Switch Flag in CR0).
+         */
+        case VMX_EXIT_QUAL_CRX_ACCESS_CLTS:
+        {
+            rcStrict = hmR0VmxExitClts(pVCpu, pVmcsInfo, pVmxTransient->cbExitInstr);
+            break;
+        }
+
+        /*
+         * LMSW (Load Machine-Status Word into CR0).
+         * LMSW cannot clear CR0.PE, so no fRealOnV86Active kludge needed here.
+         */
+        case VMX_EXIT_QUAL_CRX_ACCESS_LMSW:
+        {
+            RTGCPTR        GCPtrEffDst;
+            uint8_t const  cbInstr     = pVmxTransient->cbExitInstr;
+            uint16_t const uMsw        = VMX_EXIT_QUAL_CRX_LMSW_DATA(uExitQual);
+            bool const     fMemOperand = VMX_EXIT_QUAL_CRX_LMSW_OP_MEM(uExitQual);
+            if (fMemOperand)
+            {
+                hmR0VmxReadGuestLinearAddrVmcs(pVmxTransient);
+                GCPtrEffDst = pVmxTransient->uGuestLinearAddr;
+            }
+            else
+                GCPtrEffDst = NIL_RTGCPTR;
+            rcStrict = hmR0VmxExitLmsw(pVCpu, pVmcsInfo, cbInstr, uMsw, GCPtrEffDst);
+            break;
+        }
+
+        default:
+        {
+            AssertMsgFailed(("Unrecognized Mov CRX access type %#x\n", uAccessType));
+            HMVMX_UNEXPECTED_EXIT_RET(pVCpu, uAccessType);
+        }
+    }
+
+    Assert((pVCpu->hm.s.fCtxChanged & (HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS))
+                                   == (HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS));
+    Assert(rcStrict != VINF_IEM_RAISED_XCPT);
+
+    STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitMovCRx, y2);
+    NOREF(pVM);
+    return rcStrict;
+}
+
+
+/**
+ * VM-exit handler for I/O instructions (VMX_EXIT_IO_INSTR). Conditional
+ * VM-exit.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitIoInstr(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+    STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatExitIO, y1);
+
+    PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+    PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
+    hmR0VmxReadExitQualVmcs(pVmxTransient);
+    hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
+    int rc = hmR0VmxImportGuestState(pVCpu, pVmcsInfo, IEM_CPUMCTX_EXTRN_MUST_MASK | CPUMCTX_EXTRN_SREG_MASK
+                                                     | CPUMCTX_EXTRN_EFER);
+    /* EFER MSR also required for longmode checks in EMInterpretDisasCurrent(), but it's always up-to-date. */
+    AssertRCReturn(rc, rc);
+
+    /* Refer Intel spec. 27-5. "Exit Qualifications for I/O Instructions" for the format. */
+    uint32_t const uIOPort      = VMX_EXIT_QUAL_IO_PORT(pVmxTransient->uExitQual);
+    uint8_t  const uIOSize      = VMX_EXIT_QUAL_IO_SIZE(pVmxTransient->uExitQual);
+    bool     const fIOWrite     = (VMX_EXIT_QUAL_IO_DIRECTION(pVmxTransient->uExitQual) == VMX_EXIT_QUAL_IO_DIRECTION_OUT);
+    bool     const fIOString    = VMX_EXIT_QUAL_IO_IS_STRING(pVmxTransient->uExitQual);
+    bool     const fGstStepping = RT_BOOL(pCtx->eflags.Bits.u1TF);
+    bool     const fDbgStepping = pVCpu->hm.s.fSingleInstruction;
+    AssertReturn(uIOSize <= 3 && uIOSize != 2, VERR_VMX_IPE_1);
+
+    /*
+     * Update exit history to see if this exit can be optimized.
+     */
+    VBOXSTRICTRC rcStrict;
+    PCEMEXITREC  pExitRec = NULL;
+    if (   !fGstStepping
+        && !fDbgStepping)
+        pExitRec = EMHistoryUpdateFlagsAndTypeAndPC(pVCpu,
+                                                    !fIOString
+                                                    ? !fIOWrite
+                                                    ? EMEXIT_MAKE_FT(EMEXIT_F_KIND_EM | EMEXIT_F_HM, EMEXITTYPE_IO_PORT_READ)
+                                                    : EMEXIT_MAKE_FT(EMEXIT_F_KIND_EM | EMEXIT_F_HM, EMEXITTYPE_IO_PORT_WRITE)
+                                                    : !fIOWrite
+                                                    ? EMEXIT_MAKE_FT(EMEXIT_F_KIND_EM | EMEXIT_F_HM, EMEXITTYPE_IO_PORT_STR_READ)
+                                                    : EMEXIT_MAKE_FT(EMEXIT_F_KIND_EM | EMEXIT_F_HM, EMEXITTYPE_IO_PORT_STR_WRITE),
+                                                    pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base);
+    if (!pExitRec)
+    {
+        static uint32_t const s_aIOSizes[4] = { 1, 2, 0, 4 };                    /* Size of the I/O accesses in bytes. */
+        static uint32_t const s_aIOOpAnd[4] = { 0xff, 0xffff, 0, 0xffffffff };   /* AND masks for saving result in AL/AX/EAX. */
+
+        uint32_t const cbValue  = s_aIOSizes[uIOSize];
+        uint32_t const cbInstr  = pVmxTransient->cbExitInstr;
+        bool  fUpdateRipAlready = false; /* ugly hack, should be temporary. */
+        PVMCC pVM = pVCpu->CTX_SUFF(pVM);
+        if (fIOString)
+        {
+            /*
+             * INS/OUTS - I/O String instruction.
+             *
+             * Use instruction-information if available, otherwise fall back on
+             * interpreting the instruction.
+             */
+            Log4Func(("cs:rip=%#04x:%#RX64 %#06x/%u %c str\n", pCtx->cs.Sel, pCtx->rip, uIOPort, cbValue, fIOWrite ? 'w' : 'r'));
+            AssertReturn(pCtx->dx == uIOPort, VERR_VMX_IPE_2);
+            bool const fInsOutsInfo = RT_BF_GET(pVM->hm.s.vmx.Msrs.u64Basic, VMX_BF_BASIC_VMCS_INS_OUTS);
+            if (fInsOutsInfo)
+            {
+                hmR0VmxReadExitInstrInfoVmcs(pVmxTransient);
+                AssertReturn(pVmxTransient->ExitInstrInfo.StrIo.u3AddrSize <= 2, VERR_VMX_IPE_3);
+                AssertCompile(IEMMODE_16BIT == 0 && IEMMODE_32BIT == 1 && IEMMODE_64BIT == 2);
+                IEMMODE const enmAddrMode = (IEMMODE)pVmxTransient->ExitInstrInfo.StrIo.u3AddrSize;
+                bool const fRep           = VMX_EXIT_QUAL_IO_IS_REP(pVmxTransient->uExitQual);
+                if (fIOWrite)
+                    rcStrict = IEMExecStringIoWrite(pVCpu, cbValue, enmAddrMode, fRep, cbInstr,
+                                                    pVmxTransient->ExitInstrInfo.StrIo.iSegReg, true /*fIoChecked*/);
+                else
+                {
+                    /*
+                     * The segment prefix for INS cannot be overridden and is always ES. We can safely assume X86_SREG_ES.
+                     * Hence "iSegReg" field is undefined in the instruction-information field in VT-x for INS.
+                     * See Intel Instruction spec. for "INS".
+                     * See Intel spec. Table 27-8 "Format of the VM-Exit Instruction-Information Field as Used for INS and OUTS".
+                     */
+                    rcStrict = IEMExecStringIoRead(pVCpu, cbValue, enmAddrMode, fRep, cbInstr, true /*fIoChecked*/);
+                }
+            }
+            else
+                rcStrict = IEMExecOne(pVCpu);
+
+            ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP);
+            fUpdateRipAlready = true;
+        }
+        else
+        {
+            /*
+             * IN/OUT - I/O instruction.
+             */
+            Log4Func(("cs:rip=%04x:%08RX64 %#06x/%u %c\n", pCtx->cs.Sel, pCtx->rip, uIOPort, cbValue, fIOWrite ? 'w' : 'r'));
+            uint32_t const uAndVal = s_aIOOpAnd[uIOSize];
+            Assert(!VMX_EXIT_QUAL_IO_IS_REP(pVmxTransient->uExitQual));
+            if (fIOWrite)
+            {
+                rcStrict = IOMIOPortWrite(pVM, pVCpu, uIOPort, pCtx->eax & uAndVal, cbValue);
+                STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIOWrite);
+                if (    rcStrict == VINF_IOM_R3_IOPORT_WRITE
+                    && !pCtx->eflags.Bits.u1TF)
+                    rcStrict = EMRZSetPendingIoPortWrite(pVCpu, uIOPort, cbInstr, cbValue, pCtx->eax & uAndVal);
+            }
+            else
+            {
+                uint32_t u32Result = 0;
+                rcStrict = IOMIOPortRead(pVM, pVCpu, uIOPort, &u32Result, cbValue);
+                if (IOM_SUCCESS(rcStrict))
+                {
+                    /* Save result of I/O IN instr. in AL/AX/EAX. */
+                    pCtx->eax = (pCtx->eax & ~uAndVal) | (u32Result & uAndVal);
+                }
+                if (    rcStrict == VINF_IOM_R3_IOPORT_READ
+                    && !pCtx->eflags.Bits.u1TF)
+                    rcStrict = EMRZSetPendingIoPortRead(pVCpu, uIOPort, cbInstr, cbValue);
+                STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIORead);
+            }
+        }
+
+        if (IOM_SUCCESS(rcStrict))
+        {
+            if (!fUpdateRipAlready)
+            {
+                hmR0VmxAdvanceGuestRipBy(pVCpu, cbInstr);
+                ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP);
+            }
+
+            /*
+             * INS/OUTS with REP prefix updates RFLAGS, can be observed with triple-fault guru
+             * while booting Fedora 17 64-bit guest.
+             *
+             * See Intel Instruction reference for REP/REPE/REPZ/REPNE/REPNZ.
+             */
+            if (fIOString)
+                ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RFLAGS);
+
+            /*
+             * If any I/O breakpoints are armed, we need to check if one triggered
+             * and take appropriate action.
+             * Note that the I/O breakpoint type is undefined if CR4.DE is 0.
+             */
+            rc = hmR0VmxImportGuestState(pVCpu, pVmcsInfo, CPUMCTX_EXTRN_DR7);
+            AssertRCReturn(rc, rc);
+
+            /** @todo Optimize away the DBGFBpIsHwIoArmed call by having DBGF tell the
+             *  execution engines about whether hyper BPs and such are pending. */
+            uint32_t const uDr7 = pCtx->dr[7];
+            if (RT_UNLIKELY(   (   (uDr7 & X86_DR7_ENABLED_MASK)
+                                && X86_DR7_ANY_RW_IO(uDr7)
+                                && (pCtx->cr4 & X86_CR4_DE))
+                            || DBGFBpIsHwIoArmed(pVM)))
+            {
+                STAM_COUNTER_INC(&pVCpu->hm.s.StatDRxIoCheck);
+
+                /* We're playing with the host CPU state here, make sure we don't preempt or longjmp. */
+                VMMRZCallRing3Disable(pVCpu);
+                HM_DISABLE_PREEMPT(pVCpu);
+
+                bool fIsGuestDbgActive = CPUMR0DebugStateMaybeSaveGuest(pVCpu, true /* fDr6 */);
+
+                VBOXSTRICTRC rcStrict2 = DBGFBpCheckIo(pVM, pVCpu, pCtx, uIOPort, cbValue);
+                if (rcStrict2 == VINF_EM_RAW_GUEST_TRAP)
+                {
+                    /* Raise #DB. */
+                    if (fIsGuestDbgActive)
+                        ASMSetDR6(pCtx->dr[6]);
+                    if (pCtx->dr[7] != uDr7)
+                        pVCpu->hm.s.fCtxChanged |= HM_CHANGED_GUEST_DR7;
+
+                    hmR0VmxSetPendingXcptDB(pVCpu);
+                }
+                /* rcStrict is VINF_SUCCESS, VINF_IOM_R3_IOPORT_COMMIT_WRITE, or in [VINF_EM_FIRST..VINF_EM_LAST],
+                   however we can ditch VINF_IOM_R3_IOPORT_COMMIT_WRITE as it has VMCPU_FF_IOM as backup. */
+                else if (   rcStrict2 != VINF_SUCCESS
+                         && (rcStrict == VINF_SUCCESS || rcStrict2 < rcStrict))
+                    rcStrict = rcStrict2;
+                AssertCompile(VINF_EM_LAST < VINF_IOM_R3_IOPORT_COMMIT_WRITE);
+
+                HM_RESTORE_PREEMPT();
+                VMMRZCallRing3Enable(pVCpu);
+            }
+        }
+
+#ifdef VBOX_STRICT
+        if (   rcStrict == VINF_IOM_R3_IOPORT_READ
+            || rcStrict == VINF_EM_PENDING_R3_IOPORT_READ)
+            Assert(!fIOWrite);
+        else if (   rcStrict == VINF_IOM_R3_IOPORT_WRITE
+                 || rcStrict == VINF_IOM_R3_IOPORT_COMMIT_WRITE
+                 || rcStrict == VINF_EM_PENDING_R3_IOPORT_WRITE)
+            Assert(fIOWrite);
+        else
+        {
+# if 0 /** @todo r=bird: This is missing a bunch of VINF_EM_FIRST..VINF_EM_LAST
+           *        statuses, that the VMM device and some others may return. See
+           *        IOM_SUCCESS() for guidance. */
+            AssertMsg(   RT_FAILURE(rcStrict)
+                      || rcStrict == VINF_SUCCESS
+                      || rcStrict == VINF_EM_RAW_EMULATE_INSTR
+                      || rcStrict == VINF_EM_DBG_BREAKPOINT
+                      || rcStrict == VINF_EM_RAW_GUEST_TRAP
+                      || rcStrict == VINF_EM_RAW_TO_R3
+                      || rcStrict == VINF_TRPM_XCPT_DISPATCHED, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict)));
+# endif
+        }
+#endif
+        STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitIO, y1);
+    }
+    else
+    {
+        /*
+         * Frequent exit or something needing probing.  Get state and call EMHistoryExec.
+         */
+        int rc2 = hmR0VmxImportGuestState(pVCpu, pVmcsInfo, HMVMX_CPUMCTX_EXTRN_ALL);
+        AssertRCReturn(rc2, rc2);
+        STAM_COUNTER_INC(!fIOString ? fIOWrite ? &pVCpu->hm.s.StatExitIOWrite : &pVCpu->hm.s.StatExitIORead
+                         : fIOWrite ? &pVCpu->hm.s.StatExitIOStringWrite : &pVCpu->hm.s.StatExitIOStringRead);
+        Log4(("IOExit/%u: %04x:%08RX64: %s%s%s %#x LB %u -> EMHistoryExec\n",
+              pVCpu->idCpu, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
+              VMX_EXIT_QUAL_IO_IS_REP(pVmxTransient->uExitQual) ? "REP " : "",
+              fIOWrite ? "OUT" : "IN", fIOString ? "S" : "", uIOPort, uIOSize));
+
+        rcStrict = EMHistoryExec(pVCpu, pExitRec, 0);
+        ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_ALL_GUEST);
+
+        Log4(("IOExit/%u: %04x:%08RX64: EMHistoryExec -> %Rrc + %04x:%08RX64\n",
+              pVCpu->idCpu, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
+              VBOXSTRICTRC_VAL(rcStrict), pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
+    }
+    return rcStrict;
+}
+
+
+/**
+ * VM-exit handler for task switches (VMX_EXIT_TASK_SWITCH). Unconditional
+ * VM-exit.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitTaskSwitch(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+
+    /* Check if this task-switch occurred while delivery an event through the guest IDT. */
+    hmR0VmxReadExitQualVmcs(pVmxTransient);
+    if (VMX_EXIT_QUAL_TASK_SWITCH_TYPE(pVmxTransient->uExitQual) == VMX_EXIT_QUAL_TASK_SWITCH_TYPE_IDT)
+    {
+        hmR0VmxReadIdtVectoringInfoVmcs(pVmxTransient);
+        if (VMX_IDT_VECTORING_INFO_IS_VALID(pVmxTransient->uIdtVectoringInfo))
+        {
+            uint32_t uErrCode;
+            if (VMX_IDT_VECTORING_INFO_IS_ERROR_CODE_VALID(pVmxTransient->uIdtVectoringInfo))
+            {
+                hmR0VmxReadIdtVectoringErrorCodeVmcs(pVmxTransient);
+                uErrCode = pVmxTransient->uIdtVectoringErrorCode;
+            }
+            else
+                uErrCode = 0;
+
+            RTGCUINTPTR GCPtrFaultAddress;
+            if (VMX_IDT_VECTORING_INFO_IS_XCPT_PF(pVmxTransient->uIdtVectoringInfo))
+                GCPtrFaultAddress = pVCpu->cpum.GstCtx.cr2;
+            else
+                GCPtrFaultAddress = 0;
+
+            hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
+
+            hmR0VmxSetPendingEvent(pVCpu, VMX_ENTRY_INT_INFO_FROM_EXIT_IDT_INFO(pVmxTransient->uIdtVectoringInfo),
+                                   pVmxTransient->cbExitInstr, uErrCode, GCPtrFaultAddress);
+
+            Log4Func(("Pending event. uIntType=%#x uVector=%#x\n", VMX_IDT_VECTORING_INFO_TYPE(pVmxTransient->uIdtVectoringInfo),
+                      VMX_IDT_VECTORING_INFO_VECTOR(pVmxTransient->uIdtVectoringInfo)));
+            STAM_COUNTER_INC(&pVCpu->hm.s.StatExitTaskSwitch);
+            return VINF_EM_RAW_INJECT_TRPM_EVENT;
+        }
+    }
+
+    /* Fall back to the interpreter to emulate the task-switch. */
+    STAM_COUNTER_INC(&pVCpu->hm.s.StatExitTaskSwitch);
+    return VERR_EM_INTERPRETER;
+}
+
+
+/**
+ * VM-exit handler for monitor-trap-flag (VMX_EXIT_MTF). Conditional VM-exit.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitMtf(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+
+    PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
+    pVmcsInfo->u32ProcCtls &= ~VMX_PROC_CTLS_MONITOR_TRAP_FLAG;
+    int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVmcsInfo->u32ProcCtls);
+    AssertRC(rc);
+    return VINF_EM_DBG_STEPPED;
+}
+
+
+/**
+ * VM-exit handler for APIC access (VMX_EXIT_APIC_ACCESS). Conditional VM-exit.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitApicAccess(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+    STAM_COUNTER_INC(&pVCpu->hm.s.StatExitApicAccess);
+
+    hmR0VmxReadExitIntInfoVmcs(pVmxTransient);
+    hmR0VmxReadExitIntErrorCodeVmcs(pVmxTransient);
+    hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
+    hmR0VmxReadIdtVectoringInfoVmcs(pVmxTransient);
+    hmR0VmxReadIdtVectoringErrorCodeVmcs(pVmxTransient);
+
+    /*
+     * If this VM-exit occurred while delivering an event through the guest IDT, handle it accordingly.
+     */
+    VBOXSTRICTRC rcStrict = hmR0VmxCheckExitDueToEventDelivery(pVCpu, pVmxTransient);
+    if (RT_LIKELY(rcStrict == VINF_SUCCESS))
+    {
+        /* For some crazy guest, if an event delivery causes an APIC-access VM-exit, go to instruction emulation. */
+        if (RT_UNLIKELY(pVCpu->hm.s.Event.fPending))
+        {
+            STAM_COUNTER_INC(&pVCpu->hm.s.StatInjectInterpret);
+            return VINF_EM_RAW_INJECT_TRPM_EVENT;
+        }
+    }
+    else
+    {
+        Assert(rcStrict != VINF_HM_DOUBLE_FAULT);
+        return rcStrict;
+    }
+
+    /* IOMMIOPhysHandler() below may call into IEM, save the necessary state. */
+    PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
+    hmR0VmxReadExitQualVmcs(pVmxTransient);
+    int rc = hmR0VmxImportGuestState(pVCpu, pVmcsInfo, IEM_CPUMCTX_EXTRN_MUST_MASK);
+    AssertRCReturn(rc, rc);
+
+    /* See Intel spec. 27-6 "Exit Qualifications for APIC-access VM-exits from Linear Accesses & Guest-Phyiscal Addresses" */
+    uint32_t const uAccessType = VMX_EXIT_QUAL_APIC_ACCESS_TYPE(pVmxTransient->uExitQual);
+    switch (uAccessType)
+    {
+        case VMX_APIC_ACCESS_TYPE_LINEAR_WRITE:
+        case VMX_APIC_ACCESS_TYPE_LINEAR_READ:
+        {
+            AssertMsg(   !(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_TPR_SHADOW)
+                      || VMX_EXIT_QUAL_APIC_ACCESS_OFFSET(pVmxTransient->uExitQual) != XAPIC_OFF_TPR,
+                      ("hmR0VmxExitApicAccess: can't access TPR offset while using TPR shadowing.\n"));
+
+            RTGCPHYS GCPhys = pVCpu->hm.s.vmx.u64GstMsrApicBase;    /* Always up-to-date, as it is not part of the VMCS. */
+            GCPhys &= PAGE_BASE_GC_MASK;
+            GCPhys += VMX_EXIT_QUAL_APIC_ACCESS_OFFSET(pVmxTransient->uExitQual);
+            Log4Func(("Linear access uAccessType=%#x GCPhys=%#RGp Off=%#x\n", uAccessType, GCPhys,
+                 VMX_EXIT_QUAL_APIC_ACCESS_OFFSET(pVmxTransient->uExitQual)));
+
+            rcStrict = IOMR0MmioPhysHandler(pVCpu->CTX_SUFF(pVM), pVCpu,
+                                            uAccessType == VMX_APIC_ACCESS_TYPE_LINEAR_READ ? 0 : X86_TRAP_PF_RW, GCPhys);
+            Log4Func(("IOMMMIOPhysHandler returned %Rrc\n", VBOXSTRICTRC_VAL(rcStrict)));
+            if (   rcStrict == VINF_SUCCESS
+                || rcStrict == VERR_PAGE_TABLE_NOT_PRESENT
+                || rcStrict == VERR_PAGE_NOT_PRESENT)
+            {
+                ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RSP | HM_CHANGED_GUEST_RFLAGS
+                                                         | HM_CHANGED_GUEST_APIC_TPR);
+                rcStrict = VINF_SUCCESS;
+            }
+            break;
+        }
+
+        default:
+        {
+            Log4Func(("uAccessType=%#x\n", uAccessType));
+            rcStrict = VINF_EM_RAW_EMULATE_INSTR;
+            break;
+        }
+    }
+
+    if (rcStrict != VINF_SUCCESS)
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchApicAccessToR3);
+    return rcStrict;
+}
+
+
+/**
+ * VM-exit handler for debug-register accesses (VMX_EXIT_MOV_DRX). Conditional
+ * VM-exit.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitMovDRx(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+    PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
+
+    /* We might get this VM-exit if the nested-guest is not intercepting MOV DRx accesses. */
+    if (!pVmxTransient->fIsNestedGuest)
+    {
+        /* We should -not- get this VM-exit if the guest's debug registers were active. */
+        if (pVmxTransient->fWasGuestDebugStateActive)
+        {
+            AssertMsgFailed(("Unexpected MOV DRx exit\n"));
+            HMVMX_UNEXPECTED_EXIT_RET(pVCpu, pVmxTransient->uExitReason);
+        }
+
+        if (   !pVCpu->hm.s.fSingleInstruction
+            && !pVmxTransient->fWasHyperDebugStateActive)
+        {
+            Assert(!DBGFIsStepping(pVCpu));
+            Assert(pVmcsInfo->u32XcptBitmap & RT_BIT(X86_XCPT_DB));
+
+            /* Don't intercept MOV DRx any more. */
+            pVmcsInfo->u32ProcCtls &= ~VMX_PROC_CTLS_MOV_DR_EXIT;
+            int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVmcsInfo->u32ProcCtls);
+            AssertRC(rc);
+
+            /* We're playing with the host CPU state here, make sure we can't preempt or longjmp. */
+            VMMRZCallRing3Disable(pVCpu);
+            HM_DISABLE_PREEMPT(pVCpu);
+
+            /* Save the host & load the guest debug state, restart execution of the MOV DRx instruction. */
+            CPUMR0LoadGuestDebugState(pVCpu, true /* include DR6 */);
+            Assert(CPUMIsGuestDebugStateActive(pVCpu));
+
+            HM_RESTORE_PREEMPT();
+            VMMRZCallRing3Enable(pVCpu);
+
+#ifdef VBOX_WITH_STATISTICS
+            hmR0VmxReadExitQualVmcs(pVmxTransient);
+            if (VMX_EXIT_QUAL_DRX_DIRECTION(pVmxTransient->uExitQual) == VMX_EXIT_QUAL_DRX_DIRECTION_WRITE)
+                STAM_COUNTER_INC(&pVCpu->hm.s.StatExitDRxWrite);
+            else
+                STAM_COUNTER_INC(&pVCpu->hm.s.StatExitDRxRead);
+#endif
+            STAM_COUNTER_INC(&pVCpu->hm.s.StatDRxContextSwitch);
+            return VINF_SUCCESS;
+        }
+    }
+
+    /*
+     * EMInterpretDRx[Write|Read]() calls CPUMIsGuestIn64BitCode() which requires EFER MSR, CS.
+     * The EFER MSR is always up-to-date.
+     * Update the segment registers and DR7 from the CPU.
+     */
+    PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+    hmR0VmxReadExitQualVmcs(pVmxTransient);
+    int rc = hmR0VmxImportGuestState(pVCpu, pVmcsInfo, CPUMCTX_EXTRN_SREG_MASK | CPUMCTX_EXTRN_DR7);
+    AssertRCReturn(rc, rc);
+    Log4Func(("cs:rip=%#04x:%#RX64\n", pCtx->cs.Sel, pCtx->rip));
+
+    PVMCC pVM = pVCpu->CTX_SUFF(pVM);
+    if (VMX_EXIT_QUAL_DRX_DIRECTION(pVmxTransient->uExitQual) == VMX_EXIT_QUAL_DRX_DIRECTION_WRITE)
+    {
+        rc = EMInterpretDRxWrite(pVM, pVCpu, CPUMCTX2CORE(pCtx),
+                                 VMX_EXIT_QUAL_DRX_REGISTER(pVmxTransient->uExitQual),
+                                 VMX_EXIT_QUAL_DRX_GENREG(pVmxTransient->uExitQual));
+        if (RT_SUCCESS(rc))
+            ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_DR7);
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatExitDRxWrite);
+    }
+    else
+    {
+        rc = EMInterpretDRxRead(pVM, pVCpu, CPUMCTX2CORE(pCtx),
+                                VMX_EXIT_QUAL_DRX_GENREG(pVmxTransient->uExitQual),
+                                VMX_EXIT_QUAL_DRX_REGISTER(pVmxTransient->uExitQual));
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatExitDRxRead);
+    }
+
+    Assert(rc == VINF_SUCCESS || rc == VERR_EM_INTERPRETER);
+    if (RT_SUCCESS(rc))
+    {
+        int rc2 = hmR0VmxAdvanceGuestRip(pVCpu, pVmxTransient);
+        AssertRCReturn(rc2, rc2);
+        return VINF_SUCCESS;
+    }
+    return rc;
+}
+
+
+/**
+ * VM-exit handler for EPT misconfiguration (VMX_EXIT_EPT_MISCONFIG).
+ * Conditional VM-exit.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitEptMisconfig(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+    Assert(pVCpu->CTX_SUFF(pVM)->hm.s.fNestedPaging);
+
+    hmR0VmxReadExitIntInfoVmcs(pVmxTransient);
+    hmR0VmxReadExitIntErrorCodeVmcs(pVmxTransient);
+    hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
+    hmR0VmxReadIdtVectoringInfoVmcs(pVmxTransient);
+    hmR0VmxReadIdtVectoringErrorCodeVmcs(pVmxTransient);
+
+    /*
+     * If this VM-exit occurred while delivering an event through the guest IDT, handle it accordingly.
+     */
+    VBOXSTRICTRC rcStrict = hmR0VmxCheckExitDueToEventDelivery(pVCpu, pVmxTransient);
+    if (RT_LIKELY(rcStrict == VINF_SUCCESS))
+    {
+        /*
+         * In the unlikely case where delivering an event causes an EPT misconfig (MMIO), go back to
+         * instruction emulation to inject the original event. Otherwise, injecting the original event
+         * using hardware-assisted VMX would trigger the same EPT misconfig VM-exit again.
+         */
+        if (!pVCpu->hm.s.Event.fPending)
+        { /* likely */ }
+        else
+        {
+            STAM_COUNTER_INC(&pVCpu->hm.s.StatInjectInterpret);
+#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+            /** @todo NSTVMX: Think about how this should be handled. */
+            if (pVmxTransient->fIsNestedGuest)
+                return VERR_VMX_IPE_3;
+#endif
+            return VINF_EM_RAW_INJECT_TRPM_EVENT;
+        }
+    }
+    else
+    {
+        Assert(rcStrict != VINF_HM_DOUBLE_FAULT);
+        return rcStrict;
+    }
+
+    /*
+     * Get sufficient state and update the exit history entry.
+     */
+    PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
+    hmR0VmxReadGuestPhysicalAddrVmcs(pVmxTransient);
+    int rc = hmR0VmxImportGuestState(pVCpu, pVmcsInfo, IEM_CPUMCTX_EXTRN_MUST_MASK);
+    AssertRCReturn(rc, rc);
+
+    RTGCPHYS const GCPhys = pVmxTransient->uGuestPhysicalAddr;
+    PCEMEXITREC pExitRec = EMHistoryUpdateFlagsAndTypeAndPC(pVCpu,
+                                                            EMEXIT_MAKE_FT(EMEXIT_F_KIND_EM | EMEXIT_F_HM, EMEXITTYPE_MMIO),
+                                                            pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base);
+    if (!pExitRec)
+    {
+        /*
+         * If we succeed, resume guest execution.
+         * If we fail in interpreting the instruction because we couldn't get the guest physical address
+         * of the page containing the instruction via the guest's page tables (we would invalidate the guest page
+         * in the host TLB), resume execution which would cause a guest page fault to let the guest handle this
+         * weird case. See @bugref{6043}.
+         */
+        PVMCC    pVM  = pVCpu->CTX_SUFF(pVM);
+        PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+/** @todo bird: We can probably just go straight to IOM here and assume that
+ *        it's MMIO, then fall back on PGM if that hunch didn't work out so
+ *        well.  However, we need to address that aliasing workarounds that
+ *        PGMR0Trap0eHandlerNPMisconfig implements.  So, some care is needed.
+ *
+ *        Might also be interesting to see if we can get this done more or
+ *        less locklessly inside IOM.  Need to consider the lookup table
+ *        updating and use a bit more carefully first (or do all updates via
+ *        rendezvous) */
+        rcStrict = PGMR0Trap0eHandlerNPMisconfig(pVM, pVCpu, PGMMODE_EPT, CPUMCTX2CORE(pCtx), GCPhys, UINT32_MAX);
+        Log4Func(("At %#RGp RIP=%#RX64 rc=%Rrc\n", GCPhys, pCtx->rip, VBOXSTRICTRC_VAL(rcStrict)));
+        if (   rcStrict == VINF_SUCCESS
+            || rcStrict == VERR_PAGE_TABLE_NOT_PRESENT
+            || rcStrict == VERR_PAGE_NOT_PRESENT)
+        {
+            /* Successfully handled MMIO operation. */
+            ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RSP | HM_CHANGED_GUEST_RFLAGS
+                                                     | HM_CHANGED_GUEST_APIC_TPR);
+            rcStrict = VINF_SUCCESS;
+        }
+    }
+    else
+    {
+        /*
+         * Frequent exit or something needing probing. Call EMHistoryExec.
+         */
+        Log4(("EptMisscfgExit/%u: %04x:%08RX64: %RGp -> EMHistoryExec\n",
+              pVCpu->idCpu, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, GCPhys));
+
+        rcStrict = EMHistoryExec(pVCpu, pExitRec, 0);
+        ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_ALL_GUEST);
+
+        Log4(("EptMisscfgExit/%u: %04x:%08RX64: EMHistoryExec -> %Rrc + %04x:%08RX64\n",
+              pVCpu->idCpu, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
+              VBOXSTRICTRC_VAL(rcStrict), pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
+    }
+    return rcStrict;
+}
+
+
+/**
+ * VM-exit handler for EPT violation (VMX_EXIT_EPT_VIOLATION). Conditional
+ * VM-exit.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitEptViolation(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+    Assert(pVCpu->CTX_SUFF(pVM)->hm.s.fNestedPaging);
+
+    hmR0VmxReadExitQualVmcs(pVmxTransient);
+    hmR0VmxReadExitIntInfoVmcs(pVmxTransient);
+    hmR0VmxReadExitIntErrorCodeVmcs(pVmxTransient);
+    hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
+    hmR0VmxReadIdtVectoringInfoVmcs(pVmxTransient);
+    hmR0VmxReadIdtVectoringErrorCodeVmcs(pVmxTransient);
+
+    /*
+     * If this VM-exit occurred while delivering an event through the guest IDT, handle it accordingly.
+     */
+    VBOXSTRICTRC rcStrict = hmR0VmxCheckExitDueToEventDelivery(pVCpu, pVmxTransient);
+    if (RT_LIKELY(rcStrict == VINF_SUCCESS))
+    {
+        /*
+         * If delivery of an event causes an EPT violation (true nested #PF and not MMIO),
+         * we shall resolve the nested #PF and re-inject the original event.
+         */
+        if (pVCpu->hm.s.Event.fPending)
+            STAM_COUNTER_INC(&pVCpu->hm.s.StatInjectReflectNPF);
+    }
+    else
+    {
+        Assert(rcStrict != VINF_HM_DOUBLE_FAULT);
+        return rcStrict;
+    }
+
+    PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
+    hmR0VmxReadGuestPhysicalAddrVmcs(pVmxTransient);
+    int rc = hmR0VmxImportGuestState(pVCpu, pVmcsInfo, IEM_CPUMCTX_EXTRN_MUST_MASK);
+    AssertRCReturn(rc, rc);
+
+    RTGCPHYS const GCPhys    = pVmxTransient->uGuestPhysicalAddr;
+    uint64_t const uExitQual = pVmxTransient->uExitQual;
+    AssertMsg(((pVmxTransient->uExitQual >> 7) & 3) != 2, ("%#RX64", uExitQual));
+
+    RTGCUINT uErrorCode = 0;
+    if (uExitQual & VMX_EXIT_QUAL_EPT_INSTR_FETCH)
+        uErrorCode |= X86_TRAP_PF_ID;
+    if (uExitQual & VMX_EXIT_QUAL_EPT_DATA_WRITE)
+        uErrorCode |= X86_TRAP_PF_RW;
+    if (uExitQual & VMX_EXIT_QUAL_EPT_ENTRY_PRESENT)
+        uErrorCode |= X86_TRAP_PF_P;
+
+    PVMCC    pVM  = pVCpu->CTX_SUFF(pVM);
+    PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+    Log4Func(("at %#RX64 (%#RX64 errcode=%#x) cs:rip=%#04x:%#RX64\n", GCPhys, uExitQual, uErrorCode, pCtx->cs.Sel, pCtx->rip));
+
+    /*
+     * Handle the pagefault trap for the nested shadow table.
+     */
+    TRPMAssertXcptPF(pVCpu, GCPhys, uErrorCode);
+    rcStrict = PGMR0Trap0eHandlerNestedPaging(pVM, pVCpu, PGMMODE_EPT, uErrorCode, CPUMCTX2CORE(pCtx), GCPhys);
+    TRPMResetTrap(pVCpu);
+
+    /* Same case as PGMR0Trap0eHandlerNPMisconfig(). See comment above, @bugref{6043}. */
+    if (   rcStrict == VINF_SUCCESS
+        || rcStrict == VERR_PAGE_TABLE_NOT_PRESENT
+        || rcStrict == VERR_PAGE_NOT_PRESENT)
+    {
+        /* Successfully synced our nested page tables. */
+        STAM_COUNTER_INC(&pVCpu->hm.s.StatExitReasonNpf);
+        ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RSP | HM_CHANGED_GUEST_RFLAGS);
+        return VINF_SUCCESS;
+    }
+
+    Log4Func(("EPT return to ring-3 rcStrict2=%Rrc\n", VBOXSTRICTRC_VAL(rcStrict)));
+    return rcStrict;
+}
+
+
+#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+/**
+ * VM-exit handler for VMCLEAR (VMX_EXIT_VMCLEAR). Unconditional VM-exit.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitVmclear(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+
+    hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
+    hmR0VmxReadExitInstrInfoVmcs(pVmxTransient);
+    hmR0VmxReadExitQualVmcs(pVmxTransient);
+    int rc = hmR0VmxImportGuestState(pVCpu, pVmxTransient->pVmcsInfo, CPUMCTX_EXTRN_RSP | CPUMCTX_EXTRN_SREG_MASK
+                                                                    | CPUMCTX_EXTRN_HWVIRT
+                                                                    | IEM_CPUMCTX_EXTRN_EXEC_DECODED_MEM_MASK);
+    AssertRCReturn(rc, rc);
+
+    HMVMX_CHECK_EXIT_DUE_TO_VMX_INSTR(pVCpu, pVmxTransient->uExitReason);
+
+    VMXVEXITINFO ExitInfo;
+    RT_ZERO(ExitInfo);
+    ExitInfo.uReason     = pVmxTransient->uExitReason;
+    ExitInfo.u64Qual     = pVmxTransient->uExitQual;
+    ExitInfo.InstrInfo.u = pVmxTransient->ExitInstrInfo.u;
+    ExitInfo.cbInstr     = pVmxTransient->cbExitInstr;
+    HMVMX_DECODE_MEM_OPERAND(pVCpu, ExitInfo.InstrInfo.u, ExitInfo.u64Qual, VMXMEMACCESS_READ, &ExitInfo.GCPtrEffAddr);
+
+    VBOXSTRICTRC rcStrict = IEMExecDecodedVmclear(pVCpu, &ExitInfo);
+    if (RT_LIKELY(rcStrict == VINF_SUCCESS))
+        ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS | HM_CHANGED_GUEST_HWVIRT);
+    else if (rcStrict == VINF_IEM_RAISED_XCPT)
+    {
+        ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
+        rcStrict = VINF_SUCCESS;
+    }
+    return rcStrict;
+}
+
+
+/**
+ * VM-exit handler for VMLAUNCH (VMX_EXIT_VMLAUNCH). Unconditional VM-exit.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitVmlaunch(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+
+    /* Import the entire VMCS state for now as we would be switching VMCS on successful VMLAUNCH,
+       otherwise we could import just IEM_CPUMCTX_EXTRN_VMX_VMENTRY_MASK. */
+    hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
+    int rc = hmR0VmxImportGuestState(pVCpu, pVmxTransient->pVmcsInfo, HMVMX_CPUMCTX_EXTRN_ALL);
+    AssertRCReturn(rc, rc);
+
+    HMVMX_CHECK_EXIT_DUE_TO_VMX_INSTR(pVCpu, pVmxTransient->uExitReason);
+
+    STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatExitVmentry, z);
+    VBOXSTRICTRC rcStrict = IEMExecDecodedVmlaunchVmresume(pVCpu, pVmxTransient->cbExitInstr, VMXINSTRID_VMLAUNCH);
+    STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitVmentry, z);
+    if (RT_LIKELY(rcStrict == VINF_SUCCESS))
+    {
+        ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_ALL_GUEST);
+        if (CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx))
+            rcStrict = VINF_VMX_VMLAUNCH_VMRESUME;
+    }
+    Assert(rcStrict != VINF_IEM_RAISED_XCPT);
+    return rcStrict;
+}
+
+
+/**
+ * VM-exit handler for VMPTRLD (VMX_EXIT_VMPTRLD). Unconditional VM-exit.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitVmptrld(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+
+    hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
+    hmR0VmxReadExitInstrInfoVmcs(pVmxTransient);
+    hmR0VmxReadExitQualVmcs(pVmxTransient);
+    int rc = hmR0VmxImportGuestState(pVCpu, pVmxTransient->pVmcsInfo, CPUMCTX_EXTRN_RSP | CPUMCTX_EXTRN_SREG_MASK
+                                                                    | CPUMCTX_EXTRN_HWVIRT
+                                                                    | IEM_CPUMCTX_EXTRN_EXEC_DECODED_MEM_MASK);
+    AssertRCReturn(rc, rc);
+
+    HMVMX_CHECK_EXIT_DUE_TO_VMX_INSTR(pVCpu, pVmxTransient->uExitReason);
+
+    VMXVEXITINFO ExitInfo;
+    RT_ZERO(ExitInfo);
+    ExitInfo.uReason     = pVmxTransient->uExitReason;
+    ExitInfo.u64Qual     = pVmxTransient->uExitQual;
+    ExitInfo.InstrInfo.u = pVmxTransient->ExitInstrInfo.u;
+    ExitInfo.cbInstr     = pVmxTransient->cbExitInstr;
+    HMVMX_DECODE_MEM_OPERAND(pVCpu, ExitInfo.InstrInfo.u, ExitInfo.u64Qual, VMXMEMACCESS_READ, &ExitInfo.GCPtrEffAddr);
+
+    VBOXSTRICTRC rcStrict = IEMExecDecodedVmptrld(pVCpu, &ExitInfo);
+    if (RT_LIKELY(rcStrict == VINF_SUCCESS))
+        ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS | HM_CHANGED_GUEST_HWVIRT);
+    else if (rcStrict == VINF_IEM_RAISED_XCPT)
+    {
+        ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
+        rcStrict = VINF_SUCCESS;
+    }
+    return rcStrict;
+}
+
+
+/**
+ * VM-exit handler for VMPTRST (VMX_EXIT_VMPTRST). Unconditional VM-exit.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitVmptrst(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+
+    hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
+    hmR0VmxReadExitInstrInfoVmcs(pVmxTransient);
+    hmR0VmxReadExitQualVmcs(pVmxTransient);
+    int rc = hmR0VmxImportGuestState(pVCpu, pVmxTransient->pVmcsInfo, CPUMCTX_EXTRN_RSP | CPUMCTX_EXTRN_SREG_MASK
+                                                                    | CPUMCTX_EXTRN_HWVIRT
+                                                                    | IEM_CPUMCTX_EXTRN_EXEC_DECODED_MEM_MASK);
+    AssertRCReturn(rc, rc);
+
+    HMVMX_CHECK_EXIT_DUE_TO_VMX_INSTR(pVCpu, pVmxTransient->uExitReason);
+
+    VMXVEXITINFO ExitInfo;
+    RT_ZERO(ExitInfo);
+    ExitInfo.uReason     = pVmxTransient->uExitReason;
+    ExitInfo.u64Qual     = pVmxTransient->uExitQual;
+    ExitInfo.InstrInfo.u = pVmxTransient->ExitInstrInfo.u;
+    ExitInfo.cbInstr     = pVmxTransient->cbExitInstr;
+    HMVMX_DECODE_MEM_OPERAND(pVCpu, ExitInfo.InstrInfo.u, ExitInfo.u64Qual, VMXMEMACCESS_WRITE, &ExitInfo.GCPtrEffAddr);
+
+    VBOXSTRICTRC rcStrict = IEMExecDecodedVmptrst(pVCpu, &ExitInfo);
+    if (RT_LIKELY(rcStrict == VINF_SUCCESS))
+        ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS);
+    else if (rcStrict == VINF_IEM_RAISED_XCPT)
+    {
+        ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
+        rcStrict = VINF_SUCCESS;
+    }
+    return rcStrict;
+}
+
+
+/**
+ * VM-exit handler for VMREAD (VMX_EXIT_VMREAD). Conditional VM-exit.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitVmread(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+
+    /*
+     * Strictly speaking we should not get VMREAD VM-exits for shadow VMCS fields and
+     * thus might not need to import the shadow VMCS state, it's safer just in case
+     * code elsewhere dares look at unsynced VMCS fields.
+     */
+    hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
+    hmR0VmxReadExitInstrInfoVmcs(pVmxTransient);
+    hmR0VmxReadExitQualVmcs(pVmxTransient);
+    int rc = hmR0VmxImportGuestState(pVCpu, pVmxTransient->pVmcsInfo, CPUMCTX_EXTRN_RSP | CPUMCTX_EXTRN_SREG_MASK
+                                                                    | CPUMCTX_EXTRN_HWVIRT
+                                                                    | IEM_CPUMCTX_EXTRN_EXEC_DECODED_MEM_MASK);
+    AssertRCReturn(rc, rc);
+
+    HMVMX_CHECK_EXIT_DUE_TO_VMX_INSTR(pVCpu, pVmxTransient->uExitReason);
+
+    VMXVEXITINFO ExitInfo;
+    RT_ZERO(ExitInfo);
+    ExitInfo.uReason     = pVmxTransient->uExitReason;
+    ExitInfo.u64Qual     = pVmxTransient->uExitQual;
+    ExitInfo.InstrInfo.u = pVmxTransient->ExitInstrInfo.u;
+    ExitInfo.cbInstr     = pVmxTransient->cbExitInstr;
+    if (!ExitInfo.InstrInfo.VmreadVmwrite.fIsRegOperand)
+        HMVMX_DECODE_MEM_OPERAND(pVCpu, ExitInfo.InstrInfo.u, ExitInfo.u64Qual, VMXMEMACCESS_WRITE, &ExitInfo.GCPtrEffAddr);
+
+    VBOXSTRICTRC rcStrict = IEMExecDecodedVmread(pVCpu, &ExitInfo);
+    if (RT_LIKELY(rcStrict == VINF_SUCCESS))
+        ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS);
+    else if (rcStrict == VINF_IEM_RAISED_XCPT)
+    {
+        ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
+        rcStrict = VINF_SUCCESS;
+    }
+    return rcStrict;
+}
+
+
+/**
+ * VM-exit handler for VMRESUME (VMX_EXIT_VMRESUME). Unconditional VM-exit.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitVmresume(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+
+    /* Import the entire VMCS state for now as we would be switching VMCS on successful VMRESUME,
+       otherwise we could import just IEM_CPUMCTX_EXTRN_VMX_VMENTRY_MASK. */
+    hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
+    int rc = hmR0VmxImportGuestState(pVCpu, pVmxTransient->pVmcsInfo, HMVMX_CPUMCTX_EXTRN_ALL);
+    AssertRCReturn(rc, rc);
+
+    HMVMX_CHECK_EXIT_DUE_TO_VMX_INSTR(pVCpu, pVmxTransient->uExitReason);
+
+    STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatExitVmentry, z);
+    VBOXSTRICTRC rcStrict = IEMExecDecodedVmlaunchVmresume(pVCpu, pVmxTransient->cbExitInstr, VMXINSTRID_VMRESUME);
+    STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitVmentry, z);
+    if (RT_LIKELY(rcStrict == VINF_SUCCESS))
+    {
+        ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_ALL_GUEST);
+        if (CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx))
+            rcStrict = VINF_VMX_VMLAUNCH_VMRESUME;
+    }
+    Assert(rcStrict != VINF_IEM_RAISED_XCPT);
+    return rcStrict;
+}
+
+
+/**
+ * VM-exit handler for VMWRITE (VMX_EXIT_VMWRITE). Conditional VM-exit.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitVmwrite(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+
+    /*
+     * Although we should not get VMWRITE VM-exits for shadow VMCS fields, since our HM hook
+     * gets invoked when IEM's VMWRITE instruction emulation modifies the current VMCS and it
+     * flags re-loading the entire shadow VMCS, we should save the entire shadow VMCS here.
+     */
+    hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
+    hmR0VmxReadExitInstrInfoVmcs(pVmxTransient);
+    hmR0VmxReadExitQualVmcs(pVmxTransient);
+    int rc = hmR0VmxImportGuestState(pVCpu, pVmxTransient->pVmcsInfo, CPUMCTX_EXTRN_RSP | CPUMCTX_EXTRN_SREG_MASK
+                                                                    | CPUMCTX_EXTRN_HWVIRT
+                                                                    | IEM_CPUMCTX_EXTRN_EXEC_DECODED_MEM_MASK);
+    AssertRCReturn(rc, rc);
+
+    HMVMX_CHECK_EXIT_DUE_TO_VMX_INSTR(pVCpu, pVmxTransient->uExitReason);
+
+    VMXVEXITINFO ExitInfo;
+    RT_ZERO(ExitInfo);
+    ExitInfo.uReason     = pVmxTransient->uExitReason;
+    ExitInfo.u64Qual     = pVmxTransient->uExitQual;
+    ExitInfo.InstrInfo.u = pVmxTransient->ExitInstrInfo.u;
+    ExitInfo.cbInstr     = pVmxTransient->cbExitInstr;
+    if (!ExitInfo.InstrInfo.VmreadVmwrite.fIsRegOperand)
+        HMVMX_DECODE_MEM_OPERAND(pVCpu, ExitInfo.InstrInfo.u, ExitInfo.u64Qual, VMXMEMACCESS_READ, &ExitInfo.GCPtrEffAddr);
+
+    VBOXSTRICTRC rcStrict = IEMExecDecodedVmwrite(pVCpu, &ExitInfo);
+    if (RT_LIKELY(rcStrict == VINF_SUCCESS))
+        ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS | HM_CHANGED_GUEST_HWVIRT);
+    else if (rcStrict == VINF_IEM_RAISED_XCPT)
+    {
+        ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
+        rcStrict = VINF_SUCCESS;
+    }
+    return rcStrict;
+}
+
+
+/**
+ * VM-exit handler for VMXOFF (VMX_EXIT_VMXOFF). Unconditional VM-exit.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitVmxoff(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+
+    hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
+    int rc = hmR0VmxImportGuestState(pVCpu, pVmxTransient->pVmcsInfo, CPUMCTX_EXTRN_CR4
+                                                                    | CPUMCTX_EXTRN_HWVIRT
+                                                                    | IEM_CPUMCTX_EXTRN_EXEC_DECODED_NO_MEM_MASK);
+    AssertRCReturn(rc, rc);
+
+    HMVMX_CHECK_EXIT_DUE_TO_VMX_INSTR(pVCpu, pVmxTransient->uExitReason);
+
+    VBOXSTRICTRC rcStrict = IEMExecDecodedVmxoff(pVCpu, pVmxTransient->cbExitInstr);
+    if (RT_LIKELY(rcStrict == VINF_SUCCESS))
+        ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_HWVIRT);
+    else if (rcStrict == VINF_IEM_RAISED_XCPT)
+    {
+        ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
+        rcStrict = VINF_SUCCESS;
+    }
+    return rcStrict;
+}
+
+
+/**
+ * VM-exit handler for VMXON (VMX_EXIT_VMXON). Unconditional VM-exit.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitVmxon(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+
+    hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
+    hmR0VmxReadExitInstrInfoVmcs(pVmxTransient);
+    hmR0VmxReadExitQualVmcs(pVmxTransient);
+    int rc = hmR0VmxImportGuestState(pVCpu, pVmxTransient->pVmcsInfo, CPUMCTX_EXTRN_RSP | CPUMCTX_EXTRN_SREG_MASK
+                                                                    | CPUMCTX_EXTRN_HWVIRT
+                                                                    | IEM_CPUMCTX_EXTRN_EXEC_DECODED_MEM_MASK);
+    AssertRCReturn(rc, rc);
+
+    HMVMX_CHECK_EXIT_DUE_TO_VMX_INSTR(pVCpu, pVmxTransient->uExitReason);
+
+    VMXVEXITINFO ExitInfo;
+    RT_ZERO(ExitInfo);
+    ExitInfo.uReason     = pVmxTransient->uExitReason;
+    ExitInfo.u64Qual     = pVmxTransient->uExitQual;
+    ExitInfo.InstrInfo.u = pVmxTransient->ExitInstrInfo.u;
+    ExitInfo.cbInstr     = pVmxTransient->cbExitInstr;
+    HMVMX_DECODE_MEM_OPERAND(pVCpu, ExitInfo.InstrInfo.u, ExitInfo.u64Qual, VMXMEMACCESS_READ, &ExitInfo.GCPtrEffAddr);
+
+    VBOXSTRICTRC rcStrict = IEMExecDecodedVmxon(pVCpu, &ExitInfo);
+    if (RT_LIKELY(rcStrict == VINF_SUCCESS))
+        ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS | HM_CHANGED_GUEST_HWVIRT);
+    else if (rcStrict == VINF_IEM_RAISED_XCPT)
+    {
+        ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
+        rcStrict = VINF_SUCCESS;
+    }
+    return rcStrict;
+}
+
+
+/**
+ * VM-exit handler for INVVPID (VMX_EXIT_INVVPID). Unconditional VM-exit.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitInvvpid(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+
+    hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
+    hmR0VmxReadExitInstrInfoVmcs(pVmxTransient);
+    hmR0VmxReadExitQualVmcs(pVmxTransient);
+    int rc = hmR0VmxImportGuestState(pVCpu, pVmxTransient->pVmcsInfo, CPUMCTX_EXTRN_RSP | CPUMCTX_EXTRN_SREG_MASK
+                                                                    | IEM_CPUMCTX_EXTRN_EXEC_DECODED_MEM_MASK);
+    AssertRCReturn(rc, rc);
+
+    HMVMX_CHECK_EXIT_DUE_TO_VMX_INSTR(pVCpu, pVmxTransient->uExitReason);
+
+    VMXVEXITINFO ExitInfo;
+    RT_ZERO(ExitInfo);
+    ExitInfo.uReason     = pVmxTransient->uExitReason;
+    ExitInfo.u64Qual     = pVmxTransient->uExitQual;
+    ExitInfo.InstrInfo.u = pVmxTransient->ExitInstrInfo.u;
+    ExitInfo.cbInstr     = pVmxTransient->cbExitInstr;
+    HMVMX_DECODE_MEM_OPERAND(pVCpu, ExitInfo.InstrInfo.u, ExitInfo.u64Qual, VMXMEMACCESS_READ, &ExitInfo.GCPtrEffAddr);
+
+    VBOXSTRICTRC rcStrict = IEMExecDecodedInvvpid(pVCpu, &ExitInfo);
+    if (RT_LIKELY(rcStrict == VINF_SUCCESS))
+        ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS);
+    else if (rcStrict == VINF_IEM_RAISED_XCPT)
+    {
+        ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
+        rcStrict = VINF_SUCCESS;
+    }
+    return rcStrict;
+}
+#endif /* VBOX_WITH_NESTED_HWVIRT_VMX */
+/** @} */
+
+
+#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+/** @name Nested-guest VM-exit handlers.
+ * @{
+ */
+/* -=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= */
+/* -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- Nested-guest VM-exit handlers -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= */
+/* -=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= */
+
+/**
+ * Nested-guest VM-exit handler for exceptions or NMIs (VMX_EXIT_XCPT_OR_NMI).
+ * Conditional VM-exit.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitXcptOrNmiNested(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    HMVMX_VALIDATE_NESTED_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+
+    hmR0VmxReadExitIntInfoVmcs(pVmxTransient);
+
+    uint64_t const uExitIntInfo = pVmxTransient->uExitIntInfo;
+    uint32_t const uExitIntType = VMX_EXIT_INT_INFO_TYPE(uExitIntInfo);
+    Assert(VMX_EXIT_INT_INFO_IS_VALID(uExitIntInfo));
+
+    switch (uExitIntType)
+    {
+        /*
+         * Physical NMIs:
+         *     We shouldn't direct host physical NMIs to the nested-guest. Dispatch it to the host.
+         */
+        case VMX_EXIT_INT_INFO_TYPE_NMI:
+            return hmR0VmxExitHostNmi(pVCpu, pVmxTransient->pVmcsInfo);
+
+        /*
+         * Hardware exceptions,
+         * Software exceptions,
+         * Privileged software exceptions:
+         *     Figure out if the exception must be delivered to the guest or the nested-guest.
+         */
+        case VMX_EXIT_INT_INFO_TYPE_SW_XCPT:
+        case VMX_EXIT_INT_INFO_TYPE_PRIV_SW_XCPT:
+        case VMX_EXIT_INT_INFO_TYPE_HW_XCPT:
+        {
+            hmR0VmxReadExitIntErrorCodeVmcs(pVmxTransient);
+            hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
+            hmR0VmxReadIdtVectoringInfoVmcs(pVmxTransient);
+            hmR0VmxReadIdtVectoringErrorCodeVmcs(pVmxTransient);
+
+            PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+            bool const fIntercept = CPUMIsGuestVmxXcptInterceptSet(pCtx, VMX_EXIT_INT_INFO_VECTOR(uExitIntInfo),
+                                                                   pVmxTransient->uExitIntErrorCode);
+            if (fIntercept)
+            {
+                /* Exit qualification is required for debug and page-fault exceptions. */
+                hmR0VmxReadExitQualVmcs(pVmxTransient);
+
+                /*
+                 * For VM-exits due to software exceptions (those generated by INT3 or INTO) and privileged
+                 * software exceptions (those generated by INT1/ICEBP) we need to supply the VM-exit instruction
+                 * length. However, if delivery of a software interrupt, software exception or privileged
+                 * software exception causes a VM-exit, that too provides the VM-exit instruction length.
+                 */
+                VMXVEXITINFO ExitInfo;
+                RT_ZERO(ExitInfo);
+                ExitInfo.uReason = pVmxTransient->uExitReason;
+                ExitInfo.cbInstr = pVmxTransient->cbExitInstr;
+                ExitInfo.u64Qual = pVmxTransient->uExitQual;
+
+                VMXVEXITEVENTINFO ExitEventInfo;
+                RT_ZERO(ExitEventInfo);
+                ExitEventInfo.uExitIntInfo         = pVmxTransient->uExitIntInfo;
+                ExitEventInfo.uExitIntErrCode      = pVmxTransient->uExitIntErrorCode;
+                ExitEventInfo.uIdtVectoringInfo    = pVmxTransient->uIdtVectoringInfo;
+                ExitEventInfo.uIdtVectoringErrCode = pVmxTransient->uIdtVectoringErrorCode;
+
+#ifdef DEBUG_ramshankar
+                hmR0VmxImportGuestState(pVCpu, pVmxTransient->pVmcsInfo, HMVMX_CPUMCTX_EXTRN_ALL);
+                Log4Func(("exit_int_info=%#RX32 err_code=%#RX32 exit_qual=%#RX64\n", pVmxTransient->uExitIntInfo,
+                          pVmxTransient->uExitIntErrorCode, pVmxTransient->uExitQual));
+                if (VMX_IDT_VECTORING_INFO_IS_VALID(pVmxTransient->uIdtVectoringInfo))
+                {
+                    Log4Func(("idt_info=%#RX32 idt_errcode=%#RX32 cr2=%#RX64\n", pVmxTransient->uIdtVectoringInfo,
+                              pVmxTransient->uIdtVectoringErrorCode, pCtx->cr2));
+                }
+#endif
+                return IEMExecVmxVmexitXcpt(pVCpu, &ExitInfo, &ExitEventInfo);
+            }
+
+            /* Nested paging is currently a requirement, otherwise we would need to handle shadow #PFs in hmR0VmxExitXcptPF. */
+            Assert(pVCpu->CTX_SUFF(pVM)->hm.s.fNestedPaging);
+            return hmR0VmxExitXcpt(pVCpu, pVmxTransient);
+        }
+
+        /*
+         * Software interrupts:
+         *    VM-exits cannot be caused by software interrupts.
+         *
+         * External interrupts:
+         *    This should only happen when "acknowledge external interrupts on VM-exit"
+         *    control is set. However, we never set this when executing a guest or
+         *    nested-guest. For nested-guests it is emulated while injecting interrupts into
+         *    the guest.
+         */
+        case VMX_EXIT_INT_INFO_TYPE_SW_INT:
+        case VMX_EXIT_INT_INFO_TYPE_EXT_INT:
+        default:
+        {
+            pVCpu->hm.s.u32HMError = pVmxTransient->uExitIntInfo;
+            return VERR_VMX_UNEXPECTED_INTERRUPTION_EXIT_TYPE;
+        }
+    }
+}
+
+
+/**
+ * Nested-guest VM-exit handler for triple faults (VMX_EXIT_TRIPLE_FAULT).
+ * Unconditional VM-exit.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitTripleFaultNested(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    HMVMX_VALIDATE_NESTED_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+    return IEMExecVmxVmexitTripleFault(pVCpu);
+}
+
+
+/**
+ * Nested-guest VM-exit handler for interrupt-window exiting (VMX_EXIT_INT_WINDOW).
+ */
+HMVMX_EXIT_NSRC_DECL hmR0VmxExitIntWindowNested(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    HMVMX_VALIDATE_NESTED_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+
+    if (CPUMIsGuestVmxProcCtlsSet(&pVCpu->cpum.GstCtx, VMX_PROC_CTLS_INT_WINDOW_EXIT))
+        return IEMExecVmxVmexit(pVCpu, pVmxTransient->uExitReason, 0 /* uExitQual */);
+    return hmR0VmxExitIntWindow(pVCpu, pVmxTransient);
+}
+
+
+/**
+ * Nested-guest VM-exit handler for NMI-window exiting (VMX_EXIT_NMI_WINDOW).
+ */
+HMVMX_EXIT_NSRC_DECL hmR0VmxExitNmiWindowNested(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    HMVMX_VALIDATE_NESTED_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+
+    if (CPUMIsGuestVmxProcCtlsSet(&pVCpu->cpum.GstCtx, VMX_PROC_CTLS_NMI_WINDOW_EXIT))
+        return IEMExecVmxVmexit(pVCpu, pVmxTransient->uExitReason, 0 /* uExitQual */);
+    return hmR0VmxExitIntWindow(pVCpu, pVmxTransient);
+}
+
+
+/**
+ * Nested-guest VM-exit handler for task switches (VMX_EXIT_TASK_SWITCH).
+ * Unconditional VM-exit.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitTaskSwitchNested(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    HMVMX_VALIDATE_NESTED_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+
+    hmR0VmxReadExitQualVmcs(pVmxTransient);
+    hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
+    hmR0VmxReadIdtVectoringInfoVmcs(pVmxTransient);
+    hmR0VmxReadIdtVectoringErrorCodeVmcs(pVmxTransient);
+
+    VMXVEXITINFO ExitInfo;
+    RT_ZERO(ExitInfo);
+    ExitInfo.uReason = pVmxTransient->uExitReason;
+    ExitInfo.cbInstr = pVmxTransient->cbExitInstr;
+    ExitInfo.u64Qual = pVmxTransient->uExitQual;
+
+    VMXVEXITEVENTINFO ExitEventInfo;
+    RT_ZERO(ExitEventInfo);
+    ExitEventInfo.uIdtVectoringInfo    = pVmxTransient->uIdtVectoringInfo;
+    ExitEventInfo.uIdtVectoringErrCode = pVmxTransient->uIdtVectoringErrorCode;
+    return IEMExecVmxVmexitTaskSwitch(pVCpu, &ExitInfo, &ExitEventInfo);
+}
+
+
+/**
+ * Nested-guest VM-exit handler for HLT (VMX_EXIT_HLT). Conditional VM-exit.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitHltNested(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    HMVMX_VALIDATE_NESTED_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+
+    if (CPUMIsGuestVmxProcCtlsSet(&pVCpu->cpum.GstCtx, VMX_PROC_CTLS_HLT_EXIT))
+    {
+        hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
+        return IEMExecVmxVmexitInstr(pVCpu, pVmxTransient->uExitReason, pVmxTransient->cbExitInstr);
+    }
+    return hmR0VmxExitHlt(pVCpu, pVmxTransient);
+}
+
+
+/**
+ * Nested-guest VM-exit handler for INVLPG (VMX_EXIT_INVLPG). Conditional VM-exit.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitInvlpgNested(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    HMVMX_VALIDATE_NESTED_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+
+    if (CPUMIsGuestVmxProcCtlsSet(&pVCpu->cpum.GstCtx, VMX_PROC_CTLS_INVLPG_EXIT))
+    {
+        hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
+        hmR0VmxReadExitQualVmcs(pVmxTransient);
+
+        VMXVEXITINFO ExitInfo;
+        RT_ZERO(ExitInfo);
+        ExitInfo.uReason = pVmxTransient->uExitReason;
+        ExitInfo.cbInstr = pVmxTransient->cbExitInstr;
+        ExitInfo.u64Qual = pVmxTransient->uExitQual;
+        return IEMExecVmxVmexitInstrWithInfo(pVCpu, &ExitInfo);
+    }
+    return hmR0VmxExitInvlpg(pVCpu, pVmxTransient);
+}
+
+
+/**
+ * Nested-guest VM-exit handler for RDPMC (VMX_EXIT_RDPMC). Conditional VM-exit.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitRdpmcNested(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    HMVMX_VALIDATE_NESTED_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+
+    if (CPUMIsGuestVmxProcCtlsSet(&pVCpu->cpum.GstCtx, VMX_PROC_CTLS_RDPMC_EXIT))
+    {
+        hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
+        return IEMExecVmxVmexitInstr(pVCpu, pVmxTransient->uExitReason, pVmxTransient->cbExitInstr);
+    }
+    return hmR0VmxExitRdpmc(pVCpu, pVmxTransient);
+}
+
+
+/**
+ * Nested-guest VM-exit handler for VMREAD (VMX_EXIT_VMREAD) and VMWRITE
+ * (VMX_EXIT_VMWRITE). Conditional VM-exit.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitVmreadVmwriteNested(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    HMVMX_VALIDATE_NESTED_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+
+    Assert(   pVmxTransient->uExitReason == VMX_EXIT_VMREAD
+           || pVmxTransient->uExitReason == VMX_EXIT_VMWRITE);
+
+    hmR0VmxReadExitInstrInfoVmcs(pVmxTransient);
+
+    uint8_t const iGReg = pVmxTransient->ExitInstrInfo.VmreadVmwrite.iReg2;
+    Assert(iGReg < RT_ELEMENTS(pVCpu->cpum.GstCtx.aGRegs));
+    uint64_t u64VmcsField = pVCpu->cpum.GstCtx.aGRegs[iGReg].u64;
+
+    HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_EFER);
+    if (!CPUMIsGuestInLongModeEx(&pVCpu->cpum.GstCtx))
+        u64VmcsField &= UINT64_C(0xffffffff);
+
+    if (CPUMIsGuestVmxVmreadVmwriteInterceptSet(pVCpu, pVmxTransient->uExitReason, u64VmcsField))
+    {
+        hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
+        hmR0VmxReadExitQualVmcs(pVmxTransient);
+
+        VMXVEXITINFO ExitInfo;
+        RT_ZERO(ExitInfo);
+        ExitInfo.uReason   = pVmxTransient->uExitReason;
+        ExitInfo.cbInstr   = pVmxTransient->cbExitInstr;
+        ExitInfo.u64Qual   = pVmxTransient->uExitQual;
+        ExitInfo.InstrInfo = pVmxTransient->ExitInstrInfo;
+        return IEMExecVmxVmexitInstrWithInfo(pVCpu, &ExitInfo);
+    }
+
+    if (pVmxTransient->uExitReason == VMX_EXIT_VMREAD)
+        return hmR0VmxExitVmread(pVCpu, pVmxTransient);
+    return hmR0VmxExitVmwrite(pVCpu, pVmxTransient);
+}
+
+
+/**
+ * Nested-guest VM-exit handler for RDTSC (VMX_EXIT_RDTSC). Conditional VM-exit.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitRdtscNested(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    HMVMX_VALIDATE_NESTED_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+
+    if (CPUMIsGuestVmxProcCtlsSet(&pVCpu->cpum.GstCtx, VMX_PROC_CTLS_RDTSC_EXIT))
+    {
+        hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
+        return IEMExecVmxVmexitInstr(pVCpu, pVmxTransient->uExitReason, pVmxTransient->cbExitInstr);
+    }
+
+    return hmR0VmxExitRdtsc(pVCpu, pVmxTransient);
+}
+
+
+/**
+ * Nested-guest VM-exit handler for control-register accesses (VMX_EXIT_MOV_CRX).
+ * Conditional VM-exit.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitMovCRxNested(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    HMVMX_VALIDATE_NESTED_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+
+    hmR0VmxReadExitQualVmcs(pVmxTransient);
+    hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
+
+    VBOXSTRICTRC rcStrict;
+    uint32_t const uAccessType = VMX_EXIT_QUAL_CRX_ACCESS(pVmxTransient->uExitQual);
+    switch (uAccessType)
+    {
+        case VMX_EXIT_QUAL_CRX_ACCESS_WRITE:
+        {
+            uint8_t const iCrReg   = VMX_EXIT_QUAL_CRX_REGISTER(pVmxTransient->uExitQual);
+            uint8_t const iGReg    = VMX_EXIT_QUAL_CRX_GENREG(pVmxTransient->uExitQual);
+            Assert(iGReg < RT_ELEMENTS(pVCpu->cpum.GstCtx.aGRegs));
+            uint64_t const uNewCrX = pVCpu->cpum.GstCtx.aGRegs[iGReg].u64;
+
+            bool fIntercept;
+            switch (iCrReg)
+            {
+                case 0:
+                case 4:
+                    fIntercept = CPUMIsGuestVmxMovToCr0Cr4InterceptSet(&pVCpu->cpum.GstCtx, iCrReg, uNewCrX);
+                    break;
+
+                case 3:
+                    fIntercept = CPUMIsGuestVmxMovToCr3InterceptSet(pVCpu, uNewCrX);
+                    break;
+
+                case 8:
+                    fIntercept = CPUMIsGuestVmxProcCtlsSet(&pVCpu->cpum.GstCtx, VMX_PROC_CTLS_CR8_LOAD_EXIT);
+                    break;
+
+                default:
+                    fIntercept = false;
+                    break;
+            }
+            if (fIntercept)
+            {
+                VMXVEXITINFO ExitInfo;
+                RT_ZERO(ExitInfo);
+                ExitInfo.uReason = pVmxTransient->uExitReason;
+                ExitInfo.cbInstr = pVmxTransient->cbExitInstr;
+                ExitInfo.u64Qual = pVmxTransient->uExitQual;
+                rcStrict = IEMExecVmxVmexitInstrWithInfo(pVCpu, &ExitInfo);
+            }
+            else
+                rcStrict = hmR0VmxExitMovToCrX(pVCpu, pVmxTransient->pVmcsInfo, pVmxTransient->cbExitInstr, iGReg, iCrReg);
+            break;
+        }
+
+        case VMX_EXIT_QUAL_CRX_ACCESS_READ:
+        {
+            /*
+             * CR0/CR4 reads do not cause VM-exits, the read-shadow is used (subject to masking).
+             * CR2 reads do not cause a VM-exit.
+             * CR3 reads cause a VM-exit depending on the "CR3 store exiting" control.
+             * CR8 reads cause a VM-exit depending on the "CR8 store exiting" control.
+             */
+            uint8_t const iCrReg = VMX_EXIT_QUAL_CRX_REGISTER(pVmxTransient->uExitQual);
+            if (   iCrReg == 3
+                || iCrReg == 8)
+            {
+                static const uint32_t s_auCrXReadIntercepts[] = { 0, 0, 0, VMX_PROC_CTLS_CR3_STORE_EXIT, 0,
+                                                                  0, 0, 0, VMX_PROC_CTLS_CR8_STORE_EXIT };
+                uint32_t const uIntercept = s_auCrXReadIntercepts[iCrReg];
+                if (CPUMIsGuestVmxProcCtlsSet(&pVCpu->cpum.GstCtx, uIntercept))
+                {
+                    VMXVEXITINFO ExitInfo;
+                    RT_ZERO(ExitInfo);
+                    ExitInfo.uReason = pVmxTransient->uExitReason;
+                    ExitInfo.cbInstr = pVmxTransient->cbExitInstr;
+                    ExitInfo.u64Qual = pVmxTransient->uExitQual;
+                    rcStrict = IEMExecVmxVmexitInstrWithInfo(pVCpu, &ExitInfo);
+                }
+                else
+                {
+                    uint8_t const iGReg = VMX_EXIT_QUAL_CRX_GENREG(pVmxTransient->uExitQual);
+                    rcStrict = hmR0VmxExitMovFromCrX(pVCpu, pVmxTransient->pVmcsInfo, pVmxTransient->cbExitInstr, iGReg, iCrReg);
+                }
+            }
+            else
+            {
+                AssertMsgFailed(("MOV from CR%d VM-exit must not happen\n", iCrReg));
+                HMVMX_UNEXPECTED_EXIT_RET(pVCpu, iCrReg);
+            }
+            break;
+        }
+
+        case VMX_EXIT_QUAL_CRX_ACCESS_CLTS:
+        {
+            PCVMXVVMCS pVmcsNstGst = pVCpu->cpum.GstCtx.hwvirt.vmx.CTX_SUFF(pVmcs);
+            Assert(pVmcsNstGst);
+            uint64_t const uGstHostMask = pVmcsNstGst->u64Cr0Mask.u;
+            uint64_t const uReadShadow  = pVmcsNstGst->u64Cr0ReadShadow.u;
+            if (   (uGstHostMask & X86_CR0_TS)
+                && (uReadShadow  & X86_CR0_TS))
+            {
+                VMXVEXITINFO ExitInfo;
+                RT_ZERO(ExitInfo);
+                ExitInfo.uReason = pVmxTransient->uExitReason;
+                ExitInfo.cbInstr = pVmxTransient->cbExitInstr;
+                ExitInfo.u64Qual = pVmxTransient->uExitQual;
+                rcStrict = IEMExecVmxVmexitInstrWithInfo(pVCpu, &ExitInfo);
+            }
+            else
+                rcStrict = hmR0VmxExitClts(pVCpu, pVmxTransient->pVmcsInfo, pVmxTransient->cbExitInstr);
+            break;
+        }
+
+        case VMX_EXIT_QUAL_CRX_ACCESS_LMSW:        /* LMSW (Load Machine-Status Word into CR0) */
+        {
+            RTGCPTR        GCPtrEffDst;
+            uint16_t const uNewMsw     = VMX_EXIT_QUAL_CRX_LMSW_DATA(pVmxTransient->uExitQual);
+            bool const     fMemOperand = VMX_EXIT_QUAL_CRX_LMSW_OP_MEM(pVmxTransient->uExitQual);
+            if (fMemOperand)
+            {
+                hmR0VmxReadGuestLinearAddrVmcs(pVmxTransient);
+                GCPtrEffDst = pVmxTransient->uGuestLinearAddr;
+            }
+            else
+                GCPtrEffDst = NIL_RTGCPTR;
+
+            if (CPUMIsGuestVmxLmswInterceptSet(&pVCpu->cpum.GstCtx, uNewMsw))
+            {
+                VMXVEXITINFO ExitInfo;
+                RT_ZERO(ExitInfo);
+                ExitInfo.uReason            = pVmxTransient->uExitReason;
+                ExitInfo.cbInstr            = pVmxTransient->cbExitInstr;
+                ExitInfo.u64GuestLinearAddr = GCPtrEffDst;
+                ExitInfo.u64Qual            = pVmxTransient->uExitQual;
+                rcStrict = IEMExecVmxVmexitInstrWithInfo(pVCpu, &ExitInfo);
+            }
+            else
+                rcStrict = hmR0VmxExitLmsw(pVCpu, pVmxTransient->pVmcsInfo, pVmxTransient->cbExitInstr, uNewMsw, GCPtrEffDst);
+            break;
+        }
+
+        default:
+        {
+            AssertMsgFailed(("Unrecognized Mov CRX access type %#x\n", uAccessType));
+            HMVMX_UNEXPECTED_EXIT_RET(pVCpu, uAccessType);
+        }
+    }
+
+    if (rcStrict == VINF_IEM_RAISED_XCPT)
+    {
+        ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
+        rcStrict = VINF_SUCCESS;
+    }
+    return rcStrict;
+}
+
+
+/**
+ * Nested-guest VM-exit handler for debug-register accesses (VMX_EXIT_MOV_DRX).
+ * Conditional VM-exit.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitMovDRxNested(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    HMVMX_VALIDATE_NESTED_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+
+    if (CPUMIsGuestVmxProcCtlsSet(&pVCpu->cpum.GstCtx, VMX_PROC_CTLS_MOV_DR_EXIT))
+    {
+        hmR0VmxReadExitQualVmcs(pVmxTransient);
+        hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
+
+        VMXVEXITINFO ExitInfo;
+        RT_ZERO(ExitInfo);
+        ExitInfo.uReason = pVmxTransient->uExitReason;
+        ExitInfo.cbInstr = pVmxTransient->cbExitInstr;
+        ExitInfo.u64Qual = pVmxTransient->uExitQual;
+        return IEMExecVmxVmexitInstrWithInfo(pVCpu, &ExitInfo);
+    }
+    return hmR0VmxExitMovDRx(pVCpu, pVmxTransient);
+}
+
+
+/**
+ * Nested-guest VM-exit handler for I/O instructions (VMX_EXIT_IO_INSTR).
+ * Conditional VM-exit.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitIoInstrNested(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    HMVMX_VALIDATE_NESTED_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+
+    hmR0VmxReadExitQualVmcs(pVmxTransient);
+
+    uint32_t const uIOPort = VMX_EXIT_QUAL_IO_PORT(pVmxTransient->uExitQual);
+    uint8_t  const uIOSize = VMX_EXIT_QUAL_IO_SIZE(pVmxTransient->uExitQual);
+    AssertReturn(uIOSize <= 3 && uIOSize != 2, VERR_VMX_IPE_1);
+
+    static uint32_t const s_aIOSizes[4] = { 1, 2, 0, 4 };   /* Size of the I/O accesses in bytes. */
+    uint8_t const cbAccess = s_aIOSizes[uIOSize];
+    if (CPUMIsGuestVmxIoInterceptSet(pVCpu, uIOPort, cbAccess))
+    {
+        /*
+         * IN/OUT instruction:
+         *   - Provides VM-exit instruction length.
+         *
+         * INS/OUTS instruction:
+         *   - Provides VM-exit instruction length.
+         *   - Provides Guest-linear address.
+         *   - Optionally provides VM-exit instruction info (depends on CPU feature).
+         */
+        PVMCC pVM = pVCpu->CTX_SUFF(pVM);
+        hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
+
+        /* Make sure we don't use stale/uninitialized VMX-transient info. below. */
+        pVmxTransient->ExitInstrInfo.u  = 0;
+        pVmxTransient->uGuestLinearAddr = 0;
+
+        bool const fVmxInsOutsInfo = pVM->cpum.ro.GuestFeatures.fVmxInsOutInfo;
+        bool const fIOString       = VMX_EXIT_QUAL_IO_IS_STRING(pVmxTransient->uExitQual);
+        if (fIOString)
+        {
+            hmR0VmxReadGuestLinearAddrVmcs(pVmxTransient);
+            if (fVmxInsOutsInfo)
+            {
+                Assert(RT_BF_GET(pVM->hm.s.vmx.Msrs.u64Basic, VMX_BF_BASIC_VMCS_INS_OUTS)); /* Paranoia. */
+                hmR0VmxReadExitInstrInfoVmcs(pVmxTransient);
+            }
+        }
+
+        VMXVEXITINFO ExitInfo;
+        RT_ZERO(ExitInfo);
+        ExitInfo.uReason            = pVmxTransient->uExitReason;
+        ExitInfo.cbInstr            = pVmxTransient->cbExitInstr;
+        ExitInfo.u64Qual            = pVmxTransient->uExitQual;
+        ExitInfo.InstrInfo          = pVmxTransient->ExitInstrInfo;
+        ExitInfo.u64GuestLinearAddr = pVmxTransient->uGuestLinearAddr;
+        return IEMExecVmxVmexitInstrWithInfo(pVCpu, &ExitInfo);
+    }
+    return hmR0VmxExitIoInstr(pVCpu, pVmxTransient);
+}
+
+
+/**
+ * Nested-guest VM-exit handler for RDMSR (VMX_EXIT_RDMSR).
+ */
+HMVMX_EXIT_DECL hmR0VmxExitRdmsrNested(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    HMVMX_VALIDATE_NESTED_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+
+    uint32_t fMsrpm;
+    if (CPUMIsGuestVmxProcCtlsSet(&pVCpu->cpum.GstCtx, VMX_PROC_CTLS_USE_MSR_BITMAPS))
+        fMsrpm = CPUMGetVmxMsrPermission(pVCpu->cpum.GstCtx.hwvirt.vmx.CTX_SUFF(pvMsrBitmap), pVCpu->cpum.GstCtx.ecx);
+    else
+        fMsrpm = VMXMSRPM_EXIT_RD;
+
+    if (fMsrpm & VMXMSRPM_EXIT_RD)
+    {
+        hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
+        return IEMExecVmxVmexitInstr(pVCpu, pVmxTransient->uExitReason, pVmxTransient->cbExitInstr);
+    }
+    return hmR0VmxExitRdmsr(pVCpu, pVmxTransient);
+}
+
+
+/**
+ * Nested-guest VM-exit handler for WRMSR (VMX_EXIT_WRMSR).
+ */
+HMVMX_EXIT_DECL hmR0VmxExitWrmsrNested(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    HMVMX_VALIDATE_NESTED_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+
+    uint32_t fMsrpm;
+    if (CPUMIsGuestVmxProcCtlsSet(&pVCpu->cpum.GstCtx, VMX_PROC_CTLS_USE_MSR_BITMAPS))
+        fMsrpm = CPUMGetVmxMsrPermission(pVCpu->cpum.GstCtx.hwvirt.vmx.CTX_SUFF(pvMsrBitmap), pVCpu->cpum.GstCtx.ecx);
+    else
+        fMsrpm = VMXMSRPM_EXIT_WR;
+
+    if (fMsrpm & VMXMSRPM_EXIT_WR)
+    {
+        hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
+        return IEMExecVmxVmexitInstr(pVCpu, pVmxTransient->uExitReason, pVmxTransient->cbExitInstr);
+    }
+    return hmR0VmxExitWrmsr(pVCpu, pVmxTransient);
+}
+
+
+/**
+ * Nested-guest VM-exit handler for MWAIT (VMX_EXIT_MWAIT). Conditional VM-exit.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitMwaitNested(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    HMVMX_VALIDATE_NESTED_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+
+    if (CPUMIsGuestVmxProcCtlsSet(&pVCpu->cpum.GstCtx, VMX_PROC_CTLS_MWAIT_EXIT))
+    {
+        hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
+        return IEMExecVmxVmexitInstr(pVCpu, pVmxTransient->uExitReason, pVmxTransient->cbExitInstr);
+    }
+    return hmR0VmxExitMwait(pVCpu, pVmxTransient);
+}
+
+
+/**
+ * Nested-guest VM-exit handler for monitor-trap-flag (VMX_EXIT_MTF). Conditional
+ * VM-exit.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitMtfNested(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    HMVMX_VALIDATE_NESTED_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+
+    /** @todo NSTVMX: Should consider debugging nested-guests using VM debugger. */
+    hmR0VmxReadGuestPendingDbgXctps(pVmxTransient);
+    VMXVEXITINFO ExitInfo;
+    RT_ZERO(ExitInfo);
+    ExitInfo.uReason                 = pVmxTransient->uExitReason;
+    ExitInfo.u64GuestPendingDbgXcpts = pVmxTransient->uGuestPendingDbgXcpts;
+    return IEMExecVmxVmexitTrapLike(pVCpu, &ExitInfo);
+}
+
+
+/**
+ * Nested-guest VM-exit handler for MONITOR (VMX_EXIT_MONITOR). Conditional VM-exit.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitMonitorNested(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    HMVMX_VALIDATE_NESTED_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+
+    if (CPUMIsGuestVmxProcCtlsSet(&pVCpu->cpum.GstCtx, VMX_PROC_CTLS_MONITOR_EXIT))
+    {
+        hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
+        return IEMExecVmxVmexitInstr(pVCpu, pVmxTransient->uExitReason, pVmxTransient->cbExitInstr);
+    }
+    return hmR0VmxExitMonitor(pVCpu, pVmxTransient);
+}
+
+
+/**
+ * Nested-guest VM-exit handler for PAUSE (VMX_EXIT_PAUSE). Conditional VM-exit.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitPauseNested(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    HMVMX_VALIDATE_NESTED_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+
+    /** @todo NSTVMX: Think about this more. Does the outer guest need to intercept
+     *        PAUSE when executing a nested-guest? If it does not, we would not need
+     *        to check for the intercepts here. Just call VM-exit... */
+
+    /* The CPU would have already performed the necessary CPL checks for PAUSE-loop exiting. */
+    if (   CPUMIsGuestVmxProcCtlsSet(&pVCpu->cpum.GstCtx, VMX_PROC_CTLS_PAUSE_EXIT)
+        || CPUMIsGuestVmxProcCtls2Set(&pVCpu->cpum.GstCtx, VMX_PROC_CTLS2_PAUSE_LOOP_EXIT))
+    {
+        hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
+        return IEMExecVmxVmexitInstr(pVCpu, pVmxTransient->uExitReason, pVmxTransient->cbExitInstr);
+    }
+    return hmR0VmxExitPause(pVCpu, pVmxTransient);
+}
+
+
+/**
+ * Nested-guest VM-exit handler for when the TPR value is lowered below the
+ * specified threshold (VMX_EXIT_TPR_BELOW_THRESHOLD). Conditional VM-exit.
+ */
+HMVMX_EXIT_NSRC_DECL hmR0VmxExitTprBelowThresholdNested(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    HMVMX_VALIDATE_NESTED_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+
+    if (CPUMIsGuestVmxProcCtlsSet(&pVCpu->cpum.GstCtx, VMX_PROC_CTLS_USE_TPR_SHADOW))
+    {
+        hmR0VmxReadGuestPendingDbgXctps(pVmxTransient);
+        VMXVEXITINFO ExitInfo;
+        RT_ZERO(ExitInfo);
+        ExitInfo.uReason                 = pVmxTransient->uExitReason;
+        ExitInfo.u64GuestPendingDbgXcpts = pVmxTransient->uGuestPendingDbgXcpts;
+        return IEMExecVmxVmexitTrapLike(pVCpu, &ExitInfo);
+    }
+    return hmR0VmxExitTprBelowThreshold(pVCpu, pVmxTransient);
+}
+
+
+/**
+ * Nested-guest VM-exit handler for APIC access (VMX_EXIT_APIC_ACCESS). Conditional
+ * VM-exit.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitApicAccessNested(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    HMVMX_VALIDATE_NESTED_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+
+    hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
+    hmR0VmxReadIdtVectoringInfoVmcs(pVmxTransient);
+    hmR0VmxReadIdtVectoringErrorCodeVmcs(pVmxTransient);
+    hmR0VmxReadExitQualVmcs(pVmxTransient);
+
+    Assert(CPUMIsGuestVmxProcCtls2Set(&pVCpu->cpum.GstCtx, VMX_PROC_CTLS2_VIRT_APIC_ACCESS));
+
+    Log4Func(("at offset %#x type=%u\n", VMX_EXIT_QUAL_APIC_ACCESS_OFFSET(pVmxTransient->uExitQual),
+              VMX_EXIT_QUAL_APIC_ACCESS_TYPE(pVmxTransient->uExitQual)));
+
+    VMXVEXITINFO ExitInfo;
+    RT_ZERO(ExitInfo);
+    ExitInfo.uReason = pVmxTransient->uExitReason;
+    ExitInfo.cbInstr = pVmxTransient->cbExitInstr;
+    ExitInfo.u64Qual = pVmxTransient->uExitQual;
+
+    VMXVEXITEVENTINFO ExitEventInfo;
+    RT_ZERO(ExitEventInfo);
+    ExitEventInfo.uIdtVectoringInfo    = pVmxTransient->uIdtVectoringInfo;
+    ExitEventInfo.uIdtVectoringErrCode = pVmxTransient->uIdtVectoringErrorCode;
+    return IEMExecVmxVmexitApicAccess(pVCpu, &ExitInfo, &ExitEventInfo);
+}
+
+
+/**
+ * Nested-guest VM-exit handler for APIC write emulation (VMX_EXIT_APIC_WRITE).
+ * Conditional VM-exit.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitApicWriteNested(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    HMVMX_VALIDATE_NESTED_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+
+    Assert(CPUMIsGuestVmxProcCtls2Set(&pVCpu->cpum.GstCtx, VMX_PROC_CTLS2_APIC_REG_VIRT));
+    hmR0VmxReadExitQualVmcs(pVmxTransient);
+    return IEMExecVmxVmexit(pVCpu, pVmxTransient->uExitReason, pVmxTransient->uExitQual);
+}
+
+
+/**
+ * Nested-guest VM-exit handler for virtualized EOI (VMX_EXIT_VIRTUALIZED_EOI).
+ * Conditional VM-exit.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitVirtEoiNested(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    HMVMX_VALIDATE_NESTED_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+
+    Assert(CPUMIsGuestVmxProcCtls2Set(&pVCpu->cpum.GstCtx, VMX_PROC_CTLS2_VIRT_INT_DELIVERY));
+    hmR0VmxReadExitQualVmcs(pVmxTransient);
+    return IEMExecVmxVmexit(pVCpu, pVmxTransient->uExitReason, pVmxTransient->uExitQual);
+}
+
+
+/**
+ * Nested-guest VM-exit handler for RDTSCP (VMX_EXIT_RDTSCP). Conditional VM-exit.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitRdtscpNested(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    HMVMX_VALIDATE_NESTED_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+
+    if (CPUMIsGuestVmxProcCtlsSet(&pVCpu->cpum.GstCtx, VMX_PROC_CTLS_RDTSC_EXIT))
+    {
+        Assert(CPUMIsGuestVmxProcCtls2Set(&pVCpu->cpum.GstCtx, VMX_PROC_CTLS2_RDTSCP));
+        hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
+        return IEMExecVmxVmexitInstr(pVCpu, pVmxTransient->uExitReason, pVmxTransient->cbExitInstr);
+    }
+    return hmR0VmxExitRdtscp(pVCpu, pVmxTransient);
+}
+
+
+/**
+ * Nested-guest VM-exit handler for WBINVD (VMX_EXIT_WBINVD). Conditional VM-exit.
+ */
+HMVMX_EXIT_NSRC_DECL hmR0VmxExitWbinvdNested(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    HMVMX_VALIDATE_NESTED_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+
+    if (CPUMIsGuestVmxProcCtls2Set(&pVCpu->cpum.GstCtx, VMX_PROC_CTLS2_WBINVD_EXIT))
+    {
+        hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
+        return IEMExecVmxVmexitInstr(pVCpu, pVmxTransient->uExitReason, pVmxTransient->cbExitInstr);
+    }
+    return hmR0VmxExitWbinvd(pVCpu, pVmxTransient);
+}
+
+
+/**
+ * Nested-guest VM-exit handler for INVPCID (VMX_EXIT_INVPCID). Conditional VM-exit.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitInvpcidNested(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    HMVMX_VALIDATE_NESTED_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+
+    if (CPUMIsGuestVmxProcCtlsSet(&pVCpu->cpum.GstCtx, VMX_PROC_CTLS_INVLPG_EXIT))
+    {
+        Assert(CPUMIsGuestVmxProcCtls2Set(&pVCpu->cpum.GstCtx, VMX_PROC_CTLS2_INVPCID));
+        hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
+        hmR0VmxReadExitQualVmcs(pVmxTransient);
+        hmR0VmxReadExitInstrInfoVmcs(pVmxTransient);
+
+        VMXVEXITINFO ExitInfo;
+        RT_ZERO(ExitInfo);
+        ExitInfo.uReason   = pVmxTransient->uExitReason;
+        ExitInfo.cbInstr   = pVmxTransient->cbExitInstr;
+        ExitInfo.u64Qual   = pVmxTransient->uExitQual;
+        ExitInfo.InstrInfo = pVmxTransient->ExitInstrInfo;
+        return IEMExecVmxVmexitInstrWithInfo(pVCpu, &ExitInfo);
+    }
+    return hmR0VmxExitInvpcid(pVCpu, pVmxTransient);
+}
+
+
+/**
+ * Nested-guest VM-exit handler for invalid-guest state
+ * (VMX_EXIT_ERR_INVALID_GUEST_STATE). Error VM-exit.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitErrInvalidGuestStateNested(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    HMVMX_VALIDATE_NESTED_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+
+    /*
+     * Currently this should never happen because we fully emulate VMLAUNCH/VMRESUME in IEM.
+     * So if it does happen, it indicates a bug possibly in the hardware-assisted VMX code.
+     * Handle it like it's in an invalid guest state of the outer guest.
+     *
+     * When the fast path is implemented, this should be changed to cause the corresponding
+     * nested-guest VM-exit.
+     */
+    return hmR0VmxExitErrInvalidGuestState(pVCpu, pVmxTransient);
+}
+
+
+/**
+ * Nested-guest VM-exit handler for instructions that cause VM-exits uncondtionally
+ * and only provide the instruction length.
+ *
+ * Unconditional VM-exit.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitInstrNested(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    HMVMX_VALIDATE_NESTED_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+
+#ifdef VBOX_STRICT
+    PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+    switch (pVmxTransient->uExitReason)
+    {
+        case VMX_EXIT_ENCLS:
+            Assert(CPUMIsGuestVmxProcCtls2Set(pCtx, VMX_PROC_CTLS2_ENCLS_EXIT));
+            break;
+
+        case VMX_EXIT_VMFUNC:
+            Assert(CPUMIsGuestVmxProcCtls2Set(pCtx, VMX_PROC_CTLS2_VMFUNC));
+            break;
+    }
+#endif
+
+    hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
+    return IEMExecVmxVmexitInstr(pVCpu, pVmxTransient->uExitReason, pVmxTransient->cbExitInstr);
+}
+
+
+/**
+ * Nested-guest VM-exit handler for instructions that provide instruction length as
+ * well as more information.
+ *
+ * Unconditional VM-exit.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitInstrWithInfoNested(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+    HMVMX_VALIDATE_NESTED_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+
+#ifdef VBOX_STRICT
+    PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+    switch (pVmxTransient->uExitReason)
+    {
+        case VMX_EXIT_GDTR_IDTR_ACCESS:
+        case VMX_EXIT_LDTR_TR_ACCESS:
+            Assert(CPUMIsGuestVmxProcCtls2Set(pCtx, VMX_PROC_CTLS2_DESC_TABLE_EXIT));
+            break;
+
+        case VMX_EXIT_RDRAND:
+            Assert(CPUMIsGuestVmxProcCtls2Set(pCtx, VMX_PROC_CTLS2_RDRAND_EXIT));
+            break;
+
+        case VMX_EXIT_RDSEED:
+            Assert(CPUMIsGuestVmxProcCtls2Set(pCtx, VMX_PROC_CTLS2_RDSEED_EXIT));
+            break;
+
+        case VMX_EXIT_XSAVES:
+        case VMX_EXIT_XRSTORS:
+            /** @todo NSTVMX: Verify XSS-bitmap. */
+            Assert(CPUMIsGuestVmxProcCtls2Set(pCtx, VMX_PROC_CTLS2_XSAVES_XRSTORS));
+            break;
+
+        case VMX_EXIT_UMWAIT:
+        case VMX_EXIT_TPAUSE:
+            Assert(CPUMIsGuestVmxProcCtlsSet(pCtx, VMX_PROC_CTLS_RDTSC_EXIT));
+            Assert(CPUMIsGuestVmxProcCtls2Set(pCtx, VMX_PROC_CTLS2_USER_WAIT_PAUSE));
+            break;
+    }
+#endif
+
+    hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
+    hmR0VmxReadExitQualVmcs(pVmxTransient);
+    hmR0VmxReadExitInstrInfoVmcs(pVmxTransient);
+
+    VMXVEXITINFO ExitInfo;
+    RT_ZERO(ExitInfo);
+    ExitInfo.uReason   = pVmxTransient->uExitReason;
+    ExitInfo.cbInstr   = pVmxTransient->cbExitInstr;
+    ExitInfo.u64Qual   = pVmxTransient->uExitQual;
+    ExitInfo.InstrInfo = pVmxTransient->ExitInstrInfo;
+    return IEMExecVmxVmexitInstrWithInfo(pVCpu, &ExitInfo);
+}
+
+/** @} */
+#endif /* VBOX_WITH_NESTED_HWVIRT_VMX */
+
diff --git a/src/VBox/VMM/VMMR0/HMVMXR0.h b/src/VBox/VMM/VMMR0/HMVMXR0.h
new file mode 100644
index 00000000..a86b4d92
--- /dev/null
+++ b/src/VBox/VMM/VMMR0/HMVMXR0.h
@@ -0,0 +1,56 @@
+/* $Id: HMVMXR0.h $ */
+/** @file
+ * HM VMX (VT-x) - Internal header file.
+ */
+
+/*
+ * Copyright (C) 2006-2020 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+#ifndef VMM_INCLUDED_SRC_VMMR0_HMVMXR0_h
+#define VMM_INCLUDED_SRC_VMMR0_HMVMXR0_h
+#ifndef RT_WITHOUT_PRAGMA_ONCE
+# pragma once
+#endif
+
+RT_C_DECLS_BEGIN
+
+/** @defgroup grp_vmx_int   Internal
+ * @ingroup grp_vmx
+ * @internal
+ * @{
+ */
+
+#ifdef IN_RING0
+VMMR0DECL(int)          VMXR0Enter(PVMCPUCC pVCpu);
+VMMR0DECL(void)         VMXR0ThreadCtxCallback(RTTHREADCTXEVENT enmEvent, PVMCPUCC pVCpu, bool fGlobalInit);
+VMMR0DECL(int)          VMXR0CallRing3Callback(PVMCPUCC pVCpu, VMMCALLRING3 enmOperation);
+VMMR0DECL(int)          VMXR0EnableCpu(PHMPHYSCPU pHostCpu, PVMCC pVM, void *pvPageCpu, RTHCPHYS pPageCpuPhys,
+                                       bool fEnabledBySystem, PCSUPHWVIRTMSRS pHwvirtMsrs);
+VMMR0DECL(int)          VMXR0DisableCpu(PHMPHYSCPU pHostCpu, void *pvPageCpu, RTHCPHYS pPageCpuPhys);
+VMMR0DECL(int)          VMXR0GlobalInit(void);
+VMMR0DECL(void)         VMXR0GlobalTerm(void);
+VMMR0DECL(int)          VMXR0InitVM(PVMCC pVM);
+VMMR0DECL(int)          VMXR0TermVM(PVMCC pVM);
+VMMR0DECL(int)          VMXR0SetupVM(PVMCC pVM);
+VMMR0DECL(int)          VMXR0ExportHostState(PVMCPUCC pVCpu);
+VMMR0DECL(int)          VMXR0InvalidatePage(PVMCPUCC pVCpu, RTGCPTR GCVirt);
+VMMR0DECL(int)          VMXR0ImportStateOnDemand(PVMCPUCC pVCpu, uint64_t fWhat);
+VMMR0DECL(VBOXSTRICTRC) VMXR0RunGuestCode(PVMCPUCC pVCpu);
+DECLASM(int)            VMXR0StartVM64(RTHCUINT fResume, PCPUMCTX pCtx, void *pvUnused, PVMCC pVM, PVMCPUCC pVCpu);
+#endif /* IN_RING0 */
+
+/** @} */
+
+RT_C_DECLS_END
+
+#endif /* !VMM_INCLUDED_SRC_VMMR0_HMVMXR0_h */
+
diff --git a/src/VBox/VMM/VMMR0/IOMR0.cpp b/src/VBox/VMM/VMMR0/IOMR0.cpp
new file mode 100644
index 00000000..ebba9610
--- /dev/null
+++ b/src/VBox/VMM/VMMR0/IOMR0.cpp
@@ -0,0 +1,57 @@
+/* $Id: IOMR0.cpp $ */
+/** @file
+ * IOM - Host Context Ring 0.
+ */
+
+/*
+ * Copyright (C) 2006-2020 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+
+/*********************************************************************************************************************************
+*   Header Files                                                                                                                 *
+*********************************************************************************************************************************/
+#define LOG_GROUP LOG_GROUP_IOM
+#include <VBox/vmm/iom.h>
+#include "IOMInternal.h"
+#include <VBox/vmm/vmcc.h>
+#include <VBox/log.h>
+#include <iprt/assertcompile.h>
+
+
+
+/**
+ * Initializes the per-VM data for the IOM.
+ *
+ * This is called from under the GVMM lock, so it should only initialize the
+ * data so IOMR0CleanupVM and others will work smoothly.
+ *
+ * @param   pGVM    Pointer to the global VM structure.
+ */
+VMMR0_INT_DECL(void) IOMR0InitPerVMData(PGVM pGVM)
+{
+    AssertCompile(sizeof(pGVM->iom.s) <= sizeof(pGVM->iom.padding));
+    AssertCompile(sizeof(pGVM->iomr0.s) <= sizeof(pGVM->iomr0.padding));
+
+    iomR0IoPortInitPerVMData(pGVM);
+    iomR0MmioInitPerVMData(pGVM);
+}
+
+
+/**
+ * Cleans up any loose ends before the GVM structure is destroyed.
+ */
+VMMR0_INT_DECL(void) IOMR0CleanupVM(PGVM pGVM)
+{
+    iomR0IoPortCleanupVM(pGVM);
+    iomR0MmioCleanupVM(pGVM);
+}
+
diff --git a/src/VBox/VMM/VMMR0/IOMR0IoPort.cpp b/src/VBox/VMM/VMMR0/IOMR0IoPort.cpp
new file mode 100644
index 00000000..9c4fa9a0
--- /dev/null
+++ b/src/VBox/VMM/VMMR0/IOMR0IoPort.cpp
@@ -0,0 +1,382 @@
+/* $Id: IOMR0IoPort.cpp $ */
+/** @file
+ * IOM - Host Context Ring 0, I/O ports.
+ */
+
+/*
+ * Copyright (C) 2006-2020 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+
+/*********************************************************************************************************************************
+*   Header Files                                                                                                                 *
+*********************************************************************************************************************************/
+#define LOG_GROUP LOG_GROUP_IOM_IOPORT
+#include <VBox/vmm/iom.h>
+#include "IOMInternal.h"
+#include <VBox/vmm/pdmdev.h>
+#include <VBox/vmm/vmcc.h>
+#include <VBox/err.h>
+#include <VBox/log.h>
+#include <iprt/assert.h>
+#include <iprt/mem.h>
+#include <iprt/memobj.h>
+#include <iprt/process.h>
+#include <iprt/string.h>
+
+
+
+/**
+ * Initializes the I/O port related members.
+ *
+ * @param   pGVM    Pointer to the global VM structure.
+ */
+void iomR0IoPortInitPerVMData(PGVM pGVM)
+{
+    pGVM->iomr0.s.hIoPortMapObj      = NIL_RTR0MEMOBJ;
+    pGVM->iomr0.s.hIoPortMemObj      = NIL_RTR0MEMOBJ;
+#ifdef VBOX_WITH_STATISTICS
+    pGVM->iomr0.s.hIoPortStatsMapObj = NIL_RTR0MEMOBJ;
+    pGVM->iomr0.s.hIoPortStatsMemObj = NIL_RTR0MEMOBJ;
+#endif
+}
+
+
+/**
+ * Cleans up I/O port related resources.
+ */
+void iomR0IoPortCleanupVM(PGVM pGVM)
+{
+    RTR0MemObjFree(pGVM->iomr0.s.hIoPortMapObj, true /*fFreeMappings*/);
+    pGVM->iomr0.s.hIoPortMapObj      = NIL_RTR0MEMOBJ;
+    RTR0MemObjFree(pGVM->iomr0.s.hIoPortMemObj, true /*fFreeMappings*/);
+    pGVM->iomr0.s.hIoPortMemObj      = NIL_RTR0MEMOBJ;
+#ifdef VBOX_WITH_STATISTICS
+    RTR0MemObjFree(pGVM->iomr0.s.hIoPortStatsMapObj, true /*fFreeMappings*/);
+    pGVM->iomr0.s.hIoPortStatsMapObj = NIL_RTR0MEMOBJ;
+    RTR0MemObjFree(pGVM->iomr0.s.hIoPortStatsMemObj, true /*fFreeMappings*/);
+    pGVM->iomr0.s.hIoPortStatsMemObj = NIL_RTR0MEMOBJ;
+#endif
+}
+
+
+/**
+ * Implements PDMDEVHLPR0::pfnIoPortSetUpContext.
+ *
+ * @param   pGVM            The global (ring-0) VM structure.
+ * @param   pDevIns         The device instance.
+ * @param   hIoPorts        The I/O port handle (already registered in ring-3).
+ * @param   pfnOut          The OUT handler callback, optional.
+ * @param   pfnIn           The IN handler callback, optional.
+ * @param   pfnOutStr       The REP OUTS handler callback, optional.
+ * @param   pfnInStr        The REP INS handler callback, optional.
+ * @param   pvUser          User argument for the callbacks.
+ * @thread  EMT(0)
+ * @note    Only callable at VM creation time.
+ */
+VMMR0_INT_DECL(int)  IOMR0IoPortSetUpContext(PGVM pGVM, PPDMDEVINS pDevIns, IOMIOPORTHANDLE hIoPorts,
+                                             PFNIOMIOPORTNEWOUT pfnOut,  PFNIOMIOPORTNEWIN pfnIn,
+                                             PFNIOMIOPORTNEWOUTSTRING pfnOutStr, PFNIOMIOPORTNEWINSTRING pfnInStr, void *pvUser)
+{
+    /*
+     * Validate input and state.
+     */
+    VM_ASSERT_EMT0_RETURN(pGVM, VERR_VM_THREAD_NOT_EMT);
+    VM_ASSERT_STATE_RETURN(pGVM, VMSTATE_CREATING, VERR_VM_INVALID_VM_STATE);
+    AssertReturn(hIoPorts < pGVM->iomr0.s.cIoPortAlloc, VERR_IOM_INVALID_IOPORT_HANDLE);
+    AssertReturn(hIoPorts < pGVM->iom.s.cIoPortRegs, VERR_IOM_INVALID_IOPORT_HANDLE);
+    AssertPtrReturn(pDevIns, VERR_INVALID_HANDLE);
+    AssertReturn(pDevIns->pDevInsForR3 != NIL_RTR3PTR && !(pDevIns->pDevInsForR3 & PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
+    AssertReturn(pGVM->iomr0.s.paIoPortRing3Regs[hIoPorts].pDevIns == pDevIns->pDevInsForR3, VERR_IOM_INVALID_IOPORT_HANDLE);
+    AssertReturn(pGVM->iomr0.s.paIoPortRegs[hIoPorts].pDevIns == NULL, VERR_WRONG_ORDER);
+    Assert(pGVM->iomr0.s.paIoPortRegs[hIoPorts].idxSelf == hIoPorts);
+
+    AssertReturn(pfnOut || pfnIn || pfnOutStr || pfnInStr, VERR_INVALID_PARAMETER);
+    AssertPtrNullReturn(pfnOut, VERR_INVALID_POINTER);
+    AssertPtrNullReturn(pfnIn, VERR_INVALID_POINTER);
+    AssertPtrNullReturn(pfnOutStr, VERR_INVALID_POINTER);
+    AssertPtrNullReturn(pfnInStr, VERR_INVALID_POINTER);
+
+    uint16_t const fFlags = pGVM->iomr0.s.paIoPortRing3Regs[hIoPorts].fFlags;
+    RTIOPORT const cPorts = pGVM->iomr0.s.paIoPortRing3Regs[hIoPorts].cPorts;
+    AssertMsgReturn(cPorts > 0 && cPorts <= _8K, ("cPorts=%s\n", cPorts), VERR_IOM_INVALID_IOPORT_HANDLE);
+
+    /*
+     * Do the job.
+     */
+    pGVM->iomr0.s.paIoPortRegs[hIoPorts].pvUser             = pvUser;
+    pGVM->iomr0.s.paIoPortRegs[hIoPorts].pDevIns            = pDevIns;
+    pGVM->iomr0.s.paIoPortRegs[hIoPorts].pfnOutCallback     = pfnOut;
+    pGVM->iomr0.s.paIoPortRegs[hIoPorts].pfnInCallback      = pfnIn;
+    pGVM->iomr0.s.paIoPortRegs[hIoPorts].pfnOutStrCallback  = pfnOutStr;
+    pGVM->iomr0.s.paIoPortRegs[hIoPorts].pfnInStrCallback   = pfnInStr;
+    pGVM->iomr0.s.paIoPortRegs[hIoPorts].cPorts             = cPorts;
+    pGVM->iomr0.s.paIoPortRegs[hIoPorts].fFlags             = fFlags;
+#ifdef VBOX_WITH_STATISTICS
+    uint16_t const idxStats = pGVM->iomr0.s.paIoPortRing3Regs[hIoPorts].idxStats;
+    pGVM->iomr0.s.paIoPortRegs[hIoPorts].idxStats           = (uint32_t)idxStats + cPorts <= pGVM->iomr0.s.cIoPortStatsAllocation
+                                                            ? idxStats : UINT16_MAX;
+#else
+    pGVM->iomr0.s.paIoPortRegs[hIoPorts].idxStats           = UINT16_MAX;
+#endif
+
+    pGVM->iomr0.s.paIoPortRing3Regs[hIoPorts].fRing0 = true;
+
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Grows the I/O port registration (all contexts) and lookup tables.
+ *
+ * @returns VBox status code.
+ * @param   pGVM            The global (ring-0) VM structure.
+ * @param   cReqMinEntries  The minimum growth (absolute).
+ * @thread  EMT(0)
+ * @note    Only callable at VM creation time.
+ */
+VMMR0_INT_DECL(int) IOMR0IoPortGrowRegistrationTables(PGVM pGVM, uint64_t cReqMinEntries)
+{
+    /*
+     * Validate input and state.
+     */
+    VM_ASSERT_EMT0_RETURN(pGVM, VERR_VM_THREAD_NOT_EMT);
+    VM_ASSERT_STATE_RETURN(pGVM, VMSTATE_CREATING, VERR_VM_INVALID_VM_STATE);
+    AssertReturn(cReqMinEntries <= _4K, VERR_IOM_TOO_MANY_IOPORT_REGISTRATIONS);
+    uint32_t cNewEntries = (uint32_t)cReqMinEntries;
+    AssertReturn(cNewEntries >= pGVM->iom.s.cIoPortAlloc, VERR_IOM_IOPORT_IPE_1);
+    uint32_t const cOldEntries = pGVM->iomr0.s.cIoPortAlloc;
+    ASMCompilerBarrier();
+    AssertReturn(cNewEntries >= cOldEntries, VERR_IOM_IOPORT_IPE_2);
+    AssertReturn(pGVM->iom.s.cIoPortRegs >= pGVM->iomr0.s.cIoPortMax, VERR_IOM_IOPORT_IPE_3);
+
+    /*
+     * Allocate the new tables.  We use a single allocation for the three tables (ring-0,
+     * ring-3, lookup) and does a partial mapping of the result to ring-3.
+     */
+    uint32_t const cbRing0  = RT_ALIGN_32(cNewEntries * sizeof(IOMIOPORTENTRYR0),     PAGE_SIZE);
+    uint32_t const cbRing3  = RT_ALIGN_32(cNewEntries * sizeof(IOMIOPORTENTRYR3),     PAGE_SIZE);
+    uint32_t const cbShared = RT_ALIGN_32(cNewEntries * sizeof(IOMIOPORTLOOKUPENTRY), PAGE_SIZE);
+    uint32_t const cbNew    = cbRing0 + cbRing3 + cbShared;
+
+    /* Use the rounded up space as best we can. */
+    cNewEntries = RT_MIN(RT_MIN(cbRing0 / sizeof(IOMIOPORTENTRYR0), cbRing3 / sizeof(IOMIOPORTENTRYR3)),
+                         cbShared / sizeof(IOMIOPORTLOOKUPENTRY));
+
+    RTR0MEMOBJ hMemObj;
+    int rc = RTR0MemObjAllocPage(&hMemObj, cbNew, false /*fExecutable*/);
+    if (RT_SUCCESS(rc))
+    {
+        /*
+         * Zero and map it.
+         */
+        RT_BZERO(RTR0MemObjAddress(hMemObj), cbNew);
+
+        RTR0MEMOBJ hMapObj;
+        rc = RTR0MemObjMapUserEx(&hMapObj, hMemObj, (RTR3PTR)-1, PAGE_SIZE, RTMEM_PROT_READ | RTMEM_PROT_WRITE,
+                                 RTR0ProcHandleSelf(), cbRing0, cbNew - cbRing0);
+        if (RT_SUCCESS(rc))
+        {
+            PIOMIOPORTENTRYR0     const paRing0    = (PIOMIOPORTENTRYR0)RTR0MemObjAddress(hMemObj);
+            PIOMIOPORTENTRYR3     const paRing3    = (PIOMIOPORTENTRYR3)((uintptr_t)paRing0 + cbRing0);
+            PIOMIOPORTLOOKUPENTRY const paLookup   = (PIOMIOPORTLOOKUPENTRY)((uintptr_t)paRing3 + cbRing3);
+            RTR3UINTPTR           const uAddrRing3 = RTR0MemObjAddressR3(hMapObj);
+
+            /*
+             * Copy over the old info and initialize the idxSelf and idxStats members.
+             */
+            if (pGVM->iomr0.s.paIoPortRegs != NULL)
+            {
+                memcpy(paRing0,  pGVM->iomr0.s.paIoPortRegs,      sizeof(paRing0[0])  * cOldEntries);
+                memcpy(paRing3,  pGVM->iomr0.s.paIoPortRing3Regs, sizeof(paRing3[0])  * cOldEntries);
+                memcpy(paLookup, pGVM->iomr0.s.paIoPortLookup,    sizeof(paLookup[0]) * cOldEntries);
+            }
+
+            size_t i = cbRing0 / sizeof(*paRing0);
+            while (i-- > cOldEntries)
+            {
+                paRing0[i].idxSelf  = (uint16_t)i;
+                paRing0[i].idxStats = UINT16_MAX;
+            }
+            i = cbRing3 / sizeof(*paRing3);
+            while (i-- > cOldEntries)
+            {
+                paRing3[i].idxSelf  = (uint16_t)i;
+                paRing3[i].idxStats = UINT16_MAX;
+            }
+
+            /*
+             * Switch the memory handles.
+             */
+            RTR0MEMOBJ hTmp = pGVM->iomr0.s.hIoPortMapObj;
+            pGVM->iomr0.s.hIoPortMapObj = hMapObj;
+            hMapObj = hTmp;
+
+            hTmp = pGVM->iomr0.s.hIoPortMemObj;
+            pGVM->iomr0.s.hIoPortMemObj = hMemObj;
+            hMemObj = hTmp;
+
+            /*
+             * Update the variables.
+             */
+            pGVM->iomr0.s.paIoPortRegs      = paRing0;
+            pGVM->iomr0.s.paIoPortRing3Regs = paRing3;
+            pGVM->iomr0.s.paIoPortLookup    = paLookup;
+            pGVM->iom.s.paIoPortRegs        = uAddrRing3;
+            pGVM->iom.s.paIoPortLookup      = uAddrRing3 + cbRing3;
+            pGVM->iom.s.cIoPortAlloc        = cNewEntries;
+            pGVM->iomr0.s.cIoPortAlloc      = cNewEntries;
+
+            /*
+             * Free the old allocation.
+             */
+            RTR0MemObjFree(hMapObj, true /*fFreeMappings*/);
+        }
+        RTR0MemObjFree(hMemObj, true /*fFreeMappings*/);
+    }
+
+    return rc;
+}
+
+
+/**
+ * Grows the I/O port statistics table.
+ *
+ * @returns VBox status code.
+ * @param   pGVM            The global (ring-0) VM structure.
+ * @param   cReqMinEntries  The minimum growth (absolute).
+ * @thread  EMT(0)
+ * @note    Only callable at VM creation time.
+ */
+VMMR0_INT_DECL(int) IOMR0IoPortGrowStatisticsTable(PGVM pGVM, uint64_t cReqMinEntries)
+{
+    /*
+     * Validate input and state.
+     */
+    VM_ASSERT_EMT0_RETURN(pGVM, VERR_VM_THREAD_NOT_EMT);
+    VM_ASSERT_STATE_RETURN(pGVM, VMSTATE_CREATING, VERR_VM_INVALID_VM_STATE);
+    AssertReturn(cReqMinEntries <= _64K, VERR_IOM_TOO_MANY_IOPORT_REGISTRATIONS);
+    uint32_t cNewEntries = (uint32_t)cReqMinEntries;
+#ifdef VBOX_WITH_STATISTICS
+    uint32_t const cOldEntries = pGVM->iomr0.s.cIoPortStatsAllocation;
+    ASMCompilerBarrier();
+#else
+    uint32_t const cOldEntries = 0;
+#endif
+    AssertReturn(cNewEntries > cOldEntries, VERR_IOM_IOPORT_IPE_1);
+    AssertReturn(pGVM->iom.s.cIoPortStatsAllocation == cOldEntries, VERR_IOM_IOPORT_IPE_1);
+    AssertReturn(pGVM->iom.s.cIoPortStats <= cOldEntries, VERR_IOM_IOPORT_IPE_2);
+#ifdef VBOX_WITH_STATISTICS
+    AssertReturn(!pGVM->iomr0.s.fIoPortStatsFrozen, VERR_WRONG_ORDER);
+#endif
+
+    /*
+     * Allocate a new table, zero it and map it.
+     */
+#ifndef VBOX_WITH_STATISTICS
+    AssertFailedReturn(VERR_NOT_SUPPORTED);
+#else
+    uint32_t const cbNew = RT_ALIGN_32(cNewEntries * sizeof(IOMIOPORTSTATSENTRY), PAGE_SIZE);
+    cNewEntries = cbNew / sizeof(IOMIOPORTSTATSENTRY);
+
+    RTR0MEMOBJ hMemObj;
+    int rc = RTR0MemObjAllocPage(&hMemObj, cbNew, false /*fExecutable*/);
+    if (RT_SUCCESS(rc))
+    {
+        RT_BZERO(RTR0MemObjAddress(hMemObj), cbNew);
+
+        RTR0MEMOBJ hMapObj;
+        rc = RTR0MemObjMapUser(&hMapObj, hMemObj, (RTR3PTR)-1, PAGE_SIZE, RTMEM_PROT_READ | RTMEM_PROT_WRITE, RTR0ProcHandleSelf());
+        if (RT_SUCCESS(rc))
+        {
+            PIOMIOPORTSTATSENTRY pIoPortStats = (PIOMIOPORTSTATSENTRY)RTR0MemObjAddress(hMemObj);
+
+            /*
+             * Anything to copy over and free up?
+             */
+            if (pGVM->iomr0.s.paIoPortStats)
+                memcpy(pIoPortStats, pGVM->iomr0.s.paIoPortStats, cOldEntries * sizeof(IOMIOPORTSTATSENTRY));
+
+            /*
+             * Switch the memory handles.
+             */
+            RTR0MEMOBJ hTmp = pGVM->iomr0.s.hIoPortStatsMapObj;
+            pGVM->iomr0.s.hIoPortStatsMapObj = hMapObj;
+            hMapObj = hTmp;
+
+            hTmp = pGVM->iomr0.s.hIoPortStatsMemObj;
+            pGVM->iomr0.s.hIoPortStatsMemObj = hMemObj;
+            hMemObj = hTmp;
+
+            /*
+             * Update the variables.
+             */
+            pGVM->iomr0.s.paIoPortStats          = pIoPortStats;
+            pGVM->iom.s.paIoPortStats            = RTR0MemObjAddressR3(pGVM->iomr0.s.hIoPortStatsMapObj);
+            pGVM->iom.s.cIoPortStatsAllocation   = cNewEntries;
+            pGVM->iomr0.s.cIoPortStatsAllocation = cNewEntries;
+
+            /*
+             * Free the old allocation.
+             */
+            RTR0MemObjFree(hMapObj, true /*fFreeMappings*/);
+        }
+        RTR0MemObjFree(hMemObj, true /*fFreeMappings*/);
+    }
+    return rc;
+#endif /* VBOX_WITH_STATISTICS */
+}
+
+/**
+ * Called after all devices has been instantiated to copy over the statistics
+ * indices to the ring-0 I/O port registration table.
+ *
+ * This simplifies keeping statistics for I/O port ranges that are ring-3 only.
+ *
+ * After this call, IOMR0IoPortGrowStatisticsTable() will stop working.
+ *
+ * @returns VBox status code.
+ * @param   pGVM            The global (ring-0) VM structure.
+ * @thread  EMT(0)
+ * @note    Only callable at VM creation time.
+ */
+VMMR0_INT_DECL(int) IOMR0IoPortSyncStatisticsIndices(PGVM pGVM)
+{
+    VM_ASSERT_EMT0_RETURN(pGVM, VERR_VM_THREAD_NOT_EMT);
+    VM_ASSERT_STATE_RETURN(pGVM, VMSTATE_CREATING, VERR_VM_INVALID_VM_STATE);
+
+#ifdef VBOX_WITH_STATISTICS
+    /*
+     * First, freeze the statistics array:
+     */
+    pGVM->iomr0.s.fIoPortStatsFrozen = true;
+
+    /*
+     * Second, synchronize the indices:
+     */
+    uint32_t const          cRegs        = RT_MIN(pGVM->iom.s.cIoPortRegs, pGVM->iomr0.s.cIoPortAlloc);
+    uint32_t const          cStatsAlloc  = pGVM->iomr0.s.cIoPortStatsAllocation;
+    PIOMIOPORTENTRYR0       paIoPortRegs   = pGVM->iomr0.s.paIoPortRegs;
+    IOMIOPORTENTRYR3 const *paIoPortRegsR3 = pGVM->iomr0.s.paIoPortRing3Regs;
+    AssertReturn((paIoPortRegs && paIoPortRegsR3) || cRegs == 0, VERR_IOM_IOPORT_IPE_3);
+
+    for (uint32_t i = 0 ; i < cRegs; i++)
+    {
+        uint16_t idxStats = paIoPortRegsR3[i].idxStats;
+        paIoPortRegs[i].idxStats = idxStats < cStatsAlloc ? idxStats : UINT16_MAX;
+    }
+
+#else
+    RT_NOREF(pGVM);
+#endif
+    return VINF_SUCCESS;
+}
+
diff --git a/src/VBox/VMM/VMMR0/IOMR0Mmio.cpp b/src/VBox/VMM/VMMR0/IOMR0Mmio.cpp
new file mode 100644
index 00000000..45766568
--- /dev/null
+++ b/src/VBox/VMM/VMMR0/IOMR0Mmio.cpp
@@ -0,0 +1,378 @@
+/* $Id: IOMR0Mmio.cpp $ */
+/** @file
+ * IOM - Host Context Ring 0, MMIO.
+ */
+
+/*
+ * Copyright (C) 2006-2020 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+
+/*********************************************************************************************************************************
+*   Header Files                                                                                                                 *
+*********************************************************************************************************************************/
+#define LOG_GROUP LOG_GROUP_IOM_MMIO
+#include <VBox/vmm/iom.h>
+#include "IOMInternal.h"
+#include <VBox/vmm/pdmdev.h>
+#include <VBox/vmm/vmcc.h>
+#include <VBox/err.h>
+#include <VBox/log.h>
+#include <iprt/assert.h>
+#include <iprt/mem.h>
+#include <iprt/memobj.h>
+#include <iprt/process.h>
+#include <iprt/string.h>
+
+
+
+/**
+ * Initializes the MMIO related members.
+ *
+ * @param   pGVM    Pointer to the global VM structure.
+ */
+void iomR0MmioInitPerVMData(PGVM pGVM)
+{
+    pGVM->iomr0.s.hMmioMapObj      = NIL_RTR0MEMOBJ;
+    pGVM->iomr0.s.hMmioMemObj      = NIL_RTR0MEMOBJ;
+#ifdef VBOX_WITH_STATISTICS
+    pGVM->iomr0.s.hMmioStatsMapObj = NIL_RTR0MEMOBJ;
+    pGVM->iomr0.s.hMmioStatsMemObj = NIL_RTR0MEMOBJ;
+#endif
+}
+
+
+/**
+ * Cleans up MMIO related resources.
+ */
+void iomR0MmioCleanupVM(PGVM pGVM)
+{
+    RTR0MemObjFree(pGVM->iomr0.s.hMmioMapObj, true /*fFreeMappings*/);
+    pGVM->iomr0.s.hMmioMapObj      = NIL_RTR0MEMOBJ;
+    RTR0MemObjFree(pGVM->iomr0.s.hMmioMemObj, true /*fFreeMappings*/);
+    pGVM->iomr0.s.hMmioMemObj      = NIL_RTR0MEMOBJ;
+#ifdef VBOX_WITH_STATISTICS
+    RTR0MemObjFree(pGVM->iomr0.s.hMmioStatsMapObj, true /*fFreeMappings*/);
+    pGVM->iomr0.s.hMmioStatsMapObj = NIL_RTR0MEMOBJ;
+    RTR0MemObjFree(pGVM->iomr0.s.hMmioStatsMemObj, true /*fFreeMappings*/);
+    pGVM->iomr0.s.hMmioStatsMemObj = NIL_RTR0MEMOBJ;
+#endif
+}
+
+
+/**
+ * Implements PDMDEVHLPR0::pfnMmioSetUpContext.
+ *
+ * @param   pGVM            The global (ring-0) VM structure.
+ * @param   pDevIns         The device instance.
+ * @param   hRegion         The MMIO region handle (already registered in
+ *                          ring-3).
+ * @param   pfnWrite        The write handler callback, optional.
+ * @param   pfnRead         The read handler callback, optional.
+ * @param   pfnFill         The fill handler callback, optional.
+ * @param   pvUser          User argument for the callbacks.
+ * @thread  EMT(0)
+ * @note    Only callable at VM creation time.
+ */
+VMMR0_INT_DECL(int)  IOMR0MmioSetUpContext(PGVM pGVM, PPDMDEVINS pDevIns, IOMMMIOHANDLE hRegion, PFNIOMMMIONEWWRITE pfnWrite,
+                                           PFNIOMMMIONEWREAD pfnRead, PFNIOMMMIONEWFILL pfnFill, void *pvUser)
+{
+    /*
+     * Validate input and state.
+     */
+    VM_ASSERT_EMT0_RETURN(pGVM, VERR_VM_THREAD_NOT_EMT);
+    VM_ASSERT_STATE_RETURN(pGVM, VMSTATE_CREATING, VERR_VM_INVALID_VM_STATE);
+    AssertReturn(hRegion < pGVM->iomr0.s.cMmioAlloc, VERR_IOM_INVALID_MMIO_HANDLE);
+    AssertReturn(hRegion < pGVM->iom.s.cMmioRegs, VERR_IOM_INVALID_MMIO_HANDLE);
+    AssertPtrReturn(pDevIns, VERR_INVALID_HANDLE);
+    AssertReturn(pDevIns->pDevInsForR3 != NIL_RTR3PTR && !(pDevIns->pDevInsForR3 & PAGE_OFFSET_MASK), VERR_INVALID_PARAMETER);
+    AssertReturn(pGVM->iomr0.s.paMmioRing3Regs[hRegion].pDevIns == pDevIns->pDevInsForR3, VERR_IOM_INVALID_MMIO_HANDLE);
+    AssertReturn(pGVM->iomr0.s.paMmioRegs[hRegion].pDevIns == NULL, VERR_WRONG_ORDER);
+    Assert(pGVM->iomr0.s.paMmioRegs[hRegion].idxSelf == hRegion);
+
+    AssertReturn(pfnWrite || pfnRead || pfnFill, VERR_INVALID_PARAMETER);
+    AssertPtrNullReturn(pfnWrite, VERR_INVALID_POINTER);
+    AssertPtrNullReturn(pfnRead, VERR_INVALID_POINTER);
+    AssertPtrNullReturn(pfnFill, VERR_INVALID_POINTER);
+
+    uint32_t const fFlags   = pGVM->iomr0.s.paMmioRing3Regs[hRegion].fFlags;
+    RTGCPHYS const cbRegion = pGVM->iomr0.s.paMmioRing3Regs[hRegion].cbRegion;
+    AssertMsgReturn(cbRegion > 0 && cbRegion <= _1T, ("cbRegion=%#RGp\n", cbRegion), VERR_IOM_INVALID_MMIO_HANDLE);
+
+    /*
+     * Do the job.
+     */
+    pGVM->iomr0.s.paMmioRegs[hRegion].cbRegion          = cbRegion;
+    pGVM->iomr0.s.paMmioRegs[hRegion].pvUser            = pvUser;
+    pGVM->iomr0.s.paMmioRegs[hRegion].pDevIns           = pDevIns;
+    pGVM->iomr0.s.paMmioRegs[hRegion].pfnWriteCallback  = pfnWrite;
+    pGVM->iomr0.s.paMmioRegs[hRegion].pfnReadCallback   = pfnRead;
+    pGVM->iomr0.s.paMmioRegs[hRegion].pfnFillCallback   = pfnFill;
+    pGVM->iomr0.s.paMmioRegs[hRegion].fFlags            = fFlags;
+#ifdef VBOX_WITH_STATISTICS
+    uint16_t const idxStats = pGVM->iomr0.s.paMmioRing3Regs[hRegion].idxStats;
+    pGVM->iomr0.s.paMmioRegs[hRegion].idxStats          = (uint32_t)idxStats < pGVM->iomr0.s.cMmioStatsAllocation
+                                                        ? idxStats : UINT16_MAX;
+#else
+    pGVM->iomr0.s.paMmioRegs[hRegion].idxStats          = UINT16_MAX;
+#endif
+
+    pGVM->iomr0.s.paMmioRing3Regs[hRegion].fRing0 = true;
+
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Grows the MMIO registration (all contexts) and lookup tables.
+ *
+ * @returns VBox status code.
+ * @param   pGVM            The global (ring-0) VM structure.
+ * @param   cReqMinEntries  The minimum growth (absolute).
+ * @thread  EMT(0)
+ * @note    Only callable at VM creation time.
+ */
+VMMR0_INT_DECL(int) IOMR0MmioGrowRegistrationTables(PGVM pGVM, uint64_t cReqMinEntries)
+{
+    /*
+     * Validate input and state.
+     */
+    VM_ASSERT_EMT0_RETURN(pGVM, VERR_VM_THREAD_NOT_EMT);
+    VM_ASSERT_STATE_RETURN(pGVM, VMSTATE_CREATING, VERR_VM_INVALID_VM_STATE);
+    AssertReturn(cReqMinEntries <= _4K, VERR_IOM_TOO_MANY_MMIO_REGISTRATIONS);
+    uint32_t cNewEntries = (uint32_t)cReqMinEntries;
+    AssertReturn(cNewEntries >= pGVM->iom.s.cMmioAlloc, VERR_IOM_MMIO_IPE_1);
+    uint32_t const cOldEntries = pGVM->iomr0.s.cMmioAlloc;
+    ASMCompilerBarrier();
+    AssertReturn(cNewEntries >= cOldEntries, VERR_IOM_MMIO_IPE_2);
+    AssertReturn(pGVM->iom.s.cMmioRegs >= pGVM->iomr0.s.cMmioMax, VERR_IOM_MMIO_IPE_3);
+
+    /*
+     * Allocate the new tables.  We use a single allocation for the three tables (ring-0,
+     * ring-3, lookup) and does a partial mapping of the result to ring-3.
+     */
+    uint32_t const cbRing0  = RT_ALIGN_32(cNewEntries * sizeof(IOMMMIOENTRYR0),     PAGE_SIZE);
+    uint32_t const cbRing3  = RT_ALIGN_32(cNewEntries * sizeof(IOMMMIOENTRYR3),     PAGE_SIZE);
+    uint32_t const cbShared = RT_ALIGN_32(cNewEntries * sizeof(IOMMMIOLOOKUPENTRY), PAGE_SIZE);
+    uint32_t const cbNew    = cbRing0 + cbRing3 + cbShared;
+
+    /* Use the rounded up space as best we can. */
+    cNewEntries = RT_MIN(RT_MIN(cbRing0 / sizeof(IOMMMIOENTRYR0), cbRing3 / sizeof(IOMMMIOENTRYR3)),
+                         cbShared / sizeof(IOMMMIOLOOKUPENTRY));
+
+    RTR0MEMOBJ hMemObj;
+    int rc = RTR0MemObjAllocPage(&hMemObj, cbNew, false /*fExecutable*/);
+    if (RT_SUCCESS(rc))
+    {
+        /*
+         * Zero and map it.
+         */
+        RT_BZERO(RTR0MemObjAddress(hMemObj), cbNew);
+
+        RTR0MEMOBJ hMapObj;
+        rc = RTR0MemObjMapUserEx(&hMapObj, hMemObj, (RTR3PTR)-1, PAGE_SIZE, RTMEM_PROT_READ | RTMEM_PROT_WRITE,
+                                 RTR0ProcHandleSelf(), cbRing0, cbNew - cbRing0);
+        if (RT_SUCCESS(rc))
+        {
+            PIOMMMIOENTRYR0       const paRing0    = (PIOMMMIOENTRYR0)RTR0MemObjAddress(hMemObj);
+            PIOMMMIOENTRYR3       const paRing3    = (PIOMMMIOENTRYR3)((uintptr_t)paRing0 + cbRing0);
+            PIOMMMIOLOOKUPENTRY   const paLookup   = (PIOMMMIOLOOKUPENTRY)((uintptr_t)paRing3 + cbRing3);
+            RTR3UINTPTR           const uAddrRing3 = RTR0MemObjAddressR3(hMapObj);
+
+            /*
+             * Copy over the old info and initialize the idxSelf and idxStats members.
+             */
+            if (pGVM->iomr0.s.paMmioRegs != NULL)
+            {
+                memcpy(paRing0,  pGVM->iomr0.s.paMmioRegs,      sizeof(paRing0[0])  * cOldEntries);
+                memcpy(paRing3,  pGVM->iomr0.s.paMmioRing3Regs, sizeof(paRing3[0])  * cOldEntries);
+                memcpy(paLookup, pGVM->iomr0.s.paMmioLookup,    sizeof(paLookup[0]) * cOldEntries);
+            }
+
+            size_t i = cbRing0 / sizeof(*paRing0);
+            while (i-- > cOldEntries)
+            {
+                paRing0[i].idxSelf  = (uint16_t)i;
+                paRing0[i].idxStats = UINT16_MAX;
+            }
+            i = cbRing3 / sizeof(*paRing3);
+            while (i-- > cOldEntries)
+            {
+                paRing3[i].idxSelf  = (uint16_t)i;
+                paRing3[i].idxStats = UINT16_MAX;
+            }
+
+            /*
+             * Switch the memory handles.
+             */
+            RTR0MEMOBJ hTmp = pGVM->iomr0.s.hMmioMapObj;
+            pGVM->iomr0.s.hMmioMapObj = hMapObj;
+            hMapObj = hTmp;
+
+            hTmp = pGVM->iomr0.s.hMmioMemObj;
+            pGVM->iomr0.s.hMmioMemObj = hMemObj;
+            hMemObj = hTmp;
+
+            /*
+             * Update the variables.
+             */
+            pGVM->iomr0.s.paMmioRegs      = paRing0;
+            pGVM->iomr0.s.paMmioRing3Regs = paRing3;
+            pGVM->iomr0.s.paMmioLookup    = paLookup;
+            pGVM->iom.s.paMmioRegs        = uAddrRing3;
+            pGVM->iom.s.paMmioLookup      = uAddrRing3 + cbRing3;
+            pGVM->iom.s.cMmioAlloc        = cNewEntries;
+            pGVM->iomr0.s.cMmioAlloc      = cNewEntries;
+
+            /*
+             * Free the old allocation.
+             */
+            RTR0MemObjFree(hMapObj, true /*fFreeMappings*/);
+        }
+        RTR0MemObjFree(hMemObj, true /*fFreeMappings*/);
+    }
+
+    return rc;
+}
+
+
+/**
+ * Grows the MMIO statistics table.
+ *
+ * @returns VBox status code.
+ * @param   pGVM            The global (ring-0) VM structure.
+ * @param   cReqMinEntries  The minimum growth (absolute).
+ * @thread  EMT(0)
+ * @note    Only callable at VM creation time.
+ */
+VMMR0_INT_DECL(int) IOMR0MmioGrowStatisticsTable(PGVM pGVM, uint64_t cReqMinEntries)
+{
+    /*
+     * Validate input and state.
+     */
+    VM_ASSERT_EMT0_RETURN(pGVM, VERR_VM_THREAD_NOT_EMT);
+    VM_ASSERT_STATE_RETURN(pGVM, VMSTATE_CREATING, VERR_VM_INVALID_VM_STATE);
+    AssertReturn(cReqMinEntries <= _64K, VERR_IOM_TOO_MANY_MMIO_REGISTRATIONS);
+    uint32_t cNewEntries = (uint32_t)cReqMinEntries;
+#ifdef VBOX_WITH_STATISTICS
+    uint32_t const cOldEntries = pGVM->iomr0.s.cMmioStatsAllocation;
+    ASMCompilerBarrier();
+#else
+    uint32_t const cOldEntries = 0;
+#endif
+    AssertReturn(cNewEntries > cOldEntries, VERR_IOM_MMIO_IPE_1);
+    AssertReturn(pGVM->iom.s.cMmioStatsAllocation == cOldEntries, VERR_IOM_MMIO_IPE_1);
+    AssertReturn(pGVM->iom.s.cMmioStats <= cOldEntries, VERR_IOM_MMIO_IPE_2);
+#ifdef VBOX_WITH_STATISTICS
+    AssertReturn(!pGVM->iomr0.s.fMmioStatsFrozen, VERR_WRONG_ORDER);
+#endif
+
+    /*
+     * Allocate a new table, zero it and map it.
+     */
+#ifndef VBOX_WITH_STATISTICS
+    AssertFailedReturn(VERR_NOT_SUPPORTED);
+#else
+    uint32_t const cbNew = RT_ALIGN_32(cNewEntries * sizeof(IOMMMIOSTATSENTRY), PAGE_SIZE);
+    cNewEntries = cbNew / sizeof(IOMMMIOSTATSENTRY);
+
+    RTR0MEMOBJ hMemObj;
+    int rc = RTR0MemObjAllocPage(&hMemObj, cbNew, false /*fExecutable*/);
+    if (RT_SUCCESS(rc))
+    {
+        RT_BZERO(RTR0MemObjAddress(hMemObj), cbNew);
+
+        RTR0MEMOBJ hMapObj;
+        rc = RTR0MemObjMapUser(&hMapObj, hMemObj, (RTR3PTR)-1, PAGE_SIZE, RTMEM_PROT_READ | RTMEM_PROT_WRITE, RTR0ProcHandleSelf());
+        if (RT_SUCCESS(rc))
+        {
+            PIOMMMIOSTATSENTRY pMmioStats = (PIOMMMIOSTATSENTRY)RTR0MemObjAddress(hMemObj);
+
+            /*
+             * Anything to copy over and free up?
+             */
+            if (pGVM->iomr0.s.paMmioStats)
+                memcpy(pMmioStats, pGVM->iomr0.s.paMmioStats, cOldEntries * sizeof(IOMMMIOSTATSENTRY));
+
+            /*
+             * Switch the memory handles.
+             */
+            RTR0MEMOBJ hTmp = pGVM->iomr0.s.hMmioStatsMapObj;
+            pGVM->iomr0.s.hMmioStatsMapObj = hMapObj;
+            hMapObj = hTmp;
+
+            hTmp = pGVM->iomr0.s.hMmioStatsMemObj;
+            pGVM->iomr0.s.hMmioStatsMemObj = hMemObj;
+            hMemObj = hTmp;
+
+            /*
+             * Update the variables.
+             */
+            pGVM->iomr0.s.paMmioStats          = pMmioStats;
+            pGVM->iom.s.paMmioStats            = RTR0MemObjAddressR3(pGVM->iomr0.s.hMmioStatsMapObj);
+            pGVM->iom.s.cMmioStatsAllocation   = cNewEntries;
+            pGVM->iomr0.s.cMmioStatsAllocation = cNewEntries;
+
+            /*
+             * Free the old allocation.
+             */
+            RTR0MemObjFree(hMapObj, true /*fFreeMappings*/);
+        }
+        RTR0MemObjFree(hMemObj, true /*fFreeMappings*/);
+    }
+    return rc;
+#endif /* VBOX_WITH_STATISTICS */
+}
+
+
+/**
+ * Called after all devices has been instantiated to copy over the statistics
+ * indices to the ring-0 MMIO registration table.
+ *
+ * This simplifies keeping statistics for MMIO ranges that are ring-3 only.
+ *
+ * @returns VBox status code.
+ * @param   pGVM            The global (ring-0) VM structure.
+ * @thread  EMT(0)
+ * @note    Only callable at VM creation time.
+ */
+VMMR0_INT_DECL(int) IOMR0MmioSyncStatisticsIndices(PGVM pGVM)
+{
+    VM_ASSERT_EMT0_RETURN(pGVM, VERR_VM_THREAD_NOT_EMT);
+    VM_ASSERT_STATE_RETURN(pGVM, VMSTATE_CREATING, VERR_VM_INVALID_VM_STATE);
+
+#ifdef VBOX_WITH_STATISTICS
+    /*
+     * First, freeze the statistics array:
+     */
+    pGVM->iomr0.s.fMmioStatsFrozen = true;
+
+    /*
+     * Second, synchronize the indices:
+     */
+    uint32_t const          cRegs        = RT_MIN(pGVM->iom.s.cMmioRegs, pGVM->iomr0.s.cMmioAlloc);
+    uint32_t const          cStatsAlloc  = pGVM->iomr0.s.cMmioStatsAllocation;
+    PIOMMMIOENTRYR0         paMmioRegs   = pGVM->iomr0.s.paMmioRegs;
+    IOMMMIOENTRYR3 const   *paMmioRegsR3 = pGVM->iomr0.s.paMmioRing3Regs;
+    AssertReturn((paMmioRegs && paMmioRegsR3) || cRegs == 0, VERR_IOM_MMIO_IPE_3);
+
+    for (uint32_t i = 0 ; i < cRegs; i++)
+    {
+        uint16_t idxStats = paMmioRegsR3[i].idxStats;
+        paMmioRegs[i].idxStats = idxStats < cStatsAlloc ? idxStats : UINT16_MAX;
+    }
+
+#else
+    RT_NOREF(pGVM);
+#endif
+    return VINF_SUCCESS;
+}
+
diff --git a/src/VBox/VMM/VMMR0/Makefile.kup b/src/VBox/VMM/VMMR0/Makefile.kup
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/src/VBox/VMM/VMMR0/Makefile.kup
diff --git a/src/VBox/VMM/VMMR0/NEMR0Native-win.cpp b/src/VBox/VMM/VMMR0/NEMR0Native-win.cpp
new file mode 100644
index 00000000..2a858f15
--- /dev/null
+++ b/src/VBox/VMM/VMMR0/NEMR0Native-win.cpp
@@ -0,0 +1,2616 @@
+/* $Id: NEMR0Native-win.cpp $ */
+/** @file
+ * NEM - Native execution manager, native ring-0 Windows backend.
+ */
+
+/*
+ * Copyright (C) 2018-2020 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+
+/*********************************************************************************************************************************
+*   Header Files                                                                                                                 *
+*********************************************************************************************************************************/
+#define LOG_GROUP LOG_GROUP_NEM
+#define VMCPU_INCL_CPUM_GST_CTX
+#include <iprt/nt/nt.h>
+#include <iprt/nt/hyperv.h>
+#include <iprt/nt/vid.h>
+#include <winerror.h>
+
+#include <VBox/vmm/nem.h>
+#include <VBox/vmm/iem.h>
+#include <VBox/vmm/em.h>
+#include <VBox/vmm/apic.h>
+#include <VBox/vmm/pdm.h>
+#include <VBox/vmm/dbgftrace.h>
+#include "NEMInternal.h"
+#include <VBox/vmm/gvm.h>
+#include <VBox/vmm/vmcc.h>
+#include <VBox/vmm/gvmm.h>
+#include <VBox/param.h>
+
+#include <iprt/dbg.h>
+#include <iprt/memobj.h>
+#include <iprt/string.h>
+#include <iprt/time.h>
+
+
+/* Assert compile context sanity. */
+#ifndef RT_OS_WINDOWS
+# error "Windows only file!"
+#endif
+#ifndef RT_ARCH_AMD64
+# error "AMD64 only file!"
+#endif
+
+
+/*********************************************************************************************************************************
+*   Internal Functions                                                                                                           *
+*********************************************************************************************************************************/
+typedef uint32_t DWORD; /* for winerror.h constants */
+
+
+/*********************************************************************************************************************************
+*   Global Variables                                                                                                             *
+*********************************************************************************************************************************/
+static uint64_t (*g_pfnHvlInvokeHypercall)(uint64_t uCallInfo, uint64_t HCPhysInput, uint64_t HCPhysOutput);
+
+/**
+ * WinHvr.sys!WinHvDepositMemory
+ *
+ * This API will try allocates cPages on IdealNode and deposit it to the
+ * hypervisor for use with the given partition.  The memory will be freed when
+ * VID.SYS calls WinHvWithdrawAllMemory when the partition is cleanedup.
+ *
+ * Apparently node numbers above 64 has a different meaning.
+ */
+static NTSTATUS (*g_pfnWinHvDepositMemory)(uintptr_t idPartition, size_t cPages, uintptr_t IdealNode, size_t *pcActuallyAdded);
+
+
+/*********************************************************************************************************************************
+*   Internal Functions                                                                                                           *
+*********************************************************************************************************************************/
+NEM_TMPL_STATIC int  nemR0WinMapPages(PGVM pGVM, PGVMCPU pGVCpu, RTGCPHYS GCPhysSrc, RTGCPHYS GCPhysDst,
+                                      uint32_t cPages, uint32_t fFlags);
+NEM_TMPL_STATIC int  nemR0WinUnmapPages(PGVM pGVM, PGVMCPU pGVCpu, RTGCPHYS GCPhys, uint32_t cPages);
+#if defined(NEM_WIN_WITH_RING0_RUNLOOP) || defined(NEM_WIN_USE_HYPERCALLS_FOR_REGISTERS)
+NEM_TMPL_STATIC int  nemR0WinExportState(PGVM pGVM, PGVMCPU pGVCpu, PCPUMCTX pCtx);
+NEM_TMPL_STATIC int  nemR0WinImportState(PGVM pGVM, PGVMCPU pGVCpu, PCPUMCTX pCtx, uint64_t fWhat, bool fCanUpdateCr3);
+NEM_TMPL_STATIC int  nemR0WinQueryCpuTick(PGVM pGVM, PGVMCPU pGVCpu, uint64_t *pcTicks, uint32_t *pcAux);
+NEM_TMPL_STATIC int  nemR0WinResumeCpuTickOnAll(PGVM pGVM, PGVMCPU pGVCpu, uint64_t uPausedTscValue);
+#endif
+DECLINLINE(NTSTATUS) nemR0NtPerformIoControl(PGVM pGVM, PGVMCPU pGVCpu, uint32_t uFunction, void *pvInput, uint32_t cbInput,
+                                             void *pvOutput, uint32_t cbOutput);
+
+
+/*
+ * Instantate the code we share with ring-0.
+ */
+#ifdef NEM_WIN_WITH_RING0_RUNLOOP
+# define NEM_WIN_TEMPLATE_MODE_OWN_RUN_API
+#else
+# undef NEM_WIN_TEMPLATE_MODE_OWN_RUN_API
+#endif
+#include "../VMMAll/NEMAllNativeTemplate-win.cpp.h"
+
+
+
+/**
+ * Worker for NEMR0InitVM that allocates a hypercall page.
+ *
+ * @returns VBox status code.
+ * @param   pHypercallData  The hypercall data page to initialize.
+ */
+static int nemR0InitHypercallData(PNEMR0HYPERCALLDATA pHypercallData)
+{
+    int rc = RTR0MemObjAllocPage(&pHypercallData->hMemObj, PAGE_SIZE, false /*fExecutable*/);
+    if (RT_SUCCESS(rc))
+    {
+        pHypercallData->HCPhysPage = RTR0MemObjGetPagePhysAddr(pHypercallData->hMemObj, 0 /*iPage*/);
+        AssertStmt(pHypercallData->HCPhysPage != NIL_RTHCPHYS, rc = VERR_INTERNAL_ERROR_3);
+        pHypercallData->pbPage     = (uint8_t *)RTR0MemObjAddress(pHypercallData->hMemObj);
+        AssertStmt(pHypercallData->pbPage, rc = VERR_INTERNAL_ERROR_3);
+        if (RT_SUCCESS(rc))
+            return VINF_SUCCESS;
+
+        /* bail out */
+        RTR0MemObjFree(pHypercallData->hMemObj, true /*fFreeMappings*/);
+    }
+    pHypercallData->hMemObj     = NIL_RTR0MEMOBJ;
+    pHypercallData->HCPhysPage  = NIL_RTHCPHYS;
+    pHypercallData->pbPage      = NULL;
+    return rc;
+}
+
+/**
+ * Worker for NEMR0CleanupVM and NEMR0InitVM that cleans up a hypercall page.
+ *
+ * @param   pHypercallData  The hypercall data page to uninitialize.
+ */
+static void nemR0DeleteHypercallData(PNEMR0HYPERCALLDATA pHypercallData)
+{
+    /* Check pbPage here since it's NULL, whereas the hMemObj can be either
+       NIL_RTR0MEMOBJ or 0 (they aren't necessarily the same). */
+    if (pHypercallData->pbPage != NULL)
+    {
+        RTR0MemObjFree(pHypercallData->hMemObj, true /*fFreeMappings*/);
+        pHypercallData->pbPage = NULL;
+    }
+    pHypercallData->hMemObj    = NIL_RTR0MEMOBJ;
+    pHypercallData->HCPhysPage = NIL_RTHCPHYS;
+}
+
+
+/**
+ * Called by NEMR3Init to make sure we've got what we need.
+ *
+ * @returns VBox status code.
+ * @param   pGVM            The ring-0 VM handle.
+ * @thread  EMT(0)
+ */
+VMMR0_INT_DECL(int) NEMR0InitVM(PGVM pGVM)
+{
+    AssertCompile(sizeof(pGVM->nemr0.s) <= sizeof(pGVM->nemr0.padding));
+    AssertCompile(sizeof(pGVM->aCpus[0].nemr0.s) <= sizeof(pGVM->aCpus[0].nemr0.padding));
+
+    int rc = GVMMR0ValidateGVMandEMT(pGVM, 0);
+    AssertRCReturn(rc, rc);
+
+    /*
+     * We want to perform hypercalls here.  The NT kernel started to expose a very low
+     * level interface to do this thru somewhere between build 14271 and 16299.  Since
+     * we need build 17134 to get anywhere at all, the exact build is not relevant here.
+     *
+     * We also need to deposit memory to the hypervisor for use with partition (page
+     * mapping structures, stuff).
+     */
+    RTDBGKRNLINFO hKrnlInfo;
+    rc = RTR0DbgKrnlInfoOpen(&hKrnlInfo, 0);
+    if (RT_SUCCESS(rc))
+    {
+        rc = RTR0DbgKrnlInfoQuerySymbol(hKrnlInfo, NULL, "HvlInvokeHypercall", (void **)&g_pfnHvlInvokeHypercall);
+        if (RT_FAILURE(rc))
+            rc = VERR_NEM_MISSING_KERNEL_API_1;
+        if (RT_SUCCESS(rc))
+        {
+            rc = RTR0DbgKrnlInfoQuerySymbol(hKrnlInfo, "winhvr.sys", "WinHvDepositMemory", (void **)&g_pfnWinHvDepositMemory);
+            if (RT_FAILURE(rc))
+                rc = rc == VERR_MODULE_NOT_FOUND ? VERR_NEM_MISSING_KERNEL_API_2 : VERR_NEM_MISSING_KERNEL_API_3;
+        }
+        RTR0DbgKrnlInfoRelease(hKrnlInfo);
+        if (RT_SUCCESS(rc))
+        {
+            /*
+             * Allocate a page for non-EMT threads to use for hypercalls (update
+             * statistics and such) and a critical section protecting it.
+             */
+            rc = RTCritSectInit(&pGVM->nemr0.s.HypercallDataCritSect);
+            if (RT_SUCCESS(rc))
+            {
+                rc = nemR0InitHypercallData(&pGVM->nemr0.s.HypercallData);
+                if (RT_SUCCESS(rc))
+                {
+                    /*
+                     * Allocate a page for each VCPU to place hypercall data on.
+                     */
+                    for (VMCPUID i = 0; i < pGVM->cCpus; i++)
+                    {
+                        rc = nemR0InitHypercallData(&pGVM->aCpus[i].nemr0.s.HypercallData);
+                        if (RT_FAILURE(rc))
+                        {
+                            while (i-- > 0)
+                                nemR0DeleteHypercallData(&pGVM->aCpus[i].nemr0.s.HypercallData);
+                            break;
+                        }
+                    }
+                    if (RT_SUCCESS(rc))
+                    {
+                        /*
+                         * So far, so good.
+                         */
+                        return rc;
+                    }
+
+                    /*
+                     * Bail out.
+                     */
+                    nemR0DeleteHypercallData(&pGVM->nemr0.s.HypercallData);
+                }
+                RTCritSectDelete(&pGVM->nemr0.s.HypercallDataCritSect);
+            }
+        }
+    }
+
+    return rc;
+}
+
+
+/**
+ * Perform an I/O control operation on the partition handle (VID.SYS).
+ *
+ * @returns NT status code.
+ * @param   pGVM            The ring-0 VM structure.
+ * @param   pGVCpu          The global (ring-0) CPU structure of the calling EMT.
+ * @param   uFunction       The function to perform.
+ * @param   pvInput         The input buffer.  This must point within the VM
+ *                          structure so we can easily convert to a ring-3
+ *                          pointer if necessary.
+ * @param   cbInput         The size of the input.  @a pvInput must be NULL when
+ *                          zero.
+ * @param   pvOutput        The output buffer.  This must also point within the
+ *                          VM structure for ring-3 pointer magic.
+ * @param   cbOutput        The size of the output.  @a pvOutput must be NULL
+ *                          when zero.
+ * @thread  EMT(pGVCpu)
+ */
+DECLINLINE(NTSTATUS) nemR0NtPerformIoControl(PGVM pGVM, PGVMCPU pGVCpu, uint32_t uFunction, void *pvInput, uint32_t cbInput,
+                                             void *pvOutput, uint32_t cbOutput)
+{
+#ifdef RT_STRICT
+    /*
+     * Input and output parameters are part of the VM CPU structure.
+     */
+    VMCPU_ASSERT_EMT(pGVCpu);
+    if (pvInput)
+        AssertReturn(((uintptr_t)pvInput + cbInput) - (uintptr_t)pGVCpu <= sizeof(*pGVCpu), VERR_INVALID_PARAMETER);
+    if (pvOutput)
+        AssertReturn(((uintptr_t)pvOutput + cbOutput) - (uintptr_t)pGVCpu <= sizeof(*pGVCpu), VERR_INVALID_PARAMETER);
+#endif
+
+    int32_t rcNt = STATUS_UNSUCCESSFUL;
+    int rc = SUPR0IoCtlPerform(pGVM->nemr0.s.pIoCtlCtx, uFunction,
+                               pvInput,
+                               pvInput  ? (uintptr_t)pvInput  + pGVCpu->nemr0.s.offRing3ConversionDelta : NIL_RTR3PTR,
+                               cbInput,
+                               pvOutput,
+                               pvOutput ? (uintptr_t)pvOutput + pGVCpu->nemr0.s.offRing3ConversionDelta : NIL_RTR3PTR,
+                               cbOutput,
+                               &rcNt);
+    if (RT_SUCCESS(rc) || !NT_SUCCESS((NTSTATUS)rcNt))
+        return (NTSTATUS)rcNt;
+    return STATUS_UNSUCCESSFUL;
+}
+
+
+/**
+ * 2nd part of the initialization, after we've got a partition handle.
+ *
+ * @returns VBox status code.
+ * @param   pGVM            The ring-0 VM handle.
+ * @thread  EMT(0)
+ */
+VMMR0_INT_DECL(int) NEMR0InitVMPart2(PGVM pGVM)
+{
+    int rc = GVMMR0ValidateGVMandEMT(pGVM, 0);
+    AssertRCReturn(rc, rc);
+    SUPR0Printf("NEMR0InitVMPart2\n"); LogRel(("2: NEMR0InitVMPart2\n"));
+    Assert(pGVM->nemr0.s.fMayUseRing0Runloop == false);
+
+    /*
+     * Copy and validate the I/O control information from ring-3.
+     */
+    NEMWINIOCTL Copy = pGVM->nem.s.IoCtlGetHvPartitionId;
+    AssertLogRelReturn(Copy.uFunction != 0, VERR_NEM_INIT_FAILED);
+    AssertLogRelReturn(Copy.cbInput == 0, VERR_NEM_INIT_FAILED);
+    AssertLogRelReturn(Copy.cbOutput == sizeof(HV_PARTITION_ID), VERR_NEM_INIT_FAILED);
+    pGVM->nemr0.s.IoCtlGetHvPartitionId = Copy;
+
+    pGVM->nemr0.s.fMayUseRing0Runloop = pGVM->nem.s.fUseRing0Runloop;
+
+    Copy = pGVM->nem.s.IoCtlStartVirtualProcessor;
+    AssertLogRelStmt(Copy.uFunction != 0, rc = VERR_NEM_INIT_FAILED);
+    AssertLogRelStmt(Copy.cbInput == sizeof(HV_VP_INDEX), rc = VERR_NEM_INIT_FAILED);
+    AssertLogRelStmt(Copy.cbOutput == 0, rc = VERR_NEM_INIT_FAILED);
+    AssertLogRelStmt(Copy.uFunction != pGVM->nemr0.s.IoCtlGetHvPartitionId.uFunction, rc = VERR_NEM_INIT_FAILED);
+    if (RT_SUCCESS(rc))
+        pGVM->nemr0.s.IoCtlStartVirtualProcessor = Copy;
+
+    Copy = pGVM->nem.s.IoCtlStopVirtualProcessor;
+    AssertLogRelStmt(Copy.uFunction != 0, rc = VERR_NEM_INIT_FAILED);
+    AssertLogRelStmt(Copy.cbInput == sizeof(HV_VP_INDEX), rc = VERR_NEM_INIT_FAILED);
+    AssertLogRelStmt(Copy.cbOutput == 0, rc = VERR_NEM_INIT_FAILED);
+    AssertLogRelStmt(Copy.uFunction != pGVM->nemr0.s.IoCtlGetHvPartitionId.uFunction, rc = VERR_NEM_INIT_FAILED);
+    AssertLogRelStmt(Copy.uFunction != pGVM->nemr0.s.IoCtlStartVirtualProcessor.uFunction, rc = VERR_NEM_INIT_FAILED);
+    if (RT_SUCCESS(rc))
+        pGVM->nemr0.s.IoCtlStopVirtualProcessor = Copy;
+
+    Copy = pGVM->nem.s.IoCtlMessageSlotHandleAndGetNext;
+    AssertLogRelStmt(Copy.uFunction != 0, rc = VERR_NEM_INIT_FAILED);
+    AssertLogRelStmt(   Copy.cbInput == sizeof(VID_IOCTL_INPUT_MESSAGE_SLOT_HANDLE_AND_GET_NEXT)
+                     || Copy.cbInput == RT_OFFSETOF(VID_IOCTL_INPUT_MESSAGE_SLOT_HANDLE_AND_GET_NEXT, cMillies),
+                     rc = VERR_NEM_INIT_FAILED);
+    AssertLogRelStmt(Copy.cbOutput == 0, VERR_NEM_INIT_FAILED);
+    AssertLogRelStmt(Copy.uFunction != pGVM->nemr0.s.IoCtlGetHvPartitionId.uFunction, rc = VERR_NEM_INIT_FAILED);
+    AssertLogRelStmt(Copy.uFunction != pGVM->nemr0.s.IoCtlStartVirtualProcessor.uFunction, rc = VERR_NEM_INIT_FAILED);
+    AssertLogRelStmt(Copy.uFunction != pGVM->nemr0.s.IoCtlStopVirtualProcessor.uFunction, rc = VERR_NEM_INIT_FAILED);
+    if (RT_SUCCESS(rc))
+        pGVM->nemr0.s.IoCtlMessageSlotHandleAndGetNext = Copy;
+
+    if (   RT_SUCCESS(rc)
+        || !pGVM->nem.s.fUseRing0Runloop)
+    {
+        /*
+         * Setup of an I/O control context for the partition handle for later use.
+         */
+        rc = SUPR0IoCtlSetupForHandle(pGVM->pSession, pGVM->nem.s.hPartitionDevice, 0, &pGVM->nemr0.s.pIoCtlCtx);
+        AssertLogRelRCReturn(rc, rc);
+        for (VMCPUID idCpu = 0; idCpu < pGVM->cCpus; idCpu++)
+        {
+            PGVMCPU pGVCpu = &pGVM->aCpus[idCpu];
+            pGVCpu->nemr0.s.offRing3ConversionDelta = (uintptr_t)pGVM->aCpus[idCpu].pVCpuR3 - (uintptr_t)pGVCpu;
+        }
+
+        /*
+         * Get the partition ID.
+         */
+        PVMCPUCC pVCpu0 = &pGVM->aCpus[0];
+        NTSTATUS rcNt = nemR0NtPerformIoControl(pGVM, pVCpu0, pGVM->nemr0.s.IoCtlGetHvPartitionId.uFunction, NULL, 0,
+                                                &pVCpu0->nem.s.uIoCtlBuf.idPartition, sizeof(pVCpu0->nem.s.uIoCtlBuf.idPartition));
+        AssertLogRelMsgReturn(NT_SUCCESS(rcNt), ("IoCtlGetHvPartitionId failed: %#x\n", rcNt), VERR_NEM_INIT_FAILED);
+        pGVM->nemr0.s.idHvPartition = pVCpu0->nem.s.uIoCtlBuf.idPartition;
+        AssertLogRelMsgReturn(pGVM->nemr0.s.idHvPartition == pGVM->nem.s.idHvPartition,
+                              ("idHvPartition mismatch: r0=%#RX64, r3=%#RX64\n", pGVM->nemr0.s.idHvPartition, pGVM->nem.s.idHvPartition),
+                              VERR_NEM_INIT_FAILED);
+    }
+
+    return rc;
+}
+
+
+/**
+ * Cleanup the NEM parts of the VM in ring-0.
+ *
+ * This is always called and must deal the state regardless of whether
+ * NEMR0InitVM() was called or not.  So, take care here.
+ *
+ * @param   pGVM            The ring-0 VM handle.
+ */
+VMMR0_INT_DECL(void) NEMR0CleanupVM(PGVM pGVM)
+{
+    pGVM->nemr0.s.idHvPartition = HV_PARTITION_ID_INVALID;
+
+    /* Clean up I/O control context. */
+    if (pGVM->nemr0.s.pIoCtlCtx)
+    {
+        int rc = SUPR0IoCtlCleanup(pGVM->nemr0.s.pIoCtlCtx);
+        AssertRC(rc);
+        pGVM->nemr0.s.pIoCtlCtx = NULL;
+    }
+
+    /* Free the hypercall pages. */
+    VMCPUID i = pGVM->cCpus;
+    while (i-- > 0)
+        nemR0DeleteHypercallData(&pGVM->aCpus[i].nemr0.s.HypercallData);
+
+    /* The non-EMT one too. */
+    if (RTCritSectIsInitialized(&pGVM->nemr0.s.HypercallDataCritSect))
+        RTCritSectDelete(&pGVM->nemr0.s.HypercallDataCritSect);
+    nemR0DeleteHypercallData(&pGVM->nemr0.s.HypercallData);
+}
+
+
+#if 0 /* for debugging GPA unmapping.  */
+static int nemR3WinDummyReadGpa(PGVM pGVM, PGVMCPU pGVCpu, RTGCPHYS GCPhys)
+{
+    PHV_INPUT_READ_GPA  pIn  = (PHV_INPUT_READ_GPA)pGVCpu->nemr0.s.pbHypercallData;
+    PHV_OUTPUT_READ_GPA pOut = (PHV_OUTPUT_READ_GPA)(pIn + 1);
+    pIn->PartitionId            = pGVM->nemr0.s.idHvPartition;
+    pIn->VpIndex                = pGVCpu->idCpu;
+    pIn->ByteCount              = 0x10;
+    pIn->BaseGpa                = GCPhys;
+    pIn->ControlFlags.AsUINT64  = 0;
+    pIn->ControlFlags.CacheType = HvCacheTypeX64WriteCombining;
+    memset(pOut, 0xfe, sizeof(*pOut));
+    uint64_t volatile uResult = g_pfnHvlInvokeHypercall(HvCallReadGpa, pGVCpu->nemr0.s.HCPhysHypercallData,
+                                                        pGVCpu->nemr0.s.HCPhysHypercallData + sizeof(*pIn));
+    LogRel(("nemR3WinDummyReadGpa: %RGp -> %#RX64; code=%u rsvd=%u abData=%.16Rhxs\n",
+            GCPhys, uResult, pOut->AccessResult.ResultCode, pOut->AccessResult.Reserved, pOut->Data));
+    __debugbreak();
+
+    return uResult != 0 ? VERR_READ_ERROR : VINF_SUCCESS;
+}
+#endif
+
+
+/**
+ * Worker for NEMR0MapPages and others.
+ */
+NEM_TMPL_STATIC int nemR0WinMapPages(PGVM pGVM, PGVMCPU pGVCpu, RTGCPHYS GCPhysSrc, RTGCPHYS GCPhysDst,
+                                     uint32_t cPages, uint32_t fFlags)
+{
+    /*
+     * Validate.
+     */
+    AssertReturn(g_pfnHvlInvokeHypercall, VERR_NEM_MISSING_KERNEL_API_1);
+
+    AssertReturn(cPages > 0, VERR_OUT_OF_RANGE);
+    AssertReturn(cPages <= NEM_MAX_MAP_PAGES, VERR_OUT_OF_RANGE);
+    AssertReturn(!(fFlags & ~(HV_MAP_GPA_MAYBE_ACCESS_MASK & ~HV_MAP_GPA_DUNNO_ACCESS)), VERR_INVALID_FLAGS);
+    AssertMsgReturn(!(GCPhysDst & X86_PAGE_OFFSET_MASK), ("GCPhysDst=%RGp\n", GCPhysDst), VERR_OUT_OF_RANGE);
+    AssertReturn(GCPhysDst < _1E, VERR_OUT_OF_RANGE);
+    if (GCPhysSrc != GCPhysDst)
+    {
+        AssertMsgReturn(!(GCPhysSrc & X86_PAGE_OFFSET_MASK), ("GCPhysSrc=%RGp\n", GCPhysSrc), VERR_OUT_OF_RANGE);
+        AssertReturn(GCPhysSrc < _1E, VERR_OUT_OF_RANGE);
+    }
+
+    /*
+     * Compose and make the hypercall.
+     * Ring-3 is not allowed to fill in the host physical addresses of the call.
+     */
+    for (uint32_t iTries = 0;; iTries++)
+    {
+        RTGCPHYS GCPhysSrcTmp = GCPhysSrc;
+        HV_INPUT_MAP_GPA_PAGES *pMapPages = (HV_INPUT_MAP_GPA_PAGES *)pGVCpu->nemr0.s.HypercallData.pbPage;
+        AssertPtrReturn(pMapPages, VERR_INTERNAL_ERROR_3);
+        pMapPages->TargetPartitionId    = pGVM->nemr0.s.idHvPartition;
+        pMapPages->TargetGpaBase        = GCPhysDst >> X86_PAGE_SHIFT;
+        pMapPages->MapFlags             = fFlags;
+        pMapPages->u32ExplicitPadding   = 0;
+
+        for (uint32_t iPage = 0; iPage < cPages; iPage++, GCPhysSrcTmp += X86_PAGE_SIZE)
+        {
+            RTHCPHYS HCPhys = NIL_RTGCPHYS;
+            int rc = PGMPhysGCPhys2HCPhys(pGVM, GCPhysSrcTmp, &HCPhys);
+            AssertRCReturn(rc, rc);
+            pMapPages->PageList[iPage] = HCPhys >> X86_PAGE_SHIFT;
+        }
+
+        uint64_t uResult = g_pfnHvlInvokeHypercall(HvCallMapGpaPages | ((uint64_t)cPages << 32),
+                                                   pGVCpu->nemr0.s.HypercallData.HCPhysPage, 0);
+        Log6(("NEMR0MapPages: %RGp/%RGp L %u prot %#x -> %#RX64\n",
+              GCPhysDst, GCPhysSrcTmp - cPages * X86_PAGE_SIZE, cPages, fFlags, uResult));
+        if (uResult == ((uint64_t)cPages << 32))
+            return VINF_SUCCESS;
+
+        /*
+         * If the partition is out of memory, try donate another 512 pages to
+         * it (2MB).  VID.SYS does multiples of 512 pages, nothing smaller.
+         */
+        if (   uResult != HV_STATUS_INSUFFICIENT_MEMORY
+            || iTries > 16
+            || g_pfnWinHvDepositMemory == NULL)
+        {
+            LogRel(("g_pfnHvlInvokeHypercall/MapGpaPages -> %#RX64\n", uResult));
+            return VERR_NEM_MAP_PAGES_FAILED;
+        }
+
+        size_t cPagesAdded = 0;
+        NTSTATUS rcNt = g_pfnWinHvDepositMemory(pGVM->nemr0.s.idHvPartition, 512, 0, &cPagesAdded);
+        if (!cPagesAdded)
+        {
+            LogRel(("g_pfnWinHvDepositMemory -> %#x / %#RX64\n", rcNt, uResult));
+            return VERR_NEM_MAP_PAGES_FAILED;
+        }
+    }
+}
+
+
+/**
+ * Maps pages into the guest physical address space.
+ *
+ * Generally the caller will be under the PGM lock already, so no extra effort
+ * is needed to make sure all changes happens under it.
+ *
+ * @returns VBox status code.
+ * @param   pGVM            The ring-0 VM handle.
+ * @param   idCpu           The calling EMT.  Necessary for getting the
+ *                          hypercall page and arguments.
+ * @thread  EMT(idCpu)
+ */
+VMMR0_INT_DECL(int) NEMR0MapPages(PGVM pGVM, VMCPUID idCpu)
+{
+    /*
+     * Unpack the call.
+     */
+    int rc = GVMMR0ValidateGVMandEMT(pGVM, idCpu);
+    if (RT_SUCCESS(rc))
+    {
+        PGVMCPU pGVCpu = &pGVM->aCpus[idCpu];
+
+        RTGCPHYS const          GCPhysSrc = pGVCpu->nem.s.Hypercall.MapPages.GCPhysSrc;
+        RTGCPHYS const          GCPhysDst = pGVCpu->nem.s.Hypercall.MapPages.GCPhysDst;
+        uint32_t const          cPages    = pGVCpu->nem.s.Hypercall.MapPages.cPages;
+        HV_MAP_GPA_FLAGS const  fFlags    = pGVCpu->nem.s.Hypercall.MapPages.fFlags;
+
+        /*
+         * Do the work.
+         */
+        rc = nemR0WinMapPages(pGVM, pGVCpu, GCPhysSrc, GCPhysDst, cPages, fFlags);
+    }
+    return rc;
+}
+
+
+/**
+ * Worker for NEMR0UnmapPages and others.
+ */
+NEM_TMPL_STATIC int nemR0WinUnmapPages(PGVM pGVM, PGVMCPU pGVCpu, RTGCPHYS GCPhys, uint32_t cPages)
+{
+    /*
+     * Validate input.
+     */
+    AssertReturn(g_pfnHvlInvokeHypercall, VERR_NEM_MISSING_KERNEL_API_1);
+
+    AssertReturn(cPages > 0, VERR_OUT_OF_RANGE);
+    AssertReturn(cPages <= NEM_MAX_UNMAP_PAGES, VERR_OUT_OF_RANGE);
+    AssertMsgReturn(!(GCPhys & X86_PAGE_OFFSET_MASK), ("%RGp\n", GCPhys), VERR_OUT_OF_RANGE);
+    AssertReturn(GCPhys < _1E, VERR_OUT_OF_RANGE);
+
+    /*
+     * Compose and make the hypercall.
+     */
+    HV_INPUT_UNMAP_GPA_PAGES *pUnmapPages = (HV_INPUT_UNMAP_GPA_PAGES *)pGVCpu->nemr0.s.HypercallData.pbPage;
+    AssertPtrReturn(pUnmapPages, VERR_INTERNAL_ERROR_3);
+    pUnmapPages->TargetPartitionId    = pGVM->nemr0.s.idHvPartition;
+    pUnmapPages->TargetGpaBase        = GCPhys >> X86_PAGE_SHIFT;
+    pUnmapPages->fFlags               = 0;
+
+    uint64_t uResult = g_pfnHvlInvokeHypercall(HvCallUnmapGpaPages | ((uint64_t)cPages << 32),
+                                               pGVCpu->nemr0.s.HypercallData.HCPhysPage, 0);
+    Log6(("NEMR0UnmapPages: %RGp L %u -> %#RX64\n", GCPhys, cPages, uResult));
+    if (uResult == ((uint64_t)cPages << 32))
+    {
+#if 1       /* Do we need to do this? Hopefully not... */
+        uint64_t volatile uR = g_pfnHvlInvokeHypercall(HvCallUncommitGpaPages | ((uint64_t)cPages << 32),
+                                                       pGVCpu->nemr0.s.HypercallData.HCPhysPage, 0);
+        AssertMsg(uR == ((uint64_t)cPages << 32), ("uR=%#RX64\n", uR)); NOREF(uR);
+#endif
+        return VINF_SUCCESS;
+    }
+
+    LogRel(("g_pfnHvlInvokeHypercall/UnmapGpaPages -> %#RX64\n", uResult));
+    return VERR_NEM_UNMAP_PAGES_FAILED;
+}
+
+
+/**
+ * Unmaps pages from the guest physical address space.
+ *
+ * Generally the caller will be under the PGM lock already, so no extra effort
+ * is needed to make sure all changes happens under it.
+ *
+ * @returns VBox status code.
+ * @param   pGVM            The ring-0 VM handle.
+ * @param   idCpu           The calling EMT.  Necessary for getting the
+ *                          hypercall page and arguments.
+ * @thread  EMT(idCpu)
+ */
+VMMR0_INT_DECL(int) NEMR0UnmapPages(PGVM pGVM, VMCPUID idCpu)
+{
+    /*
+     * Unpack the call.
+     */
+    int rc = GVMMR0ValidateGVMandEMT(pGVM, idCpu);
+    if (RT_SUCCESS(rc))
+    {
+        PGVMCPU pGVCpu = &pGVM->aCpus[idCpu];
+
+        RTGCPHYS const GCPhys = pGVCpu->nem.s.Hypercall.UnmapPages.GCPhys;
+        uint32_t const cPages = pGVCpu->nem.s.Hypercall.UnmapPages.cPages;
+
+        /*
+         * Do the work.
+         */
+        rc = nemR0WinUnmapPages(pGVM, pGVCpu, GCPhys, cPages);
+    }
+    return rc;
+}
+
+
+#if defined(NEM_WIN_WITH_RING0_RUNLOOP) || defined(NEM_WIN_USE_HYPERCALLS_FOR_REGISTERS)
+/**
+ * Worker for NEMR0ExportState.
+ *
+ * Intention is to use it internally later.
+ *
+ * @returns VBox status code.
+ * @param   pGVM        The ring-0 VM handle.
+ * @param   pGVCpu      The ring-0 VCPU handle.
+ * @param   pCtx        The CPU context structure to import into.
+ */
+NEM_TMPL_STATIC int nemR0WinExportState(PGVM pGVM, PGVMCPU pGVCpu, PCPUMCTX pCtx)
+{
+    HV_INPUT_SET_VP_REGISTERS *pInput = (HV_INPUT_SET_VP_REGISTERS *)pGVCpu->nemr0.s.HypercallData.pbPage;
+    AssertPtrReturn(pInput, VERR_INTERNAL_ERROR_3);
+    AssertReturn(g_pfnHvlInvokeHypercall, VERR_NEM_MISSING_KERNEL_API_1);
+
+    pInput->PartitionId = pGVM->nemr0.s.idHvPartition;
+    pInput->VpIndex     = pGVCpu->idCpu;
+    pInput->RsvdZ       = 0;
+
+    uint64_t const fWhat = ~pCtx->fExtrn & (CPUMCTX_EXTRN_ALL | CPUMCTX_EXTRN_NEM_WIN_MASK);
+    if (   !fWhat
+        && pGVCpu->nem.s.fCurrentInterruptWindows == pGVCpu->nem.s.fDesiredInterruptWindows)
+        return VINF_SUCCESS;
+    uintptr_t iReg = 0;
+
+    /* GPRs */
+    if (fWhat & CPUMCTX_EXTRN_GPRS_MASK)
+    {
+        if (fWhat & CPUMCTX_EXTRN_RAX)
+        {
+            HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+            pInput->Elements[iReg].Name                = HvX64RegisterRax;
+            pInput->Elements[iReg].Value.Reg64         = pCtx->rax;
+            iReg++;
+        }
+        if (fWhat & CPUMCTX_EXTRN_RCX)
+        {
+            HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+            pInput->Elements[iReg].Name                = HvX64RegisterRcx;
+            pInput->Elements[iReg].Value.Reg64         = pCtx->rcx;
+            iReg++;
+        }
+        if (fWhat & CPUMCTX_EXTRN_RDX)
+        {
+            HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+            pInput->Elements[iReg].Name                = HvX64RegisterRdx;
+            pInput->Elements[iReg].Value.Reg64         = pCtx->rdx;
+            iReg++;
+        }
+        if (fWhat & CPUMCTX_EXTRN_RBX)
+        {
+            HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+            pInput->Elements[iReg].Name                = HvX64RegisterRbx;
+            pInput->Elements[iReg].Value.Reg64         = pCtx->rbx;
+            iReg++;
+        }
+        if (fWhat & CPUMCTX_EXTRN_RSP)
+        {
+            HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+            pInput->Elements[iReg].Name                = HvX64RegisterRsp;
+            pInput->Elements[iReg].Value.Reg64         = pCtx->rsp;
+            iReg++;
+        }
+        if (fWhat & CPUMCTX_EXTRN_RBP)
+        {
+            HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+            pInput->Elements[iReg].Name                = HvX64RegisterRbp;
+            pInput->Elements[iReg].Value.Reg64         = pCtx->rbp;
+            iReg++;
+        }
+        if (fWhat & CPUMCTX_EXTRN_RSI)
+        {
+            HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+            pInput->Elements[iReg].Name                = HvX64RegisterRsi;
+            pInput->Elements[iReg].Value.Reg64         = pCtx->rsi;
+            iReg++;
+        }
+        if (fWhat & CPUMCTX_EXTRN_RDI)
+        {
+            HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+            pInput->Elements[iReg].Name                = HvX64RegisterRdi;
+            pInput->Elements[iReg].Value.Reg64         = pCtx->rdi;
+            iReg++;
+        }
+        if (fWhat & CPUMCTX_EXTRN_R8_R15)
+        {
+            HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+            pInput->Elements[iReg].Name                = HvX64RegisterR8;
+            pInput->Elements[iReg].Value.Reg64         = pCtx->r8;
+            iReg++;
+            HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+            pInput->Elements[iReg].Name                = HvX64RegisterR9;
+            pInput->Elements[iReg].Value.Reg64         = pCtx->r9;
+            iReg++;
+            HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+            pInput->Elements[iReg].Name                = HvX64RegisterR10;
+            pInput->Elements[iReg].Value.Reg64         = pCtx->r10;
+            iReg++;
+            HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+            pInput->Elements[iReg].Name                = HvX64RegisterR11;
+            pInput->Elements[iReg].Value.Reg64         = pCtx->r11;
+            iReg++;
+            HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+            pInput->Elements[iReg].Name                = HvX64RegisterR12;
+            pInput->Elements[iReg].Value.Reg64         = pCtx->r12;
+            iReg++;
+            HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+            pInput->Elements[iReg].Name                = HvX64RegisterR13;
+            pInput->Elements[iReg].Value.Reg64         = pCtx->r13;
+            iReg++;
+            HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+            pInput->Elements[iReg].Name                = HvX64RegisterR14;
+            pInput->Elements[iReg].Value.Reg64         = pCtx->r14;
+            iReg++;
+            HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+            pInput->Elements[iReg].Name                = HvX64RegisterR15;
+            pInput->Elements[iReg].Value.Reg64         = pCtx->r15;
+            iReg++;
+        }
+    }
+
+    /* RIP & Flags */
+    if (fWhat & CPUMCTX_EXTRN_RIP)
+    {
+        HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+        pInput->Elements[iReg].Name                = HvX64RegisterRip;
+        pInput->Elements[iReg].Value.Reg64         = pCtx->rip;
+        iReg++;
+    }
+    if (fWhat & CPUMCTX_EXTRN_RFLAGS)
+    {
+        HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+        pInput->Elements[iReg].Name                = HvX64RegisterRflags;
+        pInput->Elements[iReg].Value.Reg64         = pCtx->rflags.u;
+        iReg++;
+    }
+
+    /* Segments */
+# define COPY_OUT_SEG(a_idx, a_enmName, a_SReg) \
+        do { \
+            HV_REGISTER_ASSOC_ZERO_PADDING(&pInput->Elements[a_idx]); \
+            pInput->Elements[a_idx].Name                     = a_enmName; \
+            pInput->Elements[a_idx].Value.Segment.Base       = (a_SReg).u64Base; \
+            pInput->Elements[a_idx].Value.Segment.Limit      = (a_SReg).u32Limit; \
+            pInput->Elements[a_idx].Value.Segment.Selector   = (a_SReg).Sel; \
+            pInput->Elements[a_idx].Value.Segment.Attributes = (a_SReg).Attr.u; \
+        } while (0)
+    if (fWhat & CPUMCTX_EXTRN_SREG_MASK)
+    {
+        if (fWhat & CPUMCTX_EXTRN_CS)
+        {
+            COPY_OUT_SEG(iReg, HvX64RegisterCs,   pCtx->cs);
+            iReg++;
+        }
+        if (fWhat & CPUMCTX_EXTRN_ES)
+        {
+            COPY_OUT_SEG(iReg, HvX64RegisterEs,   pCtx->es);
+            iReg++;
+        }
+        if (fWhat & CPUMCTX_EXTRN_SS)
+        {
+            COPY_OUT_SEG(iReg, HvX64RegisterSs,   pCtx->ss);
+            iReg++;
+        }
+        if (fWhat & CPUMCTX_EXTRN_DS)
+        {
+            COPY_OUT_SEG(iReg, HvX64RegisterDs,   pCtx->ds);
+            iReg++;
+        }
+        if (fWhat & CPUMCTX_EXTRN_FS)
+        {
+            COPY_OUT_SEG(iReg, HvX64RegisterFs,   pCtx->fs);
+            iReg++;
+        }
+        if (fWhat & CPUMCTX_EXTRN_GS)
+        {
+            COPY_OUT_SEG(iReg, HvX64RegisterGs,   pCtx->gs);
+            iReg++;
+        }
+    }
+
+    /* Descriptor tables & task segment. */
+    if (fWhat & CPUMCTX_EXTRN_TABLE_MASK)
+    {
+        if (fWhat & CPUMCTX_EXTRN_LDTR)
+        {
+            COPY_OUT_SEG(iReg, HvX64RegisterLdtr, pCtx->ldtr);
+            iReg++;
+        }
+        if (fWhat & CPUMCTX_EXTRN_TR)
+        {
+            COPY_OUT_SEG(iReg, HvX64RegisterTr,   pCtx->tr);
+            iReg++;
+        }
+
+        if (fWhat & CPUMCTX_EXTRN_IDTR)
+        {
+            HV_REGISTER_ASSOC_ZERO_PADDING(&pInput->Elements[iReg]);
+            pInput->Elements[iReg].Value.Table.Pad[0]   = 0;
+            pInput->Elements[iReg].Value.Table.Pad[1]   = 0;
+            pInput->Elements[iReg].Value.Table.Pad[2]   = 0;
+            pInput->Elements[iReg].Name                 = HvX64RegisterIdtr;
+            pInput->Elements[iReg].Value.Table.Limit    = pCtx->idtr.cbIdt;
+            pInput->Elements[iReg].Value.Table.Base     = pCtx->idtr.pIdt;
+            iReg++;
+        }
+        if (fWhat & CPUMCTX_EXTRN_GDTR)
+        {
+            HV_REGISTER_ASSOC_ZERO_PADDING(&pInput->Elements[iReg]);
+            pInput->Elements[iReg].Value.Table.Pad[0]   = 0;
+            pInput->Elements[iReg].Value.Table.Pad[1]   = 0;
+            pInput->Elements[iReg].Value.Table.Pad[2]   = 0;
+            pInput->Elements[iReg].Name                 = HvX64RegisterGdtr;
+            pInput->Elements[iReg].Value.Table.Limit    = pCtx->gdtr.cbGdt;
+            pInput->Elements[iReg].Value.Table.Base     = pCtx->gdtr.pGdt;
+            iReg++;
+        }
+    }
+
+    /* Control registers. */
+    if (fWhat & CPUMCTX_EXTRN_CR_MASK)
+    {
+        if (fWhat & CPUMCTX_EXTRN_CR0)
+        {
+            HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+            pInput->Elements[iReg].Name                 = HvX64RegisterCr0;
+            pInput->Elements[iReg].Value.Reg64          = pCtx->cr0;
+            iReg++;
+        }
+        if (fWhat & CPUMCTX_EXTRN_CR2)
+        {
+            HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+            pInput->Elements[iReg].Name                 = HvX64RegisterCr2;
+            pInput->Elements[iReg].Value.Reg64          = pCtx->cr2;
+            iReg++;
+        }
+        if (fWhat & CPUMCTX_EXTRN_CR3)
+        {
+            HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+            pInput->Elements[iReg].Name                 = HvX64RegisterCr3;
+            pInput->Elements[iReg].Value.Reg64          = pCtx->cr3;
+            iReg++;
+        }
+        if (fWhat & CPUMCTX_EXTRN_CR4)
+        {
+            HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+            pInput->Elements[iReg].Name                 = HvX64RegisterCr4;
+            pInput->Elements[iReg].Value.Reg64          = pCtx->cr4;
+            iReg++;
+        }
+    }
+    if (fWhat & CPUMCTX_EXTRN_APIC_TPR)
+    {
+        HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+        pInput->Elements[iReg].Name                 = HvX64RegisterCr8;
+        pInput->Elements[iReg].Value.Reg64          = CPUMGetGuestCR8(pGVCpu);
+        iReg++;
+    }
+
+    /** @todo does HvX64RegisterXfem mean XCR0? What about the related MSR. */
+
+    /* Debug registers. */
+/** @todo fixme. Figure out what the hyper-v version of KVM_SET_GUEST_DEBUG would be. */
+    if (fWhat & CPUMCTX_EXTRN_DR0_DR3)
+    {
+        HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+        pInput->Elements[iReg].Name                 = HvX64RegisterDr0;
+        //pInput->Elements[iReg].Value.Reg64        = CPUMGetHyperDR0(pGVCpu);
+        pInput->Elements[iReg].Value.Reg64          = pCtx->dr[0];
+        iReg++;
+        HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+        pInput->Elements[iReg].Name                 = HvX64RegisterDr1;
+        //pInput->Elements[iReg].Value.Reg64        = CPUMGetHyperDR1(pGVCpu);
+        pInput->Elements[iReg].Value.Reg64          = pCtx->dr[1];
+        iReg++;
+        HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+        pInput->Elements[iReg].Name                 = HvX64RegisterDr2;
+        //pInput->Elements[iReg].Value.Reg64        = CPUMGetHyperDR2(pGVCpu);
+        pInput->Elements[iReg].Value.Reg64          = pCtx->dr[2];
+        iReg++;
+        HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+        pInput->Elements[iReg].Name                 = HvX64RegisterDr3;
+        //pInput->Elements[iReg].Value.Reg64        = CPUMGetHyperDR3(pGVCpu);
+        pInput->Elements[iReg].Value.Reg64          = pCtx->dr[3];
+        iReg++;
+    }
+    if (fWhat & CPUMCTX_EXTRN_DR6)
+    {
+        HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+        pInput->Elements[iReg].Name                 = HvX64RegisterDr6;
+        //pInput->Elements[iReg].Value.Reg64        = CPUMGetHyperDR6(pGVCpu);
+        pInput->Elements[iReg].Value.Reg64          = pCtx->dr[6];
+        iReg++;
+    }
+    if (fWhat & CPUMCTX_EXTRN_DR7)
+    {
+        HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+        pInput->Elements[iReg].Name                 = HvX64RegisterDr7;
+        //pInput->Elements[iReg].Value.Reg64        = CPUMGetHyperDR7(pGVCpu);
+        pInput->Elements[iReg].Value.Reg64          = pCtx->dr[7];
+        iReg++;
+    }
+
+    /* Floating point state. */
+    if (fWhat & CPUMCTX_EXTRN_X87)
+    {
+        HV_REGISTER_ASSOC_ZERO_PADDING(&pInput->Elements[iReg]);
+        pInput->Elements[iReg].Name                      = HvX64RegisterFpMmx0;
+        pInput->Elements[iReg].Value.Fp.AsUINT128.Low64  = pCtx->pXStateR0->x87.aRegs[0].au64[0];
+        pInput->Elements[iReg].Value.Fp.AsUINT128.High64 = pCtx->pXStateR0->x87.aRegs[0].au64[1];
+        iReg++;
+        HV_REGISTER_ASSOC_ZERO_PADDING(&pInput->Elements[iReg]);
+        pInput->Elements[iReg].Name                      = HvX64RegisterFpMmx1;
+        pInput->Elements[iReg].Value.Fp.AsUINT128.Low64  = pCtx->pXStateR0->x87.aRegs[1].au64[0];
+        pInput->Elements[iReg].Value.Fp.AsUINT128.High64 = pCtx->pXStateR0->x87.aRegs[1].au64[1];
+        iReg++;
+        HV_REGISTER_ASSOC_ZERO_PADDING(&pInput->Elements[iReg]);
+        pInput->Elements[iReg].Name                      = HvX64RegisterFpMmx2;
+        pInput->Elements[iReg].Value.Fp.AsUINT128.Low64  = pCtx->pXStateR0->x87.aRegs[2].au64[0];
+        pInput->Elements[iReg].Value.Fp.AsUINT128.High64 = pCtx->pXStateR0->x87.aRegs[2].au64[1];
+        iReg++;
+        HV_REGISTER_ASSOC_ZERO_PADDING(&pInput->Elements[iReg]);
+        pInput->Elements[iReg].Name                      = HvX64RegisterFpMmx3;
+        pInput->Elements[iReg].Value.Fp.AsUINT128.Low64  = pCtx->pXStateR0->x87.aRegs[3].au64[0];
+        pInput->Elements[iReg].Value.Fp.AsUINT128.High64 = pCtx->pXStateR0->x87.aRegs[3].au64[1];
+        iReg++;
+        HV_REGISTER_ASSOC_ZERO_PADDING(&pInput->Elements[iReg]);
+        pInput->Elements[iReg].Name                      = HvX64RegisterFpMmx4;
+        pInput->Elements[iReg].Value.Fp.AsUINT128.Low64  = pCtx->pXStateR0->x87.aRegs[4].au64[0];
+        pInput->Elements[iReg].Value.Fp.AsUINT128.High64 = pCtx->pXStateR0->x87.aRegs[4].au64[1];
+        iReg++;
+        HV_REGISTER_ASSOC_ZERO_PADDING(&pInput->Elements[iReg]);
+        pInput->Elements[iReg].Name                      = HvX64RegisterFpMmx5;
+        pInput->Elements[iReg].Value.Fp.AsUINT128.Low64  = pCtx->pXStateR0->x87.aRegs[5].au64[0];
+        pInput->Elements[iReg].Value.Fp.AsUINT128.High64 = pCtx->pXStateR0->x87.aRegs[5].au64[1];
+        iReg++;
+        HV_REGISTER_ASSOC_ZERO_PADDING(&pInput->Elements[iReg]);
+        pInput->Elements[iReg].Name                      = HvX64RegisterFpMmx6;
+        pInput->Elements[iReg].Value.Fp.AsUINT128.Low64  = pCtx->pXStateR0->x87.aRegs[6].au64[0];
+        pInput->Elements[iReg].Value.Fp.AsUINT128.High64 = pCtx->pXStateR0->x87.aRegs[6].au64[1];
+        iReg++;
+        HV_REGISTER_ASSOC_ZERO_PADDING(&pInput->Elements[iReg]);
+        pInput->Elements[iReg].Name                      = HvX64RegisterFpMmx7;
+        pInput->Elements[iReg].Value.Fp.AsUINT128.Low64  = pCtx->pXStateR0->x87.aRegs[7].au64[0];
+        pInput->Elements[iReg].Value.Fp.AsUINT128.High64 = pCtx->pXStateR0->x87.aRegs[7].au64[1];
+        iReg++;
+
+        HV_REGISTER_ASSOC_ZERO_PADDING(&pInput->Elements[iReg]);
+        pInput->Elements[iReg].Name                            = HvX64RegisterFpControlStatus;
+        pInput->Elements[iReg].Value.FpControlStatus.FpControl = pCtx->pXStateR0->x87.FCW;
+        pInput->Elements[iReg].Value.FpControlStatus.FpStatus  = pCtx->pXStateR0->x87.FSW;
+        pInput->Elements[iReg].Value.FpControlStatus.FpTag     = pCtx->pXStateR0->x87.FTW;
+        pInput->Elements[iReg].Value.FpControlStatus.Reserved  = pCtx->pXStateR0->x87.FTW >> 8;
+        pInput->Elements[iReg].Value.FpControlStatus.LastFpOp  = pCtx->pXStateR0->x87.FOP;
+        pInput->Elements[iReg].Value.FpControlStatus.LastFpRip = (pCtx->pXStateR0->x87.FPUIP)
+                                                               | ((uint64_t)pCtx->pXStateR0->x87.CS << 32)
+                                                               | ((uint64_t)pCtx->pXStateR0->x87.Rsrvd1 << 48);
+        iReg++;
+/** @todo we've got trouble if if we try write just SSE w/o X87.  */
+        HV_REGISTER_ASSOC_ZERO_PADDING(&pInput->Elements[iReg]);
+        pInput->Elements[iReg].Name                                        = HvX64RegisterXmmControlStatus;
+        pInput->Elements[iReg].Value.XmmControlStatus.LastFpRdp            = (pCtx->pXStateR0->x87.FPUDP)
+                                                                           | ((uint64_t)pCtx->pXStateR0->x87.DS << 32)
+                                                                           | ((uint64_t)pCtx->pXStateR0->x87.Rsrvd2 << 48);
+        pInput->Elements[iReg].Value.XmmControlStatus.XmmStatusControl     = pCtx->pXStateR0->x87.MXCSR;
+        pInput->Elements[iReg].Value.XmmControlStatus.XmmStatusControlMask = pCtx->pXStateR0->x87.MXCSR_MASK; /** @todo ??? (Isn't this an output field?) */
+        iReg++;
+    }
+
+    /* Vector state. */
+    if (fWhat & CPUMCTX_EXTRN_SSE_AVX)
+    {
+        HV_REGISTER_ASSOC_ZERO_PADDING(&pInput->Elements[iReg]);
+        pInput->Elements[iReg].Name                 = HvX64RegisterXmm0;
+        pInput->Elements[iReg].Value.Reg128.Low64   = pCtx->pXStateR0->x87.aXMM[0].uXmm.s.Lo;
+        pInput->Elements[iReg].Value.Reg128.High64  = pCtx->pXStateR0->x87.aXMM[0].uXmm.s.Hi;
+        iReg++;
+        HV_REGISTER_ASSOC_ZERO_PADDING(&pInput->Elements[iReg]);
+        pInput->Elements[iReg].Name                 = HvX64RegisterXmm1;
+        pInput->Elements[iReg].Value.Reg128.Low64   = pCtx->pXStateR0->x87.aXMM[1].uXmm.s.Lo;
+        pInput->Elements[iReg].Value.Reg128.High64  = pCtx->pXStateR0->x87.aXMM[1].uXmm.s.Hi;
+        iReg++;
+        HV_REGISTER_ASSOC_ZERO_PADDING(&pInput->Elements[iReg]);
+        pInput->Elements[iReg].Name                 = HvX64RegisterXmm2;
+        pInput->Elements[iReg].Value.Reg128.Low64   = pCtx->pXStateR0->x87.aXMM[2].uXmm.s.Lo;
+        pInput->Elements[iReg].Value.Reg128.High64  = pCtx->pXStateR0->x87.aXMM[2].uXmm.s.Hi;
+        iReg++;
+        HV_REGISTER_ASSOC_ZERO_PADDING(&pInput->Elements[iReg]);
+        pInput->Elements[iReg].Name                 = HvX64RegisterXmm3;
+        pInput->Elements[iReg].Value.Reg128.Low64   = pCtx->pXStateR0->x87.aXMM[3].uXmm.s.Lo;
+        pInput->Elements[iReg].Value.Reg128.High64  = pCtx->pXStateR0->x87.aXMM[3].uXmm.s.Hi;
+        iReg++;
+        HV_REGISTER_ASSOC_ZERO_PADDING(&pInput->Elements[iReg]);
+        pInput->Elements[iReg].Name                 = HvX64RegisterXmm4;
+        pInput->Elements[iReg].Value.Reg128.Low64   = pCtx->pXStateR0->x87.aXMM[4].uXmm.s.Lo;
+        pInput->Elements[iReg].Value.Reg128.High64  = pCtx->pXStateR0->x87.aXMM[4].uXmm.s.Hi;
+        iReg++;
+        HV_REGISTER_ASSOC_ZERO_PADDING(&pInput->Elements[iReg]);
+        pInput->Elements[iReg].Name                 = HvX64RegisterXmm5;
+        pInput->Elements[iReg].Value.Reg128.Low64   = pCtx->pXStateR0->x87.aXMM[5].uXmm.s.Lo;
+        pInput->Elements[iReg].Value.Reg128.High64  = pCtx->pXStateR0->x87.aXMM[5].uXmm.s.Hi;
+        iReg++;
+        HV_REGISTER_ASSOC_ZERO_PADDING(&pInput->Elements[iReg]);
+        pInput->Elements[iReg].Name                 = HvX64RegisterXmm6;
+        pInput->Elements[iReg].Value.Reg128.Low64   = pCtx->pXStateR0->x87.aXMM[6].uXmm.s.Lo;
+        pInput->Elements[iReg].Value.Reg128.High64  = pCtx->pXStateR0->x87.aXMM[6].uXmm.s.Hi;
+        iReg++;
+        HV_REGISTER_ASSOC_ZERO_PADDING(&pInput->Elements[iReg]);
+        pInput->Elements[iReg].Name                 = HvX64RegisterXmm7;
+        pInput->Elements[iReg].Value.Reg128.Low64   = pCtx->pXStateR0->x87.aXMM[7].uXmm.s.Lo;
+        pInput->Elements[iReg].Value.Reg128.High64  = pCtx->pXStateR0->x87.aXMM[7].uXmm.s.Hi;
+        iReg++;
+        HV_REGISTER_ASSOC_ZERO_PADDING(&pInput->Elements[iReg]);
+        pInput->Elements[iReg].Name                 = HvX64RegisterXmm8;
+        pInput->Elements[iReg].Value.Reg128.Low64   = pCtx->pXStateR0->x87.aXMM[8].uXmm.s.Lo;
+        pInput->Elements[iReg].Value.Reg128.High64  = pCtx->pXStateR0->x87.aXMM[8].uXmm.s.Hi;
+        iReg++;
+        HV_REGISTER_ASSOC_ZERO_PADDING(&pInput->Elements[iReg]);
+        pInput->Elements[iReg].Name                 = HvX64RegisterXmm9;
+        pInput->Elements[iReg].Value.Reg128.Low64   = pCtx->pXStateR0->x87.aXMM[9].uXmm.s.Lo;
+        pInput->Elements[iReg].Value.Reg128.High64  = pCtx->pXStateR0->x87.aXMM[9].uXmm.s.Hi;
+        iReg++;
+        HV_REGISTER_ASSOC_ZERO_PADDING(&pInput->Elements[iReg]);
+        pInput->Elements[iReg].Name                 = HvX64RegisterXmm10;
+        pInput->Elements[iReg].Value.Reg128.Low64   = pCtx->pXStateR0->x87.aXMM[10].uXmm.s.Lo;
+        pInput->Elements[iReg].Value.Reg128.High64  = pCtx->pXStateR0->x87.aXMM[10].uXmm.s.Hi;
+        iReg++;
+        HV_REGISTER_ASSOC_ZERO_PADDING(&pInput->Elements[iReg]);
+        pInput->Elements[iReg].Name                 = HvX64RegisterXmm11;
+        pInput->Elements[iReg].Value.Reg128.Low64   = pCtx->pXStateR0->x87.aXMM[11].uXmm.s.Lo;
+        pInput->Elements[iReg].Value.Reg128.High64  = pCtx->pXStateR0->x87.aXMM[11].uXmm.s.Hi;
+        iReg++;
+        HV_REGISTER_ASSOC_ZERO_PADDING(&pInput->Elements[iReg]);
+        pInput->Elements[iReg].Name                 = HvX64RegisterXmm12;
+        pInput->Elements[iReg].Value.Reg128.Low64   = pCtx->pXStateR0->x87.aXMM[12].uXmm.s.Lo;
+        pInput->Elements[iReg].Value.Reg128.High64  = pCtx->pXStateR0->x87.aXMM[12].uXmm.s.Hi;
+        iReg++;
+        HV_REGISTER_ASSOC_ZERO_PADDING(&pInput->Elements[iReg]);
+        pInput->Elements[iReg].Name                 = HvX64RegisterXmm13;
+        pInput->Elements[iReg].Value.Reg128.Low64   = pCtx->pXStateR0->x87.aXMM[13].uXmm.s.Lo;
+        pInput->Elements[iReg].Value.Reg128.High64  = pCtx->pXStateR0->x87.aXMM[13].uXmm.s.Hi;
+        iReg++;
+        HV_REGISTER_ASSOC_ZERO_PADDING(&pInput->Elements[iReg]);
+        pInput->Elements[iReg].Name                 = HvX64RegisterXmm14;
+        pInput->Elements[iReg].Value.Reg128.Low64   = pCtx->pXStateR0->x87.aXMM[14].uXmm.s.Lo;
+        pInput->Elements[iReg].Value.Reg128.High64  = pCtx->pXStateR0->x87.aXMM[14].uXmm.s.Hi;
+        iReg++;
+        HV_REGISTER_ASSOC_ZERO_PADDING(&pInput->Elements[iReg]);
+        pInput->Elements[iReg].Name                 = HvX64RegisterXmm15;
+        pInput->Elements[iReg].Value.Reg128.Low64   = pCtx->pXStateR0->x87.aXMM[15].uXmm.s.Lo;
+        pInput->Elements[iReg].Value.Reg128.High64  = pCtx->pXStateR0->x87.aXMM[15].uXmm.s.Hi;
+        iReg++;
+    }
+
+    /* MSRs */
+    // HvX64RegisterTsc - don't touch
+    if (fWhat & CPUMCTX_EXTRN_EFER)
+    {
+        HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+        pInput->Elements[iReg].Name                 = HvX64RegisterEfer;
+        pInput->Elements[iReg].Value.Reg64          = pCtx->msrEFER;
+        iReg++;
+    }
+    if (fWhat & CPUMCTX_EXTRN_KERNEL_GS_BASE)
+    {
+        HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+        pInput->Elements[iReg].Name                 = HvX64RegisterKernelGsBase;
+        pInput->Elements[iReg].Value.Reg64          = pCtx->msrKERNELGSBASE;
+        iReg++;
+    }
+    if (fWhat & CPUMCTX_EXTRN_SYSENTER_MSRS)
+    {
+        HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+        pInput->Elements[iReg].Name                 = HvX64RegisterSysenterCs;
+        pInput->Elements[iReg].Value.Reg64          = pCtx->SysEnter.cs;
+        iReg++;
+        HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+        pInput->Elements[iReg].Name                 = HvX64RegisterSysenterEip;
+        pInput->Elements[iReg].Value.Reg64          = pCtx->SysEnter.eip;
+        iReg++;
+        HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+        pInput->Elements[iReg].Name                 = HvX64RegisterSysenterEsp;
+        pInput->Elements[iReg].Value.Reg64          = pCtx->SysEnter.esp;
+        iReg++;
+    }
+    if (fWhat & CPUMCTX_EXTRN_SYSCALL_MSRS)
+    {
+        HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+        pInput->Elements[iReg].Name                 = HvX64RegisterStar;
+        pInput->Elements[iReg].Value.Reg64          = pCtx->msrSTAR;
+        iReg++;
+        HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+        pInput->Elements[iReg].Name                 = HvX64RegisterLstar;
+        pInput->Elements[iReg].Value.Reg64          = pCtx->msrLSTAR;
+        iReg++;
+        HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+        pInput->Elements[iReg].Name                 = HvX64RegisterCstar;
+        pInput->Elements[iReg].Value.Reg64          = pCtx->msrCSTAR;
+        iReg++;
+        HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+        pInput->Elements[iReg].Name                 = HvX64RegisterSfmask;
+        pInput->Elements[iReg].Value.Reg64          = pCtx->msrSFMASK;
+        iReg++;
+    }
+    if (fWhat & CPUMCTX_EXTRN_OTHER_MSRS)
+    {
+        HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+        pInput->Elements[iReg].Name                 = HvX64RegisterApicBase;
+        pInput->Elements[iReg].Value.Reg64          = APICGetBaseMsrNoCheck(pGVCpu);
+        iReg++;
+        HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+        pInput->Elements[iReg].Name                 = HvX64RegisterPat;
+        pInput->Elements[iReg].Value.Reg64          = pCtx->msrPAT;
+        iReg++;
+# if 0 /** @todo HvX64RegisterMtrrCap is read only?  Seems it's not even readable. */
+        HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+        pInput->Elements[iReg].Name                 = HvX64RegisterMtrrCap;
+        pInput->Elements[iReg].Value.Reg64          = CPUMGetGuestIa32MtrrCap(pGVCpu);
+        iReg++;
+# endif
+
+        PCPUMCTXMSRS pCtxMsrs = CPUMQueryGuestCtxMsrsPtr(pGVCpu);
+
+        HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+        pInput->Elements[iReg].Name                 = HvX64RegisterMtrrDefType;
+        pInput->Elements[iReg].Value.Reg64          = pCtxMsrs->msr.MtrrDefType;
+        iReg++;
+
+        /** @todo we dont keep state for HvX64RegisterMtrrPhysBaseX and HvX64RegisterMtrrPhysMaskX */
+
+        HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+        pInput->Elements[iReg].Name                 = HvX64RegisterMtrrFix64k00000;
+        pInput->Elements[iReg].Value.Reg64          = pCtxMsrs->msr.MtrrFix64K_00000;
+        iReg++;
+        HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+        pInput->Elements[iReg].Name                 = HvX64RegisterMtrrFix16k80000;
+        pInput->Elements[iReg].Value.Reg64          = pCtxMsrs->msr.MtrrFix16K_80000;
+        iReg++;
+        HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+        pInput->Elements[iReg].Name                 = HvX64RegisterMtrrFix16kA0000;
+        pInput->Elements[iReg].Value.Reg64          = pCtxMsrs->msr.MtrrFix16K_A0000;
+        iReg++;
+        HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+        pInput->Elements[iReg].Name                 = HvX64RegisterMtrrFix4kC0000;
+        pInput->Elements[iReg].Value.Reg64          = pCtxMsrs->msr.MtrrFix4K_C0000;
+        iReg++;
+        HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+        pInput->Elements[iReg].Name                 = HvX64RegisterMtrrFix4kC8000;
+        pInput->Elements[iReg].Value.Reg64          = pCtxMsrs->msr.MtrrFix4K_C8000;
+        iReg++;
+        HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+        pInput->Elements[iReg].Name                 = HvX64RegisterMtrrFix4kD0000;
+        pInput->Elements[iReg].Value.Reg64          = pCtxMsrs->msr.MtrrFix4K_D0000;
+        iReg++;
+        HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+        pInput->Elements[iReg].Name                 = HvX64RegisterMtrrFix4kD8000;
+        pInput->Elements[iReg].Value.Reg64          = pCtxMsrs->msr.MtrrFix4K_D8000;
+        iReg++;
+        HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+        pInput->Elements[iReg].Name                 = HvX64RegisterMtrrFix4kE0000;
+        pInput->Elements[iReg].Value.Reg64          = pCtxMsrs->msr.MtrrFix4K_E0000;
+        iReg++;
+        HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+        pInput->Elements[iReg].Name                 = HvX64RegisterMtrrFix4kE8000;
+        pInput->Elements[iReg].Value.Reg64          = pCtxMsrs->msr.MtrrFix4K_E8000;
+        iReg++;
+        HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+        pInput->Elements[iReg].Name                 = HvX64RegisterMtrrFix4kF0000;
+        pInput->Elements[iReg].Value.Reg64          = pCtxMsrs->msr.MtrrFix4K_F0000;
+        iReg++;
+        HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+        pInput->Elements[iReg].Name                 = HvX64RegisterMtrrFix4kF8000;
+        pInput->Elements[iReg].Value.Reg64          = pCtxMsrs->msr.MtrrFix4K_F8000;
+        iReg++;
+        HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+        pInput->Elements[iReg].Name                 = HvX64RegisterTscAux;
+        pInput->Elements[iReg].Value.Reg64          = pCtxMsrs->msr.TscAux;
+        iReg++;
+
+# if 0 /** @todo Why can't we write these on Intel systems? Not that we really care... */
+        const CPUMCPUVENDOR enmCpuVendor = CPUMGetHostCpuVendor(pGVM);
+        if (enmCpuVendor != CPUMCPUVENDOR_AMD)
+        {
+            HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+            pInput->Elements[iReg].Name                 = HvX64RegisterIa32MiscEnable;
+            pInput->Elements[iReg].Value.Reg64          = pCtxMsrs->msr.MiscEnable;
+            iReg++;
+            HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+            pInput->Elements[iReg].Name                 = HvX64RegisterIa32FeatureControl;
+            pInput->Elements[iReg].Value.Reg64          = CPUMGetGuestIa32FeatureControl(pGVCpu);
+            iReg++;
+        }
+# endif
+    }
+
+    /* event injection (clear it). */
+    if (fWhat & CPUMCTX_EXTRN_NEM_WIN_EVENT_INJECT)
+    {
+        HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+        pInput->Elements[iReg].Name                 = HvRegisterPendingInterruption;
+        pInput->Elements[iReg].Value.Reg64          = 0;
+        iReg++;
+    }
+
+    /* Interruptibility state.  This can get a little complicated since we get
+       half of the state via HV_X64_VP_EXECUTION_STATE. */
+    if (   (fWhat & (CPUMCTX_EXTRN_NEM_WIN_INHIBIT_INT | CPUMCTX_EXTRN_NEM_WIN_INHIBIT_NMI))
+        ==          (CPUMCTX_EXTRN_NEM_WIN_INHIBIT_INT | CPUMCTX_EXTRN_NEM_WIN_INHIBIT_NMI) )
+    {
+        HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+        pInput->Elements[iReg].Name                 = HvRegisterInterruptState;
+        pInput->Elements[iReg].Value.Reg64          = 0;
+        if (   VMCPU_FF_IS_SET(pGVCpu, VMCPU_FF_INHIBIT_INTERRUPTS)
+            && EMGetInhibitInterruptsPC(pGVCpu) == pCtx->rip)
+            pInput->Elements[iReg].Value.InterruptState.InterruptShadow = 1;
+        if (VMCPU_FF_IS_SET(pGVCpu, VMCPU_FF_BLOCK_NMIS))
+            pInput->Elements[iReg].Value.InterruptState.NmiMasked = 1;
+        iReg++;
+    }
+    else if (fWhat & CPUMCTX_EXTRN_NEM_WIN_INHIBIT_INT)
+    {
+        if (   pGVCpu->nem.s.fLastInterruptShadow
+            || (   VMCPU_FF_IS_SET(pGVCpu, VMCPU_FF_INHIBIT_INTERRUPTS)
+                && EMGetInhibitInterruptsPC(pGVCpu) == pCtx->rip))
+        {
+            HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+            pInput->Elements[iReg].Name                 = HvRegisterInterruptState;
+            pInput->Elements[iReg].Value.Reg64          = 0;
+            if (   VMCPU_FF_IS_SET(pGVCpu, VMCPU_FF_INHIBIT_INTERRUPTS)
+                && EMGetInhibitInterruptsPC(pGVCpu) == pCtx->rip)
+                pInput->Elements[iReg].Value.InterruptState.InterruptShadow = 1;
+            /** @todo Retrieve NMI state, currently assuming it's zero. (yes this may happen on I/O) */
+            //if (VMCPU_FF_IS_ANY_SET(pGVCpu, VMCPU_FF_BLOCK_NMIS))
+            //    pInput->Elements[iReg].Value.InterruptState.NmiMasked = 1;
+            iReg++;
+        }
+    }
+    else
+        Assert(!(fWhat & CPUMCTX_EXTRN_NEM_WIN_INHIBIT_NMI));
+
+    /* Interrupt windows. Always set if active as Hyper-V seems to be forgetful. */
+    uint8_t const fDesiredIntWin = pGVCpu->nem.s.fDesiredInterruptWindows;
+    if (   fDesiredIntWin
+        || pGVCpu->nem.s.fCurrentInterruptWindows != fDesiredIntWin)
+    {
+        pGVCpu->nem.s.fCurrentInterruptWindows = pGVCpu->nem.s.fDesiredInterruptWindows;
+        HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+        pInput->Elements[iReg].Name                                         = HvX64RegisterDeliverabilityNotifications;
+        pInput->Elements[iReg].Value.DeliverabilityNotifications.AsUINT64   = fDesiredIntWin;
+        Assert(pInput->Elements[iReg].Value.DeliverabilityNotifications.NmiNotification == RT_BOOL(fDesiredIntWin & NEM_WIN_INTW_F_NMI));
+        Assert(pInput->Elements[iReg].Value.DeliverabilityNotifications.InterruptNotification == RT_BOOL(fDesiredIntWin & NEM_WIN_INTW_F_REGULAR));
+        Assert(pInput->Elements[iReg].Value.DeliverabilityNotifications.InterruptPriority == (fDesiredIntWin & NEM_WIN_INTW_F_PRIO_MASK) >> NEM_WIN_INTW_F_PRIO_SHIFT);
+        iReg++;
+    }
+
+    /// @todo HvRegisterPendingEvent0
+    /// @todo HvRegisterPendingEvent1
+
+    /*
+     * Set the registers.
+     */
+    Assert((uintptr_t)&pInput->Elements[iReg] - (uintptr_t)pGVCpu->nemr0.s.HypercallData.pbPage < PAGE_SIZE); /* max is 127 */
+
+    /*
+     * Make the hypercall.
+     */
+    uint64_t uResult = g_pfnHvlInvokeHypercall(HV_MAKE_CALL_INFO(HvCallSetVpRegisters, iReg),
+                                               pGVCpu->nemr0.s.HypercallData.HCPhysPage, 0 /*GCPhysOutput*/);
+    AssertLogRelMsgReturn(uResult == HV_MAKE_CALL_REP_RET(iReg),
+                          ("uResult=%RX64 iRegs=%#x\n", uResult, iReg),
+                          VERR_NEM_SET_REGISTERS_FAILED);
+    //LogFlow(("nemR0WinExportState: uResult=%#RX64 iReg=%zu fWhat=%#018RX64 fExtrn=%#018RX64 -> %#018RX64\n", uResult, iReg, fWhat, pCtx->fExtrn,
+    //         pCtx->fExtrn | CPUMCTX_EXTRN_ALL | CPUMCTX_EXTRN_NEM_WIN_MASK | CPUMCTX_EXTRN_KEEPER_NEM ));
+    pCtx->fExtrn |= CPUMCTX_EXTRN_ALL | CPUMCTX_EXTRN_NEM_WIN_MASK | CPUMCTX_EXTRN_KEEPER_NEM;
+    return VINF_SUCCESS;
+}
+#endif /* NEM_WIN_WITH_RING0_RUNLOOP || NEM_WIN_USE_HYPERCALLS_FOR_REGISTERS */
+
+
+/**
+ * Export the state to the native API (out of CPUMCTX).
+ *
+ * @returns VBox status code
+ * @param   pGVM        The ring-0 VM handle.
+ * @param   idCpu       The calling EMT.  Necessary for getting the
+ *                      hypercall page and arguments.
+ */
+VMMR0_INT_DECL(int)  NEMR0ExportState(PGVM pGVM, VMCPUID idCpu)
+{
+#if defined(NEM_WIN_WITH_RING0_RUNLOOP) || defined(NEM_WIN_USE_HYPERCALLS_FOR_REGISTERS)
+    /*
+     * Validate the call.
+     */
+    int rc = GVMMR0ValidateGVMandEMT(pGVM, idCpu);
+    if (RT_SUCCESS(rc))
+    {
+        PGVMCPU pGVCpu = &pGVM->aCpus[idCpu];
+        AssertReturn(g_pfnHvlInvokeHypercall, VERR_NEM_MISSING_KERNEL_API_1);
+
+        /*
+         * Call worker.
+         */
+        rc = nemR0WinExportState(pGVM, pGVCpu, &pGVCpu->cpum.GstCtx);
+    }
+    return rc;
+#else
+    RT_NOREF(pGVM, idCpu);
+    return VERR_NOT_IMPLEMENTED;
+#endif
+}
+
+
+#if defined(NEM_WIN_WITH_RING0_RUNLOOP) || defined(NEM_WIN_USE_HYPERCALLS_FOR_REGISTERS)
+/**
+ * Worker for NEMR0ImportState.
+ *
+ * Intention is to use it internally later.
+ *
+ * @returns VBox status code.
+ * @param   pGVM            The ring-0 VM handle.
+ * @param   pGVCpu          The ring-0 VCPU handle.
+ * @param   pCtx            The CPU context structure to import into.
+ * @param   fWhat           What to import, CPUMCTX_EXTRN_XXX.
+ * @param   fCanUpdateCr3   Whether it's safe to update CR3 or not.
+ */
+NEM_TMPL_STATIC int nemR0WinImportState(PGVM pGVM, PGVMCPU pGVCpu, PCPUMCTX pCtx, uint64_t fWhat, bool fCanUpdateCr3)
+{
+    HV_INPUT_GET_VP_REGISTERS *pInput = (HV_INPUT_GET_VP_REGISTERS *)pGVCpu->nemr0.s.HypercallData.pbPage;
+    AssertPtrReturn(pInput, VERR_INTERNAL_ERROR_3);
+    AssertReturn(g_pfnHvlInvokeHypercall, VERR_NEM_MISSING_KERNEL_API_1);
+    Assert(pCtx == &pGVCpu->cpum.GstCtx);
+
+    fWhat &= pCtx->fExtrn;
+
+    pInput->PartitionId = pGVM->nemr0.s.idHvPartition;
+    pInput->VpIndex     = pGVCpu->idCpu;
+    pInput->fFlags      = 0;
+
+    /* GPRs */
+    uintptr_t iReg = 0;
+    if (fWhat & CPUMCTX_EXTRN_GPRS_MASK)
+    {
+        if (fWhat & CPUMCTX_EXTRN_RAX)
+            pInput->Names[iReg++] = HvX64RegisterRax;
+        if (fWhat & CPUMCTX_EXTRN_RCX)
+            pInput->Names[iReg++] = HvX64RegisterRcx;
+        if (fWhat & CPUMCTX_EXTRN_RDX)
+            pInput->Names[iReg++] = HvX64RegisterRdx;
+        if (fWhat & CPUMCTX_EXTRN_RBX)
+            pInput->Names[iReg++] = HvX64RegisterRbx;
+        if (fWhat & CPUMCTX_EXTRN_RSP)
+            pInput->Names[iReg++] = HvX64RegisterRsp;
+        if (fWhat & CPUMCTX_EXTRN_RBP)
+            pInput->Names[iReg++] = HvX64RegisterRbp;
+        if (fWhat & CPUMCTX_EXTRN_RSI)
+            pInput->Names[iReg++] = HvX64RegisterRsi;
+        if (fWhat & CPUMCTX_EXTRN_RDI)
+            pInput->Names[iReg++] = HvX64RegisterRdi;
+        if (fWhat & CPUMCTX_EXTRN_R8_R15)
+        {
+            pInput->Names[iReg++] = HvX64RegisterR8;
+            pInput->Names[iReg++] = HvX64RegisterR9;
+            pInput->Names[iReg++] = HvX64RegisterR10;
+            pInput->Names[iReg++] = HvX64RegisterR11;
+            pInput->Names[iReg++] = HvX64RegisterR12;
+            pInput->Names[iReg++] = HvX64RegisterR13;
+            pInput->Names[iReg++] = HvX64RegisterR14;
+            pInput->Names[iReg++] = HvX64RegisterR15;
+        }
+    }
+
+    /* RIP & Flags */
+    if (fWhat & CPUMCTX_EXTRN_RIP)
+        pInput->Names[iReg++]  = HvX64RegisterRip;
+    if (fWhat & CPUMCTX_EXTRN_RFLAGS)
+        pInput->Names[iReg++] = HvX64RegisterRflags;
+
+    /* Segments */
+    if (fWhat & CPUMCTX_EXTRN_SREG_MASK)
+    {
+        if (fWhat & CPUMCTX_EXTRN_CS)
+            pInput->Names[iReg++] = HvX64RegisterCs;
+        if (fWhat & CPUMCTX_EXTRN_ES)
+            pInput->Names[iReg++] = HvX64RegisterEs;
+        if (fWhat & CPUMCTX_EXTRN_SS)
+            pInput->Names[iReg++] = HvX64RegisterSs;
+        if (fWhat & CPUMCTX_EXTRN_DS)
+            pInput->Names[iReg++] = HvX64RegisterDs;
+        if (fWhat & CPUMCTX_EXTRN_FS)
+            pInput->Names[iReg++] = HvX64RegisterFs;
+        if (fWhat & CPUMCTX_EXTRN_GS)
+            pInput->Names[iReg++] = HvX64RegisterGs;
+    }
+
+    /* Descriptor tables and the task segment. */
+    if (fWhat & CPUMCTX_EXTRN_TABLE_MASK)
+    {
+        if (fWhat & CPUMCTX_EXTRN_LDTR)
+            pInput->Names[iReg++] = HvX64RegisterLdtr;
+        if (fWhat & CPUMCTX_EXTRN_TR)
+            pInput->Names[iReg++] = HvX64RegisterTr;
+        if (fWhat & CPUMCTX_EXTRN_IDTR)
+            pInput->Names[iReg++] = HvX64RegisterIdtr;
+        if (fWhat & CPUMCTX_EXTRN_GDTR)
+            pInput->Names[iReg++] = HvX64RegisterGdtr;
+    }
+
+    /* Control registers. */
+    if (fWhat & CPUMCTX_EXTRN_CR_MASK)
+    {
+        if (fWhat & CPUMCTX_EXTRN_CR0)
+            pInput->Names[iReg++] = HvX64RegisterCr0;
+        if (fWhat & CPUMCTX_EXTRN_CR2)
+            pInput->Names[iReg++] = HvX64RegisterCr2;
+        if (fWhat & CPUMCTX_EXTRN_CR3)
+            pInput->Names[iReg++] = HvX64RegisterCr3;
+        if (fWhat & CPUMCTX_EXTRN_CR4)
+            pInput->Names[iReg++] = HvX64RegisterCr4;
+    }
+    if (fWhat & CPUMCTX_EXTRN_APIC_TPR)
+        pInput->Names[iReg++] = HvX64RegisterCr8;
+
+    /* Debug registers. */
+    if (fWhat & CPUMCTX_EXTRN_DR7)
+        pInput->Names[iReg++] = HvX64RegisterDr7;
+    if (fWhat & CPUMCTX_EXTRN_DR0_DR3)
+    {
+        if (!(fWhat & CPUMCTX_EXTRN_DR7) && (pCtx->fExtrn & CPUMCTX_EXTRN_DR7))
+        {
+            fWhat |= CPUMCTX_EXTRN_DR7;
+            pInput->Names[iReg++] = HvX64RegisterDr7;
+        }
+        pInput->Names[iReg++] = HvX64RegisterDr0;
+        pInput->Names[iReg++] = HvX64RegisterDr1;
+        pInput->Names[iReg++] = HvX64RegisterDr2;
+        pInput->Names[iReg++] = HvX64RegisterDr3;
+    }
+    if (fWhat & CPUMCTX_EXTRN_DR6)
+        pInput->Names[iReg++] = HvX64RegisterDr6;
+
+    /* Floating point state. */
+    if (fWhat & CPUMCTX_EXTRN_X87)
+    {
+        pInput->Names[iReg++] = HvX64RegisterFpMmx0;
+        pInput->Names[iReg++] = HvX64RegisterFpMmx1;
+        pInput->Names[iReg++] = HvX64RegisterFpMmx2;
+        pInput->Names[iReg++] = HvX64RegisterFpMmx3;
+        pInput->Names[iReg++] = HvX64RegisterFpMmx4;
+        pInput->Names[iReg++] = HvX64RegisterFpMmx5;
+        pInput->Names[iReg++] = HvX64RegisterFpMmx6;
+        pInput->Names[iReg++] = HvX64RegisterFpMmx7;
+        pInput->Names[iReg++] = HvX64RegisterFpControlStatus;
+    }
+    if (fWhat & (CPUMCTX_EXTRN_X87 | CPUMCTX_EXTRN_SSE_AVX))
+        pInput->Names[iReg++] = HvX64RegisterXmmControlStatus;
+
+    /* Vector state. */
+    if (fWhat & CPUMCTX_EXTRN_SSE_AVX)
+    {
+        pInput->Names[iReg++] = HvX64RegisterXmm0;
+        pInput->Names[iReg++] = HvX64RegisterXmm1;
+        pInput->Names[iReg++] = HvX64RegisterXmm2;
+        pInput->Names[iReg++] = HvX64RegisterXmm3;
+        pInput->Names[iReg++] = HvX64RegisterXmm4;
+        pInput->Names[iReg++] = HvX64RegisterXmm5;
+        pInput->Names[iReg++] = HvX64RegisterXmm6;
+        pInput->Names[iReg++] = HvX64RegisterXmm7;
+        pInput->Names[iReg++] = HvX64RegisterXmm8;
+        pInput->Names[iReg++] = HvX64RegisterXmm9;
+        pInput->Names[iReg++] = HvX64RegisterXmm10;
+        pInput->Names[iReg++] = HvX64RegisterXmm11;
+        pInput->Names[iReg++] = HvX64RegisterXmm12;
+        pInput->Names[iReg++] = HvX64RegisterXmm13;
+        pInput->Names[iReg++] = HvX64RegisterXmm14;
+        pInput->Names[iReg++] = HvX64RegisterXmm15;
+    }
+
+    /* MSRs */
+    // HvX64RegisterTsc - don't touch
+    if (fWhat & CPUMCTX_EXTRN_EFER)
+        pInput->Names[iReg++] = HvX64RegisterEfer;
+    if (fWhat & CPUMCTX_EXTRN_KERNEL_GS_BASE)
+        pInput->Names[iReg++] = HvX64RegisterKernelGsBase;
+    if (fWhat & CPUMCTX_EXTRN_SYSENTER_MSRS)
+    {
+        pInput->Names[iReg++] = HvX64RegisterSysenterCs;
+        pInput->Names[iReg++] = HvX64RegisterSysenterEip;
+        pInput->Names[iReg++] = HvX64RegisterSysenterEsp;
+    }
+    if (fWhat & CPUMCTX_EXTRN_SYSCALL_MSRS)
+    {
+        pInput->Names[iReg++] = HvX64RegisterStar;
+        pInput->Names[iReg++] = HvX64RegisterLstar;
+        pInput->Names[iReg++] = HvX64RegisterCstar;
+        pInput->Names[iReg++] = HvX64RegisterSfmask;
+    }
+
+# ifdef LOG_ENABLED
+    const CPUMCPUVENDOR enmCpuVendor = CPUMGetHostCpuVendor(pGVM);
+# endif
+    if (fWhat & CPUMCTX_EXTRN_OTHER_MSRS)
+    {
+        pInput->Names[iReg++] = HvX64RegisterApicBase; /// @todo APIC BASE
+        pInput->Names[iReg++] = HvX64RegisterPat;
+# if 0 /*def LOG_ENABLED*/ /** @todo something's wrong with HvX64RegisterMtrrCap? (AMD) */
+        pInput->Names[iReg++] = HvX64RegisterMtrrCap;
+# endif
+        pInput->Names[iReg++] = HvX64RegisterMtrrDefType;
+        pInput->Names[iReg++] = HvX64RegisterMtrrFix64k00000;
+        pInput->Names[iReg++] = HvX64RegisterMtrrFix16k80000;
+        pInput->Names[iReg++] = HvX64RegisterMtrrFix16kA0000;
+        pInput->Names[iReg++] = HvX64RegisterMtrrFix4kC0000;
+        pInput->Names[iReg++] = HvX64RegisterMtrrFix4kC8000;
+        pInput->Names[iReg++] = HvX64RegisterMtrrFix4kD0000;
+        pInput->Names[iReg++] = HvX64RegisterMtrrFix4kD8000;
+        pInput->Names[iReg++] = HvX64RegisterMtrrFix4kE0000;
+        pInput->Names[iReg++] = HvX64RegisterMtrrFix4kE8000;
+        pInput->Names[iReg++] = HvX64RegisterMtrrFix4kF0000;
+        pInput->Names[iReg++] = HvX64RegisterMtrrFix4kF8000;
+        pInput->Names[iReg++] = HvX64RegisterTscAux;
+# if 0 /** @todo why can't we read HvX64RegisterIa32MiscEnable? */
+        if (enmCpuVendor != CPUMCPUVENDOR_AMD)
+            pInput->Names[iReg++] = HvX64RegisterIa32MiscEnable;
+# endif
+# ifdef LOG_ENABLED
+        if (enmCpuVendor != CPUMCPUVENDOR_AMD && enmCpuVendor != CPUMCPUVENDOR_HYGON)
+            pInput->Names[iReg++] = HvX64RegisterIa32FeatureControl;
+# endif
+    }
+
+    /* Interruptibility. */
+    if (fWhat & (CPUMCTX_EXTRN_NEM_WIN_INHIBIT_INT | CPUMCTX_EXTRN_NEM_WIN_INHIBIT_NMI))
+    {
+        pInput->Names[iReg++] = HvRegisterInterruptState;
+        pInput->Names[iReg++] = HvX64RegisterRip;
+    }
+
+    /* event injection */
+    pInput->Names[iReg++] = HvRegisterPendingInterruption;
+    pInput->Names[iReg++] = HvRegisterPendingEvent0;
+    pInput->Names[iReg++] = HvRegisterPendingEvent1;
+    size_t const cRegs   = iReg;
+    size_t const cbInput = RT_ALIGN_Z(RT_UOFFSETOF_DYN(HV_INPUT_GET_VP_REGISTERS, Names[cRegs]), 32);
+
+    HV_REGISTER_VALUE *paValues = (HV_REGISTER_VALUE *)((uint8_t *)pInput + cbInput);
+    Assert((uintptr_t)&paValues[cRegs] - (uintptr_t)pGVCpu->nemr0.s.HypercallData.pbPage < PAGE_SIZE); /* (max is around 168 registers) */
+    RT_BZERO(paValues, cRegs * sizeof(paValues[0]));
+
+    /*
+     * Make the hypercall.
+     */
+    uint64_t uResult = g_pfnHvlInvokeHypercall(HV_MAKE_CALL_INFO(HvCallGetVpRegisters, cRegs),
+                                               pGVCpu->nemr0.s.HypercallData.HCPhysPage,
+                                               pGVCpu->nemr0.s.HypercallData.HCPhysPage + cbInput);
+    AssertLogRelMsgReturn(uResult == HV_MAKE_CALL_REP_RET(cRegs),
+                          ("uResult=%RX64 cRegs=%#x\n", uResult, cRegs),
+                          VERR_NEM_GET_REGISTERS_FAILED);
+    //LogFlow(("nemR0WinImportState: uResult=%#RX64 iReg=%zu fWhat=%#018RX64 fExtr=%#018RX64\n", uResult, cRegs, fWhat, pCtx->fExtrn));
+
+    /*
+     * Copy information to the CPUM context.
+     */
+    iReg = 0;
+
+    /* GPRs */
+    if (fWhat & CPUMCTX_EXTRN_GPRS_MASK)
+    {
+        if (fWhat & CPUMCTX_EXTRN_RAX)
+        {
+            Assert(pInput->Names[iReg] == HvX64RegisterRax);
+            pCtx->rax = paValues[iReg++].Reg64;
+        }
+        if (fWhat & CPUMCTX_EXTRN_RCX)
+        {
+            Assert(pInput->Names[iReg] == HvX64RegisterRcx);
+            pCtx->rcx = paValues[iReg++].Reg64;
+        }
+        if (fWhat & CPUMCTX_EXTRN_RDX)
+        {
+            Assert(pInput->Names[iReg] == HvX64RegisterRdx);
+            pCtx->rdx = paValues[iReg++].Reg64;
+        }
+        if (fWhat & CPUMCTX_EXTRN_RBX)
+        {
+            Assert(pInput->Names[iReg] == HvX64RegisterRbx);
+            pCtx->rbx = paValues[iReg++].Reg64;
+        }
+        if (fWhat & CPUMCTX_EXTRN_RSP)
+        {
+            Assert(pInput->Names[iReg] == HvX64RegisterRsp);
+            pCtx->rsp = paValues[iReg++].Reg64;
+        }
+        if (fWhat & CPUMCTX_EXTRN_RBP)
+        {
+            Assert(pInput->Names[iReg] == HvX64RegisterRbp);
+            pCtx->rbp = paValues[iReg++].Reg64;
+        }
+        if (fWhat & CPUMCTX_EXTRN_RSI)
+        {
+            Assert(pInput->Names[iReg] == HvX64RegisterRsi);
+            pCtx->rsi = paValues[iReg++].Reg64;
+        }
+        if (fWhat & CPUMCTX_EXTRN_RDI)
+        {
+            Assert(pInput->Names[iReg] == HvX64RegisterRdi);
+            pCtx->rdi = paValues[iReg++].Reg64;
+        }
+        if (fWhat & CPUMCTX_EXTRN_R8_R15)
+        {
+            Assert(pInput->Names[iReg] == HvX64RegisterR8);
+            Assert(pInput->Names[iReg + 7] == HvX64RegisterR15);
+            pCtx->r8  = paValues[iReg++].Reg64;
+            pCtx->r9  = paValues[iReg++].Reg64;
+            pCtx->r10 = paValues[iReg++].Reg64;
+            pCtx->r11 = paValues[iReg++].Reg64;
+            pCtx->r12 = paValues[iReg++].Reg64;
+            pCtx->r13 = paValues[iReg++].Reg64;
+            pCtx->r14 = paValues[iReg++].Reg64;
+            pCtx->r15 = paValues[iReg++].Reg64;
+        }
+    }
+
+    /* RIP & Flags */
+    if (fWhat & CPUMCTX_EXTRN_RIP)
+    {
+        Assert(pInput->Names[iReg] == HvX64RegisterRip);
+        pCtx->rip      = paValues[iReg++].Reg64;
+    }
+    if (fWhat & CPUMCTX_EXTRN_RFLAGS)
+    {
+        Assert(pInput->Names[iReg] == HvX64RegisterRflags);
+        pCtx->rflags.u = paValues[iReg++].Reg64;
+    }
+
+    /* Segments */
+# define COPY_BACK_SEG(a_idx, a_enmName, a_SReg) \
+        do { \
+            Assert(pInput->Names[a_idx] == a_enmName); \
+            (a_SReg).u64Base  = paValues[a_idx].Segment.Base; \
+            (a_SReg).u32Limit = paValues[a_idx].Segment.Limit; \
+            (a_SReg).ValidSel = (a_SReg).Sel = paValues[a_idx].Segment.Selector; \
+            (a_SReg).Attr.u   = paValues[a_idx].Segment.Attributes; \
+            (a_SReg).fFlags   = CPUMSELREG_FLAGS_VALID; \
+        } while (0)
+    if (fWhat & CPUMCTX_EXTRN_SREG_MASK)
+    {
+        if (fWhat & CPUMCTX_EXTRN_CS)
+        {
+            COPY_BACK_SEG(iReg, HvX64RegisterCs,   pCtx->cs);
+            iReg++;
+        }
+        if (fWhat & CPUMCTX_EXTRN_ES)
+        {
+            COPY_BACK_SEG(iReg, HvX64RegisterEs,   pCtx->es);
+            iReg++;
+        }
+        if (fWhat & CPUMCTX_EXTRN_SS)
+        {
+            COPY_BACK_SEG(iReg, HvX64RegisterSs,   pCtx->ss);
+            iReg++;
+        }
+        if (fWhat & CPUMCTX_EXTRN_DS)
+        {
+            COPY_BACK_SEG(iReg, HvX64RegisterDs,   pCtx->ds);
+            iReg++;
+        }
+        if (fWhat & CPUMCTX_EXTRN_FS)
+        {
+            COPY_BACK_SEG(iReg, HvX64RegisterFs,   pCtx->fs);
+            iReg++;
+        }
+        if (fWhat & CPUMCTX_EXTRN_GS)
+        {
+            COPY_BACK_SEG(iReg, HvX64RegisterGs,   pCtx->gs);
+            iReg++;
+        }
+    }
+    /* Descriptor tables and the task segment. */
+    if (fWhat & CPUMCTX_EXTRN_TABLE_MASK)
+    {
+        if (fWhat & CPUMCTX_EXTRN_LDTR)
+        {
+            COPY_BACK_SEG(iReg, HvX64RegisterLdtr, pCtx->ldtr);
+            iReg++;
+        }
+        if (fWhat & CPUMCTX_EXTRN_TR)
+        {
+            /* AMD-V likes loading TR with in AVAIL state, whereas intel insists on BUSY.  So,
+               avoid to trigger sanity assertions around the code, always fix this. */
+            COPY_BACK_SEG(iReg, HvX64RegisterTr,   pCtx->tr);
+            switch (pCtx->tr.Attr.n.u4Type)
+            {
+                case X86_SEL_TYPE_SYS_386_TSS_BUSY:
+                case X86_SEL_TYPE_SYS_286_TSS_BUSY:
+                    break;
+                case X86_SEL_TYPE_SYS_386_TSS_AVAIL:
+                    pCtx->tr.Attr.n.u4Type = X86_SEL_TYPE_SYS_386_TSS_BUSY;
+                    break;
+                case X86_SEL_TYPE_SYS_286_TSS_AVAIL:
+                    pCtx->tr.Attr.n.u4Type = X86_SEL_TYPE_SYS_286_TSS_BUSY;
+                    break;
+            }
+            iReg++;
+        }
+        if (fWhat & CPUMCTX_EXTRN_IDTR)
+        {
+            Assert(pInput->Names[iReg] == HvX64RegisterIdtr);
+            pCtx->idtr.cbIdt = paValues[iReg].Table.Limit;
+            pCtx->idtr.pIdt  = paValues[iReg].Table.Base;
+            iReg++;
+        }
+        if (fWhat & CPUMCTX_EXTRN_GDTR)
+        {
+            Assert(pInput->Names[iReg] == HvX64RegisterGdtr);
+            pCtx->gdtr.cbGdt = paValues[iReg].Table.Limit;
+            pCtx->gdtr.pGdt  = paValues[iReg].Table.Base;
+            iReg++;
+        }
+    }
+
+    /* Control registers. */
+    bool fMaybeChangedMode = false;
+    bool fUpdateCr3        = false;
+    if (fWhat & CPUMCTX_EXTRN_CR_MASK)
+    {
+        if (fWhat & CPUMCTX_EXTRN_CR0)
+        {
+            Assert(pInput->Names[iReg] == HvX64RegisterCr0);
+            if (pCtx->cr0 != paValues[iReg].Reg64)
+            {
+                CPUMSetGuestCR0(pGVCpu, paValues[iReg].Reg64);
+                fMaybeChangedMode = true;
+            }
+            iReg++;
+        }
+        if (fWhat & CPUMCTX_EXTRN_CR2)
+        {
+            Assert(pInput->Names[iReg] == HvX64RegisterCr2);
+            pCtx->cr2 = paValues[iReg].Reg64;
+            iReg++;
+        }
+        if (fWhat & CPUMCTX_EXTRN_CR3)
+        {
+            Assert(pInput->Names[iReg] == HvX64RegisterCr3);
+            if (pCtx->cr3 != paValues[iReg].Reg64)
+            {
+                CPUMSetGuestCR3(pGVCpu, paValues[iReg].Reg64);
+                fUpdateCr3 = true;
+            }
+            iReg++;
+        }
+        if (fWhat & CPUMCTX_EXTRN_CR4)
+        {
+            Assert(pInput->Names[iReg] == HvX64RegisterCr4);
+            if (pCtx->cr4 != paValues[iReg].Reg64)
+            {
+                CPUMSetGuestCR4(pGVCpu, paValues[iReg].Reg64);
+                fMaybeChangedMode = true;
+            }
+            iReg++;
+        }
+    }
+    if (fWhat & CPUMCTX_EXTRN_APIC_TPR)
+    {
+        Assert(pInput->Names[iReg] == HvX64RegisterCr8);
+        APICSetTpr(pGVCpu, (uint8_t)paValues[iReg].Reg64 << 4);
+        iReg++;
+    }
+
+    /* Debug registers. */
+    if (fWhat & CPUMCTX_EXTRN_DR7)
+    {
+        Assert(pInput->Names[iReg] == HvX64RegisterDr7);
+        if (pCtx->dr[7] != paValues[iReg].Reg64)
+            CPUMSetGuestDR7(pGVCpu, paValues[iReg].Reg64);
+        pCtx->fExtrn &= ~CPUMCTX_EXTRN_DR7; /* Hack alert! Avoids asserting when processing CPUMCTX_EXTRN_DR0_DR3. */
+        iReg++;
+    }
+    if (fWhat & CPUMCTX_EXTRN_DR0_DR3)
+    {
+        Assert(pInput->Names[iReg] == HvX64RegisterDr0);
+        Assert(pInput->Names[iReg+3] == HvX64RegisterDr3);
+        if (pCtx->dr[0] != paValues[iReg].Reg64)
+            CPUMSetGuestDR0(pGVCpu, paValues[iReg].Reg64);
+        iReg++;
+        if (pCtx->dr[1] != paValues[iReg].Reg64)
+            CPUMSetGuestDR1(pGVCpu, paValues[iReg].Reg64);
+        iReg++;
+        if (pCtx->dr[2] != paValues[iReg].Reg64)
+            CPUMSetGuestDR2(pGVCpu, paValues[iReg].Reg64);
+        iReg++;
+        if (pCtx->dr[3] != paValues[iReg].Reg64)
+            CPUMSetGuestDR3(pGVCpu, paValues[iReg].Reg64);
+        iReg++;
+    }
+    if (fWhat & CPUMCTX_EXTRN_DR6)
+    {
+        Assert(pInput->Names[iReg] == HvX64RegisterDr6);
+        if (pCtx->dr[6] != paValues[iReg].Reg64)
+            CPUMSetGuestDR6(pGVCpu, paValues[iReg].Reg64);
+        iReg++;
+    }
+
+    /* Floating point state. */
+    if (fWhat & CPUMCTX_EXTRN_X87)
+    {
+        Assert(pInput->Names[iReg] == HvX64RegisterFpMmx0);
+        Assert(pInput->Names[iReg + 7] == HvX64RegisterFpMmx7);
+        pCtx->pXStateR0->x87.aRegs[0].au64[0] = paValues[iReg].Fp.AsUINT128.Low64;
+        pCtx->pXStateR0->x87.aRegs[0].au64[1] = paValues[iReg].Fp.AsUINT128.High64;
+        iReg++;
+        pCtx->pXStateR0->x87.aRegs[1].au64[0] = paValues[iReg].Fp.AsUINT128.Low64;
+        pCtx->pXStateR0->x87.aRegs[1].au64[1] = paValues[iReg].Fp.AsUINT128.High64;
+        iReg++;
+        pCtx->pXStateR0->x87.aRegs[2].au64[0] = paValues[iReg].Fp.AsUINT128.Low64;
+        pCtx->pXStateR0->x87.aRegs[2].au64[1] = paValues[iReg].Fp.AsUINT128.High64;
+        iReg++;
+        pCtx->pXStateR0->x87.aRegs[3].au64[0] = paValues[iReg].Fp.AsUINT128.Low64;
+        pCtx->pXStateR0->x87.aRegs[3].au64[1] = paValues[iReg].Fp.AsUINT128.High64;
+        iReg++;
+        pCtx->pXStateR0->x87.aRegs[4].au64[0] = paValues[iReg].Fp.AsUINT128.Low64;
+        pCtx->pXStateR0->x87.aRegs[4].au64[1] = paValues[iReg].Fp.AsUINT128.High64;
+        iReg++;
+        pCtx->pXStateR0->x87.aRegs[5].au64[0] = paValues[iReg].Fp.AsUINT128.Low64;
+        pCtx->pXStateR0->x87.aRegs[5].au64[1] = paValues[iReg].Fp.AsUINT128.High64;
+        iReg++;
+        pCtx->pXStateR0->x87.aRegs[6].au64[0] = paValues[iReg].Fp.AsUINT128.Low64;
+        pCtx->pXStateR0->x87.aRegs[6].au64[1] = paValues[iReg].Fp.AsUINT128.High64;
+        iReg++;
+        pCtx->pXStateR0->x87.aRegs[7].au64[0] = paValues[iReg].Fp.AsUINT128.Low64;
+        pCtx->pXStateR0->x87.aRegs[7].au64[1] = paValues[iReg].Fp.AsUINT128.High64;
+        iReg++;
+
+        Assert(pInput->Names[iReg] == HvX64RegisterFpControlStatus);
+        pCtx->pXStateR0->x87.FCW        = paValues[iReg].FpControlStatus.FpControl;
+        pCtx->pXStateR0->x87.FSW        = paValues[iReg].FpControlStatus.FpStatus;
+        pCtx->pXStateR0->x87.FTW        = paValues[iReg].FpControlStatus.FpTag
+                                        /*| (paValues[iReg].FpControlStatus.Reserved << 8)*/;
+        pCtx->pXStateR0->x87.FOP        = paValues[iReg].FpControlStatus.LastFpOp;
+        pCtx->pXStateR0->x87.FPUIP      = (uint32_t)paValues[iReg].FpControlStatus.LastFpRip;
+        pCtx->pXStateR0->x87.CS         = (uint16_t)(paValues[iReg].FpControlStatus.LastFpRip >> 32);
+        pCtx->pXStateR0->x87.Rsrvd1     = (uint16_t)(paValues[iReg].FpControlStatus.LastFpRip >> 48);
+        iReg++;
+    }
+
+    if (fWhat & (CPUMCTX_EXTRN_X87 | CPUMCTX_EXTRN_SSE_AVX))
+    {
+        Assert(pInput->Names[iReg] == HvX64RegisterXmmControlStatus);
+        if (fWhat & CPUMCTX_EXTRN_X87)
+        {
+            pCtx->pXStateR0->x87.FPUDP  = (uint32_t)paValues[iReg].XmmControlStatus.LastFpRdp;
+            pCtx->pXStateR0->x87.DS     = (uint16_t)(paValues[iReg].XmmControlStatus.LastFpRdp >> 32);
+            pCtx->pXStateR0->x87.Rsrvd2 = (uint16_t)(paValues[iReg].XmmControlStatus.LastFpRdp >> 48);
+        }
+        pCtx->pXStateR0->x87.MXCSR      = paValues[iReg].XmmControlStatus.XmmStatusControl;
+        pCtx->pXStateR0->x87.MXCSR_MASK = paValues[iReg].XmmControlStatus.XmmStatusControlMask; /** @todo ??? (Isn't this an output field?) */
+        iReg++;
+    }
+
+    /* Vector state. */
+    if (fWhat & CPUMCTX_EXTRN_SSE_AVX)
+    {
+        Assert(pInput->Names[iReg] == HvX64RegisterXmm0);
+        Assert(pInput->Names[iReg+15] == HvX64RegisterXmm15);
+        pCtx->pXStateR0->x87.aXMM[0].uXmm.s.Lo  = paValues[iReg].Reg128.Low64;
+        pCtx->pXStateR0->x87.aXMM[0].uXmm.s.Hi  = paValues[iReg].Reg128.High64;
+        iReg++;
+        pCtx->pXStateR0->x87.aXMM[1].uXmm.s.Lo  = paValues[iReg].Reg128.Low64;
+        pCtx->pXStateR0->x87.aXMM[1].uXmm.s.Hi  = paValues[iReg].Reg128.High64;
+        iReg++;
+        pCtx->pXStateR0->x87.aXMM[2].uXmm.s.Lo  = paValues[iReg].Reg128.Low64;
+        pCtx->pXStateR0->x87.aXMM[2].uXmm.s.Hi  = paValues[iReg].Reg128.High64;
+        iReg++;
+        pCtx->pXStateR0->x87.aXMM[3].uXmm.s.Lo  = paValues[iReg].Reg128.Low64;
+        pCtx->pXStateR0->x87.aXMM[3].uXmm.s.Hi  = paValues[iReg].Reg128.High64;
+        iReg++;
+        pCtx->pXStateR0->x87.aXMM[4].uXmm.s.Lo  = paValues[iReg].Reg128.Low64;
+        pCtx->pXStateR0->x87.aXMM[4].uXmm.s.Hi  = paValues[iReg].Reg128.High64;
+        iReg++;
+        pCtx->pXStateR0->x87.aXMM[5].uXmm.s.Lo  = paValues[iReg].Reg128.Low64;
+        pCtx->pXStateR0->x87.aXMM[5].uXmm.s.Hi  = paValues[iReg].Reg128.High64;
+        iReg++;
+        pCtx->pXStateR0->x87.aXMM[6].uXmm.s.Lo  = paValues[iReg].Reg128.Low64;
+        pCtx->pXStateR0->x87.aXMM[6].uXmm.s.Hi  = paValues[iReg].Reg128.High64;
+        iReg++;
+        pCtx->pXStateR0->x87.aXMM[7].uXmm.s.Lo  = paValues[iReg].Reg128.Low64;
+        pCtx->pXStateR0->x87.aXMM[7].uXmm.s.Hi  = paValues[iReg].Reg128.High64;
+        iReg++;
+        pCtx->pXStateR0->x87.aXMM[8].uXmm.s.Lo  = paValues[iReg].Reg128.Low64;
+        pCtx->pXStateR0->x87.aXMM[8].uXmm.s.Hi  = paValues[iReg].Reg128.High64;
+        iReg++;
+        pCtx->pXStateR0->x87.aXMM[9].uXmm.s.Lo  = paValues[iReg].Reg128.Low64;
+        pCtx->pXStateR0->x87.aXMM[9].uXmm.s.Hi  = paValues[iReg].Reg128.High64;
+        iReg++;
+        pCtx->pXStateR0->x87.aXMM[10].uXmm.s.Lo = paValues[iReg].Reg128.Low64;
+        pCtx->pXStateR0->x87.aXMM[10].uXmm.s.Hi = paValues[iReg].Reg128.High64;
+        iReg++;
+        pCtx->pXStateR0->x87.aXMM[11].uXmm.s.Lo = paValues[iReg].Reg128.Low64;
+        pCtx->pXStateR0->x87.aXMM[11].uXmm.s.Hi = paValues[iReg].Reg128.High64;
+        iReg++;
+        pCtx->pXStateR0->x87.aXMM[12].uXmm.s.Lo = paValues[iReg].Reg128.Low64;
+        pCtx->pXStateR0->x87.aXMM[12].uXmm.s.Hi = paValues[iReg].Reg128.High64;
+        iReg++;
+        pCtx->pXStateR0->x87.aXMM[13].uXmm.s.Lo = paValues[iReg].Reg128.Low64;
+        pCtx->pXStateR0->x87.aXMM[13].uXmm.s.Hi = paValues[iReg].Reg128.High64;
+        iReg++;
+        pCtx->pXStateR0->x87.aXMM[14].uXmm.s.Lo = paValues[iReg].Reg128.Low64;
+        pCtx->pXStateR0->x87.aXMM[14].uXmm.s.Hi = paValues[iReg].Reg128.High64;
+        iReg++;
+        pCtx->pXStateR0->x87.aXMM[15].uXmm.s.Lo = paValues[iReg].Reg128.Low64;
+        pCtx->pXStateR0->x87.aXMM[15].uXmm.s.Hi = paValues[iReg].Reg128.High64;
+        iReg++;
+    }
+
+
+    /* MSRs */
+    // HvX64RegisterTsc - don't touch
+    if (fWhat & CPUMCTX_EXTRN_EFER)
+    {
+        Assert(pInput->Names[iReg] == HvX64RegisterEfer);
+        if (paValues[iReg].Reg64 != pCtx->msrEFER)
+        {
+            Log7(("NEM/%u: MSR EFER changed %RX64 -> %RX64\n", pGVCpu->idCpu, pCtx->msrEFER, paValues[iReg].Reg64));
+            if ((paValues[iReg].Reg64 ^ pCtx->msrEFER) & MSR_K6_EFER_NXE)
+                PGMNotifyNxeChanged(pGVCpu, RT_BOOL(paValues[iReg].Reg64 & MSR_K6_EFER_NXE));
+            pCtx->msrEFER = paValues[iReg].Reg64;
+            fMaybeChangedMode = true;
+        }
+        iReg++;
+    }
+    if (fWhat & CPUMCTX_EXTRN_KERNEL_GS_BASE)
+    {
+        Assert(pInput->Names[iReg] == HvX64RegisterKernelGsBase);
+        if (pCtx->msrKERNELGSBASE != paValues[iReg].Reg64)
+            Log7(("NEM/%u: MSR KERNELGSBASE changed %RX64 -> %RX64\n", pGVCpu->idCpu, pCtx->msrKERNELGSBASE, paValues[iReg].Reg64));
+        pCtx->msrKERNELGSBASE = paValues[iReg].Reg64;
+        iReg++;
+    }
+    if (fWhat & CPUMCTX_EXTRN_SYSENTER_MSRS)
+    {
+        Assert(pInput->Names[iReg] == HvX64RegisterSysenterCs);
+        if (pCtx->SysEnter.cs != paValues[iReg].Reg64)
+            Log7(("NEM/%u: MSR SYSENTER.CS changed %RX64 -> %RX64\n", pGVCpu->idCpu, pCtx->SysEnter.cs, paValues[iReg].Reg64));
+        pCtx->SysEnter.cs = paValues[iReg].Reg64;
+        iReg++;
+
+        Assert(pInput->Names[iReg] == HvX64RegisterSysenterEip);
+        if (pCtx->SysEnter.eip != paValues[iReg].Reg64)
+            Log7(("NEM/%u: MSR SYSENTER.EIP changed %RX64 -> %RX64\n", pGVCpu->idCpu, pCtx->SysEnter.eip, paValues[iReg].Reg64));
+        pCtx->SysEnter.eip = paValues[iReg].Reg64;
+        iReg++;
+
+        Assert(pInput->Names[iReg] == HvX64RegisterSysenterEsp);
+        if (pCtx->SysEnter.esp != paValues[iReg].Reg64)
+            Log7(("NEM/%u: MSR SYSENTER.ESP changed %RX64 -> %RX64\n", pGVCpu->idCpu, pCtx->SysEnter.esp, paValues[iReg].Reg64));
+        pCtx->SysEnter.esp = paValues[iReg].Reg64;
+        iReg++;
+    }
+    if (fWhat & CPUMCTX_EXTRN_SYSCALL_MSRS)
+    {
+        Assert(pInput->Names[iReg] == HvX64RegisterStar);
+        if (pCtx->msrSTAR != paValues[iReg].Reg64)
+            Log7(("NEM/%u: MSR STAR changed %RX64 -> %RX64\n", pGVCpu->idCpu, pCtx->msrSTAR, paValues[iReg].Reg64));
+        pCtx->msrSTAR   = paValues[iReg].Reg64;
+        iReg++;
+
+        Assert(pInput->Names[iReg] == HvX64RegisterLstar);
+        if (pCtx->msrLSTAR != paValues[iReg].Reg64)
+            Log7(("NEM/%u: MSR LSTAR changed %RX64 -> %RX64\n", pGVCpu->idCpu, pCtx->msrLSTAR, paValues[iReg].Reg64));
+        pCtx->msrLSTAR  = paValues[iReg].Reg64;
+        iReg++;
+
+        Assert(pInput->Names[iReg] == HvX64RegisterCstar);
+        if (pCtx->msrCSTAR != paValues[iReg].Reg64)
+            Log7(("NEM/%u: MSR CSTAR changed %RX64 -> %RX64\n", pGVCpu->idCpu, pCtx->msrCSTAR, paValues[iReg].Reg64));
+        pCtx->msrCSTAR  = paValues[iReg].Reg64;
+        iReg++;
+
+        Assert(pInput->Names[iReg] == HvX64RegisterSfmask);
+        if (pCtx->msrSFMASK != paValues[iReg].Reg64)
+            Log7(("NEM/%u: MSR SFMASK changed %RX64 -> %RX64\n", pGVCpu->idCpu, pCtx->msrSFMASK, paValues[iReg].Reg64));
+        pCtx->msrSFMASK = paValues[iReg].Reg64;
+        iReg++;
+    }
+    if (fWhat & CPUMCTX_EXTRN_OTHER_MSRS)
+    {
+        Assert(pInput->Names[iReg] == HvX64RegisterApicBase);
+        const uint64_t uOldBase = APICGetBaseMsrNoCheck(pGVCpu);
+        if (paValues[iReg].Reg64 != uOldBase)
+        {
+            Log7(("NEM/%u: MSR APICBase changed %RX64 -> %RX64 (%RX64)\n",
+                  pGVCpu->idCpu, uOldBase, paValues[iReg].Reg64, paValues[iReg].Reg64 ^ uOldBase));
+            int rc2 = APICSetBaseMsr(pGVCpu, paValues[iReg].Reg64);
+            AssertLogRelMsg(rc2 == VINF_SUCCESS, ("rc2=%Rrc [%#RX64]\n", rc2, paValues[iReg].Reg64));
+        }
+        iReg++;
+
+        Assert(pInput->Names[iReg] == HvX64RegisterPat);
+        if (pCtx->msrPAT != paValues[iReg].Reg64)
+            Log7(("NEM/%u: MSR PAT changed %RX64 -> %RX64\n", pGVCpu->idCpu, pCtx->msrPAT, paValues[iReg].Reg64));
+        pCtx->msrPAT    = paValues[iReg].Reg64;
+        iReg++;
+
+# if 0 /*def LOG_ENABLED*/ /** @todo something's wrong with HvX64RegisterMtrrCap? (AMD) */
+        Assert(pInput->Names[iReg] == HvX64RegisterMtrrCap);
+        if (paValues[iReg].Reg64 != CPUMGetGuestIa32MtrrCap(pGVCpu))
+            Log7(("NEM/%u: MSR MTRR_CAP changed %RX64 -> %RX64 (!!)\n", pGVCpu->idCpu, CPUMGetGuestIa32MtrrCap(pGVCpu), paValues[iReg].Reg64));
+        iReg++;
+# endif
+
+        PCPUMCTXMSRS pCtxMsrs = CPUMQueryGuestCtxMsrsPtr(pGVCpu);
+        Assert(pInput->Names[iReg] == HvX64RegisterMtrrDefType);
+        if (paValues[iReg].Reg64 != pCtxMsrs->msr.MtrrDefType )
+            Log7(("NEM/%u: MSR MTRR_DEF_TYPE changed %RX64 -> %RX64\n", pGVCpu->idCpu, pCtxMsrs->msr.MtrrDefType, paValues[iReg].Reg64));
+        pCtxMsrs->msr.MtrrDefType = paValues[iReg].Reg64;
+        iReg++;
+
+        /** @todo we dont keep state for HvX64RegisterMtrrPhysBaseX and HvX64RegisterMtrrPhysMaskX */
+
+        Assert(pInput->Names[iReg] == HvX64RegisterMtrrFix64k00000);
+        if (paValues[iReg].Reg64 != pCtxMsrs->msr.MtrrFix64K_00000 )
+            Log7(("NEM/%u: MSR MTRR_FIX16K_00000 changed %RX64 -> %RX64\n", pGVCpu->idCpu, pCtxMsrs->msr.MtrrFix64K_00000, paValues[iReg].Reg64));
+        pCtxMsrs->msr.MtrrFix64K_00000 = paValues[iReg].Reg64;
+        iReg++;
+
+        Assert(pInput->Names[iReg] == HvX64RegisterMtrrFix16k80000);
+        if (paValues[iReg].Reg64 != pCtxMsrs->msr.MtrrFix16K_80000 )
+            Log7(("NEM/%u: MSR MTRR_FIX16K_80000 changed %RX64 -> %RX64\n", pGVCpu->idCpu, pCtxMsrs->msr.MtrrFix16K_80000, paValues[iReg].Reg64));
+        pCtxMsrs->msr.MtrrFix16K_80000 = paValues[iReg].Reg64;
+        iReg++;
+
+        Assert(pInput->Names[iReg] == HvX64RegisterMtrrFix16kA0000);
+        if (paValues[iReg].Reg64 != pCtxMsrs->msr.MtrrFix16K_A0000 )
+            Log7(("NEM/%u: MSR MTRR_FIX16K_A0000 changed %RX64 -> %RX64\n", pGVCpu->idCpu, pCtxMsrs->msr.MtrrFix16K_A0000, paValues[iReg].Reg64));
+        pCtxMsrs->msr.MtrrFix16K_A0000 = paValues[iReg].Reg64;
+        iReg++;
+
+        Assert(pInput->Names[iReg] == HvX64RegisterMtrrFix4kC0000);
+        if (paValues[iReg].Reg64 != pCtxMsrs->msr.MtrrFix4K_C0000 )
+            Log7(("NEM/%u: MSR MTRR_FIX16K_C0000 changed %RX64 -> %RX64\n", pGVCpu->idCpu, pCtxMsrs->msr.MtrrFix4K_C0000, paValues[iReg].Reg64));
+        pCtxMsrs->msr.MtrrFix4K_C0000 = paValues[iReg].Reg64;
+        iReg++;
+
+        Assert(pInput->Names[iReg] == HvX64RegisterMtrrFix4kC8000);
+        if (paValues[iReg].Reg64 != pCtxMsrs->msr.MtrrFix4K_C8000 )
+            Log7(("NEM/%u: MSR MTRR_FIX16K_C8000 changed %RX64 -> %RX64\n", pGVCpu->idCpu, pCtxMsrs->msr.MtrrFix4K_C8000, paValues[iReg].Reg64));
+        pCtxMsrs->msr.MtrrFix4K_C8000 = paValues[iReg].Reg64;
+        iReg++;
+
+        Assert(pInput->Names[iReg] == HvX64RegisterMtrrFix4kD0000);
+        if (paValues[iReg].Reg64 != pCtxMsrs->msr.MtrrFix4K_D0000 )
+            Log7(("NEM/%u: MSR MTRR_FIX16K_D0000 changed %RX64 -> %RX64\n", pGVCpu->idCpu, pCtxMsrs->msr.MtrrFix4K_D0000, paValues[iReg].Reg64));
+        pCtxMsrs->msr.MtrrFix4K_D0000 = paValues[iReg].Reg64;
+        iReg++;
+
+        Assert(pInput->Names[iReg] == HvX64RegisterMtrrFix4kD8000);
+        if (paValues[iReg].Reg64 != pCtxMsrs->msr.MtrrFix4K_D8000 )
+            Log7(("NEM/%u: MSR MTRR_FIX16K_D8000 changed %RX64 -> %RX64\n", pGVCpu->idCpu, pCtxMsrs->msr.MtrrFix4K_D8000, paValues[iReg].Reg64));
+        pCtxMsrs->msr.MtrrFix4K_D8000 = paValues[iReg].Reg64;
+        iReg++;
+
+        Assert(pInput->Names[iReg] == HvX64RegisterMtrrFix4kE0000);
+        if (paValues[iReg].Reg64 != pCtxMsrs->msr.MtrrFix4K_E0000 )
+            Log7(("NEM/%u: MSR MTRR_FIX16K_E0000 changed %RX64 -> %RX64\n", pGVCpu->idCpu, pCtxMsrs->msr.MtrrFix4K_E0000, paValues[iReg].Reg64));
+        pCtxMsrs->msr.MtrrFix4K_E0000 = paValues[iReg].Reg64;
+        iReg++;
+
+        Assert(pInput->Names[iReg] == HvX64RegisterMtrrFix4kE8000);
+        if (paValues[iReg].Reg64 != pCtxMsrs->msr.MtrrFix4K_E8000 )
+            Log7(("NEM/%u: MSR MTRR_FIX16K_E8000 changed %RX64 -> %RX64\n", pGVCpu->idCpu, pCtxMsrs->msr.MtrrFix4K_E8000, paValues[iReg].Reg64));
+        pCtxMsrs->msr.MtrrFix4K_E8000 = paValues[iReg].Reg64;
+        iReg++;
+
+        Assert(pInput->Names[iReg] == HvX64RegisterMtrrFix4kF0000);
+        if (paValues[iReg].Reg64 != pCtxMsrs->msr.MtrrFix4K_F0000 )
+            Log7(("NEM/%u: MSR MTRR_FIX16K_F0000 changed %RX64 -> %RX64\n", pGVCpu->idCpu, pCtxMsrs->msr.MtrrFix4K_F0000, paValues[iReg].Reg64));
+        pCtxMsrs->msr.MtrrFix4K_F0000 = paValues[iReg].Reg64;
+        iReg++;
+
+        Assert(pInput->Names[iReg] == HvX64RegisterMtrrFix4kF8000);
+        if (paValues[iReg].Reg64 != pCtxMsrs->msr.MtrrFix4K_F8000 )
+            Log7(("NEM/%u: MSR MTRR_FIX16K_F8000 changed %RX64 -> %RX64\n", pGVCpu->idCpu, pCtxMsrs->msr.MtrrFix4K_F8000, paValues[iReg].Reg64));
+        pCtxMsrs->msr.MtrrFix4K_F8000 = paValues[iReg].Reg64;
+        iReg++;
+
+        Assert(pInput->Names[iReg] == HvX64RegisterTscAux);
+        if (paValues[iReg].Reg64 != pCtxMsrs->msr.TscAux )
+            Log7(("NEM/%u: MSR TSC_AUX changed %RX64 -> %RX64\n", pGVCpu->idCpu, pCtxMsrs->msr.TscAux, paValues[iReg].Reg64));
+        pCtxMsrs->msr.TscAux = paValues[iReg].Reg64;
+        iReg++;
+
+# if 0 /** @todo why can't we even read HvX64RegisterIa32MiscEnable? */
+        if (enmCpuVendor != CPUMCPUVENDOR_AMD)
+        {
+            Assert(pInput->Names[iReg] == HvX64RegisterIa32MiscEnable);
+            if (paValues[iReg].Reg64 != pCtxMsrs->msr.MiscEnable)
+                Log7(("NEM/%u: MSR MISC_ENABLE changed %RX64 -> %RX64\n", pGVCpu->idCpu, pCtxMsrs->msr.MiscEnable, paValues[iReg].Reg64));
+            pCtxMsrs->msr.MiscEnable = paValues[iReg].Reg64;
+            iReg++;
+        }
+# endif
+# ifdef LOG_ENABLED
+        if (enmCpuVendor != CPUMCPUVENDOR_AMD && enmCpuVendor != CPUMCPUVENDOR_HYGON)
+        {
+            Assert(pInput->Names[iReg] == HvX64RegisterIa32FeatureControl);
+            if (paValues[iReg].Reg64 != pCtx->hwvirt.vmx.Msrs.u64FeatCtrl)
+                Log7(("NEM/%u: MSR FEATURE_CONTROL changed %RX64 -> %RX64 (!!)\n", pGVCpu->idCpu, pCtx->hwvirt.vmx.Msrs.u64FeatCtrl, paValues[iReg].Reg64));
+            iReg++;
+        }
+# endif
+    }
+
+    /* Interruptibility. */
+    if (fWhat & (CPUMCTX_EXTRN_NEM_WIN_INHIBIT_INT | CPUMCTX_EXTRN_NEM_WIN_INHIBIT_NMI))
+    {
+        Assert(pInput->Names[iReg] == HvRegisterInterruptState);
+        Assert(pInput->Names[iReg + 1] == HvX64RegisterRip);
+
+        if (!(pCtx->fExtrn & CPUMCTX_EXTRN_NEM_WIN_INHIBIT_INT))
+        {
+            pGVCpu->nem.s.fLastInterruptShadow = paValues[iReg].InterruptState.InterruptShadow;
+            if (paValues[iReg].InterruptState.InterruptShadow)
+                EMSetInhibitInterruptsPC(pGVCpu, paValues[iReg + 1].Reg64);
+            else
+                VMCPU_FF_CLEAR(pGVCpu, VMCPU_FF_INHIBIT_INTERRUPTS);
+        }
+
+        if (!(pCtx->fExtrn & CPUMCTX_EXTRN_NEM_WIN_INHIBIT_NMI))
+        {
+            if (paValues[iReg].InterruptState.NmiMasked)
+                VMCPU_FF_SET(pGVCpu, VMCPU_FF_BLOCK_NMIS);
+            else
+                VMCPU_FF_CLEAR(pGVCpu, VMCPU_FF_BLOCK_NMIS);
+        }
+
+        fWhat |= CPUMCTX_EXTRN_NEM_WIN_INHIBIT_INT | CPUMCTX_EXTRN_NEM_WIN_INHIBIT_NMI;
+        iReg += 2;
+    }
+
+    /* Event injection. */
+    /// @todo HvRegisterPendingInterruption
+    Assert(pInput->Names[iReg] == HvRegisterPendingInterruption);
+    if (paValues[iReg].PendingInterruption.InterruptionPending)
+    {
+        Log7(("PendingInterruption: type=%u vector=%#x errcd=%RTbool/%#x instr-len=%u nested=%u\n",
+              paValues[iReg].PendingInterruption.InterruptionType, paValues[iReg].PendingInterruption.InterruptionVector,
+              paValues[iReg].PendingInterruption.DeliverErrorCode, paValues[iReg].PendingInterruption.ErrorCode,
+              paValues[iReg].PendingInterruption.InstructionLength, paValues[iReg].PendingInterruption.NestedEvent));
+        AssertMsg((paValues[iReg].PendingInterruption.AsUINT64 & UINT64_C(0xfc00)) == 0,
+                  ("%#RX64\n", paValues[iReg].PendingInterruption.AsUINT64));
+    }
+
+    /// @todo HvRegisterPendingEvent0
+    /// @todo HvRegisterPendingEvent1
+
+    /* Almost done, just update extrn flags and maybe change PGM mode. */
+    pCtx->fExtrn &= ~fWhat;
+    if (!(pCtx->fExtrn & (CPUMCTX_EXTRN_ALL | (CPUMCTX_EXTRN_NEM_WIN_MASK & ~CPUMCTX_EXTRN_NEM_WIN_EVENT_INJECT))))
+        pCtx->fExtrn = 0;
+
+    /* Typical. */
+    if (!fMaybeChangedMode && !fUpdateCr3)
+        return VINF_SUCCESS;
+
+    /*
+     * Slow.
+     */
+    int rc = VINF_SUCCESS;
+    if (fMaybeChangedMode)
+    {
+        rc = PGMChangeMode(pGVCpu, pCtx->cr0, pCtx->cr4, pCtx->msrEFER);
+        AssertMsgReturn(rc == VINF_SUCCESS, ("rc=%Rrc\n", rc), RT_FAILURE_NP(rc) ? rc : VERR_NEM_IPE_1);
+    }
+
+    if (fUpdateCr3)
+    {
+        if (fCanUpdateCr3)
+        {
+            LogFlow(("nemR0WinImportState: -> PGMUpdateCR3!\n"));
+            rc = PGMUpdateCR3(pGVCpu, pCtx->cr3);
+            AssertMsgReturn(rc == VINF_SUCCESS, ("rc=%Rrc\n", rc), RT_FAILURE_NP(rc) ? rc : VERR_NEM_IPE_2);
+        }
+        else
+        {
+            LogFlow(("nemR0WinImportState: -> VERR_NEM_FLUSH_TLB!\n"));
+            rc = VERR_NEM_FLUSH_TLB; /* Calling PGMFlushTLB w/o long jump setup doesn't work, ring-3 does it. */
+        }
+    }
+
+    return rc;
+}
+#endif /* NEM_WIN_WITH_RING0_RUNLOOP || NEM_WIN_USE_HYPERCALLS_FOR_REGISTERS */
+
+
+/**
+ * Import the state from the native API (back to CPUMCTX).
+ *
+ * @returns VBox status code
+ * @param   pGVM        The ring-0 VM handle.
+ * @param   idCpu       The calling EMT.  Necessary for getting the
+ *                      hypercall page and arguments.
+ * @param   fWhat       What to import, CPUMCTX_EXTRN_XXX. Set
+ *                      CPUMCTX_EXTERN_ALL for everything.
+ */
+VMMR0_INT_DECL(int) NEMR0ImportState(PGVM pGVM, VMCPUID idCpu, uint64_t fWhat)
+{
+#if defined(NEM_WIN_WITH_RING0_RUNLOOP) || defined(NEM_WIN_USE_HYPERCALLS_FOR_REGISTERS)
+    /*
+     * Validate the call.
+     */
+    int rc = GVMMR0ValidateGVMandEMT(pGVM, idCpu);
+    if (RT_SUCCESS(rc))
+    {
+        PGVMCPU pGVCpu = &pGVM->aCpus[idCpu];
+        AssertReturn(g_pfnHvlInvokeHypercall, VERR_NEM_MISSING_KERNEL_API_1);
+
+        /*
+         * Call worker.
+         */
+        rc = nemR0WinImportState(pGVM, pGVCpu, &pGVCpu->cpum.GstCtx, fWhat, false /*fCanUpdateCr3*/);
+    }
+    return rc;
+#else
+    RT_NOREF(pGVM, idCpu, fWhat);
+    return VERR_NOT_IMPLEMENTED;
+#endif
+}
+
+
+#if defined(NEM_WIN_WITH_RING0_RUNLOOP) || defined(NEM_WIN_USE_HYPERCALLS_FOR_REGISTERS)
+/**
+ * Worker for NEMR0QueryCpuTick and the ring-0 NEMHCQueryCpuTick.
+ *
+ * @returns VBox status code.
+ * @param   pGVM        The ring-0 VM handle.
+ * @param   pGVCpu      The ring-0 VCPU handle.
+ * @param   pcTicks     Where to return the current CPU tick count.
+ * @param   pcAux       Where to return the hyper-V TSC_AUX value.  Optional.
+ */
+NEM_TMPL_STATIC int nemR0WinQueryCpuTick(PGVM pGVM, PGVMCPU pGVCpu, uint64_t *pcTicks, uint32_t *pcAux)
+{
+    /*
+     * Hypercall parameters.
+     */
+    HV_INPUT_GET_VP_REGISTERS *pInput = (HV_INPUT_GET_VP_REGISTERS *)pGVCpu->nemr0.s.HypercallData.pbPage;
+    AssertPtrReturn(pInput, VERR_INTERNAL_ERROR_3);
+    AssertReturn(g_pfnHvlInvokeHypercall, VERR_NEM_MISSING_KERNEL_API_1);
+
+    pInput->PartitionId = pGVM->nemr0.s.idHvPartition;
+    pInput->VpIndex     = pGVCpu->idCpu;
+    pInput->fFlags      = 0;
+    pInput->Names[0]    = HvX64RegisterTsc;
+    pInput->Names[1]    = HvX64RegisterTscAux;
+
+    size_t const cbInput = RT_ALIGN_Z(RT_UOFFSETOF(HV_INPUT_GET_VP_REGISTERS, Names[2]), 32);
+    HV_REGISTER_VALUE *paValues = (HV_REGISTER_VALUE *)((uint8_t *)pInput + cbInput);
+    RT_BZERO(paValues, sizeof(paValues[0]) * 2);
+
+    /*
+     * Make the hypercall.
+     */
+    uint64_t uResult = g_pfnHvlInvokeHypercall(HV_MAKE_CALL_INFO(HvCallGetVpRegisters, 2),
+                                               pGVCpu->nemr0.s.HypercallData.HCPhysPage,
+                                               pGVCpu->nemr0.s.HypercallData.HCPhysPage + cbInput);
+    AssertLogRelMsgReturn(uResult == HV_MAKE_CALL_REP_RET(2), ("uResult=%RX64 cRegs=%#x\n", uResult, 2),
+                          VERR_NEM_GET_REGISTERS_FAILED);
+
+    /*
+     * Get results.
+     */
+    *pcTicks = paValues[0].Reg64;
+    if (pcAux)
+        *pcAux = paValues[0].Reg32;
+    return VINF_SUCCESS;
+}
+#endif /* NEM_WIN_WITH_RING0_RUNLOOP || NEM_WIN_USE_HYPERCALLS_FOR_REGISTERS */
+
+
+/**
+ * Queries the TSC and TSC_AUX values, putting the results in .
+ *
+ * @returns VBox status code
+ * @param   pGVM        The ring-0 VM handle.
+ * @param   idCpu       The calling EMT.  Necessary for getting the
+ *                      hypercall page and arguments.
+ */
+VMMR0_INT_DECL(int) NEMR0QueryCpuTick(PGVM pGVM, VMCPUID idCpu)
+{
+#if defined(NEM_WIN_WITH_RING0_RUNLOOP) || defined(NEM_WIN_USE_HYPERCALLS_FOR_REGISTERS)
+    /*
+     * Validate the call.
+     */
+    int rc = GVMMR0ValidateGVMandEMT(pGVM, idCpu);
+    if (RT_SUCCESS(rc))
+    {
+        PGVMCPU pGVCpu = &pGVM->aCpus[idCpu];
+        AssertReturn(g_pfnHvlInvokeHypercall, VERR_NEM_MISSING_KERNEL_API_1);
+
+        /*
+         * Call worker.
+         */
+        pGVCpu->nem.s.Hypercall.QueryCpuTick.cTicks = 0;
+        pGVCpu->nem.s.Hypercall.QueryCpuTick.uAux   = 0;
+        rc = nemR0WinQueryCpuTick(pGVM, pGVCpu, &pGVCpu->nem.s.Hypercall.QueryCpuTick.cTicks,
+                                  &pGVCpu->nem.s.Hypercall.QueryCpuTick.uAux);
+    }
+    return rc;
+#else
+    RT_NOREF(pGVM, idCpu);
+    return VERR_NOT_IMPLEMENTED;
+#endif
+}
+
+
+#if defined(NEM_WIN_WITH_RING0_RUNLOOP) || defined(NEM_WIN_USE_HYPERCALLS_FOR_REGISTERS)
+/**
+ * Worker for NEMR0ResumeCpuTickOnAll and the ring-0 NEMHCResumeCpuTickOnAll.
+ *
+ * @returns VBox status code.
+ * @param   pGVM            The ring-0 VM handle.
+ * @param   pGVCpu          The ring-0 VCPU handle.
+ * @param   uPausedTscValue The TSC value at the time of pausing.
+ */
+NEM_TMPL_STATIC int nemR0WinResumeCpuTickOnAll(PGVM pGVM, PGVMCPU pGVCpu, uint64_t uPausedTscValue)
+{
+    AssertReturn(g_pfnHvlInvokeHypercall, VERR_NEM_MISSING_KERNEL_API_1);
+
+    /*
+     * Set up the hypercall parameters.
+     */
+    HV_INPUT_SET_VP_REGISTERS *pInput = (HV_INPUT_SET_VP_REGISTERS *)pGVCpu->nemr0.s.HypercallData.pbPage;
+    AssertPtrReturn(pInput, VERR_INTERNAL_ERROR_3);
+
+    pInput->PartitionId = pGVM->nemr0.s.idHvPartition;
+    pInput->VpIndex     = 0;
+    pInput->RsvdZ       = 0;
+    pInput->Elements[0].Name  = HvX64RegisterTsc;
+    pInput->Elements[0].Pad0  = 0;
+    pInput->Elements[0].Pad1  = 0;
+    pInput->Elements[0].Value.Reg128.High64 = 0;
+    pInput->Elements[0].Value.Reg64 = uPausedTscValue;
+
+    /*
+     * Disable interrupts and do the first virtual CPU.
+     */
+    RTCCINTREG const fSavedFlags = ASMIntDisableFlags();
+    uint64_t const   uFirstTsc   = ASMReadTSC();
+    uint64_t uResult = g_pfnHvlInvokeHypercall(HV_MAKE_CALL_INFO(HvCallSetVpRegisters, 1),
+                                               pGVCpu->nemr0.s.HypercallData.HCPhysPage, 0 /* no output */);
+    AssertLogRelMsgReturnStmt(uResult == HV_MAKE_CALL_REP_RET(1), ("uResult=%RX64 uTsc=%#RX64\n", uResult, uPausedTscValue),
+                              ASMSetFlags(fSavedFlags), VERR_NEM_SET_TSC);
+
+    /*
+     * Do secondary processors, adjusting for elapsed TSC and keeping finger crossed
+     * that we don't introduce too much drift here.
+     */
+    for (VMCPUID iCpu = 1; iCpu < pGVM->cCpus; iCpu++)
+    {
+        Assert(pInput->PartitionId == pGVM->nemr0.s.idHvPartition);
+        Assert(pInput->RsvdZ       == 0);
+        Assert(pInput->Elements[0].Name == HvX64RegisterTsc);
+        Assert(pInput->Elements[0].Pad0 == 0);
+        Assert(pInput->Elements[0].Pad1 == 0);
+        Assert(pInput->Elements[0].Value.Reg128.High64 == 0);
+
+        pInput->VpIndex = iCpu;
+        const uint64_t offDelta = (ASMReadTSC() - uFirstTsc);
+        pInput->Elements[0].Value.Reg64 = uPausedTscValue + offDelta;
+
+        uResult = g_pfnHvlInvokeHypercall(HV_MAKE_CALL_INFO(HvCallSetVpRegisters, 1),
+                                          pGVCpu->nemr0.s.HypercallData.HCPhysPage, 0 /* no output */);
+        AssertLogRelMsgReturnStmt(uResult == HV_MAKE_CALL_REP_RET(1),
+                                  ("uResult=%RX64 uTsc=%#RX64 + %#RX64\n", uResult, uPausedTscValue, offDelta),
+                                  ASMSetFlags(fSavedFlags), VERR_NEM_SET_TSC);
+    }
+
+    /*
+     * Done.
+     */
+    ASMSetFlags(fSavedFlags);
+    return VINF_SUCCESS;
+}
+#endif /* NEM_WIN_WITH_RING0_RUNLOOP || NEM_WIN_USE_HYPERCALLS_FOR_REGISTERS */
+
+
+/**
+ * Sets the TSC register to @a uPausedTscValue on all CPUs.
+ *
+ * @returns VBox status code
+ * @param   pGVM            The ring-0 VM handle.
+ * @param   idCpu           The calling EMT.  Necessary for getting the
+ *                          hypercall page and arguments.
+ * @param   uPausedTscValue The TSC value at the time of pausing.
+ */
+VMMR0_INT_DECL(int) NEMR0ResumeCpuTickOnAll(PGVM pGVM, VMCPUID idCpu, uint64_t uPausedTscValue)
+{
+#if defined(NEM_WIN_WITH_RING0_RUNLOOP) || defined(NEM_WIN_USE_HYPERCALLS_FOR_REGISTERS)
+    /*
+     * Validate the call.
+     */
+    int rc = GVMMR0ValidateGVMandEMT(pGVM, idCpu);
+    if (RT_SUCCESS(rc))
+    {
+        PGVMCPU pGVCpu = &pGVM->aCpus[idCpu];
+        AssertReturn(g_pfnHvlInvokeHypercall, VERR_NEM_MISSING_KERNEL_API_1);
+
+        /*
+         * Call worker.
+         */
+        pGVCpu->nem.s.Hypercall.QueryCpuTick.cTicks = 0;
+        pGVCpu->nem.s.Hypercall.QueryCpuTick.uAux   = 0;
+        rc = nemR0WinResumeCpuTickOnAll(pGVM, pGVCpu, uPausedTscValue);
+    }
+    return rc;
+#else
+    RT_NOREF(pGVM, idCpu, uPausedTscValue);
+    return VERR_NOT_IMPLEMENTED;
+#endif
+}
+
+
+VMMR0_INT_DECL(VBOXSTRICTRC) NEMR0RunGuestCode(PGVM pGVM, VMCPUID idCpu)
+{
+#ifdef NEM_WIN_WITH_RING0_RUNLOOP
+    if (pGVM->nemr0.s.fMayUseRing0Runloop)
+        return nemHCWinRunGC(pGVM, &pGVM->aCpus[idCpu]);
+    return VERR_NEM_RING3_ONLY;
+#else
+    RT_NOREF(pGVM, idCpu);
+    return VERR_NOT_IMPLEMENTED;
+#endif
+}
+
+
+/**
+ * Updates statistics in the VM structure.
+ *
+ * @returns VBox status code.
+ * @param   pGVM        The ring-0 VM handle.
+ * @param   idCpu       The calling EMT, or NIL.  Necessary for getting the hypercall
+ *                      page and arguments.
+ */
+VMMR0_INT_DECL(int)  NEMR0UpdateStatistics(PGVM pGVM, VMCPUID idCpu)
+{
+    /*
+     * Validate the call.
+     */
+    int rc;
+    if (idCpu == NIL_VMCPUID)
+        rc = GVMMR0ValidateGVM(pGVM);
+    else
+        rc = GVMMR0ValidateGVMandEMT(pGVM, idCpu);
+    if (RT_SUCCESS(rc))
+    {
+        AssertReturn(g_pfnHvlInvokeHypercall, VERR_NEM_MISSING_KERNEL_API_1);
+
+        PNEMR0HYPERCALLDATA pHypercallData = idCpu != NIL_VMCPUID
+                                           ? &pGVM->aCpus[idCpu].nemr0.s.HypercallData
+                                           : &pGVM->nemr0.s.HypercallData;
+        if (   RT_VALID_PTR(pHypercallData->pbPage)
+            && pHypercallData->HCPhysPage != NIL_RTHCPHYS)
+        {
+            if (idCpu == NIL_VMCPUID)
+                rc = RTCritSectEnter(&pGVM->nemr0.s.HypercallDataCritSect);
+            if (RT_SUCCESS(rc))
+            {
+                /*
+                 * Query the memory statistics for the partition.
+                 */
+                HV_INPUT_GET_MEMORY_BALANCE  *pInput = (HV_INPUT_GET_MEMORY_BALANCE *)pHypercallData->pbPage;
+                pInput->TargetPartitionId                               = pGVM->nemr0.s.idHvPartition;
+                pInput->ProximityDomainInfo.Flags.ProximityPreferred    = 0;
+                pInput->ProximityDomainInfo.Flags.ProxyimityInfoValid   = 0;
+                pInput->ProximityDomainInfo.Flags.Reserved              = 0;
+                pInput->ProximityDomainInfo.Id                          = 0;
+
+                HV_OUTPUT_GET_MEMORY_BALANCE *pOutput = (HV_OUTPUT_GET_MEMORY_BALANCE *)(pInput + 1);
+                RT_ZERO(*pOutput);
+
+                uint64_t uResult = g_pfnHvlInvokeHypercall(HvCallGetMemoryBalance,
+                                                           pHypercallData->HCPhysPage,
+                                                           pHypercallData->HCPhysPage + sizeof(*pInput));
+                if (uResult == HV_STATUS_SUCCESS)
+                {
+                    pGVM->nem.s.R0Stats.cPagesAvailable = pOutput->PagesAvailable;
+                    pGVM->nem.s.R0Stats.cPagesInUse     = pOutput->PagesInUse;
+                    rc = VINF_SUCCESS;
+                }
+                else
+                {
+                    LogRel(("HvCallGetMemoryBalance -> %#RX64 (%#RX64 %#RX64)!!\n",
+                            uResult, pOutput->PagesAvailable, pOutput->PagesInUse));
+                    rc = VERR_NEM_IPE_0;
+                }
+
+                if (idCpu == NIL_VMCPUID)
+                    RTCritSectLeave(&pGVM->nemr0.s.HypercallDataCritSect);
+            }
+        }
+        else
+            rc = VERR_WRONG_ORDER;
+    }
+    return rc;
+}
+
+
+#if 1 && defined(DEBUG_bird)
+/**
+ * Debug only interface for poking around and exploring Hyper-V stuff.
+ *
+ * @param   pGVM        The ring-0 VM handle.
+ * @param   idCpu       The calling EMT.
+ * @param   u64Arg      What to query.  0 == registers.
+ */
+VMMR0_INT_DECL(int) NEMR0DoExperiment(PGVM pGVM, VMCPUID idCpu, uint64_t u64Arg)
+{
+    /*
+     * Resolve CPU structures.
+     */
+    int rc = GVMMR0ValidateGVMandEMT(pGVM, idCpu);
+    if (RT_SUCCESS(rc))
+    {
+        AssertReturn(g_pfnHvlInvokeHypercall, VERR_NEM_MISSING_KERNEL_API_1);
+
+        PGVMCPU pGVCpu = &pGVM->aCpus[idCpu];
+        if (u64Arg == 0)
+        {
+            /*
+             * Query register.
+             */
+            HV_INPUT_GET_VP_REGISTERS *pInput = (HV_INPUT_GET_VP_REGISTERS *)pGVCpu->nemr0.s.HypercallData.pbPage;
+            AssertPtrReturn(pInput, VERR_INTERNAL_ERROR_3);
+
+            size_t const cbInput = RT_ALIGN_Z(RT_UOFFSETOF(HV_INPUT_GET_VP_REGISTERS, Names[1]), 32);
+            HV_REGISTER_VALUE *paValues = (HV_REGISTER_VALUE *)((uint8_t *)pInput + cbInput);
+            RT_BZERO(paValues, sizeof(paValues[0]) * 1);
+
+            pInput->PartitionId = pGVM->nemr0.s.idHvPartition;
+            pInput->VpIndex     = pGVCpu->idCpu;
+            pInput->fFlags      = 0;
+            pInput->Names[0]    = (HV_REGISTER_NAME)pGVCpu->nem.s.Hypercall.Experiment.uItem;
+
+            uint64_t uResult = g_pfnHvlInvokeHypercall(HV_MAKE_CALL_INFO(HvCallGetVpRegisters, 1),
+                                                       pGVCpu->nemr0.s.HypercallData.HCPhysPage,
+                                                       pGVCpu->nemr0.s.HypercallData.HCPhysPage + cbInput);
+            pGVCpu->nem.s.Hypercall.Experiment.fSuccess = uResult == HV_MAKE_CALL_REP_RET(1);
+            pGVCpu->nem.s.Hypercall.Experiment.uStatus  = uResult;
+            pGVCpu->nem.s.Hypercall.Experiment.uLoValue = paValues[0].Reg128.Low64;
+            pGVCpu->nem.s.Hypercall.Experiment.uHiValue = paValues[0].Reg128.High64;
+            rc = VINF_SUCCESS;
+        }
+        else if (u64Arg == 1)
+        {
+            /*
+             * Query partition property.
+             */
+            HV_INPUT_GET_PARTITION_PROPERTY *pInput = (HV_INPUT_GET_PARTITION_PROPERTY *)pGVCpu->nemr0.s.HypercallData.pbPage;
+            AssertPtrReturn(pInput, VERR_INTERNAL_ERROR_3);
+
+            size_t const cbInput = RT_ALIGN_Z(sizeof(*pInput), 32);
+            HV_OUTPUT_GET_PARTITION_PROPERTY *pOutput = (HV_OUTPUT_GET_PARTITION_PROPERTY *)((uint8_t *)pInput + cbInput);
+            pOutput->PropertyValue = 0;
+
+            pInput->PartitionId  = pGVM->nemr0.s.idHvPartition;
+            pInput->PropertyCode = (HV_PARTITION_PROPERTY_CODE)pGVCpu->nem.s.Hypercall.Experiment.uItem;
+            pInput->uPadding     = 0;
+
+            uint64_t uResult = g_pfnHvlInvokeHypercall(HvCallGetPartitionProperty,
+                                                       pGVCpu->nemr0.s.HypercallData.HCPhysPage,
+                                                       pGVCpu->nemr0.s.HypercallData.HCPhysPage + cbInput);
+            pGVCpu->nem.s.Hypercall.Experiment.fSuccess = uResult == HV_STATUS_SUCCESS;
+            pGVCpu->nem.s.Hypercall.Experiment.uStatus  = uResult;
+            pGVCpu->nem.s.Hypercall.Experiment.uLoValue = pOutput->PropertyValue;
+            pGVCpu->nem.s.Hypercall.Experiment.uHiValue = 0;
+            rc = VINF_SUCCESS;
+        }
+        else if (u64Arg == 2)
+        {
+            /*
+             * Set register.
+             */
+            HV_INPUT_SET_VP_REGISTERS *pInput = (HV_INPUT_SET_VP_REGISTERS *)pGVCpu->nemr0.s.HypercallData.pbPage;
+            AssertPtrReturn(pInput, VERR_INTERNAL_ERROR_3);
+            RT_BZERO(pInput, RT_UOFFSETOF(HV_INPUT_SET_VP_REGISTERS, Elements[1]));
+
+            pInput->PartitionId = pGVM->nemr0.s.idHvPartition;
+            pInput->VpIndex     = pGVCpu->idCpu;
+            pInput->RsvdZ      = 0;
+            pInput->Elements[0].Name = (HV_REGISTER_NAME)pGVCpu->nem.s.Hypercall.Experiment.uItem;
+            pInput->Elements[0].Value.Reg128.High64 = pGVCpu->nem.s.Hypercall.Experiment.uHiValue;
+            pInput->Elements[0].Value.Reg128.Low64  = pGVCpu->nem.s.Hypercall.Experiment.uLoValue;
+
+            uint64_t uResult = g_pfnHvlInvokeHypercall(HV_MAKE_CALL_INFO(HvCallSetVpRegisters, 1),
+                                                       pGVCpu->nemr0.s.HypercallData.HCPhysPage, 0);
+            pGVCpu->nem.s.Hypercall.Experiment.fSuccess = uResult == HV_MAKE_CALL_REP_RET(1);
+            pGVCpu->nem.s.Hypercall.Experiment.uStatus  = uResult;
+            rc = VINF_SUCCESS;
+        }
+        else
+            rc = VERR_INVALID_FUNCTION;
+    }
+    return rc;
+}
+#endif /* DEBUG_bird */
+
diff --git a/src/VBox/VMM/VMMR0/PDMR0DevHlp.cpp b/src/VBox/VMM/VMMR0/PDMR0DevHlp.cpp
new file mode 100644
index 00000000..a9790717
--- /dev/null
+++ b/src/VBox/VMM/VMMR0/PDMR0DevHlp.cpp
@@ -0,0 +1,1558 @@
+/* $Id: PDMR0DevHlp.cpp $ */
+/** @file
+ * PDM - Pluggable Device and Driver Manager, R0 Device Helper parts.
+ */
+
+/*
+ * Copyright (C) 2006-2020 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+
+/*********************************************************************************************************************************
+*   Header Files                                                                                                                 *
+*********************************************************************************************************************************/
+#define LOG_GROUP LOG_GROUP_PDM_DEVICE
+#define PDMPCIDEV_INCLUDE_PRIVATE  /* Hack to get pdmpcidevint.h included at the right point. */
+#include "PDMInternal.h"
+#include <VBox/vmm/pdm.h>
+#include <VBox/vmm/apic.h>
+#include <VBox/vmm/mm.h>
+#include <VBox/vmm/pgm.h>
+#include <VBox/vmm/gvm.h>
+#include <VBox/vmm/vmm.h>
+#include <VBox/vmm/vmcc.h>
+#include <VBox/vmm/gvmm.h>
+
+#include <VBox/log.h>
+#include <VBox/err.h>
+#include <VBox/sup.h>
+#include <iprt/asm.h>
+#include <iprt/assert.h>
+#include <iprt/ctype.h>
+#include <iprt/string.h>
+
+#include "dtrace/VBoxVMM.h"
+#include "PDMInline.h"
+
+
+/*********************************************************************************************************************************
+*   Global Variables                                                                                                             *
+*********************************************************************************************************************************/
+RT_C_DECLS_BEGIN
+extern DECLEXPORT(const PDMDEVHLPR0)    g_pdmR0DevHlp;
+extern DECLEXPORT(const PDMPICHLP)      g_pdmR0PicHlp;
+extern DECLEXPORT(const PDMIOAPICHLP)   g_pdmR0IoApicHlp;
+extern DECLEXPORT(const PDMPCIHLPR0)    g_pdmR0PciHlp;
+extern DECLEXPORT(const PDMHPETHLPR0)   g_pdmR0HpetHlp;
+extern DECLEXPORT(const PDMPCIRAWHLPR0) g_pdmR0PciRawHlp;
+RT_C_DECLS_END
+
+
+/*********************************************************************************************************************************
+*   Internal Functions                                                                                                           *
+*********************************************************************************************************************************/
+static bool pdmR0IsaSetIrq(PGVM pGVM, int iIrq, int iLevel, uint32_t uTagSrc);
+
+
+/** @name Ring-0 Device Helpers
+ * @{
+ */
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnIoPortSetUpContextEx} */
+static DECLCALLBACK(int) pdmR0DevHlp_IoPortSetUpContextEx(PPDMDEVINS pDevIns, IOMIOPORTHANDLE hIoPorts,
+                                                          PFNIOMIOPORTNEWOUT pfnOut, PFNIOMIOPORTNEWIN pfnIn,
+                                                          PFNIOMIOPORTNEWOUTSTRING pfnOutStr, PFNIOMIOPORTNEWINSTRING pfnInStr,
+                                                          void *pvUser)
+{
+    PDMDEV_ASSERT_DEVINS(pDevIns);
+    LogFlow(("pdmR0DevHlp_IoPortSetUpContextEx: caller='%s'/%d: hIoPorts=%#x pfnOut=%p pfnIn=%p pfnOutStr=%p pfnInStr=%p pvUser=%p\n",
+             pDevIns->pReg->szName, pDevIns->iInstance, hIoPorts, pfnOut, pfnIn, pfnOutStr, pfnInStr, pvUser));
+    PGVM pGVM = pDevIns->Internal.s.pGVM;
+    VM_ASSERT_EMT0_RETURN(pGVM, VERR_VM_THREAD_NOT_EMT);
+    VM_ASSERT_STATE_RETURN(pGVM, VMSTATE_CREATING, VERR_VM_INVALID_VM_STATE);
+
+    int rc = IOMR0IoPortSetUpContext(pGVM, pDevIns, hIoPorts, pfnOut, pfnIn, pfnOutStr, pfnInStr, pvUser);
+
+    LogFlow(("pdmR0DevHlp_IoPortSetUpContextEx: caller='%s'/%d: returns %Rrc\n", pDevIns->pReg->szName, pDevIns->iInstance, rc));
+    return rc;
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnMmioSetUpContextEx} */
+static DECLCALLBACK(int) pdmR0DevHlp_MmioSetUpContextEx(PPDMDEVINS pDevIns, IOMMMIOHANDLE hRegion, PFNIOMMMIONEWWRITE pfnWrite,
+                                                        PFNIOMMMIONEWREAD pfnRead, PFNIOMMMIONEWFILL pfnFill, void *pvUser)
+{
+    PDMDEV_ASSERT_DEVINS(pDevIns);
+    LogFlow(("pdmR0DevHlp_MmioSetUpContextEx: caller='%s'/%d: hRegion=%#x pfnWrite=%p pfnRead=%p pfnFill=%p pvUser=%p\n",
+             pDevIns->pReg->szName, pDevIns->iInstance, hRegion, pfnWrite, pfnRead, pfnFill, pvUser));
+    PGVM pGVM = pDevIns->Internal.s.pGVM;
+    VM_ASSERT_EMT0_RETURN(pGVM, VERR_VM_THREAD_NOT_EMT);
+    VM_ASSERT_STATE_RETURN(pGVM, VMSTATE_CREATING, VERR_VM_INVALID_VM_STATE);
+
+    int rc = IOMR0MmioSetUpContext(pGVM, pDevIns, hRegion, pfnWrite, pfnRead, pfnFill, pvUser);
+
+    LogFlow(("pdmR0DevHlp_MmioSetUpContextEx: caller='%s'/%d: returns %Rrc\n", pDevIns->pReg->szName, pDevIns->iInstance, rc));
+    return rc;
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnMmio2SetUpContext} */
+static DECLCALLBACK(int) pdmR0DevHlp_Mmio2SetUpContext(PPDMDEVINS pDevIns, PGMMMIO2HANDLE hRegion,
+                                                       size_t offSub, size_t cbSub, void **ppvMapping)
+{
+    PDMDEV_ASSERT_DEVINS(pDevIns);
+    LogFlow(("pdmR0DevHlp_Mmio2SetUpContext: caller='%s'/%d: hRegion=%#x offSub=%#zx cbSub=%#zx ppvMapping=%p\n",
+             pDevIns->pReg->szName, pDevIns->iInstance, hRegion, offSub, cbSub, ppvMapping));
+    *ppvMapping = NULL;
+
+    PGVM pGVM = pDevIns->Internal.s.pGVM;
+    VM_ASSERT_EMT0_RETURN(pGVM, VERR_VM_THREAD_NOT_EMT);
+    VM_ASSERT_STATE_RETURN(pGVM, VMSTATE_CREATING, VERR_VM_INVALID_VM_STATE);
+
+    int rc = PGMR0PhysMMIO2MapKernel(pGVM, pDevIns, hRegion, offSub, cbSub, ppvMapping);
+
+    LogFlow(("pdmR0DevHlp_Mmio2SetUpContext: caller='%s'/%d: returns %Rrc (%p)\n", pDevIns->pReg->szName, pDevIns->iInstance, rc, *ppvMapping));
+    return rc;
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnPCIPhysRead} */
+static DECLCALLBACK(int) pdmR0DevHlp_PCIPhysRead(PPDMDEVINS pDevIns, PPDMPCIDEV pPciDev, RTGCPHYS GCPhys,
+                                                 void *pvBuf, size_t cbRead)
+{
+    PDMDEV_ASSERT_DEVINS(pDevIns);
+    if (!pPciDev) /* NULL is an alias for the default PCI device. */
+        pPciDev = pDevIns->apPciDevs[0];
+    AssertReturn(pPciDev, VERR_PDM_NOT_PCI_DEVICE);
+    PDMPCIDEV_ASSERT_VALID_AND_REGISTERED(pDevIns, pPciDev);
+
+#ifndef PDM_DO_NOT_RESPECT_PCI_BM_BIT
+    /*
+     * Just check the busmaster setting here and forward the request to the generic read helper.
+     */
+    if (PCIDevIsBusmaster(pPciDev))
+    { /* likely */ }
+    else
+    {
+        Log(("pdmRCDevHlp_PCIPhysRead: caller=%p/%d: returns %Rrc - Not bus master! GCPhys=%RGp cbRead=%#zx\n",
+             pDevIns, pDevIns->iInstance, VERR_PDM_NOT_PCI_BUS_MASTER, GCPhys, cbRead));
+        memset(pvBuf, 0xff, cbRead);
+        return VERR_PDM_NOT_PCI_BUS_MASTER;
+    }
+#endif
+
+    return pDevIns->pHlpR0->pfnPhysRead(pDevIns, GCPhys, pvBuf, cbRead);
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnPCIPhysWrite} */
+static DECLCALLBACK(int) pdmR0DevHlp_PCIPhysWrite(PPDMDEVINS pDevIns, PPDMPCIDEV pPciDev, RTGCPHYS GCPhys,
+                                                  const void *pvBuf, size_t cbWrite)
+{
+    PDMDEV_ASSERT_DEVINS(pDevIns);
+    if (!pPciDev) /* NULL is an alias for the default PCI device. */
+        pPciDev = pDevIns->apPciDevs[0];
+    AssertReturn(pPciDev, VERR_PDM_NOT_PCI_DEVICE);
+    PDMPCIDEV_ASSERT_VALID_AND_REGISTERED(pDevIns, pPciDev);
+
+#ifndef PDM_DO_NOT_RESPECT_PCI_BM_BIT
+    /*
+     * Just check the busmaster setting here and forward the request to the generic read helper.
+     */
+    if (PCIDevIsBusmaster(pPciDev))
+    { /* likely */ }
+    else
+    {
+        Log(("pdmRCDevHlp_PCIPhysWrite: caller=%p/%d: returns %Rrc - Not bus master! GCPhys=%RGp cbWrite=%#zx\n",
+             pDevIns, pDevIns->iInstance, VERR_PDM_NOT_PCI_BUS_MASTER, GCPhys, cbWrite));
+        return VERR_PDM_NOT_PCI_BUS_MASTER;
+    }
+#endif
+
+    return pDevIns->pHlpR0->pfnPhysWrite(pDevIns, GCPhys, pvBuf, cbWrite);
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnPCISetIrq} */
+static DECLCALLBACK(void) pdmR0DevHlp_PCISetIrq(PPDMDEVINS pDevIns, PPDMPCIDEV pPciDev, int iIrq, int iLevel)
+{
+    PDMDEV_ASSERT_DEVINS(pDevIns);
+    if (!pPciDev) /* NULL is an alias for the default PCI device. */
+        pPciDev = pDevIns->apPciDevs[0];
+    AssertReturnVoid(pPciDev);
+    LogFlow(("pdmR0DevHlp_PCISetIrq: caller=%p/%d: pPciDev=%p:{%#x} iIrq=%d iLevel=%d\n",
+             pDevIns, pDevIns->iInstance, pPciDev, pPciDev->uDevFn, iIrq, iLevel));
+    PDMPCIDEV_ASSERT_VALID_AND_REGISTERED(pDevIns, pPciDev);
+
+    PGVM         pGVM      = pDevIns->Internal.s.pGVM;
+    size_t const idxBus    = pPciDev->Int.s.idxPdmBus;
+    AssertReturnVoid(idxBus < RT_ELEMENTS(pGVM->pdmr0.s.aPciBuses));
+    PPDMPCIBUSR0 pPciBusR0 = &pGVM->pdmr0.s.aPciBuses[idxBus];
+
+    pdmLock(pGVM);
+
+    uint32_t uTagSrc;
+    if (iLevel & PDM_IRQ_LEVEL_HIGH)
+    {
+        pDevIns->Internal.s.pIntR3R0->uLastIrqTag = uTagSrc = pdmCalcIrqTag(pGVM, pDevIns->Internal.s.pInsR3R0->idTracing);
+        if (iLevel == PDM_IRQ_LEVEL_HIGH)
+            VBOXVMM_PDM_IRQ_HIGH(VMMGetCpu(pGVM), RT_LOWORD(uTagSrc), RT_HIWORD(uTagSrc));
+        else
+            VBOXVMM_PDM_IRQ_HILO(VMMGetCpu(pGVM), RT_LOWORD(uTagSrc), RT_HIWORD(uTagSrc));
+    }
+    else
+        uTagSrc = pDevIns->Internal.s.pIntR3R0->uLastIrqTag;
+
+    if (pPciBusR0->pDevInsR0)
+    {
+        pPciBusR0->pfnSetIrqR0(pPciBusR0->pDevInsR0, pPciDev, iIrq, iLevel, uTagSrc);
+
+        pdmUnlock(pGVM);
+
+        if (iLevel == PDM_IRQ_LEVEL_LOW)
+            VBOXVMM_PDM_IRQ_LOW(VMMGetCpu(pGVM), RT_LOWORD(uTagSrc), RT_HIWORD(uTagSrc));
+    }
+    else
+    {
+        pdmUnlock(pGVM);
+
+        /* queue for ring-3 execution. */
+        PPDMDEVHLPTASK pTask = (PPDMDEVHLPTASK)PDMQueueAlloc(pGVM->pdm.s.pDevHlpQueueR0);
+        AssertReturnVoid(pTask);
+
+        pTask->enmOp = PDMDEVHLPTASKOP_PCI_SET_IRQ;
+        pTask->pDevInsR3 = PDMDEVINS_2_R3PTR(pDevIns);
+        pTask->u.PciSetIRQ.iIrq = iIrq;
+        pTask->u.PciSetIRQ.iLevel = iLevel;
+        pTask->u.PciSetIRQ.uTagSrc = uTagSrc;
+        pTask->u.PciSetIRQ.pPciDevR3 = MMHyperR0ToR3(pGVM, pPciDev);
+
+        PDMQueueInsertEx(pGVM->pdm.s.pDevHlpQueueR0, &pTask->Core, 0);
+    }
+
+    LogFlow(("pdmR0DevHlp_PCISetIrq: caller=%p/%d: returns void; uTagSrc=%#x\n", pDevIns, pDevIns->iInstance, uTagSrc));
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnISASetIrq} */
+static DECLCALLBACK(void) pdmR0DevHlp_ISASetIrq(PPDMDEVINS pDevIns, int iIrq, int iLevel)
+{
+    PDMDEV_ASSERT_DEVINS(pDevIns);
+    LogFlow(("pdmR0DevHlp_ISASetIrq: caller=%p/%d: iIrq=%d iLevel=%d\n", pDevIns, pDevIns->iInstance, iIrq, iLevel));
+    PGVM pGVM = pDevIns->Internal.s.pGVM;
+
+    pdmLock(pGVM);
+    uint32_t uTagSrc;
+    if (iLevel & PDM_IRQ_LEVEL_HIGH)
+    {
+        pDevIns->Internal.s.pIntR3R0->uLastIrqTag = uTagSrc = pdmCalcIrqTag(pGVM, pDevIns->Internal.s.pInsR3R0->idTracing);
+        if (iLevel == PDM_IRQ_LEVEL_HIGH)
+            VBOXVMM_PDM_IRQ_HIGH(VMMGetCpu(pGVM), RT_LOWORD(uTagSrc), RT_HIWORD(uTagSrc));
+        else
+            VBOXVMM_PDM_IRQ_HILO(VMMGetCpu(pGVM), RT_LOWORD(uTagSrc), RT_HIWORD(uTagSrc));
+    }
+    else
+        uTagSrc = pDevIns->Internal.s.pIntR3R0->uLastIrqTag;
+
+    bool fRc = pdmR0IsaSetIrq(pGVM, iIrq, iLevel, uTagSrc);
+
+    if (iLevel == PDM_IRQ_LEVEL_LOW && fRc)
+        VBOXVMM_PDM_IRQ_LOW(VMMGetCpu(pGVM), RT_LOWORD(uTagSrc), RT_HIWORD(uTagSrc));
+    pdmUnlock(pGVM);
+    LogFlow(("pdmR0DevHlp_ISASetIrq: caller=%p/%d: returns void; uTagSrc=%#x\n", pDevIns, pDevIns->iInstance, uTagSrc));
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnIoApicSendMsi} */
+static DECLCALLBACK(void) pdmR0DevHlp_IoApicSendMsi(PPDMDEVINS pDevIns, RTGCPHYS GCPhys, uint32_t uValue)
+{
+    PDMDEV_ASSERT_DEVINS(pDevIns);
+    LogFlow(("pdmR0DevHlp_IoApicSendMsi: caller=%p/%d: GCPhys=%RGp uValue=%#x\n", pDevIns, pDevIns->iInstance, GCPhys, uValue));
+    PGVM pGVM = pDevIns->Internal.s.pGVM;
+
+    uint32_t uTagSrc;
+    pDevIns->Internal.s.pIntR3R0->uLastIrqTag = uTagSrc = pdmCalcIrqTag(pGVM, pDevIns->Internal.s.pInsR3R0->idTracing);
+    VBOXVMM_PDM_IRQ_HILO(VMMGetCpu(pGVM), RT_LOWORD(uTagSrc), RT_HIWORD(uTagSrc));
+
+    if (pGVM->pdm.s.IoApic.pDevInsR0)
+        pGVM->pdm.s.IoApic.pfnSendMsiR0(pGVM->pdm.s.IoApic.pDevInsR0, GCPhys, uValue, uTagSrc);
+    else
+        AssertFatalMsgFailed(("Lazy bastards!"));
+
+    LogFlow(("pdmR0DevHlp_IoApicSendMsi: caller=%p/%d: returns void; uTagSrc=%#x\n", pDevIns, pDevIns->iInstance, uTagSrc));
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnPhysRead} */
+static DECLCALLBACK(int) pdmR0DevHlp_PhysRead(PPDMDEVINS pDevIns, RTGCPHYS GCPhys, void *pvBuf, size_t cbRead)
+{
+    PDMDEV_ASSERT_DEVINS(pDevIns);
+    LogFlow(("pdmR0DevHlp_PhysRead: caller=%p/%d: GCPhys=%RGp pvBuf=%p cbRead=%#x\n",
+             pDevIns, pDevIns->iInstance, GCPhys, pvBuf, cbRead));
+
+    VBOXSTRICTRC rcStrict = PGMPhysRead(pDevIns->Internal.s.pGVM, GCPhys, pvBuf, cbRead, PGMACCESSORIGIN_DEVICE);
+    AssertMsg(rcStrict == VINF_SUCCESS, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict))); /** @todo track down the users for this bugger. */
+
+    Log(("pdmR0DevHlp_PhysRead: caller=%p/%d: returns %Rrc\n", pDevIns, pDevIns->iInstance, VBOXSTRICTRC_VAL(rcStrict) ));
+    return VBOXSTRICTRC_VAL(rcStrict);
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnPhysWrite} */
+static DECLCALLBACK(int) pdmR0DevHlp_PhysWrite(PPDMDEVINS pDevIns, RTGCPHYS GCPhys, const void *pvBuf, size_t cbWrite)
+{
+    PDMDEV_ASSERT_DEVINS(pDevIns);
+    LogFlow(("pdmR0DevHlp_PhysWrite: caller=%p/%d: GCPhys=%RGp pvBuf=%p cbWrite=%#x\n",
+             pDevIns, pDevIns->iInstance, GCPhys, pvBuf, cbWrite));
+
+    VBOXSTRICTRC rcStrict = PGMPhysWrite(pDevIns->Internal.s.pGVM, GCPhys, pvBuf, cbWrite, PGMACCESSORIGIN_DEVICE);
+    AssertMsg(rcStrict == VINF_SUCCESS, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict))); /** @todo track down the users for this bugger. */
+
+    Log(("pdmR0DevHlp_PhysWrite: caller=%p/%d: returns %Rrc\n", pDevIns, pDevIns->iInstance, VBOXSTRICTRC_VAL(rcStrict) ));
+    return VBOXSTRICTRC_VAL(rcStrict);
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnA20IsEnabled} */
+static DECLCALLBACK(bool) pdmR0DevHlp_A20IsEnabled(PPDMDEVINS pDevIns)
+{
+    PDMDEV_ASSERT_DEVINS(pDevIns);
+    LogFlow(("pdmR0DevHlp_A20IsEnabled: caller=%p/%d:\n", pDevIns, pDevIns->iInstance));
+
+    bool fEnabled = PGMPhysIsA20Enabled(VMMGetCpu(pDevIns->Internal.s.pGVM));
+
+    Log(("pdmR0DevHlp_A20IsEnabled: caller=%p/%d: returns %RTbool\n", pDevIns, pDevIns->iInstance, fEnabled));
+    return fEnabled;
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnVMState} */
+static DECLCALLBACK(VMSTATE) pdmR0DevHlp_VMState(PPDMDEVINS pDevIns)
+{
+    PDMDEV_ASSERT_DEVINS(pDevIns);
+
+    VMSTATE enmVMState = pDevIns->Internal.s.pGVM->enmVMState;
+
+    LogFlow(("pdmR0DevHlp_VMState: caller=%p/%d: returns %d\n", pDevIns, pDevIns->iInstance, enmVMState));
+    return enmVMState;
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnVMSetError} */
+static DECLCALLBACK(int) pdmR0DevHlp_VMSetError(PPDMDEVINS pDevIns, int rc, RT_SRC_POS_DECL, const char *pszFormat, ...)
+{
+    PDMDEV_ASSERT_DEVINS(pDevIns);
+    va_list args;
+    va_start(args, pszFormat);
+    int rc2 = VMSetErrorV(pDevIns->Internal.s.pGVM, rc, RT_SRC_POS_ARGS, pszFormat, args); Assert(rc2 == rc); NOREF(rc2);
+    va_end(args);
+    return rc;
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnVMSetErrorV} */
+static DECLCALLBACK(int) pdmR0DevHlp_VMSetErrorV(PPDMDEVINS pDevIns, int rc, RT_SRC_POS_DECL, const char *pszFormat, va_list va)
+{
+    PDMDEV_ASSERT_DEVINS(pDevIns);
+    int rc2 = VMSetErrorV(pDevIns->Internal.s.pGVM, rc, RT_SRC_POS_ARGS, pszFormat, va); Assert(rc2 == rc); NOREF(rc2);
+    return rc;
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnVMSetRuntimeError} */
+static DECLCALLBACK(int) pdmR0DevHlp_VMSetRuntimeError(PPDMDEVINS pDevIns, uint32_t fFlags, const char *pszErrorId, const char *pszFormat, ...)
+{
+    PDMDEV_ASSERT_DEVINS(pDevIns);
+    va_list va;
+    va_start(va, pszFormat);
+    int rc = VMSetRuntimeErrorV(pDevIns->Internal.s.pGVM, fFlags, pszErrorId, pszFormat, va);
+    va_end(va);
+    return rc;
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnVMSetRuntimeErrorV} */
+static DECLCALLBACK(int) pdmR0DevHlp_VMSetRuntimeErrorV(PPDMDEVINS pDevIns, uint32_t fFlags, const char *pszErrorId, const char *pszFormat, va_list va)
+{
+    PDMDEV_ASSERT_DEVINS(pDevIns);
+    int rc = VMSetRuntimeErrorV(pDevIns->Internal.s.pGVM, fFlags, pszErrorId, pszFormat, va);
+    return rc;
+}
+
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnGetVM} */
+static DECLCALLBACK(PVMCC)  pdmR0DevHlp_GetVM(PPDMDEVINS pDevIns)
+{
+    PDMDEV_ASSERT_DEVINS(pDevIns);
+    LogFlow(("pdmR0DevHlp_GetVM: caller='%p'/%d\n", pDevIns, pDevIns->iInstance));
+    return pDevIns->Internal.s.pGVM;
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnGetVMCPU} */
+static DECLCALLBACK(PVMCPUCC) pdmR0DevHlp_GetVMCPU(PPDMDEVINS pDevIns)
+{
+    PDMDEV_ASSERT_DEVINS(pDevIns);
+    LogFlow(("pdmR0DevHlp_GetVMCPU: caller='%p'/%d\n", pDevIns, pDevIns->iInstance));
+    return VMMGetCpu(pDevIns->Internal.s.pGVM);
+}
+
+
+/** @interface_method_impl{PDMDEVHLPRC,pfnGetCurrentCpuId} */
+static DECLCALLBACK(VMCPUID) pdmR0DevHlp_GetCurrentCpuId(PPDMDEVINS pDevIns)
+{
+    PDMDEV_ASSERT_DEVINS(pDevIns);
+    VMCPUID idCpu = VMMGetCpuId(pDevIns->Internal.s.pGVM);
+    LogFlow(("pdmR0DevHlp_GetCurrentCpuId: caller='%p'/%d for CPU %u\n", pDevIns, pDevIns->iInstance, idCpu));
+    return idCpu;
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnTimerToPtr} */
+static DECLCALLBACK(PTMTIMERR0) pdmR0DevHlp_TimerToPtr(PPDMDEVINS pDevIns, TMTIMERHANDLE hTimer)
+{
+    PDMDEV_ASSERT_DEVINS(pDevIns);
+    RT_NOREF(pDevIns);
+    return (PTMTIMERR0)MMHyperR3ToCC(pDevIns->Internal.s.pGVM, hTimer);
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnTimerFromMicro} */
+static DECLCALLBACK(uint64_t) pdmR0DevHlp_TimerFromMicro(PPDMDEVINS pDevIns, TMTIMERHANDLE hTimer, uint64_t cMicroSecs)
+{
+    return TMTimerFromMicro(pdmR0DevHlp_TimerToPtr(pDevIns, hTimer), cMicroSecs);
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnTimerFromMilli} */
+static DECLCALLBACK(uint64_t) pdmR0DevHlp_TimerFromMilli(PPDMDEVINS pDevIns, TMTIMERHANDLE hTimer, uint64_t cMilliSecs)
+{
+    return TMTimerFromMilli(pdmR0DevHlp_TimerToPtr(pDevIns, hTimer), cMilliSecs);
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnTimerFromNano} */
+static DECLCALLBACK(uint64_t) pdmR0DevHlp_TimerFromNano(PPDMDEVINS pDevIns, TMTIMERHANDLE hTimer, uint64_t cNanoSecs)
+{
+    return TMTimerFromNano(pdmR0DevHlp_TimerToPtr(pDevIns, hTimer), cNanoSecs);
+}
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnTimerGet} */
+static DECLCALLBACK(uint64_t) pdmR0DevHlp_TimerGet(PPDMDEVINS pDevIns, TMTIMERHANDLE hTimer)
+{
+    return TMTimerGet(pdmR0DevHlp_TimerToPtr(pDevIns, hTimer));
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnTimerGetFreq} */
+static DECLCALLBACK(uint64_t) pdmR0DevHlp_TimerGetFreq(PPDMDEVINS pDevIns, TMTIMERHANDLE hTimer)
+{
+    return TMTimerGetFreq(pdmR0DevHlp_TimerToPtr(pDevIns, hTimer));
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnTimerGetNano} */
+static DECLCALLBACK(uint64_t) pdmR0DevHlp_TimerGetNano(PPDMDEVINS pDevIns, TMTIMERHANDLE hTimer)
+{
+    return TMTimerGetNano(pdmR0DevHlp_TimerToPtr(pDevIns, hTimer));
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnTimerIsActive} */
+static DECLCALLBACK(bool) pdmR0DevHlp_TimerIsActive(PPDMDEVINS pDevIns, TMTIMERHANDLE hTimer)
+{
+    return TMTimerIsActive(pdmR0DevHlp_TimerToPtr(pDevIns, hTimer));
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnTimerIsLockOwner} */
+static DECLCALLBACK(bool) pdmR0DevHlp_TimerIsLockOwner(PPDMDEVINS pDevIns, TMTIMERHANDLE hTimer)
+{
+    return TMTimerIsLockOwner(pdmR0DevHlp_TimerToPtr(pDevIns, hTimer));
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnTimerLockClock} */
+static DECLCALLBACK(VBOXSTRICTRC) pdmR0DevHlp_TimerLockClock(PPDMDEVINS pDevIns, TMTIMERHANDLE hTimer, int rcBusy)
+{
+    return TMTimerLock(pdmR0DevHlp_TimerToPtr(pDevIns, hTimer), rcBusy);
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnTimerLockClock2} */
+static DECLCALLBACK(VBOXSTRICTRC) pdmR0DevHlp_TimerLockClock2(PPDMDEVINS pDevIns, TMTIMERHANDLE hTimer,
+                                                              PPDMCRITSECT pCritSect, int rcBusy)
+{
+    VBOXSTRICTRC rc = TMTimerLock(pdmR0DevHlp_TimerToPtr(pDevIns, hTimer), rcBusy);
+    if (rc == VINF_SUCCESS)
+    {
+        rc = PDMCritSectEnter(pCritSect, rcBusy);
+        if (rc == VINF_SUCCESS)
+            return rc;
+        AssertRC(VBOXSTRICTRC_VAL(rc));
+        TMTimerUnlock(pdmR0DevHlp_TimerToPtr(pDevIns, hTimer));
+    }
+    else
+        AssertRC(VBOXSTRICTRC_VAL(rc));
+    return rc;
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnTimerSet} */
+static DECLCALLBACK(int) pdmR0DevHlp_TimerSet(PPDMDEVINS pDevIns, TMTIMERHANDLE hTimer, uint64_t uExpire)
+{
+    return TMTimerSet(pdmR0DevHlp_TimerToPtr(pDevIns, hTimer), uExpire);
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnTimerSetFrequencyHint} */
+static DECLCALLBACK(int) pdmR0DevHlp_TimerSetFrequencyHint(PPDMDEVINS pDevIns, TMTIMERHANDLE hTimer, uint32_t uHz)
+{
+    return TMTimerSetFrequencyHint(pdmR0DevHlp_TimerToPtr(pDevIns, hTimer), uHz);
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnTimerSetMicro} */
+static DECLCALLBACK(int) pdmR0DevHlp_TimerSetMicro(PPDMDEVINS pDevIns, TMTIMERHANDLE hTimer, uint64_t cMicrosToNext)
+{
+    return TMTimerSetMicro(pdmR0DevHlp_TimerToPtr(pDevIns, hTimer), cMicrosToNext);
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnTimerSetMillies} */
+static DECLCALLBACK(int) pdmR0DevHlp_TimerSetMillies(PPDMDEVINS pDevIns, TMTIMERHANDLE hTimer, uint64_t cMilliesToNext)
+{
+    return TMTimerSetMillies(pdmR0DevHlp_TimerToPtr(pDevIns, hTimer), cMilliesToNext);
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnTimerSetNano} */
+static DECLCALLBACK(int) pdmR0DevHlp_TimerSetNano(PPDMDEVINS pDevIns, TMTIMERHANDLE hTimer, uint64_t cNanosToNext)
+{
+    return TMTimerSetNano(pdmR0DevHlp_TimerToPtr(pDevIns, hTimer), cNanosToNext);
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnTimerSetRelative} */
+static DECLCALLBACK(int) pdmR0DevHlp_TimerSetRelative(PPDMDEVINS pDevIns, TMTIMERHANDLE hTimer, uint64_t cTicksToNext, uint64_t *pu64Now)
+{
+    return TMTimerSetRelative(pdmR0DevHlp_TimerToPtr(pDevIns, hTimer), cTicksToNext, pu64Now);
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnTimerStop} */
+static DECLCALLBACK(int) pdmR0DevHlp_TimerStop(PPDMDEVINS pDevIns, TMTIMERHANDLE hTimer)
+{
+    return TMTimerStop(pdmR0DevHlp_TimerToPtr(pDevIns, hTimer));
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnTimerUnlockClock} */
+static DECLCALLBACK(void) pdmR0DevHlp_TimerUnlockClock(PPDMDEVINS pDevIns, TMTIMERHANDLE hTimer)
+{
+    TMTimerUnlock(pdmR0DevHlp_TimerToPtr(pDevIns, hTimer));
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnTimerUnlockClock2} */
+static DECLCALLBACK(void) pdmR0DevHlp_TimerUnlockClock2(PPDMDEVINS pDevIns, TMTIMERHANDLE hTimer, PPDMCRITSECT pCritSect)
+{
+    TMTimerUnlock(pdmR0DevHlp_TimerToPtr(pDevIns, hTimer));
+    int rc = PDMCritSectLeave(pCritSect);
+    AssertRC(rc);
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnTMTimeVirtGet} */
+static DECLCALLBACK(uint64_t) pdmR0DevHlp_TMTimeVirtGet(PPDMDEVINS pDevIns)
+{
+    PDMDEV_ASSERT_DEVINS(pDevIns);
+    LogFlow(("pdmR0DevHlp_TMTimeVirtGet: caller='%p'/%d\n", pDevIns, pDevIns->iInstance));
+    return TMVirtualGet(pDevIns->Internal.s.pGVM);
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnTMTimeVirtGetFreq} */
+static DECLCALLBACK(uint64_t) pdmR0DevHlp_TMTimeVirtGetFreq(PPDMDEVINS pDevIns)
+{
+    PDMDEV_ASSERT_DEVINS(pDevIns);
+    LogFlow(("pdmR0DevHlp_TMTimeVirtGetFreq: caller='%p'/%d\n", pDevIns, pDevIns->iInstance));
+    return TMVirtualGetFreq(pDevIns->Internal.s.pGVM);
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnTMTimeVirtGetNano} */
+static DECLCALLBACK(uint64_t) pdmR0DevHlp_TMTimeVirtGetNano(PPDMDEVINS pDevIns)
+{
+    PDMDEV_ASSERT_DEVINS(pDevIns);
+    LogFlow(("pdmR0DevHlp_TMTimeVirtGetNano: caller='%p'/%d\n", pDevIns, pDevIns->iInstance));
+    return TMVirtualToNano(pDevIns->Internal.s.pGVM, TMVirtualGet(pDevIns->Internal.s.pGVM));
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnQueueToPtr} */
+static DECLCALLBACK(PPDMQUEUE)  pdmR0DevHlp_QueueToPtr(PPDMDEVINS pDevIns, PDMQUEUEHANDLE hQueue)
+{
+    PDMDEV_ASSERT_DEVINS(pDevIns);
+    RT_NOREF(pDevIns);
+    return (PPDMQUEUE)MMHyperR3ToCC(pDevIns->Internal.s.pGVM, hQueue);
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnQueueAlloc} */
+static DECLCALLBACK(PPDMQUEUEITEMCORE) pdmR0DevHlp_QueueAlloc(PPDMDEVINS pDevIns, PDMQUEUEHANDLE hQueue)
+{
+    return PDMQueueAlloc(pdmR0DevHlp_QueueToPtr(pDevIns, hQueue));
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnQueueInsert} */
+static DECLCALLBACK(void) pdmR0DevHlp_QueueInsert(PPDMDEVINS pDevIns, PDMQUEUEHANDLE hQueue, PPDMQUEUEITEMCORE pItem)
+{
+    return PDMQueueInsert(pdmR0DevHlp_QueueToPtr(pDevIns, hQueue), pItem);
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnQueueInsertEx} */
+static DECLCALLBACK(void) pdmR0DevHlp_QueueInsertEx(PPDMDEVINS pDevIns, PDMQUEUEHANDLE hQueue, PPDMQUEUEITEMCORE pItem,
+                                                    uint64_t cNanoMaxDelay)
+{
+    return PDMQueueInsertEx(pdmR0DevHlp_QueueToPtr(pDevIns, hQueue), pItem, cNanoMaxDelay);
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnQueueFlushIfNecessary} */
+static DECLCALLBACK(bool) pdmR0DevHlp_QueueFlushIfNecessary(PPDMDEVINS pDevIns, PDMQUEUEHANDLE hQueue)
+{
+    return PDMQueueFlushIfNecessary(pdmR0DevHlp_QueueToPtr(pDevIns, hQueue));
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnTaskTrigger} */
+static DECLCALLBACK(int) pdmR0DevHlp_TaskTrigger(PPDMDEVINS pDevIns, PDMTASKHANDLE hTask)
+{
+    PDMDEV_ASSERT_DEVINS(pDevIns);
+    LogFlow(("pdmR0DevHlp_TaskTrigger: caller='%s'/%d: hTask=%RU64\n", pDevIns->pReg->szName, pDevIns->iInstance, hTask));
+
+    int rc = PDMTaskTrigger(pDevIns->Internal.s.pGVM, PDMTASKTYPE_DEV, pDevIns->pDevInsForR3, hTask);
+
+    LogFlow(("pdmR0DevHlp_TaskTrigger: caller='%s'/%d: returns %Rrc\n", pDevIns->pReg->szName, pDevIns->iInstance, rc));
+    return rc;
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnSUPSemEventSignal} */
+static DECLCALLBACK(int) pdmR0DevHlp_SUPSemEventSignal(PPDMDEVINS pDevIns, SUPSEMEVENT hEvent)
+{
+    PDMDEV_ASSERT_DEVINS(pDevIns);
+    LogFlow(("pdmR0DevHlp_SUPSemEventSignal: caller='%s'/%d: hEvent=%p\n", pDevIns->pReg->szName, pDevIns->iInstance, hEvent));
+
+    int rc = SUPSemEventSignal(pDevIns->Internal.s.pGVM->pSession, hEvent);
+
+    LogFlow(("pdmR0DevHlp_SUPSemEventSignal: caller='%s'/%d: returns %Rrc\n", pDevIns->pReg->szName, pDevIns->iInstance, rc));
+    return rc;
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnSUPSemEventWaitNoResume} */
+static DECLCALLBACK(int) pdmR0DevHlp_SUPSemEventWaitNoResume(PPDMDEVINS pDevIns, SUPSEMEVENT hEvent, uint32_t cMillies)
+{
+    PDMDEV_ASSERT_DEVINS(pDevIns);
+    LogFlow(("pdmR0DevHlp_SUPSemEventWaitNoResume: caller='%s'/%d: hEvent=%p cNsTimeout=%RU32\n",
+             pDevIns->pReg->szName, pDevIns->iInstance, hEvent, cMillies));
+
+    int rc = SUPSemEventWaitNoResume(pDevIns->Internal.s.pGVM->pSession, hEvent, cMillies);
+
+    LogFlow(("pdmR0DevHlp_SUPSemEventWaitNoResume: caller='%s'/%d: returns %Rrc\n", pDevIns->pReg->szName, pDevIns->iInstance, rc));
+    return rc;
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnSUPSemEventWaitNsAbsIntr} */
+static DECLCALLBACK(int) pdmR0DevHlp_SUPSemEventWaitNsAbsIntr(PPDMDEVINS pDevIns, SUPSEMEVENT hEvent, uint64_t uNsTimeout)
+{
+    PDMDEV_ASSERT_DEVINS(pDevIns);
+    LogFlow(("pdmR0DevHlp_SUPSemEventWaitNsAbsIntr: caller='%s'/%d: hEvent=%p uNsTimeout=%RU64\n",
+             pDevIns->pReg->szName, pDevIns->iInstance, hEvent, uNsTimeout));
+
+    int rc = SUPSemEventWaitNsAbsIntr(pDevIns->Internal.s.pGVM->pSession, hEvent, uNsTimeout);
+
+    LogFlow(("pdmR0DevHlp_SUPSemEventWaitNsAbsIntr: caller='%s'/%d: returns %Rrc\n", pDevIns->pReg->szName, pDevIns->iInstance, rc));
+    return rc;
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnSUPSemEventWaitNsRelIntr} */
+static DECLCALLBACK(int) pdmR0DevHlp_SUPSemEventWaitNsRelIntr(PPDMDEVINS pDevIns, SUPSEMEVENT hEvent, uint64_t cNsTimeout)
+{
+    PDMDEV_ASSERT_DEVINS(pDevIns);
+    LogFlow(("pdmR0DevHlp_SUPSemEventWaitNsRelIntr: caller='%s'/%d: hEvent=%p cNsTimeout=%RU64\n",
+             pDevIns->pReg->szName, pDevIns->iInstance, hEvent, cNsTimeout));
+
+    int rc = SUPSemEventWaitNsRelIntr(pDevIns->Internal.s.pGVM->pSession, hEvent, cNsTimeout);
+
+    LogFlow(("pdmR0DevHlp_SUPSemEventWaitNsRelIntr: caller='%s'/%d: returns %Rrc\n", pDevIns->pReg->szName, pDevIns->iInstance, rc));
+    return rc;
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnSUPSemEventGetResolution} */
+static DECLCALLBACK(uint32_t) pdmR0DevHlp_SUPSemEventGetResolution(PPDMDEVINS pDevIns)
+{
+    PDMDEV_ASSERT_DEVINS(pDevIns);
+    LogFlow(("pdmR0DevHlp_SUPSemEventGetResolution: caller='%s'/%d:\n", pDevIns->pReg->szName, pDevIns->iInstance));
+
+    uint32_t cNsResolution = SUPSemEventGetResolution(pDevIns->Internal.s.pGVM->pSession);
+
+    LogFlow(("pdmR0DevHlp_SUPSemEventGetResolution: caller='%s'/%d: returns %u\n", pDevIns->pReg->szName, pDevIns->iInstance, cNsResolution));
+    return cNsResolution;
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnSUPSemEventMultiSignal} */
+static DECLCALLBACK(int) pdmR0DevHlp_SUPSemEventMultiSignal(PPDMDEVINS pDevIns, SUPSEMEVENTMULTI hEventMulti)
+{
+    PDMDEV_ASSERT_DEVINS(pDevIns);
+    LogFlow(("pdmR0DevHlp_SUPSemEventMultiSignal: caller='%s'/%d: hEventMulti=%p\n", pDevIns->pReg->szName, pDevIns->iInstance, hEventMulti));
+
+    int rc = SUPSemEventMultiSignal(pDevIns->Internal.s.pGVM->pSession, hEventMulti);
+
+    LogFlow(("pdmR0DevHlp_SUPSemEventMultiSignal: caller='%s'/%d: returns %Rrc\n", pDevIns->pReg->szName, pDevIns->iInstance, rc));
+    return rc;
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnSUPSemEventMultiReset} */
+static DECLCALLBACK(int) pdmR0DevHlp_SUPSemEventMultiReset(PPDMDEVINS pDevIns, SUPSEMEVENTMULTI hEventMulti)
+{
+    PDMDEV_ASSERT_DEVINS(pDevIns);
+    LogFlow(("pdmR0DevHlp_SUPSemEventMultiReset: caller='%s'/%d: hEventMulti=%p\n", pDevIns->pReg->szName, pDevIns->iInstance, hEventMulti));
+
+    int rc = SUPSemEventMultiReset(pDevIns->Internal.s.pGVM->pSession, hEventMulti);
+
+    LogFlow(("pdmR0DevHlp_SUPSemEventMultiReset: caller='%s'/%d: returns %Rrc\n", pDevIns->pReg->szName, pDevIns->iInstance, rc));
+    return rc;
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnSUPSemEventMultiWaitNoResume} */
+static DECLCALLBACK(int) pdmR0DevHlp_SUPSemEventMultiWaitNoResume(PPDMDEVINS pDevIns, SUPSEMEVENTMULTI hEventMulti,
+                                                                  uint32_t cMillies)
+{
+    PDMDEV_ASSERT_DEVINS(pDevIns);
+    LogFlow(("pdmR0DevHlp_SUPSemEventMultiWaitNoResume: caller='%s'/%d: hEventMulti=%p cMillies=%RU32\n",
+             pDevIns->pReg->szName, pDevIns->iInstance, hEventMulti, cMillies));
+
+    int rc = SUPSemEventMultiWaitNoResume(pDevIns->Internal.s.pGVM->pSession, hEventMulti, cMillies);
+
+    LogFlow(("pdmR0DevHlp_SUPSemEventMultiWaitNoResume: caller='%s'/%d: returns %Rrc\n", pDevIns->pReg->szName, pDevIns->iInstance, rc));
+    return rc;
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnSUPSemEventMultiWaitNsAbsIntr} */
+static DECLCALLBACK(int) pdmR0DevHlp_SUPSemEventMultiWaitNsAbsIntr(PPDMDEVINS pDevIns, SUPSEMEVENTMULTI hEventMulti,
+                                                                   uint64_t uNsTimeout)
+{
+    PDMDEV_ASSERT_DEVINS(pDevIns);
+    LogFlow(("pdmR0DevHlp_SUPSemEventMultiWaitNsAbsIntr: caller='%s'/%d: hEventMulti=%p uNsTimeout=%RU64\n",
+             pDevIns->pReg->szName, pDevIns->iInstance, hEventMulti, uNsTimeout));
+
+    int rc = SUPSemEventMultiWaitNsAbsIntr(pDevIns->Internal.s.pGVM->pSession, hEventMulti, uNsTimeout);
+
+    LogFlow(("pdmR0DevHlp_SUPSemEventMultiWaitNsAbsIntr: caller='%s'/%d: returns %Rrc\n", pDevIns->pReg->szName, pDevIns->iInstance, rc));
+    return rc;
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnSUPSemEventMultiWaitNsRelIntr} */
+static DECLCALLBACK(int) pdmR0DevHlp_SUPSemEventMultiWaitNsRelIntr(PPDMDEVINS pDevIns, SUPSEMEVENTMULTI hEventMulti,
+                                                                   uint64_t cNsTimeout)
+{
+    PDMDEV_ASSERT_DEVINS(pDevIns);
+    LogFlow(("pdmR0DevHlp_SUPSemEventMultiWaitNsRelIntr: caller='%s'/%d: hEventMulti=%p cNsTimeout=%RU64\n",
+             pDevIns->pReg->szName, pDevIns->iInstance, hEventMulti, cNsTimeout));
+
+    int rc = SUPSemEventMultiWaitNsRelIntr(pDevIns->Internal.s.pGVM->pSession, hEventMulti, cNsTimeout);
+
+    LogFlow(("pdmR0DevHlp_SUPSemEventMultiWaitNsRelIntr: caller='%s'/%d: returns %Rrc\n", pDevIns->pReg->szName, pDevIns->iInstance, rc));
+    return rc;
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnSUPSemEventMultiGetResolution} */
+static DECLCALLBACK(uint32_t) pdmR0DevHlp_SUPSemEventMultiGetResolution(PPDMDEVINS pDevIns)
+{
+    PDMDEV_ASSERT_DEVINS(pDevIns);
+    LogFlow(("pdmR0DevHlp_SUPSemEventMultiGetResolution: caller='%s'/%d:\n", pDevIns->pReg->szName, pDevIns->iInstance));
+
+    uint32_t cNsResolution = SUPSemEventMultiGetResolution(pDevIns->Internal.s.pGVM->pSession);
+
+    LogFlow(("pdmR0DevHlp_SUPSemEventMultiGetResolution: caller='%s'/%d: returns %u\n", pDevIns->pReg->szName, pDevIns->iInstance, cNsResolution));
+    return cNsResolution;
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnCritSectGetNop} */
+static DECLCALLBACK(PPDMCRITSECT) pdmR0DevHlp_CritSectGetNop(PPDMDEVINS pDevIns)
+{
+    PDMDEV_ASSERT_DEVINS(pDevIns);
+    PGVM pGVM = pDevIns->Internal.s.pGVM;
+
+    PPDMCRITSECT pCritSect = &pGVM->pdm.s.NopCritSect;
+    LogFlow(("pdmR0DevHlp_CritSectGetNop: caller='%s'/%d: return %p\n", pDevIns->pReg->szName, pDevIns->iInstance, pCritSect));
+    return pCritSect;
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnSetDeviceCritSect} */
+static DECLCALLBACK(int) pdmR0DevHlp_SetDeviceCritSect(PPDMDEVINS pDevIns, PPDMCRITSECT pCritSect)
+{
+    /*
+     * Validate input.
+     *
+     * Note! We only allow the automatically created default critical section
+     *       to be replaced by this API.
+     */
+    PDMDEV_ASSERT_DEVINS(pDevIns);
+    AssertPtrReturn(pCritSect, VERR_INVALID_POINTER);
+    LogFlow(("pdmR0DevHlp_SetDeviceCritSect: caller='%s'/%d: pCritSect=%p (%s)\n",
+             pDevIns->pReg->szName, pDevIns->iInstance, pCritSect, pCritSect->s.pszName));
+    AssertReturn(PDMCritSectIsInitialized(pCritSect), VERR_INVALID_PARAMETER);
+    PGVM pGVM = pDevIns->Internal.s.pGVM;
+    AssertReturn(pCritSect->s.pVMR0 == pGVM, VERR_INVALID_PARAMETER);
+
+    VM_ASSERT_EMT(pGVM);
+    VM_ASSERT_STATE_RETURN(pGVM, VMSTATE_CREATING, VERR_WRONG_ORDER);
+
+    /*
+     * Check that ring-3 has already done this, then effect the change.
+     */
+    AssertReturn(pDevIns->pDevInsForR3R0->Internal.s.fIntFlags & PDMDEVINSINT_FLAGS_CHANGED_CRITSECT, VERR_WRONG_ORDER);
+    pDevIns->pCritSectRoR0 = pCritSect;
+
+    LogFlow(("pdmR0DevHlp_SetDeviceCritSect: caller='%s'/%d: returns %Rrc\n", pDevIns->pReg->szName, pDevIns->iInstance, VINF_SUCCESS));
+    return VINF_SUCCESS;
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnCritSectEnter} */
+static DECLCALLBACK(int)      pdmR0DevHlp_CritSectEnter(PPDMDEVINS pDevIns, PPDMCRITSECT pCritSect, int rcBusy)
+{
+    PDMDEV_ASSERT_DEVINS(pDevIns);
+    RT_NOREF(pDevIns); /** @todo pass pDevIns->Internal.s.pGVM to the crit sect code.   */
+    return PDMCritSectEnter(pCritSect, rcBusy);
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnCritSectEnterDebug} */
+static DECLCALLBACK(int)      pdmR0DevHlp_CritSectEnterDebug(PPDMDEVINS pDevIns, PPDMCRITSECT pCritSect, int rcBusy, RTHCUINTPTR uId, RT_SRC_POS_DECL)
+{
+    PDMDEV_ASSERT_DEVINS(pDevIns);
+    RT_NOREF(pDevIns); /** @todo pass pDevIns->Internal.s.pGVM to the crit sect code.   */
+    return PDMCritSectEnterDebug(pCritSect, rcBusy, uId, RT_SRC_POS_ARGS);
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnCritSectTryEnter} */
+static DECLCALLBACK(int)      pdmR0DevHlp_CritSectTryEnter(PPDMDEVINS pDevIns, PPDMCRITSECT pCritSect)
+{
+    PDMDEV_ASSERT_DEVINS(pDevIns);
+    RT_NOREF(pDevIns); /** @todo pass pDevIns->Internal.s.pGVM to the crit sect code.   */
+    return PDMCritSectTryEnter(pCritSect);
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnCritSectTryEnterDebug} */
+static DECLCALLBACK(int)      pdmR0DevHlp_CritSectTryEnterDebug(PPDMDEVINS pDevIns, PPDMCRITSECT pCritSect, RTHCUINTPTR uId, RT_SRC_POS_DECL)
+{
+    PDMDEV_ASSERT_DEVINS(pDevIns);
+    RT_NOREF(pDevIns); /** @todo pass pDevIns->Internal.s.pGVM to the crit sect code.   */
+    return PDMCritSectTryEnterDebug(pCritSect, uId, RT_SRC_POS_ARGS);
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnCritSectLeave} */
+static DECLCALLBACK(int)      pdmR0DevHlp_CritSectLeave(PPDMDEVINS pDevIns, PPDMCRITSECT pCritSect)
+{
+    PDMDEV_ASSERT_DEVINS(pDevIns);
+    RT_NOREF(pDevIns); /** @todo pass pDevIns->Internal.s.pGVM to the crit sect code.   */
+    return PDMCritSectLeave(pCritSect);
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnCritSectIsOwner} */
+static DECLCALLBACK(bool)     pdmR0DevHlp_CritSectIsOwner(PPDMDEVINS pDevIns, PCPDMCRITSECT pCritSect)
+{
+    PDMDEV_ASSERT_DEVINS(pDevIns);
+    RT_NOREF(pDevIns); /** @todo pass pDevIns->Internal.s.pGVM to the crit sect code.   */
+    return PDMCritSectIsOwner(pCritSect);
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnCritSectIsInitialized} */
+static DECLCALLBACK(bool)     pdmR0DevHlp_CritSectIsInitialized(PPDMDEVINS pDevIns, PCPDMCRITSECT pCritSect)
+{
+    PDMDEV_ASSERT_DEVINS(pDevIns);
+    RT_NOREF(pDevIns);
+    return PDMCritSectIsInitialized(pCritSect);
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnCritSectHasWaiters} */
+static DECLCALLBACK(bool)     pdmR0DevHlp_CritSectHasWaiters(PPDMDEVINS pDevIns, PCPDMCRITSECT pCritSect)
+{
+    PDMDEV_ASSERT_DEVINS(pDevIns);
+    RT_NOREF(pDevIns);
+    return PDMCritSectHasWaiters(pCritSect);
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnCritSectGetRecursion} */
+static DECLCALLBACK(uint32_t) pdmR0DevHlp_CritSectGetRecursion(PPDMDEVINS pDevIns, PCPDMCRITSECT pCritSect)
+{
+    PDMDEV_ASSERT_DEVINS(pDevIns);
+    RT_NOREF(pDevIns);
+    return PDMCritSectGetRecursion(pCritSect);
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnCritSectScheduleExitEvent} */
+static DECLCALLBACK(int) pdmR0DevHlp_CritSectScheduleExitEvent(PPDMDEVINS pDevIns, PPDMCRITSECT pCritSect,
+                                                               SUPSEMEVENT hEventToSignal)
+{
+    PDMDEV_ASSERT_DEVINS(pDevIns);
+    RT_NOREF(pDevIns);
+    return PDMHCCritSectScheduleExitEvent(pCritSect, hEventToSignal);
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnDBGFTraceBuf} */
+static DECLCALLBACK(RTTRACEBUF) pdmR0DevHlp_DBGFTraceBuf(PPDMDEVINS pDevIns)
+{
+    PDMDEV_ASSERT_DEVINS(pDevIns);
+    RTTRACEBUF hTraceBuf = pDevIns->Internal.s.pGVM->hTraceBufR0;
+    LogFlow(("pdmR0DevHlp_DBGFTraceBuf: caller='%p'/%d: returns %p\n", pDevIns, pDevIns->iInstance, hTraceBuf));
+    return hTraceBuf;
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnPCIBusSetUpContext} */
+static DECLCALLBACK(int) pdmR0DevHlp_PCIBusSetUpContext(PPDMDEVINS pDevIns, PPDMPCIBUSREGR0 pPciBusReg, PCPDMPCIHLPR0 *ppPciHlp)
+{
+    PDMDEV_ASSERT_DEVINS(pDevIns);
+    LogFlow(("pdmR0DevHlp_PCIBusSetUpContext: caller='%p'/%d: pPciBusReg=%p{.u32Version=%#x, .iBus=%#u, .pfnSetIrq=%p, u32EnvVersion=%#x} ppPciHlp=%p\n",
+             pDevIns, pDevIns->iInstance, pPciBusReg, pPciBusReg->u32Version, pPciBusReg->iBus, pPciBusReg->pfnSetIrq,
+             pPciBusReg->u32EndVersion, ppPciHlp));
+    PGVM pGVM = pDevIns->Internal.s.pGVM;
+
+    /*
+     * Validate input.
+     */
+    AssertPtrReturn(pPciBusReg, VERR_INVALID_POINTER);
+    AssertLogRelMsgReturn(pPciBusReg->u32Version == PDM_PCIBUSREGCC_VERSION,
+                          ("%#x vs %#x\n", pPciBusReg->u32Version, PDM_PCIBUSREGCC_VERSION), VERR_VERSION_MISMATCH);
+    AssertPtrReturn(pPciBusReg->pfnSetIrq, VERR_INVALID_POINTER);
+    AssertLogRelMsgReturn(pPciBusReg->u32EndVersion == PDM_PCIBUSREGCC_VERSION,
+                          ("%#x vs %#x\n", pPciBusReg->u32EndVersion, PDM_PCIBUSREGCC_VERSION), VERR_VERSION_MISMATCH);
+
+    AssertPtrReturn(ppPciHlp, VERR_INVALID_POINTER);
+
+    VM_ASSERT_STATE_RETURN(pGVM, VMSTATE_CREATING, VERR_WRONG_ORDER);
+    VM_ASSERT_EMT0_RETURN(pGVM, VERR_VM_THREAD_NOT_EMT);
+
+    /* Check the shared bus data (registered earlier from ring-3): */
+    uint32_t iBus = pPciBusReg->iBus;
+    ASMCompilerBarrier();
+    AssertLogRelMsgReturn(iBus < RT_ELEMENTS(pGVM->pdm.s.aPciBuses), ("iBus=%#x\n", iBus), VERR_OUT_OF_RANGE);
+    PPDMPCIBUS pPciBusShared = &pGVM->pdm.s.aPciBuses[iBus];
+    AssertLogRelMsgReturn(pPciBusShared->iBus == iBus, ("%u vs %u\n", pPciBusShared->iBus, iBus), VERR_INVALID_PARAMETER);
+    AssertLogRelMsgReturn(pPciBusShared->pDevInsR3 == pDevIns->pDevInsForR3,
+                          ("%p vs %p (iBus=%u)\n", pPciBusShared->pDevInsR3, pDevIns->pDevInsForR3, iBus), VERR_NOT_OWNER);
+
+    /* Check that the bus isn't already registered in ring-0: */
+    AssertCompile(RT_ELEMENTS(pGVM->pdm.s.aPciBuses) == RT_ELEMENTS(pGVM->pdmr0.s.aPciBuses));
+    PPDMPCIBUSR0 pPciBusR0 = &pGVM->pdmr0.s.aPciBuses[iBus];
+    AssertLogRelMsgReturn(pPciBusR0->pDevInsR0 == NULL,
+                          ("%p (caller pDevIns=%p, iBus=%u)\n", pPciBusR0->pDevInsR0, pDevIns, iBus),
+                          VERR_ALREADY_EXISTS);
+
+    /*
+     * Do the registering.
+     */
+    pPciBusR0->iBus        = iBus;
+    pPciBusR0->uPadding0   = 0xbeefbeef;
+    pPciBusR0->pfnSetIrqR0 = pPciBusReg->pfnSetIrq;
+    pPciBusR0->pDevInsR0   = pDevIns;
+
+    *ppPciHlp = &g_pdmR0PciHlp;
+
+    LogFlow(("pdmR0DevHlp_PCIBusSetUpContext: caller='%p'/%d: returns VINF_SUCCESS\n", pDevIns, pDevIns->iInstance));
+    return VINF_SUCCESS;
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnPICSetUpContext} */
+static DECLCALLBACK(int) pdmR0DevHlp_PICSetUpContext(PPDMDEVINS pDevIns, PPDMPICREG pPicReg, PCPDMPICHLP *ppPicHlp)
+{
+    PDMDEV_ASSERT_DEVINS(pDevIns);
+    LogFlow(("pdmR0DevHlp_PICSetUpContext: caller='%s'/%d: pPicReg=%p:{.u32Version=%#x, .pfnSetIrq=%p, .pfnGetInterrupt=%p, .u32TheEnd=%#x } ppPicHlp=%p\n",
+             pDevIns->pReg->szName, pDevIns->iInstance, pPicReg, pPicReg->u32Version, pPicReg->pfnSetIrq, pPicReg->pfnGetInterrupt, pPicReg->u32TheEnd, ppPicHlp));
+    PGVM pGVM = pDevIns->Internal.s.pGVM;
+
+    /*
+     * Validate input.
+     */
+    AssertMsgReturn(pPicReg->u32Version == PDM_PICREG_VERSION,
+                    ("%s/%d: u32Version=%#x expected %#x\n", pDevIns->pReg->szName, pDevIns->iInstance, pPicReg->u32Version, PDM_PICREG_VERSION),
+                    VERR_VERSION_MISMATCH);
+    AssertPtrReturn(pPicReg->pfnSetIrq, VERR_INVALID_POINTER);
+    AssertPtrReturn(pPicReg->pfnGetInterrupt, VERR_INVALID_POINTER);
+    AssertMsgReturn(pPicReg->u32TheEnd == PDM_PICREG_VERSION,
+                    ("%s/%d: u32TheEnd=%#x expected %#x\n", pDevIns->pReg->szName, pDevIns->iInstance, pPicReg->u32TheEnd, PDM_PICREG_VERSION),
+                    VERR_VERSION_MISMATCH);
+    AssertPtrReturn(ppPicHlp, VERR_INVALID_POINTER);
+
+    VM_ASSERT_STATE_RETURN(pGVM, VMSTATE_CREATING, VERR_WRONG_ORDER);
+    VM_ASSERT_EMT0_RETURN(pGVM, VERR_VM_THREAD_NOT_EMT);
+
+    /* Check that it's the same device as made the ring-3 registrations: */
+    AssertLogRelMsgReturn(pGVM->pdm.s.Pic.pDevInsR3 == pDevIns->pDevInsForR3,
+                          ("%p vs %p\n", pGVM->pdm.s.Pic.pDevInsR3, pDevIns->pDevInsForR3), VERR_NOT_OWNER);
+
+    /* Check that it isn't already registered in ring-0: */
+    AssertLogRelMsgReturn(pGVM->pdm.s.Pic.pDevInsR0 == NULL, ("%p (caller pDevIns=%p)\n", pGVM->pdm.s.Pic.pDevInsR0, pDevIns),
+                          VERR_ALREADY_EXISTS);
+
+    /*
+     * Take down the callbacks and instance.
+     */
+    pGVM->pdm.s.Pic.pDevInsR0 = pDevIns;
+    pGVM->pdm.s.Pic.pfnSetIrqR0 = pPicReg->pfnSetIrq;
+    pGVM->pdm.s.Pic.pfnGetInterruptR0 = pPicReg->pfnGetInterrupt;
+    Log(("PDM: Registered PIC device '%s'/%d pDevIns=%p\n", pDevIns->pReg->szName, pDevIns->iInstance, pDevIns));
+
+    /* set the helper pointer and return. */
+    *ppPicHlp = &g_pdmR0PicHlp;
+    LogFlow(("pdmR0DevHlp_PICSetUpContext: caller='%s'/%d: returns %Rrc\n", pDevIns->pReg->szName, pDevIns->iInstance, VINF_SUCCESS));
+    return VINF_SUCCESS;
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnApicSetUpContext} */
+static DECLCALLBACK(int) pdmR0DevHlp_ApicSetUpContext(PPDMDEVINS pDevIns)
+{
+    PDMDEV_ASSERT_DEVINS(pDevIns);
+    LogFlow(("pdmR0DevHlp_ApicSetUpContext: caller='%s'/%d:\n", pDevIns->pReg->szName, pDevIns->iInstance));
+    PGVM pGVM = pDevIns->Internal.s.pGVM;
+
+    /*
+     * Validate input.
+     */
+    VM_ASSERT_STATE_RETURN(pGVM, VMSTATE_CREATING, VERR_WRONG_ORDER);
+    VM_ASSERT_EMT0_RETURN(pGVM, VERR_VM_THREAD_NOT_EMT);
+
+    /* Check that it's the same device as made the ring-3 registrations: */
+    AssertLogRelMsgReturn(pGVM->pdm.s.Apic.pDevInsR3 == pDevIns->pDevInsForR3,
+                          ("%p vs %p\n", pGVM->pdm.s.Apic.pDevInsR3, pDevIns->pDevInsForR3), VERR_NOT_OWNER);
+
+    /* Check that it isn't already registered in ring-0: */
+    AssertLogRelMsgReturn(pGVM->pdm.s.Apic.pDevInsR0 == NULL, ("%p (caller pDevIns=%p)\n", pGVM->pdm.s.Apic.pDevInsR0, pDevIns),
+                          VERR_ALREADY_EXISTS);
+
+    /*
+     * Take down the instance.
+     */
+    pGVM->pdm.s.Apic.pDevInsR0 = pDevIns;
+    Log(("PDM: Registered APIC device '%s'/%d pDevIns=%p\n", pDevIns->pReg->szName, pDevIns->iInstance, pDevIns));
+
+    /* set the helper pointer and return. */
+    LogFlow(("pdmR0DevHlp_ApicSetUpContext: caller='%s'/%d: returns %Rrc\n", pDevIns->pReg->szName, pDevIns->iInstance, VINF_SUCCESS));
+    return VINF_SUCCESS;
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnIoApicSetUpContext} */
+static DECLCALLBACK(int) pdmR0DevHlp_IoApicSetUpContext(PPDMDEVINS pDevIns, PPDMIOAPICREG pIoApicReg, PCPDMIOAPICHLP *ppIoApicHlp)
+{
+    PDMDEV_ASSERT_DEVINS(pDevIns);
+    LogFlow(("pdmR0DevHlp_IoApicSetUpContext: caller='%s'/%d: pIoApicReg=%p:{.u32Version=%#x, .pfnSetIrq=%p, .pfnSendMsi=%p, .pfnSetEoi=%p, .u32TheEnd=%#x } ppIoApicHlp=%p\n",
+             pDevIns->pReg->szName, pDevIns->iInstance, pIoApicReg, pIoApicReg->u32Version, pIoApicReg->pfnSetIrq, pIoApicReg->pfnSendMsi, pIoApicReg->pfnSetEoi, pIoApicReg->u32TheEnd, ppIoApicHlp));
+    PGVM pGVM = pDevIns->Internal.s.pGVM;
+
+    /*
+     * Validate input.
+     */
+    AssertMsgReturn(pIoApicReg->u32Version == PDM_IOAPICREG_VERSION,
+                    ("%s/%d: u32Version=%#x expected %#x\n", pDevIns->pReg->szName, pDevIns->iInstance, pIoApicReg->u32Version, PDM_IOAPICREG_VERSION),
+                    VERR_VERSION_MISMATCH);
+    AssertPtrReturn(pIoApicReg->pfnSetIrq, VERR_INVALID_POINTER);
+    AssertPtrReturn(pIoApicReg->pfnSendMsi, VERR_INVALID_POINTER);
+    AssertPtrReturn(pIoApicReg->pfnSetEoi, VERR_INVALID_POINTER);
+    AssertMsgReturn(pIoApicReg->u32TheEnd == PDM_IOAPICREG_VERSION,
+                    ("%s/%d: u32TheEnd=%#x expected %#x\n", pDevIns->pReg->szName, pDevIns->iInstance, pIoApicReg->u32TheEnd, PDM_IOAPICREG_VERSION),
+                    VERR_VERSION_MISMATCH);
+    AssertPtrReturn(ppIoApicHlp, VERR_INVALID_POINTER);
+
+    VM_ASSERT_STATE_RETURN(pGVM, VMSTATE_CREATING, VERR_WRONG_ORDER);
+    VM_ASSERT_EMT0_RETURN(pGVM, VERR_VM_THREAD_NOT_EMT);
+
+    /* Check that it's the same device as made the ring-3 registrations: */
+    AssertLogRelMsgReturn(pGVM->pdm.s.IoApic.pDevInsR3 == pDevIns->pDevInsForR3,
+                          ("%p vs %p\n", pGVM->pdm.s.IoApic.pDevInsR3, pDevIns->pDevInsForR3), VERR_NOT_OWNER);
+
+    /* Check that it isn't already registered in ring-0: */
+    AssertLogRelMsgReturn(pGVM->pdm.s.IoApic.pDevInsR0 == NULL, ("%p (caller pDevIns=%p)\n", pGVM->pdm.s.IoApic.pDevInsR0, pDevIns),
+                          VERR_ALREADY_EXISTS);
+
+    /*
+     * Take down the callbacks and instance.
+     */
+    pGVM->pdm.s.IoApic.pDevInsR0    = pDevIns;
+    pGVM->pdm.s.IoApic.pfnSetIrqR0  = pIoApicReg->pfnSetIrq;
+    pGVM->pdm.s.IoApic.pfnSendMsiR0 = pIoApicReg->pfnSendMsi;
+    pGVM->pdm.s.IoApic.pfnSetEoiR0  = pIoApicReg->pfnSetEoi;
+    Log(("PDM: Registered IOAPIC device '%s'/%d pDevIns=%p\n", pDevIns->pReg->szName, pDevIns->iInstance, pDevIns));
+
+    /* set the helper pointer and return. */
+    *ppIoApicHlp = &g_pdmR0IoApicHlp;
+    LogFlow(("pdmR0DevHlp_IoApicSetUpContext: caller='%s'/%d: returns %Rrc\n", pDevIns->pReg->szName, pDevIns->iInstance, VINF_SUCCESS));
+    return VINF_SUCCESS;
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnHpetSetUpContext} */
+static DECLCALLBACK(int) pdmR0DevHlp_HpetSetUpContext(PPDMDEVINS pDevIns, PPDMHPETREG pHpetReg, PCPDMHPETHLPR0 *ppHpetHlp)
+{
+    PDMDEV_ASSERT_DEVINS(pDevIns);
+    LogFlow(("pdmR0DevHlp_HpetSetUpContext: caller='%s'/%d: pHpetReg=%p:{.u32Version=%#x, } ppHpetHlp=%p\n",
+             pDevIns->pReg->szName, pDevIns->iInstance, pHpetReg, pHpetReg->u32Version, ppHpetHlp));
+    PGVM pGVM = pDevIns->Internal.s.pGVM;
+
+    /*
+     * Validate input.
+     */
+    AssertMsgReturn(pHpetReg->u32Version == PDM_HPETREG_VERSION,
+                    ("%s/%d: u32Version=%#x expected %#x\n", pDevIns->pReg->szName, pDevIns->iInstance, pHpetReg->u32Version, PDM_HPETREG_VERSION),
+                    VERR_VERSION_MISMATCH);
+    AssertPtrReturn(ppHpetHlp, VERR_INVALID_POINTER);
+
+    VM_ASSERT_STATE_RETURN(pGVM, VMSTATE_CREATING, VERR_WRONG_ORDER);
+    VM_ASSERT_EMT0_RETURN(pGVM, VERR_VM_THREAD_NOT_EMT);
+
+    /* Check that it's the same device as made the ring-3 registrations: */
+    AssertLogRelMsgReturn(pGVM->pdm.s.pHpet == pDevIns->pDevInsForR3, ("%p vs %p\n", pGVM->pdm.s.pHpet, pDevIns->pDevInsForR3),
+                          VERR_NOT_OWNER);
+
+    ///* Check that it isn't already registered in ring-0: */
+    //AssertLogRelMsgReturn(pGVM->pdm.s.Hpet.pDevInsR0 == NULL, ("%p (caller pDevIns=%p)\n", pGVM->pdm.s.Hpet.pDevInsR0, pDevIns),
+    //                      VERR_ALREADY_EXISTS);
+
+    /*
+     * Nothing to take down here at present.
+     */
+    Log(("PDM: Registered HPET device '%s'/%d pDevIns=%p\n", pDevIns->pReg->szName, pDevIns->iInstance, pDevIns));
+
+    /* set the helper pointer and return. */
+    *ppHpetHlp = &g_pdmR0HpetHlp;
+    LogFlow(("pdmR0DevHlp_HpetSetUpContext: caller='%s'/%d: returns %Rrc\n", pDevIns->pReg->szName, pDevIns->iInstance, VINF_SUCCESS));
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * The Ring-0 Device Helper Callbacks.
+ */
+extern DECLEXPORT(const PDMDEVHLPR0) g_pdmR0DevHlp =
+{
+    PDM_DEVHLPR0_VERSION,
+    pdmR0DevHlp_IoPortSetUpContextEx,
+    pdmR0DevHlp_MmioSetUpContextEx,
+    pdmR0DevHlp_Mmio2SetUpContext,
+    pdmR0DevHlp_PCIPhysRead,
+    pdmR0DevHlp_PCIPhysWrite,
+    pdmR0DevHlp_PCISetIrq,
+    pdmR0DevHlp_ISASetIrq,
+    pdmR0DevHlp_IoApicSendMsi,
+    pdmR0DevHlp_PhysRead,
+    pdmR0DevHlp_PhysWrite,
+    pdmR0DevHlp_A20IsEnabled,
+    pdmR0DevHlp_VMState,
+    pdmR0DevHlp_VMSetError,
+    pdmR0DevHlp_VMSetErrorV,
+    pdmR0DevHlp_VMSetRuntimeError,
+    pdmR0DevHlp_VMSetRuntimeErrorV,
+    pdmR0DevHlp_GetVM,
+    pdmR0DevHlp_GetVMCPU,
+    pdmR0DevHlp_GetCurrentCpuId,
+    pdmR0DevHlp_TimerToPtr,
+    pdmR0DevHlp_TimerFromMicro,
+    pdmR0DevHlp_TimerFromMilli,
+    pdmR0DevHlp_TimerFromNano,
+    pdmR0DevHlp_TimerGet,
+    pdmR0DevHlp_TimerGetFreq,
+    pdmR0DevHlp_TimerGetNano,
+    pdmR0DevHlp_TimerIsActive,
+    pdmR0DevHlp_TimerIsLockOwner,
+    pdmR0DevHlp_TimerLockClock,
+    pdmR0DevHlp_TimerLockClock2,
+    pdmR0DevHlp_TimerSet,
+    pdmR0DevHlp_TimerSetFrequencyHint,
+    pdmR0DevHlp_TimerSetMicro,
+    pdmR0DevHlp_TimerSetMillies,
+    pdmR0DevHlp_TimerSetNano,
+    pdmR0DevHlp_TimerSetRelative,
+    pdmR0DevHlp_TimerStop,
+    pdmR0DevHlp_TimerUnlockClock,
+    pdmR0DevHlp_TimerUnlockClock2,
+    pdmR0DevHlp_TMTimeVirtGet,
+    pdmR0DevHlp_TMTimeVirtGetFreq,
+    pdmR0DevHlp_TMTimeVirtGetNano,
+    pdmR0DevHlp_QueueToPtr,
+    pdmR0DevHlp_QueueAlloc,
+    pdmR0DevHlp_QueueInsert,
+    pdmR0DevHlp_QueueInsertEx,
+    pdmR0DevHlp_QueueFlushIfNecessary,
+    pdmR0DevHlp_TaskTrigger,
+    pdmR0DevHlp_SUPSemEventSignal,
+    pdmR0DevHlp_SUPSemEventWaitNoResume,
+    pdmR0DevHlp_SUPSemEventWaitNsAbsIntr,
+    pdmR0DevHlp_SUPSemEventWaitNsRelIntr,
+    pdmR0DevHlp_SUPSemEventGetResolution,
+    pdmR0DevHlp_SUPSemEventMultiSignal,
+    pdmR0DevHlp_SUPSemEventMultiReset,
+    pdmR0DevHlp_SUPSemEventMultiWaitNoResume,
+    pdmR0DevHlp_SUPSemEventMultiWaitNsAbsIntr,
+    pdmR0DevHlp_SUPSemEventMultiWaitNsRelIntr,
+    pdmR0DevHlp_SUPSemEventMultiGetResolution,
+    pdmR0DevHlp_CritSectGetNop,
+    pdmR0DevHlp_SetDeviceCritSect,
+    pdmR0DevHlp_CritSectEnter,
+    pdmR0DevHlp_CritSectEnterDebug,
+    pdmR0DevHlp_CritSectTryEnter,
+    pdmR0DevHlp_CritSectTryEnterDebug,
+    pdmR0DevHlp_CritSectLeave,
+    pdmR0DevHlp_CritSectIsOwner,
+    pdmR0DevHlp_CritSectIsInitialized,
+    pdmR0DevHlp_CritSectHasWaiters,
+    pdmR0DevHlp_CritSectGetRecursion,
+    pdmR0DevHlp_CritSectScheduleExitEvent,
+    pdmR0DevHlp_DBGFTraceBuf,
+    pdmR0DevHlp_PCIBusSetUpContext,
+    pdmR0DevHlp_PICSetUpContext,
+    pdmR0DevHlp_ApicSetUpContext,
+    pdmR0DevHlp_IoApicSetUpContext,
+    pdmR0DevHlp_HpetSetUpContext,
+    NULL /*pfnReserved1*/,
+    NULL /*pfnReserved2*/,
+    NULL /*pfnReserved3*/,
+    NULL /*pfnReserved4*/,
+    NULL /*pfnReserved5*/,
+    NULL /*pfnReserved6*/,
+    NULL /*pfnReserved7*/,
+    NULL /*pfnReserved8*/,
+    NULL /*pfnReserved9*/,
+    NULL /*pfnReserved10*/,
+    PDM_DEVHLPR0_VERSION
+};
+
+/** @} */
+
+
+/** @name PIC Ring-0 Helpers
+ * @{
+ */
+
+/** @interface_method_impl{PDMPICHLP,pfnSetInterruptFF} */
+static DECLCALLBACK(void) pdmR0PicHlp_SetInterruptFF(PPDMDEVINS pDevIns)
+{
+    PDMDEV_ASSERT_DEVINS(pDevIns);
+    PGVM     pGVM  = (PGVM)pDevIns->Internal.s.pGVM;
+    PVMCPUCC pVCpu = &pGVM->aCpus[0];     /* for PIC we always deliver to CPU 0, MP use APIC */
+    /** @todo r=ramshankar: Propagating rcRZ and make all callers handle it? */
+    APICLocalInterrupt(pVCpu, 0 /* u8Pin */, 1 /* u8Level */, VINF_SUCCESS /* rcRZ */);
+}
+
+
+/** @interface_method_impl{PDMPICHLP,pfnClearInterruptFF} */
+static DECLCALLBACK(void) pdmR0PicHlp_ClearInterruptFF(PPDMDEVINS pDevIns)
+{
+    PDMDEV_ASSERT_DEVINS(pDevIns);
+    PGVM     pGVM  = (PGVM)pDevIns->Internal.s.pGVM;
+    PVMCPUCC pVCpu = &pGVM->aCpus[0];     /* for PIC we always deliver to CPU 0, MP use APIC */
+    /** @todo r=ramshankar: Propagating rcRZ and make all callers handle it? */
+    APICLocalInterrupt(pVCpu, 0 /* u8Pin */, 0 /* u8Level */, VINF_SUCCESS /* rcRZ */);
+}
+
+
+/** @interface_method_impl{PDMPICHLP,pfnLock} */
+static DECLCALLBACK(int) pdmR0PicHlp_Lock(PPDMDEVINS pDevIns, int rc)
+{
+    PDMDEV_ASSERT_DEVINS(pDevIns);
+    return pdmLockEx(pDevIns->Internal.s.pGVM, rc);
+}
+
+
+/** @interface_method_impl{PDMPICHLP,pfnUnlock} */
+static DECLCALLBACK(void) pdmR0PicHlp_Unlock(PPDMDEVINS pDevIns)
+{
+    PDMDEV_ASSERT_DEVINS(pDevIns);
+    pdmUnlock(pDevIns->Internal.s.pGVM);
+}
+
+
+/**
+ * The Ring-0 PIC Helper Callbacks.
+ */
+extern DECLEXPORT(const PDMPICHLP) g_pdmR0PicHlp =
+{
+    PDM_PICHLP_VERSION,
+    pdmR0PicHlp_SetInterruptFF,
+    pdmR0PicHlp_ClearInterruptFF,
+    pdmR0PicHlp_Lock,
+    pdmR0PicHlp_Unlock,
+    PDM_PICHLP_VERSION
+};
+
+/** @} */
+
+
+/** @name I/O APIC Ring-0 Helpers
+ * @{
+ */
+
+/** @interface_method_impl{PDMIOAPICHLP,pfnApicBusDeliver} */
+static DECLCALLBACK(int) pdmR0IoApicHlp_ApicBusDeliver(PPDMDEVINS pDevIns, uint8_t u8Dest, uint8_t u8DestMode,
+                                                       uint8_t u8DeliveryMode, uint8_t uVector, uint8_t u8Polarity,
+                                                       uint8_t u8TriggerMode, uint32_t uTagSrc)
+{
+    PDMDEV_ASSERT_DEVINS(pDevIns);
+    PGVM pGVM = pDevIns->Internal.s.pGVM;
+    LogFlow(("pdmR0IoApicHlp_ApicBusDeliver: caller=%p/%d: u8Dest=%RX8 u8DestMode=%RX8 u8DeliveryMode=%RX8 uVector=%RX8 u8Polarity=%RX8 u8TriggerMode=%RX8 uTagSrc=%#x\n",
+             pDevIns, pDevIns->iInstance, u8Dest, u8DestMode, u8DeliveryMode, uVector, u8Polarity, u8TriggerMode, uTagSrc));
+    return APICBusDeliver(pGVM, u8Dest, u8DestMode, u8DeliveryMode, uVector, u8Polarity, u8TriggerMode, uTagSrc);
+}
+
+
+/** @interface_method_impl{PDMIOAPICHLP,pfnLock} */
+static DECLCALLBACK(int) pdmR0IoApicHlp_Lock(PPDMDEVINS pDevIns, int rc)
+{
+    PDMDEV_ASSERT_DEVINS(pDevIns);
+    return pdmLockEx(pDevIns->Internal.s.pGVM, rc);
+}
+
+
+/** @interface_method_impl{PDMIOAPICHLP,pfnUnlock} */
+static DECLCALLBACK(void) pdmR0IoApicHlp_Unlock(PPDMDEVINS pDevIns)
+{
+    PDMDEV_ASSERT_DEVINS(pDevIns);
+    pdmUnlock(pDevIns->Internal.s.pGVM);
+}
+
+
+/**
+ * The Ring-0 I/O APIC Helper Callbacks.
+ */
+extern DECLEXPORT(const PDMIOAPICHLP) g_pdmR0IoApicHlp =
+{
+    PDM_IOAPICHLP_VERSION,
+    pdmR0IoApicHlp_ApicBusDeliver,
+    pdmR0IoApicHlp_Lock,
+    pdmR0IoApicHlp_Unlock,
+    PDM_IOAPICHLP_VERSION
+};
+
+/** @} */
+
+
+
+
+/** @name PCI Bus Ring-0 Helpers
+ * @{
+ */
+
+/** @interface_method_impl{PDMPCIHLPR0,pfnIsaSetIrq} */
+static DECLCALLBACK(void) pdmR0PciHlp_IsaSetIrq(PPDMDEVINS pDevIns, int iIrq, int iLevel, uint32_t uTagSrc)
+{
+    PDMDEV_ASSERT_DEVINS(pDevIns);
+    Log4(("pdmR0PciHlp_IsaSetIrq: iIrq=%d iLevel=%d uTagSrc=%#x\n", iIrq, iLevel, uTagSrc));
+    PGVM pGVM = pDevIns->Internal.s.pGVM;
+
+    pdmLock(pGVM);
+    pdmR0IsaSetIrq(pGVM, iIrq, iLevel, uTagSrc);
+    pdmUnlock(pGVM);
+}
+
+
+/** @interface_method_impl{PDMPCIHLPR0,pfnIoApicSetIrq} */
+static DECLCALLBACK(void) pdmR0PciHlp_IoApicSetIrq(PPDMDEVINS pDevIns, int iIrq, int iLevel, uint32_t uTagSrc)
+{
+    PDMDEV_ASSERT_DEVINS(pDevIns);
+    Log4(("pdmR0PciHlp_IoApicSetIrq: iIrq=%d iLevel=%d uTagSrc=%#x\n", iIrq, iLevel, uTagSrc));
+    PGVM pGVM = pDevIns->Internal.s.pGVM;
+
+    if (pGVM->pdm.s.IoApic.pDevInsR0)
+        pGVM->pdm.s.IoApic.pfnSetIrqR0(pGVM->pdm.s.IoApic.pDevInsR0, iIrq, iLevel, uTagSrc);
+    else if (pGVM->pdm.s.IoApic.pDevInsR3)
+    {
+        /* queue for ring-3 execution. */
+        PPDMDEVHLPTASK pTask = (PPDMDEVHLPTASK)PDMQueueAlloc(pGVM->pdm.s.pDevHlpQueueR0);
+        if (pTask)
+        {
+            pTask->enmOp = PDMDEVHLPTASKOP_IOAPIC_SET_IRQ;
+            pTask->pDevInsR3 = NIL_RTR3PTR; /* not required */
+            pTask->u.IoApicSetIRQ.iIrq = iIrq;
+            pTask->u.IoApicSetIRQ.iLevel = iLevel;
+            pTask->u.IoApicSetIRQ.uTagSrc = uTagSrc;
+
+            PDMQueueInsertEx(pGVM->pdm.s.pDevHlpQueueR0, &pTask->Core, 0);
+        }
+        else
+            AssertMsgFailed(("We're out of devhlp queue items!!!\n"));
+    }
+}
+
+
+/** @interface_method_impl{PDMPCIHLPR0,pfnIoApicSendMsi} */
+static DECLCALLBACK(void) pdmR0PciHlp_IoApicSendMsi(PPDMDEVINS pDevIns, RTGCPHYS GCPhys, uint32_t uValue, uint32_t uTagSrc)
+{
+    PDMDEV_ASSERT_DEVINS(pDevIns);
+    Log4(("pdmR0PciHlp_IoApicSendMsi: GCPhys=%p uValue=%d uTagSrc=%#x\n", GCPhys, uValue, uTagSrc));
+    PGVM pGVM = pDevIns->Internal.s.pGVM;
+    if (pGVM->pdm.s.IoApic.pDevInsR0)
+        pGVM->pdm.s.IoApic.pfnSendMsiR0(pGVM->pdm.s.IoApic.pDevInsR0, GCPhys, uValue, uTagSrc);
+    else
+        AssertFatalMsgFailed(("Lazy bastards!"));
+}
+
+
+/** @interface_method_impl{PDMPCIHLPR0,pfnLock} */
+static DECLCALLBACK(int) pdmR0PciHlp_Lock(PPDMDEVINS pDevIns, int rc)
+{
+    PDMDEV_ASSERT_DEVINS(pDevIns);
+    return pdmLockEx(pDevIns->Internal.s.pGVM, rc);
+}
+
+
+/** @interface_method_impl{PDMPCIHLPR0,pfnUnlock} */
+static DECLCALLBACK(void) pdmR0PciHlp_Unlock(PPDMDEVINS pDevIns)
+{
+    PDMDEV_ASSERT_DEVINS(pDevIns);
+    pdmUnlock(pDevIns->Internal.s.pGVM);
+}
+
+
+/** @interface_method_impl{PDMPCIHLPR0,pfnGetBusByNo} */
+static DECLCALLBACK(PPDMDEVINS) pdmR0PciHlp_GetBusByNo(PPDMDEVINS pDevIns, uint32_t idxPdmBus)
+{
+    PDMDEV_ASSERT_DEVINS(pDevIns);
+    PGVM pGVM = pDevIns->Internal.s.pGVM;
+    AssertReturn(idxPdmBus < RT_ELEMENTS(pGVM->pdmr0.s.aPciBuses), NULL);
+    PPDMDEVINS pRetDevIns = pGVM->pdmr0.s.aPciBuses[idxPdmBus].pDevInsR0;
+    LogFlow(("pdmR3PciHlp_GetBusByNo: caller='%s'/%d: returns %p\n", pDevIns->pReg->szName, pDevIns->iInstance, pRetDevIns));
+    return pRetDevIns;
+}
+
+
+/**
+ * The Ring-0 PCI Bus Helper Callbacks.
+ */
+extern DECLEXPORT(const PDMPCIHLPR0) g_pdmR0PciHlp =
+{
+    PDM_PCIHLPR0_VERSION,
+    pdmR0PciHlp_IsaSetIrq,
+    pdmR0PciHlp_IoApicSetIrq,
+    pdmR0PciHlp_IoApicSendMsi,
+    pdmR0PciHlp_Lock,
+    pdmR0PciHlp_Unlock,
+    pdmR0PciHlp_GetBusByNo,
+    PDM_PCIHLPR0_VERSION, /* the end */
+};
+
+/** @} */
+
+
+/** @name HPET Ring-0 Helpers
+ * @{
+ */
+/* none */
+
+/**
+ * The Ring-0 HPET Helper Callbacks.
+ */
+extern DECLEXPORT(const PDMHPETHLPR0) g_pdmR0HpetHlp =
+{
+    PDM_HPETHLPR0_VERSION,
+    PDM_HPETHLPR0_VERSION, /* the end */
+};
+
+/** @} */
+
+
+/** @name Raw PCI Ring-0 Helpers
+ * @{
+ */
+/* none */
+
+/**
+ * The Ring-0 PCI raw Helper Callbacks.
+ */
+extern DECLEXPORT(const PDMPCIRAWHLPR0) g_pdmR0PciRawHlp =
+{
+    PDM_PCIRAWHLPR0_VERSION,
+    PDM_PCIRAWHLPR0_VERSION, /* the end */
+};
+
+/** @} */
+
+
+
+
+/**
+ * Sets an irq on the PIC and I/O APIC.
+ *
+ * @returns true if delivered, false if postponed.
+ * @param   pGVM        The global (ring-0) VM structure.
+ * @param   iIrq        The irq.
+ * @param   iLevel      The new level.
+ * @param   uTagSrc     The IRQ tag and source.
+ *
+ * @remarks The caller holds the PDM lock.
+ */
+static bool pdmR0IsaSetIrq(PGVM pGVM, int iIrq, int iLevel, uint32_t uTagSrc)
+{
+    if (RT_LIKELY(    (   pGVM->pdm.s.IoApic.pDevInsR0
+                       || !pGVM->pdm.s.IoApic.pDevInsR3)
+                  &&  (   pGVM->pdm.s.Pic.pDevInsR0
+                       || !pGVM->pdm.s.Pic.pDevInsR3)))
+    {
+        if (pGVM->pdm.s.Pic.pDevInsR0)
+            pGVM->pdm.s.Pic.pfnSetIrqR0(pGVM->pdm.s.Pic.pDevInsR0, iIrq, iLevel, uTagSrc);
+        if (pGVM->pdm.s.IoApic.pDevInsR0)
+            pGVM->pdm.s.IoApic.pfnSetIrqR0(pGVM->pdm.s.IoApic.pDevInsR0, iIrq, iLevel, uTagSrc);
+        return true;
+    }
+
+    /* queue for ring-3 execution. */
+    PPDMDEVHLPTASK pTask = (PPDMDEVHLPTASK)PDMQueueAlloc(pGVM->pdm.s.pDevHlpQueueR0);
+    AssertReturn(pTask, false);
+
+    pTask->enmOp = PDMDEVHLPTASKOP_ISA_SET_IRQ;
+    pTask->pDevInsR3 = NIL_RTR3PTR; /* not required */
+    pTask->u.IsaSetIRQ.iIrq = iIrq;
+    pTask->u.IsaSetIRQ.iLevel = iLevel;
+    pTask->u.IsaSetIRQ.uTagSrc = uTagSrc;
+
+    PDMQueueInsertEx(pGVM->pdm.s.pDevHlpQueueR0, &pTask->Core, 0);
+    return false;
+}
+
diff --git a/src/VBox/VMM/VMMR0/PDMR0Device.cpp b/src/VBox/VMM/VMMR0/PDMR0Device.cpp
new file mode 100644
index 00000000..571d0e61
--- /dev/null
+++ b/src/VBox/VMM/VMMR0/PDMR0Device.cpp
@@ -0,0 +1,803 @@
+/* $Id: PDMR0Device.cpp $ */
+/** @file
+ * PDM - Pluggable Device and Driver Manager, R0 Device parts.
+ */
+
+/*
+ * Copyright (C) 2006-2020 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+
+/*********************************************************************************************************************************
+*   Header Files                                                                                                                 *
+*********************************************************************************************************************************/
+#define LOG_GROUP LOG_GROUP_PDM_DEVICE
+#define PDMPCIDEV_INCLUDE_PRIVATE  /* Hack to get pdmpcidevint.h included at the right point. */
+#include "PDMInternal.h"
+#include <VBox/vmm/pdm.h>
+#include <VBox/vmm/apic.h>
+#include <VBox/vmm/mm.h>
+#include <VBox/vmm/pgm.h>
+#include <VBox/vmm/gvm.h>
+#include <VBox/vmm/vmm.h>
+#include <VBox/vmm/hm.h>
+#include <VBox/vmm/vmcc.h>
+#include <VBox/vmm/gvmm.h>
+
+#include <VBox/log.h>
+#include <VBox/err.h>
+#include <VBox/msi.h>
+#include <VBox/sup.h>
+#include <iprt/asm.h>
+#include <iprt/assert.h>
+#include <iprt/ctype.h>
+#include <iprt/mem.h>
+#include <iprt/memobj.h>
+#include <iprt/process.h>
+#include <iprt/string.h>
+
+#include "dtrace/VBoxVMM.h"
+#include "PDMInline.h"
+
+
+/*********************************************************************************************************************************
+*   Global Variables                                                                                                             *
+*********************************************************************************************************************************/
+RT_C_DECLS_BEGIN
+extern DECLEXPORT(const PDMDEVHLPR0)    g_pdmR0DevHlp;
+extern DECLEXPORT(const PDMPICHLP)      g_pdmR0PicHlp;
+extern DECLEXPORT(const PDMIOAPICHLP)   g_pdmR0IoApicHlp;
+extern DECLEXPORT(const PDMPCIHLPR0)    g_pdmR0PciHlp;
+extern DECLEXPORT(const PDMHPETHLPR0)   g_pdmR0HpetHlp;
+extern DECLEXPORT(const PDMPCIRAWHLPR0) g_pdmR0PciRawHlp;
+RT_C_DECLS_END
+
+/** List of PDMDEVMODREGR0 structures protected by the loader lock. */
+static RTLISTANCHOR g_PDMDevModList;
+
+
+/**
+ * Pointer to the ring-0 device registrations for VMMR0.
+ */
+static const PDMDEVREGR0 *g_apVMM0DevRegs[] =
+{
+    &g_DeviceAPIC,
+};
+
+/**
+ * Module device registration record for VMMR0.
+ */
+static PDMDEVMODREGR0 g_VBoxDDR0ModDevReg =
+{
+    /* .u32Version = */ PDM_DEVMODREGR0_VERSION,
+    /* .cDevRegs = */   RT_ELEMENTS(g_apVMM0DevRegs),
+    /* .papDevRegs = */ &g_apVMM0DevRegs[0],
+    /* .hMod = */       NULL,
+    /* .ListEntry = */  { NULL, NULL },
+};
+
+
+/*********************************************************************************************************************************
+*   Internal Functions                                                                                                           *
+*********************************************************************************************************************************/
+
+
+/**
+ * Initializes the global ring-0 PDM data.
+ */
+VMMR0_INT_DECL(void) PDMR0Init(void *hMod)
+{
+    RTListInit(&g_PDMDevModList);
+    g_VBoxDDR0ModDevReg.hMod = hMod;
+    RTListAppend(&g_PDMDevModList, &g_VBoxDDR0ModDevReg.ListEntry);
+}
+
+
+/**
+ * Used by PDMR0CleanupVM to destroy a device instance.
+ *
+ * This is done during VM cleanup so that we're sure there are no active threads
+ * inside the device code.
+ *
+ * @param   pGVM        The global (ring-0) VM structure.
+ * @param   pDevIns     The device instance.
+ * @param   idxR0Device The device instance handle.
+ */
+static int pdmR0DeviceDestroy(PGVM pGVM, PPDMDEVINSR0 pDevIns, uint32_t idxR0Device)
+{
+    /*
+     * Assert sanity.
+     */
+    Assert(idxR0Device < pGVM->pdmr0.s.cDevInstances);
+    AssertPtrReturn(pDevIns, VERR_INVALID_HANDLE);
+    Assert(pDevIns->u32Version == PDM_DEVINSR0_VERSION);
+    Assert(pDevIns->Internal.s.idxR0Device == idxR0Device);
+
+    /*
+     * Call the final destructor if there is one.
+     */
+    if (pDevIns->pReg->pfnFinalDestruct)
+        pDevIns->pReg->pfnFinalDestruct(pDevIns);
+    pDevIns->u32Version = ~PDM_DEVINSR0_VERSION;
+
+    /*
+     * Remove the device from the instance table.
+     */
+    Assert(pGVM->pdmr0.s.apDevInstances[idxR0Device] == pDevIns);
+    pGVM->pdmr0.s.apDevInstances[idxR0Device] = NULL;
+    if (idxR0Device + 1 == pGVM->pdmr0.s.cDevInstances)
+        pGVM->pdmr0.s.cDevInstances = idxR0Device;
+
+    /*
+     * Free the ring-3 mapping and instance memory.
+     */
+    RTR0MEMOBJ hMemObj = pDevIns->Internal.s.hMapObj;
+    pDevIns->Internal.s.hMapObj = NIL_RTR0MEMOBJ;
+    RTR0MemObjFree(hMemObj, true);
+
+    hMemObj = pDevIns->Internal.s.hMemObj;
+    pDevIns->Internal.s.hMemObj = NIL_RTR0MEMOBJ;
+    RTR0MemObjFree(hMemObj, true);
+
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Initializes the per-VM data for the PDM.
+ *
+ * This is called from under the GVMM lock, so it only need to initialize the
+ * data so PDMR0CleanupVM and others will work smoothly.
+ *
+ * @param   pGVM    Pointer to the global VM structure.
+ */
+VMMR0_INT_DECL(void) PDMR0InitPerVMData(PGVM pGVM)
+{
+    AssertCompile(sizeof(pGVM->pdm.s) <= sizeof(pGVM->pdm.padding));
+    AssertCompile(sizeof(pGVM->pdmr0.s) <= sizeof(pGVM->pdmr0.padding));
+
+    pGVM->pdmr0.s.cDevInstances = 0;
+}
+
+
+/**
+ * Cleans up any loose ends before the GVM structure is destroyed.
+ */
+VMMR0_INT_DECL(void) PDMR0CleanupVM(PGVM pGVM)
+{
+    uint32_t i = pGVM->pdmr0.s.cDevInstances;
+    while (i-- > 0)
+    {
+        PPDMDEVINSR0 pDevIns = pGVM->pdmr0.s.apDevInstances[i];
+        if (pDevIns)
+            pdmR0DeviceDestroy(pGVM, pDevIns, i);
+    }
+}
+
+
+/**
+ * Worker for PDMR0DeviceCreate that does the actual instantiation.
+ *
+ * Allocates a memory object and divides it up as follows:
+ * @verbatim
+   --------------------------------------
+   ring-0 devins
+   --------------------------------------
+   ring-0 instance data
+   --------------------------------------
+   ring-0 PCI device data (optional) ??
+   --------------------------------------
+   page alignment padding
+   --------------------------------------
+   ring-3 devins
+   --------------------------------------
+   ring-3 instance data
+   --------------------------------------
+   ring-3 PCI device data (optional) ??
+   --------------------------------------
+  [page alignment padding                ] -
+  [--------------------------------------]  \
+  [raw-mode devins                       ]   \
+  [--------------------------------------]   - Optional, only when raw-mode is enabled.
+  [raw-mode instance data                ]   /
+  [--------------------------------------]  /
+  [raw-mode PCI device data (optional)?? ] -
+   --------------------------------------
+   shared instance data
+   --------------------------------------
+   default crit section
+   --------------------------------------
+   shared PCI device data (optional)
+   --------------------------------------
+   @endverbatim
+ *
+ * @returns VBox status code.
+ * @param   pGVM            The global (ring-0) VM structure.
+ * @param   pDevReg         The device registration structure.
+ * @param   iInstance       The device instance number.
+ * @param   cbInstanceR3    The size of the ring-3 instance data.
+ * @param   cbInstanceRC    The size of the raw-mode instance data.
+ * @param   hMod            The module implementing the device.  On success, the
+ * @param   RCPtrMapping    The raw-mode context mapping address, NIL_RTGCPTR if
+ *                          not to include raw-mode.
+ * @param   ppDevInsR3      Where to return the ring-3 device instance address.
+ * @thread  EMT(0)
+ */
+static int pdmR0DeviceCreateWorker(PGVM pGVM, PCPDMDEVREGR0 pDevReg, uint32_t iInstance, uint32_t cbInstanceR3,
+                                   uint32_t cbInstanceRC, RTRGPTR RCPtrMapping, void *hMod, PPDMDEVINSR3 *ppDevInsR3)
+{
+    /*
+     * Check that the instance number isn't a duplicate.
+     */
+    for (size_t i = 0; i < pGVM->pdmr0.s.cDevInstances; i++)
+    {
+        PPDMDEVINS pCur = pGVM->pdmr0.s.apDevInstances[i];
+        AssertLogRelReturn(!pCur || pCur->pReg != pDevReg || pCur->iInstance != iInstance, VERR_DUPLICATE);
+    }
+
+    /*
+     * Figure out how much memory we need and allocate it.
+     */
+    uint32_t const cbRing0     = RT_ALIGN_32(RT_UOFFSETOF(PDMDEVINSR0, achInstanceData) + pDevReg->cbInstanceCC, PAGE_SIZE);
+    uint32_t const cbRing3     = RT_ALIGN_32(RT_UOFFSETOF(PDMDEVINSR3, achInstanceData) + cbInstanceR3,
+                                             RCPtrMapping != NIL_RTRGPTR ? PAGE_SIZE : 64);
+    uint32_t const cbRC        = RCPtrMapping != NIL_RTRGPTR ? 0
+                               : RT_ALIGN_32(RT_UOFFSETOF(PDMDEVINSRC, achInstanceData) + cbInstanceRC, 64);
+    uint32_t const cbShared    = RT_ALIGN_32(pDevReg->cbInstanceShared, 64);
+    uint32_t const cbCritSect  = RT_ALIGN_32(sizeof(PDMCRITSECT), 64);
+    uint32_t const cbMsixState = RT_ALIGN_32(pDevReg->cMaxMsixVectors * 16 + (pDevReg->cMaxMsixVectors + 7) / 8, _4K);
+    uint32_t const cbPciDev    = RT_ALIGN_32(RT_UOFFSETOF_DYN(PDMPCIDEV, abMsixState[cbMsixState]), 64);
+    uint32_t const cPciDevs    = RT_MIN(pDevReg->cMaxPciDevices, 8);
+    uint32_t const cbPciDevs   = cbPciDev * cPciDevs;
+    uint32_t const cbTotal     = RT_ALIGN_32(cbRing0 + cbRing3 + cbRC + cbShared + cbCritSect + cbPciDevs, PAGE_SIZE);
+    AssertLogRelMsgReturn(cbTotal <= PDM_MAX_DEVICE_INSTANCE_SIZE,
+                          ("Instance of '%s' is too big: cbTotal=%u, max %u\n",
+                           pDevReg->szName, cbTotal, PDM_MAX_DEVICE_INSTANCE_SIZE),
+                          VERR_OUT_OF_RANGE);
+
+    RTR0MEMOBJ hMemObj;
+    int rc = RTR0MemObjAllocPage(&hMemObj, cbTotal, false /*fExecutable*/);
+    if (RT_FAILURE(rc))
+        return rc;
+    RT_BZERO(RTR0MemObjAddress(hMemObj), cbTotal);
+
+    /* Map it. */
+    RTR0MEMOBJ hMapObj;
+    rc = RTR0MemObjMapUserEx(&hMapObj, hMemObj, (RTR3PTR)-1, 0, RTMEM_PROT_READ | RTMEM_PROT_WRITE, RTR0ProcHandleSelf(),
+                             cbRing0, cbTotal - cbRing0);
+    if (RT_SUCCESS(rc))
+    {
+        PPDMDEVINSR0        pDevIns   = (PPDMDEVINSR0)RTR0MemObjAddress(hMemObj);
+        struct PDMDEVINSR3 *pDevInsR3 = (struct PDMDEVINSR3 *)((uint8_t *)pDevIns + cbRing0);
+
+        /*
+         * Initialize the ring-0 instance.
+         */
+        pDevIns->u32Version             = PDM_DEVINSR0_VERSION;
+        pDevIns->iInstance              = iInstance;
+        pDevIns->pHlpR0                 = &g_pdmR0DevHlp;
+        pDevIns->pvInstanceDataR0       = (uint8_t *)pDevIns + cbRing0 + cbRing3 + cbRC;
+        pDevIns->pvInstanceDataForR0    = &pDevIns->achInstanceData[0];
+        pDevIns->pCritSectRoR0          = (PPDMCRITSECT)((uint8_t *)pDevIns->pvInstanceDataR0 + cbShared);
+        pDevIns->pReg                   = pDevReg;
+        pDevIns->pDevInsForR3           = RTR0MemObjAddressR3(hMapObj);
+        pDevIns->pDevInsForR3R0         = pDevInsR3;
+        pDevIns->pvInstanceDataForR3R0  = &pDevInsR3->achInstanceData[0];
+        pDevIns->cbPciDev               = cbPciDev;
+        pDevIns->cPciDevs               = cPciDevs;
+        for (uint32_t iPciDev = 0; iPciDev < cPciDevs; iPciDev++)
+        {
+            /* Note! PDMDevice.cpp has a copy of this code.  Keep in sync. */
+            PPDMPCIDEV pPciDev = (PPDMPCIDEV)((uint8_t *)pDevIns->pCritSectRoR0 + cbCritSect + cbPciDev * iPciDev);
+            if (iPciDev < RT_ELEMENTS(pDevIns->apPciDevs))
+                pDevIns->apPciDevs[iPciDev] = pPciDev;
+            pPciDev->cbConfig           = _4K;
+            pPciDev->cbMsixState        = cbMsixState;
+            pPciDev->idxSubDev          = (uint16_t)iPciDev;
+            pPciDev->Int.s.idxSubDev    = (uint16_t)iPciDev;
+            pPciDev->u32Magic           = PDMPCIDEV_MAGIC;
+        }
+        pDevIns->Internal.s.pGVM        = pGVM;
+        pDevIns->Internal.s.pRegR0      = pDevReg;
+        pDevIns->Internal.s.hMod        = hMod;
+        pDevIns->Internal.s.hMemObj     = hMemObj;
+        pDevIns->Internal.s.hMapObj     = hMapObj;
+        pDevIns->Internal.s.pInsR3R0    = pDevInsR3;
+        pDevIns->Internal.s.pIntR3R0    = &pDevInsR3->Internal.s;
+
+        /*
+         * Initialize the ring-3 instance data as much as we can.
+         * Note! PDMDevice.cpp does this job for ring-3 only devices.  Keep in sync.
+         */
+        pDevInsR3->u32Version           = PDM_DEVINSR3_VERSION;
+        pDevInsR3->iInstance            = iInstance;
+        pDevInsR3->cbRing3              = cbTotal - cbRing0;
+        pDevInsR3->fR0Enabled           = true;
+        pDevInsR3->fRCEnabled           = RCPtrMapping != NIL_RTRGPTR;
+        pDevInsR3->pvInstanceDataR3     = pDevIns->pDevInsForR3 + cbRing3 + cbRC;
+        pDevInsR3->pvInstanceDataForR3  = pDevIns->pDevInsForR3 + RT_UOFFSETOF(PDMDEVINSR3, achInstanceData);
+        pDevInsR3->pCritSectRoR3        = pDevIns->pDevInsForR3 + cbRing3 + cbRC + cbShared;
+        pDevInsR3->pDevInsR0RemoveMe    = pDevIns;
+        pDevInsR3->pvInstanceDataR0     = pDevIns->pvInstanceDataR0;
+        pDevInsR3->pvInstanceDataRC     = RCPtrMapping == NIL_RTRGPTR
+                                        ? NIL_RTRGPTR : pDevIns->pDevInsForRC + RT_UOFFSETOF(PDMDEVINSRC, achInstanceData);
+        pDevInsR3->pDevInsForRC         = pDevIns->pDevInsForRC;
+        pDevInsR3->pDevInsForRCR3       = pDevIns->pDevInsForR3 + cbRing3;
+        pDevInsR3->pDevInsForRCR3       = pDevInsR3->pDevInsForRCR3 + RT_UOFFSETOF(PDMDEVINSRC, achInstanceData);
+        pDevInsR3->cbPciDev             = cbPciDev;
+        pDevInsR3->cPciDevs             = cPciDevs;
+        for (uint32_t i = 0; i < RT_MIN(cPciDevs, RT_ELEMENTS(pDevIns->apPciDevs)); i++)
+            pDevInsR3->apPciDevs[i] = pDevInsR3->pCritSectRoR3 + cbCritSect + cbPciDev * i;
+
+        pDevInsR3->Internal.s.pVMR3     = pGVM->pVMR3;
+        pDevInsR3->Internal.s.fIntFlags = RCPtrMapping == NIL_RTRGPTR ? PDMDEVINSINT_FLAGS_R0_ENABLED
+                                        : PDMDEVINSINT_FLAGS_R0_ENABLED | PDMDEVINSINT_FLAGS_RC_ENABLED;
+
+        /*
+         * Initialize the raw-mode instance data as much as possible.
+         */
+        if (RCPtrMapping != NIL_RTRGPTR)
+        {
+            struct PDMDEVINSRC *pDevInsRC = RCPtrMapping == NIL_RTRGPTR ? NULL
+                                          : (struct PDMDEVINSRC *)((uint8_t *)pDevIns + cbRing0 + cbRing3);
+
+            pDevIns->pDevInsForRC           = RCPtrMapping;
+            pDevIns->pDevInsForRCR0         = pDevInsRC;
+            pDevIns->pvInstanceDataForRCR0  = &pDevInsRC->achInstanceData[0];
+
+            pDevInsRC->u32Version           = PDM_DEVINSRC_VERSION;
+            pDevInsRC->iInstance            = iInstance;
+            pDevInsRC->pvInstanceDataRC     = pDevIns->pDevInsForRC + cbRC;
+            pDevInsRC->pvInstanceDataForRC  = pDevIns->pDevInsForRC + RT_UOFFSETOF(PDMDEVINSRC, achInstanceData);
+            pDevInsRC->pCritSectRoRC        = pDevIns->pDevInsForRC + cbRC + cbShared;
+            pDevInsRC->cbPciDev             = cbPciDev;
+            pDevInsRC->cPciDevs             = cPciDevs;
+            for (uint32_t i = 0; i < RT_MIN(cPciDevs, RT_ELEMENTS(pDevIns->apPciDevs)); i++)
+                pDevInsRC->apPciDevs[i] = pDevInsRC->pCritSectRoRC + cbCritSect + cbPciDev * i;
+
+            pDevInsRC->Internal.s.pVMRC     = pGVM->pVMRC;
+        }
+
+        /*
+         * Add to the device instance array and set its handle value.
+         */
+        AssertCompile(sizeof(pGVM->pdmr0.padding) == sizeof(pGVM->pdmr0));
+        uint32_t idxR0Device = pGVM->pdmr0.s.cDevInstances;
+        if (idxR0Device < RT_ELEMENTS(pGVM->pdmr0.s.apDevInstances))
+        {
+            pGVM->pdmr0.s.apDevInstances[idxR0Device] = pDevIns;
+            pGVM->pdmr0.s.cDevInstances = idxR0Device + 1;
+            pDevIns->Internal.s.idxR0Device   = idxR0Device;
+            pDevInsR3->Internal.s.idxR0Device = idxR0Device;
+
+            /*
+             * Call the early constructor if present.
+             */
+            if (pDevReg->pfnEarlyConstruct)
+                rc = pDevReg->pfnEarlyConstruct(pDevIns);
+            if (RT_SUCCESS(rc))
+            {
+                /*
+                 * We're done.
+                 */
+                *ppDevInsR3 = RTR0MemObjAddressR3(hMapObj);
+                return rc;
+            }
+
+            /*
+             * Bail out.
+             */
+            if (pDevIns->pReg->pfnFinalDestruct)
+                pDevIns->pReg->pfnFinalDestruct(pDevIns);
+
+            pGVM->pdmr0.s.apDevInstances[idxR0Device] = NULL;
+            Assert(pGVM->pdmr0.s.cDevInstances == idxR0Device + 1);
+            pGVM->pdmr0.s.cDevInstances = idxR0Device;
+        }
+
+        RTR0MemObjFree(hMapObj, true);
+    }
+    RTR0MemObjFree(hMemObj, true);
+    return rc;
+}
+
+
+/**
+ * Used by ring-3 PDM to create a device instance that operates both in ring-3
+ * and ring-0.
+ *
+ * Creates an instance of a device (for both ring-3 and ring-0, and optionally
+ * raw-mode context).
+ *
+ * @returns VBox status code.
+ * @param   pGVM    The global (ring-0) VM structure.
+ * @param   pReq    Pointer to the request buffer.
+ * @thread  EMT(0)
+ */
+VMMR0_INT_DECL(int) PDMR0DeviceCreateReqHandler(PGVM pGVM, PPDMDEVICECREATEREQ pReq)
+{
+    LogFlow(("PDMR0DeviceCreateReqHandler: %s in %s\n", pReq->szDevName, pReq->szModName));
+
+    /*
+     * Validate the request.
+     */
+    AssertReturn(pReq->Hdr.cbReq == sizeof(*pReq), VERR_INVALID_PARAMETER);
+    pReq->pDevInsR3 = NIL_RTR3PTR;
+
+    int rc = GVMMR0ValidateGVMandEMT(pGVM, 0);
+    AssertRCReturn(rc, rc);
+
+    AssertReturn(pReq->fFlags           != 0, VERR_INVALID_FLAGS);
+    AssertReturn(pReq->fClass           != 0, VERR_WRONG_TYPE);
+    AssertReturn(pReq->uSharedVersion   != 0, VERR_INVALID_PARAMETER);
+    AssertReturn(pReq->cbInstanceShared != 0, VERR_INVALID_PARAMETER);
+    size_t const cchDevName = RTStrNLen(pReq->szDevName, sizeof(pReq->szDevName));
+    AssertReturn(cchDevName < sizeof(pReq->szDevName), VERR_NO_STRING_TERMINATOR);
+    AssertReturn(cchDevName > 0, VERR_EMPTY_STRING);
+    AssertReturn(cchDevName < RT_SIZEOFMEMB(PDMDEVREG, szName), VERR_NOT_FOUND);
+
+    size_t const cchModName = RTStrNLen(pReq->szModName, sizeof(pReq->szModName));
+    AssertReturn(cchModName < sizeof(pReq->szModName), VERR_NO_STRING_TERMINATOR);
+    AssertReturn(cchModName > 0, VERR_EMPTY_STRING);
+    AssertReturn(pReq->cbInstanceShared <= PDM_MAX_DEVICE_INSTANCE_SIZE, VERR_OUT_OF_RANGE);
+    AssertReturn(pReq->cbInstanceR3 <= PDM_MAX_DEVICE_INSTANCE_SIZE, VERR_OUT_OF_RANGE);
+    AssertReturn(pReq->cbInstanceRC <= PDM_MAX_DEVICE_INSTANCE_SIZE, VERR_OUT_OF_RANGE);
+    AssertReturn(pReq->iInstance < 1024, VERR_OUT_OF_RANGE);
+    AssertReturn(pReq->iInstance < pReq->cMaxInstances, VERR_OUT_OF_RANGE);
+    AssertReturn(pReq->cMaxPciDevices <= 8, VERR_OUT_OF_RANGE);
+    AssertReturn(pReq->cMaxMsixVectors <= VBOX_MSIX_MAX_ENTRIES, VERR_OUT_OF_RANGE);
+
+    /*
+     * Reference the module.
+     */
+    void *hMod = NULL;
+    rc = SUPR0LdrModByName(pGVM->pSession, pReq->szModName, &hMod);
+    if (RT_FAILURE(rc))
+    {
+        LogRel(("PDMR0DeviceCreateReqHandler: SUPR0LdrModByName(,%s,) failed: %Rrc\n", pReq->szModName, rc));
+        return rc;
+    }
+
+    /*
+     * Look for the the module and the device registration structure.
+     */
+    int rcLock = SUPR0LdrLock(pGVM->pSession);
+    AssertRC(rc);
+
+    rc = VERR_NOT_FOUND;
+    PPDMDEVMODREGR0 pMod;
+    RTListForEach(&g_PDMDevModList, pMod, PDMDEVMODREGR0, ListEntry)
+    {
+        if (pMod->hMod == hMod)
+        {
+            /*
+             * Found the module. We can drop the loader lock now before we
+             * search the devices it registers.
+             */
+            if (RT_SUCCESS(rcLock))
+            {
+                rcLock = SUPR0LdrUnlock(pGVM->pSession);
+                AssertRC(rcLock);
+            }
+            rcLock = VERR_ALREADY_RESET;
+
+            PCPDMDEVREGR0 *papDevRegs = pMod->papDevRegs;
+            size_t         i          = pMod->cDevRegs;
+            while (i-- > 0)
+            {
+                PCPDMDEVREGR0 pDevReg = papDevRegs[i];
+                LogFlow(("PDMR0DeviceCreateReqHandler: candidate #%u: %s %#x\n", i, pReq->szDevName, pDevReg->u32Version));
+                if (   PDM_VERSION_ARE_COMPATIBLE(pDevReg->u32Version, PDM_DEVREGR0_VERSION)
+                    && pDevReg->szName[cchDevName] == '\0'
+                    && memcmp(pDevReg->szName, pReq->szDevName, cchDevName) == 0)
+                {
+
+                    /*
+                     * Found the device, now check whether it matches the ring-3 registration.
+                     */
+                    if (   pReq->uSharedVersion   == pDevReg->uSharedVersion
+                        && pReq->cbInstanceShared == pDevReg->cbInstanceShared
+                        && pReq->cbInstanceRC     == pDevReg->cbInstanceRC
+                        && pReq->fFlags           == pDevReg->fFlags
+                        && pReq->fClass           == pDevReg->fClass
+                        && pReq->cMaxInstances    == pDevReg->cMaxInstances
+                        && pReq->cMaxPciDevices   == pDevReg->cMaxPciDevices
+                        && pReq->cMaxMsixVectors  == pDevReg->cMaxMsixVectors)
+                    {
+                        rc = pdmR0DeviceCreateWorker(pGVM, pDevReg, pReq->iInstance, pReq->cbInstanceR3, pReq->cbInstanceRC,
+                                                     NIL_RTRCPTR /** @todo new raw-mode */, hMod, &pReq->pDevInsR3);
+                        if (RT_SUCCESS(rc))
+                            hMod = NULL; /* keep the module reference */
+                    }
+                    else
+                    {
+                        LogRel(("PDMR0DeviceCreate: Ring-3 does not match ring-0 device registration (%s):\n"
+                                "    uSharedVersion: %#x vs %#x\n"
+                                "  cbInstanceShared: %#x vs %#x\n"
+                                "      cbInstanceRC: %#x vs %#x\n"
+                                "            fFlags: %#x vs %#x\n"
+                                "            fClass: %#x vs %#x\n"
+                                "     cMaxInstances: %#x vs %#x\n"
+                                "    cMaxPciDevices: %#x vs %#x\n"
+                                "   cMaxMsixVectors: %#x vs %#x\n"
+                                ,
+                                pReq->szDevName,
+                                pReq->uSharedVersion,   pDevReg->uSharedVersion,
+                                pReq->cbInstanceShared, pDevReg->cbInstanceShared,
+                                pReq->cbInstanceRC,     pDevReg->cbInstanceRC,
+                                pReq->fFlags,           pDevReg->fFlags,
+                                pReq->fClass,           pDevReg->fClass,
+                                pReq->cMaxInstances,    pDevReg->cMaxInstances,
+                                pReq->cMaxPciDevices,   pDevReg->cMaxPciDevices,
+                                pReq->cMaxMsixVectors,  pDevReg->cMaxMsixVectors));
+                        rc = VERR_INCOMPATIBLE_CONFIG;
+                    }
+                }
+            }
+            break;
+        }
+    }
+
+    if (RT_SUCCESS_NP(rcLock))
+    {
+        rcLock = SUPR0LdrUnlock(pGVM->pSession);
+        AssertRC(rcLock);
+    }
+    SUPR0LdrModRelease(pGVM->pSession, hMod);
+    return rc;
+}
+
+
+/**
+ * Used by ring-3 PDM to call standard ring-0 device methods.
+ *
+ * @returns VBox status code.
+ * @param   pGVM    The global (ring-0) VM structure.
+ * @param   pReq    Pointer to the request buffer.
+ * @param   idCpu   The ID of the calling EMT.
+ * @thread  EMT(0), except for PDMDEVICEGENCALL_REQUEST which can be any EMT.
+ */
+VMMR0_INT_DECL(int) PDMR0DeviceGenCallReqHandler(PGVM pGVM, PPDMDEVICEGENCALLREQ pReq, VMCPUID idCpu)
+{
+    /*
+     * Validate the request.
+     */
+    AssertReturn(pReq->Hdr.cbReq == sizeof(*pReq), VERR_INVALID_PARAMETER);
+
+    int rc = GVMMR0ValidateGVMandEMT(pGVM, idCpu);
+    AssertRCReturn(rc, rc);
+
+    AssertReturn(pReq->idxR0Device < pGVM->pdmr0.s.cDevInstances, VERR_INVALID_HANDLE);
+    PPDMDEVINSR0 pDevIns = pGVM->pdmr0.s.apDevInstances[pReq->idxR0Device];
+    AssertPtrReturn(pDevIns, VERR_INVALID_HANDLE);
+    AssertReturn(pDevIns->pDevInsForR3 == pReq->pDevInsR3, VERR_INVALID_HANDLE);
+
+    /*
+     * Make the call.
+     */
+    rc = VINF_SUCCESS /*VINF_NOT_IMPLEMENTED*/;
+    switch (pReq->enmCall)
+    {
+        case PDMDEVICEGENCALL_CONSTRUCT:
+            AssertMsgBreakStmt(pGVM->enmVMState < VMSTATE_CREATED, ("enmVMState=%d\n", pGVM->enmVMState), rc = VERR_INVALID_STATE);
+            AssertReturn(idCpu == 0,  VERR_VM_THREAD_NOT_EMT);
+            if (pDevIns->pReg->pfnConstruct)
+                rc = pDevIns->pReg->pfnConstruct(pDevIns);
+            break;
+
+        case PDMDEVICEGENCALL_DESTRUCT:
+            AssertMsgBreakStmt(pGVM->enmVMState < VMSTATE_CREATED || pGVM->enmVMState >= VMSTATE_DESTROYING,
+                               ("enmVMState=%d\n", pGVM->enmVMState), rc = VERR_INVALID_STATE);
+            AssertReturn(idCpu == 0,  VERR_VM_THREAD_NOT_EMT);
+            if (pDevIns->pReg->pfnDestruct)
+            {
+                pDevIns->pReg->pfnDestruct(pDevIns);
+                rc = VINF_SUCCESS;
+            }
+            break;
+
+        case PDMDEVICEGENCALL_REQUEST:
+            if (pDevIns->pReg->pfnRequest)
+                rc = pDevIns->pReg->pfnRequest(pDevIns, pReq->Params.Req.uReq, pReq->Params.Req.uArg);
+            else
+                rc = VERR_INVALID_FUNCTION;
+            break;
+
+        default:
+            AssertMsgFailed(("enmCall=%d\n", pReq->enmCall));
+            rc = VERR_INVALID_FUNCTION;
+            break;
+    }
+
+    return rc;
+}
+
+
+/**
+ * Legacy device mode compatiblity.
+ *
+ * @returns VBox status code.
+ * @param   pGVM    The global (ring-0) VM structure.
+ * @param   pReq    Pointer to the request buffer.
+ * @thread  EMT(0)
+ */
+VMMR0_INT_DECL(int) PDMR0DeviceCompatSetCritSectReqHandler(PGVM pGVM, PPDMDEVICECOMPATSETCRITSECTREQ pReq)
+{
+    /*
+     * Validate the request.
+     */
+    AssertReturn(pReq->Hdr.cbReq == sizeof(*pReq), VERR_INVALID_PARAMETER);
+
+    int rc = GVMMR0ValidateGVMandEMT(pGVM, 0);
+    AssertRCReturn(rc, rc);
+
+    AssertReturn(pReq->idxR0Device < pGVM->pdmr0.s.cDevInstances, VERR_INVALID_HANDLE);
+    PPDMDEVINSR0 pDevIns = pGVM->pdmr0.s.apDevInstances[pReq->idxR0Device];
+    AssertPtrReturn(pDevIns, VERR_INVALID_HANDLE);
+    AssertReturn(pDevIns->pDevInsForR3 == pReq->pDevInsR3, VERR_INVALID_HANDLE);
+
+    AssertReturn(pGVM->enmVMState == VMSTATE_CREATING, VERR_INVALID_STATE);
+
+    /*
+     * The critical section address can be in a few different places:
+     *      1. shared data.
+     *      2. nop section.
+     *      3. pdm critsect.
+     */
+    PPDMCRITSECT pCritSect;
+    if (pReq->pCritSectR3 == pGVM->pVMR3 + RT_UOFFSETOF(VM, pdm.s.NopCritSect))
+    {
+        pCritSect = &pGVM->pdm.s.NopCritSect;
+        Log(("PDMR0DeviceCompatSetCritSectReqHandler: Nop - %p %#x\n", pCritSect, pCritSect->s.Core.u32Magic));
+    }
+    else if (pReq->pCritSectR3 == pGVM->pVMR3 + RT_UOFFSETOF(VM, pdm.s.CritSect))
+    {
+        pCritSect = &pGVM->pdm.s.CritSect;
+        Log(("PDMR0DeviceCompatSetCritSectReqHandler: PDM - %p %#x\n", pCritSect, pCritSect->s.Core.u32Magic));
+    }
+    else
+    {
+        size_t offCritSect = pReq->pCritSectR3 - pDevIns->pDevInsForR3R0->pvInstanceDataR3;
+        AssertLogRelMsgReturn(   offCritSect                       <  pDevIns->pReg->cbInstanceShared
+                              && offCritSect + sizeof(PDMCRITSECT) <= pDevIns->pReg->cbInstanceShared,
+                              ("offCritSect=%p pCritSectR3=%p cbInstanceShared=%#x (%s)\n",
+                               offCritSect, pReq->pCritSectR3, pDevIns->pReg->cbInstanceShared, pDevIns->pReg->szName),
+                              VERR_INVALID_POINTER);
+        pCritSect = (PPDMCRITSECT)((uint8_t *)pDevIns->pvInstanceDataR0 + offCritSect);
+        Log(("PDMR0DeviceCompatSetCritSectReqHandler: custom - %#x/%p %#x\n", offCritSect, pCritSect, pCritSect->s.Core.u32Magic));
+    }
+    AssertLogRelMsgReturn(pCritSect->s.Core.u32Magic == RTCRITSECT_MAGIC,
+                          ("cs=%p magic=%#x dev=%s\n", pCritSect, pCritSect->s.Core.u32Magic, pDevIns->pReg->szName),
+                          VERR_INVALID_MAGIC);
+
+    /*
+     * Make the update.
+     */
+    pDevIns->pCritSectRoR0 = pCritSect;
+
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Registers the device implementations living in a module.
+ *
+ * This should normally only be called during ModuleInit().  The should be a
+ * call to PDMR0DeviceDeregisterModule from the ModuleTerm() function to undo
+ * the effects of this call.
+ *
+ * @returns VBox status code.
+ * @param   hMod            The module handle of the module being registered.
+ * @param   pModReg         The module registration structure.  This will be
+ *                          used directly so it must live as long as the module
+ *                          and be writable.
+ *
+ * @note    Caller must own the loader lock!
+ */
+VMMR0DECL(int) PDMR0DeviceRegisterModule(void *hMod, PPDMDEVMODREGR0 pModReg)
+{
+    /*
+     * Validate the input.
+     */
+    AssertPtrReturn(hMod, VERR_INVALID_HANDLE);
+    Assert(SUPR0LdrIsLockOwnerByMod(hMod, true));
+
+    AssertPtrReturn(pModReg, VERR_INVALID_POINTER);
+    AssertLogRelMsgReturn(PDM_VERSION_ARE_COMPATIBLE(pModReg->u32Version, PDM_DEVMODREGR0_VERSION),
+                          ("pModReg->u32Version=%#x vs %#x\n", pModReg->u32Version, PDM_DEVMODREGR0_VERSION),
+                          VERR_VERSION_MISMATCH);
+    AssertLogRelMsgReturn(pModReg->cDevRegs <= 256 && pModReg->cDevRegs > 0, ("cDevRegs=%u\n", pModReg->cDevRegs),
+                          VERR_OUT_OF_RANGE);
+    AssertLogRelMsgReturn(pModReg->hMod == NULL, ("hMod=%p\n", pModReg->hMod), VERR_INVALID_PARAMETER);
+    AssertLogRelMsgReturn(pModReg->ListEntry.pNext == NULL, ("pNext=%p\n", pModReg->ListEntry.pNext), VERR_INVALID_PARAMETER);
+    AssertLogRelMsgReturn(pModReg->ListEntry.pPrev == NULL, ("pPrev=%p\n", pModReg->ListEntry.pPrev), VERR_INVALID_PARAMETER);
+
+    for (size_t i = 0; i < pModReg->cDevRegs; i++)
+    {
+        PCPDMDEVREGR0 pDevReg = pModReg->papDevRegs[i];
+        AssertLogRelMsgReturn(RT_VALID_PTR(pDevReg), ("[%u]: %p\n", i, pDevReg), VERR_INVALID_POINTER);
+        AssertLogRelMsgReturn(PDM_VERSION_ARE_COMPATIBLE(pDevReg->u32Version, PDM_DEVREGR0_VERSION),
+                              ("pDevReg->u32Version=%#x vs %#x\n", pModReg->u32Version, PDM_DEVREGR0_VERSION), VERR_VERSION_MISMATCH);
+        AssertLogRelMsgReturn(RT_VALID_PTR(pDevReg->pszDescription), ("[%u]: %p\n", i, pDevReg->pszDescription), VERR_INVALID_POINTER);
+        AssertLogRelMsgReturn(pDevReg->uReserved0     == 0, ("[%u]: %#x\n", i, pDevReg->uReserved0),     VERR_INVALID_PARAMETER);
+        AssertLogRelMsgReturn(pDevReg->fClass         != 0, ("[%u]: %#x\n", i, pDevReg->fClass),         VERR_INVALID_PARAMETER);
+        AssertLogRelMsgReturn(pDevReg->fFlags         != 0, ("[%u]: %#x\n", i, pDevReg->fFlags),         VERR_INVALID_PARAMETER);
+        AssertLogRelMsgReturn(pDevReg->cMaxInstances   > 0, ("[%u]: %#x\n", i, pDevReg->cMaxInstances),  VERR_INVALID_PARAMETER);
+        AssertLogRelMsgReturn(pDevReg->cMaxPciDevices <= 8, ("[%u]: %#x\n", i, pDevReg->cMaxPciDevices), VERR_INVALID_PARAMETER);
+        AssertLogRelMsgReturn(pDevReg->cMaxMsixVectors <= VBOX_MSIX_MAX_ENTRIES,
+                              ("[%u]: %#x\n", i, pDevReg->cMaxMsixVectors), VERR_INVALID_PARAMETER);
+
+        /* The name must be printable ascii and correctly terminated. */
+        for (size_t off = 0; off < RT_ELEMENTS(pDevReg->szName); off++)
+        {
+            char ch = pDevReg->szName[off];
+            AssertLogRelMsgReturn(RT_C_IS_PRINT(ch) || (ch == '\0' && off > 0),
+                                  ("[%u]: off=%u  szName: %.*Rhxs\n", i, off, sizeof(pDevReg->szName), &pDevReg->szName[0]),
+                                  VERR_INVALID_NAME);
+            if (ch == '\0')
+                break;
+        }
+    }
+
+    /*
+     * Add it, assuming we're being called at ModuleInit/ModuleTerm time only, or
+     * that the caller has already taken the loader lock.
+     */
+    pModReg->hMod = hMod;
+    RTListAppend(&g_PDMDevModList, &pModReg->ListEntry);
+
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Deregisters the device implementations living in a module.
+ *
+ * This should normally only be called during ModuleTerm().
+ *
+ * @returns VBox status code.
+ * @param   hMod            The module handle of the module being registered.
+ * @param   pModReg         The module registration structure.  This will be
+ *                          used directly so it must live as long as the module
+ *                          and be writable.
+ *
+ * @note    Caller must own the loader lock!
+ */
+VMMR0DECL(int) PDMR0DeviceDeregisterModule(void *hMod, PPDMDEVMODREGR0 pModReg)
+{
+    /*
+     * Validate the input.
+     */
+    AssertPtrReturn(hMod, VERR_INVALID_HANDLE);
+    Assert(SUPR0LdrIsLockOwnerByMod(hMod, true));
+
+    AssertPtrReturn(pModReg, VERR_INVALID_POINTER);
+    AssertLogRelMsgReturn(PDM_VERSION_ARE_COMPATIBLE(pModReg->u32Version, PDM_DEVMODREGR0_VERSION),
+                          ("pModReg->u32Version=%#x vs %#x\n", pModReg->u32Version, PDM_DEVMODREGR0_VERSION),
+                          VERR_VERSION_MISMATCH);
+    AssertLogRelMsgReturn(pModReg->hMod == hMod || pModReg->hMod == NULL, ("pModReg->hMod=%p vs %p\n", pModReg->hMod, hMod),
+                          VERR_INVALID_PARAMETER);
+
+    /*
+     * Unlink the registration record and return it to virgin conditions.  Ignore
+     * the call if not registered.
+     */
+    if (pModReg->hMod)
+    {
+        pModReg->hMod = NULL;
+        RTListNodeRemove(&pModReg->ListEntry);
+        pModReg->ListEntry.pNext = NULL;
+        pModReg->ListEntry.pPrev = NULL;
+        return VINF_SUCCESS;
+    }
+    return VWRN_NOT_FOUND;
+}
+
diff --git a/src/VBox/VMM/VMMR0/PDMR0Driver.cpp b/src/VBox/VMM/VMMR0/PDMR0Driver.cpp
new file mode 100644
index 00000000..c22d9805
--- /dev/null
+++ b/src/VBox/VMM/VMMR0/PDMR0Driver.cpp
@@ -0,0 +1,163 @@
+/* $Id: PDMR0Driver.cpp $ */
+/** @file
+ * PDM - Pluggable Device and Driver Manager, R0 Driver parts.
+ */
+
+/*
+ * Copyright (C) 2010-2020 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+
+/*********************************************************************************************************************************
+*   Header Files                                                                                                                 *
+*********************************************************************************************************************************/
+#define LOG_GROUP LOG_GROUP_PDM_DRIVER
+#include "PDMInternal.h"
+#include <VBox/vmm/pdm.h>
+#include <VBox/vmm/vmcc.h>
+#include <VBox/vmm/gvmm.h>
+
+#include <VBox/log.h>
+#include <iprt/errcore.h>
+#include <iprt/assert.h>
+
+
+/*********************************************************************************************************************************
+*   Global Variables                                                                                                             *
+*********************************************************************************************************************************/
+RT_C_DECLS_BEGIN
+extern DECLEXPORT(const PDMDRVHLPR0)    g_pdmR0DrvHlp;
+RT_C_DECLS_END
+
+
+/** @name Ring-0 Context Driver Helpers
+ * @{
+ */
+
+/** @interface_method_impl{PDMDRVHLPR0,pfnVMSetError} */
+static DECLCALLBACK(int) pdmR0DrvHlp_VMSetError(PPDMDRVINS pDrvIns, int rc, RT_SRC_POS_DECL, const char *pszFormat, ...)
+{
+    PDMDRV_ASSERT_DRVINS(pDrvIns);
+    va_list args;
+    va_start(args, pszFormat);
+    int rc2 = VMSetErrorV(pDrvIns->Internal.s.pVMR0, rc, RT_SRC_POS_ARGS, pszFormat, args); Assert(rc2 == rc); NOREF(rc2);
+    va_end(args);
+    return rc;
+}
+
+
+/** @interface_method_impl{PDMDRVHLPR0,pfnVMSetErrorV} */
+static DECLCALLBACK(int) pdmR0DrvHlp_VMSetErrorV(PPDMDRVINS pDrvIns, int rc, RT_SRC_POS_DECL, const char *pszFormat, va_list va)
+{
+    PDMDRV_ASSERT_DRVINS(pDrvIns);
+    int rc2 = VMSetErrorV(pDrvIns->Internal.s.pVMR0, rc, RT_SRC_POS_ARGS, pszFormat, va); Assert(rc2 == rc); NOREF(rc2);
+    return rc;
+}
+
+
+/** @interface_method_impl{PDMDRVHLPR0,pfnVMSetRuntimeError} */
+static DECLCALLBACK(int) pdmR0DrvHlp_VMSetRuntimeError(PPDMDRVINS pDrvIns, uint32_t fFlags, const char *pszErrorId,
+                                                       const char *pszFormat, ...)
+{
+    PDMDRV_ASSERT_DRVINS(pDrvIns);
+    va_list va;
+    va_start(va, pszFormat);
+    int rc = VMSetRuntimeErrorV(pDrvIns->Internal.s.pVMR0, fFlags, pszErrorId, pszFormat, va);
+    va_end(va);
+    return rc;
+}
+
+
+/** @interface_method_impl{PDMDRVHLPR0,pfnVMSetRuntimeErrorV} */
+static DECLCALLBACK(int) pdmR0DrvHlp_VMSetRuntimeErrorV(PPDMDRVINS pDrvIns, uint32_t fFlags, const char *pszErrorId,
+                                                        const char *pszFormat, va_list va)
+{
+    PDMDRV_ASSERT_DRVINS(pDrvIns);
+    int rc = VMSetRuntimeErrorV(pDrvIns->Internal.s.pVMR0, fFlags, pszErrorId, pszFormat, va);
+    return rc;
+}
+
+
+/** @interface_method_impl{PDMDRVHLPR0,pfnAssertEMT} */
+static DECLCALLBACK(bool) pdmR0DrvHlp_AssertEMT(PPDMDRVINS pDrvIns, const char *pszFile, unsigned iLine, const char *pszFunction)
+{
+    PDMDRV_ASSERT_DRVINS(pDrvIns);
+    if (VM_IS_EMT(pDrvIns->Internal.s.pVMR0))
+        return true;
+
+    RTAssertMsg1Weak("AssertEMT", iLine, pszFile, pszFunction);
+    RTAssertPanic();
+    return false;
+}
+
+
+/** @interface_method_impl{PDMDRVHLPR0,pfnAssertOther} */
+static DECLCALLBACK(bool) pdmR0DrvHlp_AssertOther(PPDMDRVINS pDrvIns, const char *pszFile, unsigned iLine, const char *pszFunction)
+{
+    PDMDRV_ASSERT_DRVINS(pDrvIns);
+    if (!VM_IS_EMT(pDrvIns->Internal.s.pVMR0))
+        return true;
+
+    RTAssertMsg1Weak("AssertOther", iLine, pszFile, pszFunction);
+    RTAssertPanic();
+    return false;
+}
+
+
+/**
+ * The Ring-0 Context Driver Helper Callbacks.
+ */
+extern DECLEXPORT(const PDMDRVHLPR0) g_pdmR0DrvHlp =
+{
+    PDM_DRVHLPRC_VERSION,
+    pdmR0DrvHlp_VMSetError,
+    pdmR0DrvHlp_VMSetErrorV,
+    pdmR0DrvHlp_VMSetRuntimeError,
+    pdmR0DrvHlp_VMSetRuntimeErrorV,
+    pdmR0DrvHlp_AssertEMT,
+    pdmR0DrvHlp_AssertOther,
+    PDM_DRVHLPRC_VERSION
+};
+
+/** @} */
+
+
+
+/**
+ * PDMDrvHlpCallR0 helper.
+ *
+ * @returns See PFNPDMDRVREQHANDLERR0.
+ * @param   pGVM    The global (ring-0) VM structure. (For validation.)
+ * @param   pReq    Pointer to the request buffer.
+ */
+VMMR0_INT_DECL(int) PDMR0DriverCallReqHandler(PGVM pGVM, PPDMDRIVERCALLREQHANDLERREQ pReq)
+{
+    /*
+     * Validate input and make the call.
+     */
+    int rc = GVMMR0ValidateGVM(pGVM);
+    if (RT_SUCCESS(rc))
+    {
+        AssertPtrReturn(pReq, VERR_INVALID_POINTER);
+        AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
+
+        PPDMDRVINS pDrvIns = pReq->pDrvInsR0;
+        AssertPtrReturn(pDrvIns, VERR_INVALID_POINTER);
+        AssertReturn(pDrvIns->Internal.s.pVMR0 == pGVM, VERR_INVALID_PARAMETER);
+
+        PFNPDMDRVREQHANDLERR0 pfnReqHandlerR0 = pDrvIns->Internal.s.pfnReqHandlerR0;
+        AssertPtrReturn(pfnReqHandlerR0, VERR_INVALID_POINTER);
+
+        rc = pfnReqHandlerR0(pDrvIns, pReq->uOperation, pReq->u64Arg);
+    }
+    return rc;
+}
+
diff --git a/src/VBox/VMM/VMMR0/PGMR0.cpp b/src/VBox/VMM/VMMR0/PGMR0.cpp
new file mode 100644
index 00000000..365cd90a
--- /dev/null
+++ b/src/VBox/VMM/VMMR0/PGMR0.cpp
@@ -0,0 +1,807 @@
+/* $Id: PGMR0.cpp $ */
+/** @file
+ * PGM - Page Manager and Monitor, Ring-0.
+ */
+
+/*
+ * Copyright (C) 2007-2020 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+
+/*********************************************************************************************************************************
+*   Header Files                                                                                                                 *
+*********************************************************************************************************************************/
+#define LOG_GROUP LOG_GROUP_PGM
+#include <VBox/rawpci.h>
+#include <VBox/vmm/pgm.h>
+#include <VBox/vmm/gmm.h>
+#include "PGMInternal.h"
+#include <VBox/vmm/pdmdev.h>
+#include <VBox/vmm/vmcc.h>
+#include <VBox/vmm/gvm.h>
+#include "PGMInline.h"
+#include <VBox/log.h>
+#include <VBox/err.h>
+#include <iprt/assert.h>
+#include <iprt/mem.h>
+#include <iprt/memobj.h>
+
+
+/*
+ * Instantiate the ring-0 header/code templates.
+ */
+/** @todo r=bird: Gotta love this nested paging hacking we're still carrying with us... (Split PGM_TYPE_NESTED.) */
+#define PGM_BTH_NAME(name)          PGM_BTH_NAME_32BIT_PROT(name)
+#include "PGMR0Bth.h"
+#undef PGM_BTH_NAME
+
+#define PGM_BTH_NAME(name)          PGM_BTH_NAME_PAE_PROT(name)
+#include "PGMR0Bth.h"
+#undef PGM_BTH_NAME
+
+#define PGM_BTH_NAME(name)          PGM_BTH_NAME_AMD64_PROT(name)
+#include "PGMR0Bth.h"
+#undef PGM_BTH_NAME
+
+#define PGM_BTH_NAME(name)          PGM_BTH_NAME_EPT_PROT(name)
+#include "PGMR0Bth.h"
+#undef PGM_BTH_NAME
+
+
+/**
+ * Initializes the per-VM data for the PGM.
+ *
+ * This is called from under the GVMM lock, so it should only initialize the
+ * data so PGMR0CleanupVM and others will work smoothly.
+ *
+ * @returns VBox status code.
+ * @param   pGVM    Pointer to the global VM structure.
+ */
+VMMR0_INT_DECL(int) PGMR0InitPerVMData(PGVM pGVM)
+{
+    AssertCompile(sizeof(pGVM->pgm.s) <= sizeof(pGVM->pgm.padding));
+    AssertCompile(sizeof(pGVM->pgmr0.s) <= sizeof(pGVM->pgmr0.padding));
+
+    AssertCompile(RT_ELEMENTS(pGVM->pgmr0.s.ahPoolMemObjs) == RT_ELEMENTS(pGVM->pgmr0.s.ahPoolMapObjs));
+    for (uint32_t i = 0; i < RT_ELEMENTS(pGVM->pgmr0.s.ahPoolMemObjs); i++)
+    {
+        pGVM->pgmr0.s.ahPoolMemObjs[i] = NIL_RTR0MEMOBJ;
+        pGVM->pgmr0.s.ahPoolMapObjs[i] = NIL_RTR0MEMOBJ;
+    }
+    return RTCritSectInit(&pGVM->pgmr0.s.PoolGrowCritSect);
+}
+
+
+/**
+ * Initalize the per-VM PGM for ring-0.
+ *
+ * @returns VBox status code.
+ * @param   pGVM    Pointer to the global VM structure.
+ */
+VMMR0_INT_DECL(int) PGMR0InitVM(PGVM pGVM)
+{
+    int rc = VINF_SUCCESS;
+#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE
+    rc = PGMR0DynMapInitVM(pGVM);
+#endif
+    RT_NOREF(pGVM);
+    return rc;
+}
+
+
+/**
+ * Cleans up any loose ends before the GVM structure is destroyed.
+ */
+VMMR0_INT_DECL(void) PGMR0CleanupVM(PGVM pGVM)
+{
+    for (uint32_t i = 0; i < RT_ELEMENTS(pGVM->pgmr0.s.ahPoolMemObjs); i++)
+    {
+        if (pGVM->pgmr0.s.ahPoolMapObjs[i] != NIL_RTR0MEMOBJ)
+        {
+            int rc = RTR0MemObjFree(pGVM->pgmr0.s.ahPoolMapObjs[i], true /*fFreeMappings*/);
+            AssertRC(rc);
+            pGVM->pgmr0.s.ahPoolMapObjs[i] = NIL_RTR0MEMOBJ;
+        }
+
+        if (pGVM->pgmr0.s.ahPoolMemObjs[i] != NIL_RTR0MEMOBJ)
+        {
+            int rc = RTR0MemObjFree(pGVM->pgmr0.s.ahPoolMemObjs[i], true /*fFreeMappings*/);
+            AssertRC(rc);
+            pGVM->pgmr0.s.ahPoolMemObjs[i] = NIL_RTR0MEMOBJ;
+        }
+    }
+
+    if (RTCritSectIsInitialized(&pGVM->pgmr0.s.PoolGrowCritSect))
+        RTCritSectDelete(&pGVM->pgmr0.s.PoolGrowCritSect);
+}
+
+
+/**
+ * Worker function for PGMR3PhysAllocateHandyPages and pgmPhysEnsureHandyPage.
+ *
+ * @returns The following VBox status codes.
+ * @retval  VINF_SUCCESS on success. FF cleared.
+ * @retval  VINF_EM_NO_MEMORY if we're out of memory. The FF is set in this case.
+ *
+ * @param   pGVM        The global (ring-0) VM structure.
+ * @param   idCpu       The ID of the calling EMT.
+ *
+ * @thread  EMT(idCpu)
+ *
+ * @remarks Must be called from within the PGM critical section. The caller
+ *          must clear the new pages.
+ */
+VMMR0_INT_DECL(int) PGMR0PhysAllocateHandyPages(PGVM pGVM, VMCPUID idCpu)
+{
+    /*
+     * Validate inputs.
+     */
+    AssertReturn(idCpu < pGVM->cCpus, VERR_INVALID_CPU_ID); /* caller already checked this, but just to be sure. */
+    AssertReturn(pGVM->aCpus[idCpu].hEMT == RTThreadNativeSelf(), VERR_NOT_OWNER);
+    PGM_LOCK_ASSERT_OWNER_EX(pGVM, &pGVM->aCpus[idCpu]);
+
+    /*
+     * Check for error injection.
+     */
+    if (RT_UNLIKELY(pGVM->pgm.s.fErrInjHandyPages))
+        return VERR_NO_MEMORY;
+
+    /*
+     * Try allocate a full set of handy pages.
+     */
+    uint32_t iFirst = pGVM->pgm.s.cHandyPages;
+    AssertReturn(iFirst <= RT_ELEMENTS(pGVM->pgm.s.aHandyPages), VERR_PGM_HANDY_PAGE_IPE);
+    uint32_t cPages = RT_ELEMENTS(pGVM->pgm.s.aHandyPages) - iFirst;
+    if (!cPages)
+        return VINF_SUCCESS;
+    int rc = GMMR0AllocateHandyPages(pGVM, idCpu, cPages, cPages, &pGVM->pgm.s.aHandyPages[iFirst]);
+    if (RT_SUCCESS(rc))
+    {
+#ifdef VBOX_STRICT
+        for (uint32_t i = 0; i < RT_ELEMENTS(pGVM->pgm.s.aHandyPages); i++)
+        {
+            Assert(pGVM->pgm.s.aHandyPages[i].idPage != NIL_GMM_PAGEID);
+            Assert(pGVM->pgm.s.aHandyPages[i].idPage <= GMM_PAGEID_LAST);
+            Assert(pGVM->pgm.s.aHandyPages[i].idSharedPage == NIL_GMM_PAGEID);
+            Assert(pGVM->pgm.s.aHandyPages[i].HCPhysGCPhys != NIL_RTHCPHYS);
+            Assert(!(pGVM->pgm.s.aHandyPages[i].HCPhysGCPhys & ~X86_PTE_PAE_PG_MASK));
+        }
+#endif
+
+        pGVM->pgm.s.cHandyPages = RT_ELEMENTS(pGVM->pgm.s.aHandyPages);
+    }
+    else if (rc != VERR_GMM_SEED_ME)
+    {
+        if (    (   rc == VERR_GMM_HIT_GLOBAL_LIMIT
+                 || rc == VERR_GMM_HIT_VM_ACCOUNT_LIMIT)
+            &&  iFirst < PGM_HANDY_PAGES_MIN)
+        {
+
+#ifdef VBOX_STRICT
+            /* We're ASSUMING that GMM has updated all the entires before failing us. */
+            uint32_t i;
+            for (i = iFirst; i < RT_ELEMENTS(pGVM->pgm.s.aHandyPages); i++)
+            {
+                Assert(pGVM->pgm.s.aHandyPages[i].idPage == NIL_GMM_PAGEID);
+                Assert(pGVM->pgm.s.aHandyPages[i].idSharedPage == NIL_GMM_PAGEID);
+                Assert(pGVM->pgm.s.aHandyPages[i].HCPhysGCPhys == NIL_RTHCPHYS);
+            }
+#endif
+
+            /*
+             * Reduce the number of pages until we hit the minimum limit.
+             */
+            do
+            {
+                cPages >>= 1;
+                if (cPages + iFirst < PGM_HANDY_PAGES_MIN)
+                    cPages = PGM_HANDY_PAGES_MIN - iFirst;
+                rc = GMMR0AllocateHandyPages(pGVM, idCpu, 0, cPages, &pGVM->pgm.s.aHandyPages[iFirst]);
+            } while (   (   rc == VERR_GMM_HIT_GLOBAL_LIMIT
+                         || rc == VERR_GMM_HIT_VM_ACCOUNT_LIMIT)
+                     && cPages + iFirst > PGM_HANDY_PAGES_MIN);
+            if (RT_SUCCESS(rc))
+            {
+#ifdef VBOX_STRICT
+                i = iFirst + cPages;
+                while (i-- > 0)
+                {
+                    Assert(pGVM->pgm.s.aHandyPages[i].idPage != NIL_GMM_PAGEID);
+                    Assert(pGVM->pgm.s.aHandyPages[i].idPage <= GMM_PAGEID_LAST);
+                    Assert(pGVM->pgm.s.aHandyPages[i].idSharedPage == NIL_GMM_PAGEID);
+                    Assert(pGVM->pgm.s.aHandyPages[i].HCPhysGCPhys != NIL_RTHCPHYS);
+                    Assert(!(pGVM->pgm.s.aHandyPages[i].HCPhysGCPhys & ~X86_PTE_PAE_PG_MASK));
+                }
+
+                for (i = cPages + iFirst; i < RT_ELEMENTS(pGVM->pgm.s.aHandyPages); i++)
+                {
+                    Assert(pGVM->pgm.s.aHandyPages[i].idPage == NIL_GMM_PAGEID);
+                    Assert(pGVM->pgm.s.aHandyPages[i].idSharedPage == NIL_GMM_PAGEID);
+                    Assert(pGVM->pgm.s.aHandyPages[i].HCPhysGCPhys == NIL_RTHCPHYS);
+                }
+#endif
+
+                pGVM->pgm.s.cHandyPages = iFirst + cPages;
+            }
+        }
+
+        if (RT_FAILURE(rc) && rc != VERR_GMM_SEED_ME)
+        {
+            LogRel(("PGMR0PhysAllocateHandyPages: rc=%Rrc iFirst=%d cPages=%d\n", rc, iFirst, cPages));
+            VM_FF_SET(pGVM, VM_FF_PGM_NO_MEMORY);
+        }
+    }
+
+
+    LogFlow(("PGMR0PhysAllocateHandyPages: cPages=%d rc=%Rrc\n", cPages, rc));
+    return rc;
+}
+
+
+/**
+ * Flushes any changes pending in the handy page array.
+ *
+ * It is very important that this gets done when page sharing is enabled.
+ *
+ * @returns The following VBox status codes.
+ * @retval  VINF_SUCCESS on success. FF cleared.
+ *
+ * @param   pGVM        The global (ring-0) VM structure.
+ * @param   idCpu       The ID of the calling EMT.
+ *
+ * @thread  EMT(idCpu)
+ *
+ * @remarks Must be called from within the PGM critical section.
+ */
+VMMR0_INT_DECL(int) PGMR0PhysFlushHandyPages(PGVM pGVM, VMCPUID idCpu)
+{
+    /*
+     * Validate inputs.
+     */
+    AssertReturn(idCpu < pGVM->cCpus, VERR_INVALID_CPU_ID); /* caller already checked this, but just to be sure. */
+    AssertReturn(pGVM->aCpus[idCpu].hEMT == RTThreadNativeSelf(), VERR_NOT_OWNER);
+    PGM_LOCK_ASSERT_OWNER_EX(pGVM, &pGVM->aCpus[idCpu]);
+
+    /*
+     * Try allocate a full set of handy pages.
+     */
+    uint32_t iFirst = pGVM->pgm.s.cHandyPages;
+    AssertReturn(iFirst <= RT_ELEMENTS(pGVM->pgm.s.aHandyPages), VERR_PGM_HANDY_PAGE_IPE);
+    uint32_t cPages = RT_ELEMENTS(pGVM->pgm.s.aHandyPages) - iFirst;
+    if (!cPages)
+        return VINF_SUCCESS;
+    int rc = GMMR0AllocateHandyPages(pGVM, idCpu, cPages, 0, &pGVM->pgm.s.aHandyPages[iFirst]);
+
+    LogFlow(("PGMR0PhysFlushHandyPages: cPages=%d rc=%Rrc\n", cPages, rc));
+    return rc;
+}
+
+
+/**
+ * Worker function for PGMR3PhysAllocateLargeHandyPage
+ *
+ * @returns The following VBox status codes.
+ * @retval  VINF_SUCCESS on success.
+ * @retval  VINF_EM_NO_MEMORY if we're out of memory.
+ *
+ * @param   pGVM        The global (ring-0) VM structure.
+ * @param   idCpu       The ID of the calling EMT.
+ *
+ * @thread  EMT(idCpu)
+ *
+ * @remarks Must be called from within the PGM critical section. The caller
+ *          must clear the new pages.
+ */
+VMMR0_INT_DECL(int) PGMR0PhysAllocateLargeHandyPage(PGVM pGVM, VMCPUID idCpu)
+{
+    /*
+     * Validate inputs.
+     */
+    AssertReturn(idCpu < pGVM->cCpus, VERR_INVALID_CPU_ID); /* caller already checked this, but just to be sure. */
+    AssertReturn(pGVM->aCpus[idCpu].hEMT == RTThreadNativeSelf(), VERR_NOT_OWNER);
+    PGM_LOCK_ASSERT_OWNER_EX(pGVM, &pGVM->aCpus[idCpu]);
+    Assert(!pGVM->pgm.s.cLargeHandyPages);
+
+    /*
+     * Do the job.
+     */
+    int rc = GMMR0AllocateLargePage(pGVM, idCpu, _2M,
+                                    &pGVM->pgm.s.aLargeHandyPage[0].idPage,
+                                    &pGVM->pgm.s.aLargeHandyPage[0].HCPhysGCPhys);
+    if (RT_SUCCESS(rc))
+        pGVM->pgm.s.cLargeHandyPages = 1;
+
+    return rc;
+}
+
+
+/**
+ * Locate a MMIO2 range.
+ *
+ * @returns Pointer to the MMIO2 range.
+ * @param   pGVM        The global (ring-0) VM structure.
+ * @param   pDevIns     The device instance owning the region.
+ * @param   hMmio2      Handle to look up.
+ */
+DECLINLINE(PPGMREGMMIO2RANGE) pgmR0PhysMMIOExFind(PGVM pGVM, PPDMDEVINS pDevIns, PGMMMIO2HANDLE hMmio2)
+{
+    /*
+     * We use the lookup table here as list walking is tedious in ring-0 when using
+     * ring-3 pointers and this probably will require some kind of refactoring anyway.
+     */
+    if (hMmio2 <= RT_ELEMENTS(pGVM->pgm.s.apMmio2RangesR0) && hMmio2 != 0)
+    {
+        PPGMREGMMIO2RANGE pCur = pGVM->pgm.s.apMmio2RangesR0[hMmio2 - 1];
+        if (pCur && pCur->pDevInsR3 == pDevIns->pDevInsForR3)
+        {
+            Assert(pCur->idMmio2 == hMmio2);
+            AssertReturn(pCur->fFlags & PGMREGMMIO2RANGE_F_MMIO2, NULL);
+            return pCur;
+        }
+        Assert(!pCur);
+    }
+    return NULL;
+}
+
+
+/**
+ * Worker for PDMDEVHLPR0::pfnMmio2SetUpContext.
+ *
+ * @returns VBox status code.
+ * @param   pGVM        The global (ring-0) VM structure.
+ * @param   pDevIns     The device instance.
+ * @param   hMmio2      The MMIO2 region to map into ring-0 address space.
+ * @param   offSub      The offset into the region.
+ * @param   cbSub       The size of the mapping, zero meaning all the rest.
+ * @param   ppvMapping  Where to return the ring-0 mapping address.
+ */
+VMMR0_INT_DECL(int) PGMR0PhysMMIO2MapKernel(PGVM pGVM, PPDMDEVINS pDevIns, PGMMMIO2HANDLE hMmio2,
+                                            size_t offSub, size_t cbSub, void **ppvMapping)
+{
+    AssertReturn(!(offSub & PAGE_OFFSET_MASK), VERR_UNSUPPORTED_ALIGNMENT);
+    AssertReturn(!(cbSub  & PAGE_OFFSET_MASK), VERR_UNSUPPORTED_ALIGNMENT);
+
+    /*
+     * Translate hRegion into a range pointer.
+     */
+    PPGMREGMMIO2RANGE pFirstRegMmio = pgmR0PhysMMIOExFind(pGVM, pDevIns, hMmio2);
+    AssertReturn(pFirstRegMmio, VERR_NOT_FOUND);
+#if defined(VBOX_WITH_RAM_IN_KERNEL) && !defined(VBOX_WITH_LINEAR_HOST_PHYS_MEM)
+    uint8_t * const pvR0  = (uint8_t *)pFirstRegMmio->pvR0;
+#else
+    RTR3PTR const  pvR3   = pFirstRegMmio->pvR3;
+#endif
+    RTGCPHYS const cbReal = pFirstRegMmio->cbReal;
+    pFirstRegMmio = NULL;
+    ASMCompilerBarrier();
+
+    AssertReturn(offSub < cbReal, VERR_OUT_OF_RANGE);
+    if (cbSub == 0)
+        cbSub = cbReal - offSub;
+    else
+        AssertReturn(cbSub < cbReal && cbSub + offSub <= cbReal, VERR_OUT_OF_RANGE);
+
+    /*
+     * Do the mapping.
+     */
+#if defined(VBOX_WITH_RAM_IN_KERNEL) && !defined(VBOX_WITH_LINEAR_HOST_PHYS_MEM)
+    AssertPtr(pvR0);
+    *ppvMapping = pvR0 + offSub;
+    return VINF_SUCCESS;
+#else
+    return SUPR0PageMapKernel(pGVM->pSession, pvR3, (uint32_t)offSub, (uint32_t)cbSub, 0 /*fFlags*/, ppvMapping);
+#endif
+}
+
+
+#ifdef VBOX_WITH_PCI_PASSTHROUGH
+/* Interface sketch.  The interface belongs to a global PCI pass-through
+   manager.  It shall use the global VM handle, not the user VM handle to
+   store the per-VM info (domain) since that is all ring-0 stuff, thus
+   passing pGVM here.  I've tentitively prefixed the functions 'GPciRawR0',
+   we can discuss the PciRaw code re-organtization when I'm back from
+   vacation.
+
+   I've implemented the initial IOMMU set up below.  For things to work
+   reliably, we will probably need add a whole bunch of checks and
+   GPciRawR0GuestPageUpdate call to the PGM code.  For the present,
+   assuming nested paging (enforced) and prealloc (enforced), no
+   ballooning (check missing), page sharing (check missing) or live
+   migration (check missing), it might work fine.  At least if some
+   VM power-off hook is present and can tear down the IOMMU page tables. */
+
+/**
+ * Tells the global PCI pass-through manager that we are about to set up the
+ * guest page to host page mappings for the specfied VM.
+ *
+ * @returns VBox status code.
+ *
+ * @param   pGVM                The ring-0 VM structure.
+ */
+VMMR0_INT_DECL(int) GPciRawR0GuestPageBeginAssignments(PGVM pGVM)
+{
+    NOREF(pGVM);
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Assigns a host page mapping for a guest page.
+ *
+ * This is only used when setting up the mappings, i.e. between
+ * GPciRawR0GuestPageBeginAssignments and GPciRawR0GuestPageEndAssignments.
+ *
+ * @returns VBox status code.
+ * @param   pGVM                The ring-0 VM structure.
+ * @param   GCPhys              The address of the guest page (page aligned).
+ * @param   HCPhys              The address of the host page (page aligned).
+ */
+VMMR0_INT_DECL(int) GPciRawR0GuestPageAssign(PGVM pGVM, RTGCPHYS GCPhys, RTHCPHYS HCPhys)
+{
+    AssertReturn(!(GCPhys & PAGE_OFFSET_MASK), VERR_INTERNAL_ERROR_3);
+    AssertReturn(!(HCPhys & PAGE_OFFSET_MASK), VERR_INTERNAL_ERROR_3);
+
+    if (pGVM->rawpci.s.pfnContigMemInfo)
+        /** @todo what do we do on failure? */
+        pGVM->rawpci.s.pfnContigMemInfo(&pGVM->rawpci.s, HCPhys, GCPhys, PAGE_SIZE, PCIRAW_MEMINFO_MAP);
+
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Indicates that the specified guest page doesn't exists but doesn't have host
+ * page mapping we trust PCI pass-through with.
+ *
+ * This is only used when setting up the mappings, i.e. between
+ * GPciRawR0GuestPageBeginAssignments and GPciRawR0GuestPageEndAssignments.
+ *
+ * @returns VBox status code.
+ * @param   pGVM                The ring-0 VM structure.
+ * @param   GCPhys              The address of the guest page (page aligned).
+ * @param   HCPhys              The address of the host page (page aligned).
+ */
+VMMR0_INT_DECL(int) GPciRawR0GuestPageUnassign(PGVM pGVM, RTGCPHYS GCPhys)
+{
+    AssertReturn(!(GCPhys & PAGE_OFFSET_MASK), VERR_INTERNAL_ERROR_3);
+
+    if (pGVM->rawpci.s.pfnContigMemInfo)
+        /** @todo what do we do on failure? */
+        pGVM->rawpci.s.pfnContigMemInfo(&pGVM->rawpci.s, 0, GCPhys, PAGE_SIZE, PCIRAW_MEMINFO_UNMAP);
+
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Tells the global PCI pass-through manager that we have completed setting up
+ * the guest page to host page mappings for the specfied VM.
+ *
+ * This complements GPciRawR0GuestPageBeginAssignments and will be called even
+ * if some page assignment failed.
+ *
+ * @returns VBox status code.
+ *
+ * @param   pGVM                The ring-0 VM structure.
+ */
+VMMR0_INT_DECL(int) GPciRawR0GuestPageEndAssignments(PGVM pGVM)
+{
+    NOREF(pGVM);
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Tells the global PCI pass-through manager that a guest page mapping has
+ * changed after the initial setup.
+ *
+ * @returns VBox status code.
+ * @param   pGVM                The ring-0 VM structure.
+ * @param   GCPhys              The address of the guest page (page aligned).
+ * @param   HCPhys              The new host page address or NIL_RTHCPHYS if
+ *                              now unassigned.
+ */
+VMMR0_INT_DECL(int) GPciRawR0GuestPageUpdate(PGVM pGVM, RTGCPHYS GCPhys, RTHCPHYS HCPhys)
+{
+    AssertReturn(!(GCPhys & PAGE_OFFSET_MASK), VERR_INTERNAL_ERROR_4);
+    AssertReturn(!(HCPhys & PAGE_OFFSET_MASK) || HCPhys == NIL_RTHCPHYS, VERR_INTERNAL_ERROR_4);
+    NOREF(pGVM);
+    return VINF_SUCCESS;
+}
+
+#endif /* VBOX_WITH_PCI_PASSTHROUGH */
+
+
+/**
+ * Sets up the IOMMU when raw PCI device is enabled.
+ *
+ * @note    This is a hack that will probably be remodelled and refined later!
+ *
+ * @returns VBox status code.
+ *
+ * @param   pGVM                The global (ring-0) VM structure.
+ */
+VMMR0_INT_DECL(int) PGMR0PhysSetupIoMmu(PGVM pGVM)
+{
+    int rc = GVMMR0ValidateGVM(pGVM);
+    if (RT_FAILURE(rc))
+        return rc;
+
+#ifdef VBOX_WITH_PCI_PASSTHROUGH
+    if (pGVM->pgm.s.fPciPassthrough)
+    {
+        /*
+         * The Simplistic Approach - Enumerate all the pages and call tell the
+         * IOMMU about each of them.
+         */
+        pgmLock(pGVM);
+        rc = GPciRawR0GuestPageBeginAssignments(pGVM);
+        if (RT_SUCCESS(rc))
+        {
+            for (PPGMRAMRANGE pRam = pGVM->pgm.s.pRamRangesXR0; RT_SUCCESS(rc) && pRam; pRam = pRam->pNextR0)
+            {
+                PPGMPAGE    pPage  = &pRam->aPages[0];
+                RTGCPHYS    GCPhys = pRam->GCPhys;
+                uint32_t    cLeft  = pRam->cb >> PAGE_SHIFT;
+                while (cLeft-- > 0)
+                {
+                    /* Only expose pages that are 100% safe for now. */
+                    if (   PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM
+                        && PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED
+                        && !PGM_PAGE_HAS_ANY_HANDLERS(pPage))
+                        rc = GPciRawR0GuestPageAssign(pGVM, GCPhys, PGM_PAGE_GET_HCPHYS(pPage));
+                    else
+                        rc = GPciRawR0GuestPageUnassign(pGVM, GCPhys);
+
+                    /* next */
+                    pPage++;
+                    GCPhys += PAGE_SIZE;
+                }
+            }
+
+            int rc2 = GPciRawR0GuestPageEndAssignments(pGVM);
+            if (RT_FAILURE(rc2) && RT_SUCCESS(rc))
+                rc = rc2;
+        }
+        pgmUnlock(pGVM);
+    }
+    else
+#endif
+        rc = VERR_NOT_SUPPORTED;
+    return rc;
+}
+
+
+/**
+ * \#PF Handler for nested paging.
+ *
+ * @returns VBox status code (appropriate for trap handling and GC return).
+ * @param   pGVM                The global (ring-0) VM structure.
+ * @param   pGVCpu              The global (ring-0) CPU structure of the calling
+ *                              EMT.
+ * @param   enmShwPagingMode    Paging mode for the nested page tables.
+ * @param   uErr                The trap error code.
+ * @param   pRegFrame           Trap register frame.
+ * @param   GCPhysFault         The fault address.
+ */
+VMMR0DECL(int) PGMR0Trap0eHandlerNestedPaging(PGVM pGVM, PGVMCPU pGVCpu, PGMMODE enmShwPagingMode, RTGCUINT uErr,
+                                              PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault)
+{
+    int rc;
+
+    LogFlow(("PGMTrap0eHandler: uErr=%RGx GCPhysFault=%RGp eip=%RGv\n", uErr, GCPhysFault, (RTGCPTR)pRegFrame->rip));
+    STAM_PROFILE_START(&pGVCpu->pgm.s.StatRZTrap0e, a);
+    STAM_STATS({ pGVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = NULL; } );
+
+    /* AMD uses the host's paging mode; Intel has a single mode (EPT). */
+    AssertMsg(   enmShwPagingMode == PGMMODE_32_BIT || enmShwPagingMode == PGMMODE_PAE      || enmShwPagingMode == PGMMODE_PAE_NX
+              || enmShwPagingMode == PGMMODE_AMD64  || enmShwPagingMode == PGMMODE_AMD64_NX || enmShwPagingMode == PGMMODE_EPT,
+              ("enmShwPagingMode=%d\n", enmShwPagingMode));
+
+    /* Reserved shouldn't end up here. */
+    Assert(!(uErr & X86_TRAP_PF_RSVD));
+
+#ifdef VBOX_WITH_STATISTICS
+    /*
+     * Error code stats.
+     */
+    if (uErr & X86_TRAP_PF_US)
+    {
+        if (!(uErr & X86_TRAP_PF_P))
+        {
+            if (uErr & X86_TRAP_PF_RW)
+                STAM_COUNTER_INC(&pGVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eUSNotPresentWrite);
+            else
+                STAM_COUNTER_INC(&pGVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eUSNotPresentRead);
+        }
+        else if (uErr & X86_TRAP_PF_RW)
+            STAM_COUNTER_INC(&pGVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eUSWrite);
+        else if (uErr & X86_TRAP_PF_RSVD)
+            STAM_COUNTER_INC(&pGVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eUSReserved);
+        else if (uErr & X86_TRAP_PF_ID)
+            STAM_COUNTER_INC(&pGVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eUSNXE);
+        else
+            STAM_COUNTER_INC(&pGVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eUSRead);
+    }
+    else
+    {   /* Supervisor */
+        if (!(uErr & X86_TRAP_PF_P))
+        {
+            if (uErr & X86_TRAP_PF_RW)
+                STAM_COUNTER_INC(&pGVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eSVNotPresentWrite);
+            else
+                STAM_COUNTER_INC(&pGVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eSVNotPresentRead);
+        }
+        else if (uErr & X86_TRAP_PF_RW)
+            STAM_COUNTER_INC(&pGVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eSVWrite);
+        else if (uErr & X86_TRAP_PF_ID)
+            STAM_COUNTER_INC(&pGVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eSNXE);
+        else if (uErr & X86_TRAP_PF_RSVD)
+            STAM_COUNTER_INC(&pGVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eSVReserved);
+    }
+#endif
+
+    /*
+     * Call the worker.
+     *
+     * Note! We pretend the guest is in protected mode without paging, so we
+     *       can use existing code to build the nested page tables.
+     */
+/** @todo r=bird: Gotta love this nested paging hacking we're still carrying with us... (Split PGM_TYPE_NESTED.) */
+    bool fLockTaken = false;
+    switch (enmShwPagingMode)
+    {
+        case PGMMODE_32_BIT:
+            rc = PGM_BTH_NAME_32BIT_PROT(Trap0eHandler)(pGVCpu, uErr, pRegFrame, GCPhysFault, &fLockTaken);
+            break;
+        case PGMMODE_PAE:
+        case PGMMODE_PAE_NX:
+            rc = PGM_BTH_NAME_PAE_PROT(Trap0eHandler)(pGVCpu, uErr, pRegFrame, GCPhysFault, &fLockTaken);
+            break;
+        case PGMMODE_AMD64:
+        case PGMMODE_AMD64_NX:
+            rc = PGM_BTH_NAME_AMD64_PROT(Trap0eHandler)(pGVCpu, uErr, pRegFrame, GCPhysFault, &fLockTaken);
+            break;
+        case PGMMODE_EPT:
+            rc = PGM_BTH_NAME_EPT_PROT(Trap0eHandler)(pGVCpu, uErr, pRegFrame, GCPhysFault, &fLockTaken);
+            break;
+        default:
+            AssertFailed();
+            rc = VERR_INVALID_PARAMETER;
+            break;
+    }
+    if (fLockTaken)
+    {
+        PGM_LOCK_ASSERT_OWNER(pGVM);
+        pgmUnlock(pGVM);
+    }
+
+    if (rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE)
+        rc = VINF_SUCCESS;
+    /*
+     * Handle the case where we cannot interpret the instruction because we cannot get the guest physical address
+     * via its page tables, see @bugref{6043}.
+     */
+    else if (   rc == VERR_PAGE_NOT_PRESENT                 /* SMP only ; disassembly might fail. */
+             || rc == VERR_PAGE_TABLE_NOT_PRESENT           /* seen with UNI & SMP */
+             || rc == VERR_PAGE_DIRECTORY_PTR_NOT_PRESENT   /* seen with SMP */
+             || rc == VERR_PAGE_MAP_LEVEL4_NOT_PRESENT)     /* precaution */
+    {
+        Log(("WARNING: Unexpected VERR_PAGE_TABLE_NOT_PRESENT (%d) for page fault at %RGp error code %x (rip=%RGv)\n", rc, GCPhysFault, uErr, pRegFrame->rip));
+        /* Some kind of inconsistency in the SMP case; it's safe to just execute the instruction again; not sure about
+           single VCPU VMs though. */
+        rc = VINF_SUCCESS;
+    }
+
+    STAM_STATS({ if (!pGVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution))
+                    pGVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pGVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eTime2Misc; });
+    STAM_PROFILE_STOP_EX(&pGVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0e, pGVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution), a);
+    return rc;
+}
+
+
+/**
+ * \#PF Handler for deliberate nested paging misconfiguration (/reserved bit)
+ * employed for MMIO pages.
+ *
+ * @returns VBox status code (appropriate for trap handling and GC return).
+ * @param   pGVM                The global (ring-0) VM structure.
+ * @param   pGVCpu              The global (ring-0) CPU structure of the calling
+ *                              EMT.
+ * @param   enmShwPagingMode    Paging mode for the nested page tables.
+ * @param   pRegFrame           Trap register frame.
+ * @param   GCPhysFault         The fault address.
+ * @param   uErr                The error code, UINT32_MAX if not available
+ *                              (VT-x).
+ */
+VMMR0DECL(VBOXSTRICTRC) PGMR0Trap0eHandlerNPMisconfig(PGVM pGVM, PGVMCPU pGVCpu, PGMMODE enmShwPagingMode,
+                                                      PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, uint32_t uErr)
+{
+#ifdef PGM_WITH_MMIO_OPTIMIZATIONS
+    STAM_PROFILE_START(&pGVCpu->CTX_SUFF(pStats)->StatR0NpMiscfg, a);
+    VBOXSTRICTRC rc;
+
+    /*
+     * Try lookup the all access physical handler for the address.
+     */
+    pgmLock(pGVM);
+    PPGMPHYSHANDLER         pHandler     = pgmHandlerPhysicalLookup(pGVM, GCPhysFault);
+    PPGMPHYSHANDLERTYPEINT  pHandlerType = RT_LIKELY(pHandler) ? PGMPHYSHANDLER_GET_TYPE(pGVM, pHandler) : NULL;
+    if (RT_LIKELY(pHandler && pHandlerType->enmKind != PGMPHYSHANDLERKIND_WRITE))
+    {
+        /*
+         * If the handle has aliases page or pages that have been temporarily
+         * disabled, we'll have to take a detour to make sure we resync them
+         * to avoid lots of unnecessary exits.
+         */
+        PPGMPAGE pPage;
+        if (   (   pHandler->cAliasedPages
+                || pHandler->cTmpOffPages)
+            && (   (pPage = pgmPhysGetPage(pGVM, GCPhysFault)) == NULL
+                || PGM_PAGE_GET_HNDL_PHYS_STATE(pPage) == PGM_PAGE_HNDL_PHYS_STATE_DISABLED)
+           )
+        {
+            Log(("PGMR0Trap0eHandlerNPMisconfig: Resyncing aliases / tmp-off page at %RGp (uErr=%#x) %R[pgmpage]\n", GCPhysFault, uErr, pPage));
+            STAM_COUNTER_INC(&pGVCpu->pgm.s.CTX_SUFF(pStats)->StatR0NpMiscfgSyncPage);
+            rc = pgmShwSyncNestedPageLocked(pGVCpu, GCPhysFault, 1 /*cPages*/, enmShwPagingMode);
+            pgmUnlock(pGVM);
+        }
+        else
+        {
+            if (pHandlerType->CTX_SUFF(pfnPfHandler))
+            {
+                void *pvUser = pHandler->CTX_SUFF(pvUser);
+                STAM_PROFILE_START(&pHandler->Stat, h);
+                pgmUnlock(pGVM);
+
+                Log6(("PGMR0Trap0eHandlerNPMisconfig: calling %p(,%#x,,%RGp,%p)\n", pHandlerType->CTX_SUFF(pfnPfHandler), uErr, GCPhysFault, pvUser));
+                rc = pHandlerType->CTX_SUFF(pfnPfHandler)(pGVM, pGVCpu, uErr == UINT32_MAX ? RTGCPTR_MAX : uErr, pRegFrame,
+                                                          GCPhysFault, GCPhysFault, pvUser);
+
+#ifdef VBOX_WITH_STATISTICS
+                pgmLock(pGVM);
+                pHandler = pgmHandlerPhysicalLookup(pGVM, GCPhysFault);
+                if (pHandler)
+                    STAM_PROFILE_STOP(&pHandler->Stat, h);
+                pgmUnlock(pGVM);
+#endif
+            }
+            else
+            {
+                pgmUnlock(pGVM);
+                Log(("PGMR0Trap0eHandlerNPMisconfig: %RGp (uErr=%#x) -> R3\n", GCPhysFault, uErr));
+                rc = VINF_EM_RAW_EMULATE_INSTR;
+            }
+        }
+    }
+    else
+    {
+        /*
+         * Must be out of sync, so do a SyncPage and restart the instruction.
+         *
+         * ASSUMES that ALL handlers are page aligned and covers whole pages
+         * (assumption asserted in PGMHandlerPhysicalRegisterEx).
+         */
+        Log(("PGMR0Trap0eHandlerNPMisconfig: Out of sync page at %RGp (uErr=%#x)\n", GCPhysFault, uErr));
+        STAM_COUNTER_INC(&pGVCpu->pgm.s.CTX_SUFF(pStats)->StatR0NpMiscfgSyncPage);
+        rc = pgmShwSyncNestedPageLocked(pGVCpu, GCPhysFault, 1 /*cPages*/, enmShwPagingMode);
+        pgmUnlock(pGVM);
+    }
+
+    STAM_PROFILE_STOP(&pGVCpu->pgm.s.CTX_SUFF(pStats)->StatR0NpMiscfg, a);
+    return rc;
+
+#else
+    AssertLogRelFailed();
+    return VERR_PGM_NOT_USED_IN_MODE;
+#endif
+}
+
diff --git a/src/VBox/VMM/VMMR0/PGMR0Bth.h b/src/VBox/VMM/VMMR0/PGMR0Bth.h
new file mode 100644
index 00000000..4eceac21
--- /dev/null
+++ b/src/VBox/VMM/VMMR0/PGMR0Bth.h
@@ -0,0 +1,25 @@
+/* $Id: PGMR0Bth.h $ */
+/** @file
+ * VBox - Page Manager / Monitor, Shadow+Guest Paging Template.
+ */
+
+/*
+ * Copyright (C) 2006-2020 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+
+/*******************************************************************************
+*   Internal Functions                                                         *
+*******************************************************************************/
+RT_C_DECLS_BEGIN
+PGM_BTH_DECL(int, Trap0eHandler)(PVMCPUCC pVCpu, RTGCUINT uErr, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, bool *pfLockTaken);
+RT_C_DECLS_END
+
diff --git a/src/VBox/VMM/VMMR0/PGMR0Pool.cpp b/src/VBox/VMM/VMMR0/PGMR0Pool.cpp
new file mode 100644
index 00000000..bc05b456
--- /dev/null
+++ b/src/VBox/VMM/VMMR0/PGMR0Pool.cpp
@@ -0,0 +1,153 @@
+/* $Id: PGMR0Pool.cpp $ */
+/** @file
+ * PGM Shadow Page Pool, ring-0 specific bits.
+ */
+
+/*
+ * Copyright (C) 2006-2020 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+
+/*********************************************************************************************************************************
+*   Header Files                                                                                                                 *
+*********************************************************************************************************************************/
+#define LOG_GROUP LOG_GROUP_PGM_POOL
+#include <VBox/vmm/pgm.h>
+#include <VBox/vmm/hm.h>
+#include "PGMInternal.h"
+#include <VBox/vmm/vmcc.h>
+#include "PGMInline.h"
+
+#include <VBox/log.h>
+#include <VBox/err.h>
+#include <iprt/mem.h>
+#include <iprt/memobj.h>
+
+
+
+/**
+ * Grows the shadow page pool.
+ *
+ * I.e. adds more pages to it, assuming that hasn't reached cMaxPages yet.
+ *
+ * @returns VBox status code.
+ * @param   pGVM    The ring-0 VM structure.
+ */
+VMMR0_INT_DECL(int) PGMR0PoolGrow(PGVM pGVM)
+{
+    PPGMPOOL pPool = pGVM->pgm.s.pPoolR0;
+    AssertReturn(pPool->cCurPages < pPool->cMaxPages, VERR_PGM_POOL_MAXED_OUT_ALREADY);
+    AssertReturn(pPool->pVMR3 == pGVM->pVMR3, VERR_PGM_POOL_IPE);
+    AssertReturn(pPool->pVMR0 == pGVM, VERR_PGM_POOL_IPE);
+
+    /* With 32-bit guests and no EPT, the CR3 limits the root pages to low
+       (below 4 GB) memory. */
+    /** @todo change the pool to handle ROOT page allocations specially when
+     *        required. */
+    bool const fCanUseHighMemory = HMIsNestedPagingActive(pGVM);
+
+    STAM_REL_PROFILE_START(&pPool->StatGrow, a);
+    int rc = RTCritSectEnter(&pGVM->pgmr0.s.PoolGrowCritSect);
+    AssertRCReturn(rc, rc);
+
+    /*
+     * Figure out how many pages should allocate.
+     */
+    uint32_t const cMaxPages = RT_MIN(pPool->cMaxPages, PGMPOOL_IDX_LAST);
+    uint32_t const cCurPages = RT_MIN(pPool->cCurPages, cMaxPages);
+    if (cCurPages < cMaxPages)
+    {
+        uint32_t cNewPages = cMaxPages - cCurPages;
+        if (cNewPages > PGMPOOL_CFG_MAX_GROW)
+            cNewPages = PGMPOOL_CFG_MAX_GROW;
+        LogFlow(("PGMR3PoolGrow: Growing the pool by %u (%#x) pages to %u (%#x) pages. fCanUseHighMemory=%RTbool\n",
+                 cNewPages, cNewPages, cCurPages + cNewPages, cCurPages + cNewPages, fCanUseHighMemory));
+
+        /* Check that the handles in the arrays entry are both NIL. */
+        uintptr_t const idxMemHandle = cCurPages / (PGMPOOL_CFG_MAX_GROW);
+        AssertCompile(   (PGMPOOL_IDX_LAST + (PGMPOOL_CFG_MAX_GROW - 1)) / PGMPOOL_CFG_MAX_GROW
+                      <= RT_ELEMENTS(pGVM->pgmr0.s.ahPoolMemObjs));
+        AssertCompile(RT_ELEMENTS(pGVM->pgmr0.s.ahPoolMemObjs) == RT_ELEMENTS(pGVM->pgmr0.s.ahPoolMapObjs));
+        AssertLogRelMsgReturnStmt(   pGVM->pgmr0.s.ahPoolMemObjs[idxMemHandle] == NIL_RTR0MEMOBJ
+                                  && pGVM->pgmr0.s.ahPoolMapObjs[idxMemHandle] == NIL_RTR0MEMOBJ,
+                                  ("idxMemHandle=%#x\n", idxMemHandle), RTCritSectLeave(&pGVM->pgmr0.s.PoolGrowCritSect),
+                                  VERR_PGM_POOL_IPE);
+
+        /*
+         * Allocate the new pages and map them into ring-3.
+         */
+        RTR0MEMOBJ hMemObj = NIL_RTR0MEMOBJ;
+        if (fCanUseHighMemory)
+            rc = RTR0MemObjAllocPage(&hMemObj, cNewPages * PAGE_SIZE, false /*fExecutable*/);
+        else
+            rc = RTR0MemObjAllocLow(&hMemObj, cNewPages * PAGE_SIZE, false /*fExecutable*/);
+        if (RT_SUCCESS(rc))
+        {
+            RTR0MEMOBJ hMapObj = NIL_RTR0MEMOBJ;
+            rc = RTR0MemObjMapUser(&hMapObj, hMemObj, (RTR3PTR)-1, 0, RTMEM_PROT_READ | RTMEM_PROT_WRITE, NIL_RTR0PROCESS);
+            if (RT_SUCCESS(rc))
+            {
+                pGVM->pgmr0.s.ahPoolMemObjs[idxMemHandle] = hMemObj;
+                pGVM->pgmr0.s.ahPoolMapObjs[idxMemHandle] = hMapObj;
+
+                uint8_t *pbRing0 = (uint8_t *)RTR0MemObjAddress(hMemObj);
+                RTR3PTR  pbRing3 = RTR0MemObjAddressR3(hMapObj);
+                AssertPtr(pbRing0);
+                Assert(((uintptr_t)pbRing0 & PAGE_OFFSET_MASK) == 0);
+                Assert(pbRing3 != NIL_RTR3PTR);
+                Assert((pbRing3 & PAGE_OFFSET_MASK) == 0);
+
+                /*
+                 * Initialize the new pages.
+                 */
+                for (unsigned iNewPage = 0; iNewPage < cNewPages; iNewPage++)
+                {
+                    PPGMPOOLPAGE pPage = &pPool->aPages[cCurPages + iNewPage];
+                    pPage->pvPageR0         = &pbRing0[iNewPage * PAGE_SIZE];
+                    pPage->pvPageR3         = pbRing3 + iNewPage * PAGE_SIZE;
+                    pPage->Core.Key         = RTR0MemObjGetPagePhysAddr(hMemObj, iNewPage);
+                    AssertFatal(pPage->Core.Key < _4G || fCanUseHighMemory);
+                    pPage->GCPhys           = NIL_RTGCPHYS;
+                    pPage->enmKind          = PGMPOOLKIND_FREE;
+                    pPage->idx              = pPage - &pPool->aPages[0];
+                    LogFlow(("PGMR3PoolGrow: insert page #%#x - %RHp\n", pPage->idx, pPage->Core.Key));
+                    pPage->iNext            = pPool->iFreeHead;
+                    pPage->iUserHead        = NIL_PGMPOOL_USER_INDEX;
+                    pPage->iModifiedNext    = NIL_PGMPOOL_IDX;
+                    pPage->iModifiedPrev    = NIL_PGMPOOL_IDX;
+                    pPage->iMonitoredNext   = NIL_PGMPOOL_IDX;
+                    pPage->iMonitoredPrev   = NIL_PGMPOOL_IDX;
+                    pPage->iAgeNext         = NIL_PGMPOOL_IDX;
+                    pPage->iAgePrev         = NIL_PGMPOOL_IDX;
+                    /* commit it */
+                    bool fRc = RTAvloHCPhysInsert(&pPool->HCPhysTree, &pPage->Core); Assert(fRc); NOREF(fRc);
+                    pPool->iFreeHead = cCurPages + iNewPage;
+                    pPool->cCurPages = cCurPages + iNewPage + 1;
+                }
+
+                STAM_REL_PROFILE_STOP(&pPool->StatGrow, a);
+                RTCritSectLeave(&pGVM->pgmr0.s.PoolGrowCritSect);
+                return VINF_SUCCESS;
+            }
+
+            RTR0MemObjFree(hMemObj, true /*fFreeMappings*/);
+        }
+        if (cCurPages > 64)
+            LogRelMax(5, ("PGMR0PoolGrow: rc=%Rrc cNewPages=%#x cCurPages=%#x cMaxPages=%#x fCanUseHighMemory=%d\n",
+                          rc, cNewPages, cCurPages, cMaxPages, fCanUseHighMemory));
+        else
+            LogRel(("PGMR0PoolGrow: rc=%Rrc cNewPages=%#x cCurPages=%#x cMaxPages=%#x fCanUseHighMemory=%d\n",
+                    rc, cNewPages, cCurPages, cMaxPages, fCanUseHighMemory));
+    }
+    RTCritSectLeave(&pGVM->pgmr0.s.PoolGrowCritSect);
+    return rc;
+}
+
diff --git a/src/VBox/VMM/VMMR0/PGMR0SharedPage.cpp b/src/VBox/VMM/VMMR0/PGMR0SharedPage.cpp
new file mode 100644
index 00000000..909bf143
--- /dev/null
+++ b/src/VBox/VMM/VMMR0/PGMR0SharedPage.cpp
@@ -0,0 +1,171 @@
+/* $Id: PGMR0SharedPage.cpp $ */
+/** @file
+ * PGM - Page Manager and Monitor, Page Sharing, Ring-0.
+ */
+
+/*
+ * Copyright (C) 2010-2020 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+
+/*********************************************************************************************************************************
+*   Header Files                                                                                                                 *
+*********************************************************************************************************************************/
+#define LOG_GROUP LOG_GROUP_PGM_SHARED
+#include <VBox/vmm/pgm.h>
+#include <VBox/vmm/gmm.h>
+#include "PGMInternal.h"
+#include <VBox/vmm/vmcc.h>
+#include <VBox/vmm/gvm.h>
+#include "PGMInline.h"
+#include <VBox/log.h>
+#include <VBox/err.h>
+#include <iprt/assert.h>
+#include <iprt/mem.h>
+
+
+#ifdef VBOX_WITH_PAGE_SHARING
+/**
+ * Check a registered module for shared page changes.
+ *
+ * The PGM lock shall be taken prior to calling this method.
+ *
+ * @returns The following VBox status codes.
+ *
+ * @param   pVM                 The cross context VM structure.
+ * @param   pGVM                Pointer to the GVM instance data.
+ * @param   idCpu               The ID of the calling virtual CPU.
+ * @param   pModule             Global module description.
+ * @param   paRegionsGCPtrs     Array parallel to pModules->aRegions with the
+ *                              addresses of the regions in the calling
+ *                              process.
+ */
+VMMR0DECL(int) PGMR0SharedModuleCheck(PVMCC pVM, PGVM pGVM, VMCPUID idCpu, PGMMSHAREDMODULE pModule, PCRTGCPTR64 paRegionsGCPtrs)
+{
+    PVMCPUCC            pVCpu         = &pGVM->aCpus[idCpu];
+    int                 rc            = VINF_SUCCESS;
+    bool                fFlushTLBs    = false;
+    bool                fFlushRemTLBs = false;
+    GMMSHAREDPAGEDESC   PageDesc;
+
+    Log(("PGMR0SharedModuleCheck: check %s %s base=%RGv size=%x\n", pModule->szName, pModule->szVersion, pModule->Core.Key, pModule->cbModule));
+
+    PGM_LOCK_ASSERT_OWNER(pVM);     /* This cannot fail as we grab the lock in pgmR3SharedModuleRegRendezvous before calling into ring-0. */
+
+    /*
+     * Check every region of the shared module.
+     */
+    for (uint32_t idxRegion = 0; idxRegion < pModule->cRegions; idxRegion++)
+    {
+        RTGCPTR  GCPtrPage  = paRegionsGCPtrs[idxRegion] & ~(RTGCPTR)PAGE_OFFSET_MASK;
+        uint32_t cbLeft     = pModule->aRegions[idxRegion].cb; Assert(!(cbLeft & PAGE_OFFSET_MASK));
+        uint32_t idxPage    = 0;
+
+        while (cbLeft)
+        {
+            /** @todo inefficient to fetch each guest page like this... */
+            RTGCPHYS GCPhys;
+            uint64_t fFlags;
+            rc = PGMGstGetPage(pVCpu, GCPtrPage, &fFlags, &GCPhys);
+            if (    rc == VINF_SUCCESS
+                &&  !(fFlags & X86_PTE_RW)) /* important as we make assumptions about this below! */
+            {
+                PPGMPAGE pPage = pgmPhysGetPage(pVM, GCPhys);
+                Assert(!pPage || !PGM_PAGE_IS_BALLOONED(pPage));
+                if (    pPage
+                    &&  PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED
+                    &&  PGM_PAGE_GET_READ_LOCKS(pPage) == 0
+                    &&  PGM_PAGE_GET_WRITE_LOCKS(pPage) == 0 )
+                {
+                    PageDesc.idPage = PGM_PAGE_GET_PAGEID(pPage);
+                    PageDesc.HCPhys = PGM_PAGE_GET_HCPHYS(pPage);
+                    PageDesc.GCPhys = GCPhys;
+
+                    rc = GMMR0SharedModuleCheckPage(pGVM, pModule, idxRegion, idxPage, &PageDesc);
+                    if (RT_FAILURE(rc))
+                        break;
+
+                    /*
+                     * Any change for this page?
+                     */
+                    if (PageDesc.idPage != NIL_GMM_PAGEID)
+                    {
+                        Assert(PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED);
+
+                        Log(("PGMR0SharedModuleCheck: shared page gst virt=%RGv phys=%RGp host %RHp->%RHp\n",
+                             GCPtrPage, PageDesc.GCPhys, PGM_PAGE_GET_HCPHYS(pPage), PageDesc.HCPhys));
+
+                        /* Page was either replaced by an existing shared
+                           version of it or converted into a read-only shared
+                           page, so, clear all references. */
+                        bool fFlush = false;
+                        rc = pgmPoolTrackUpdateGCPhys(pVM, PageDesc.GCPhys, pPage, true /* clear the entries */, &fFlush);
+                        Assert(   rc == VINF_SUCCESS
+                               || (   VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)
+                                   && (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)));
+                        if (rc == VINF_SUCCESS)
+                            fFlushTLBs |= fFlush;
+                        fFlushRemTLBs = true;
+
+                        if (PageDesc.HCPhys != PGM_PAGE_GET_HCPHYS(pPage))
+                        {
+                            /* Update the physical address and page id now. */
+                            PGM_PAGE_SET_HCPHYS(pVM, pPage, PageDesc.HCPhys);
+                            PGM_PAGE_SET_PAGEID(pVM, pPage, PageDesc.idPage);
+
+                            /* Invalidate page map TLB entry for this page too. */
+                            pgmPhysInvalidatePageMapTLBEntry(pVM, PageDesc.GCPhys);
+                            pVM->pgm.s.cReusedSharedPages++;
+                        }
+                        /* else: nothing changed (== this page is now a shared
+                           page), so no need to flush anything. */
+
+                        pVM->pgm.s.cSharedPages++;
+                        pVM->pgm.s.cPrivatePages--;
+                        PGM_PAGE_SET_STATE(pVM, pPage, PGM_PAGE_STATE_SHARED);
+
+# ifdef VBOX_STRICT /* check sum hack */
+                        pPage->s.u2Unused0 = PageDesc.u32StrictChecksum        & 3;
+                        //pPage->s.u2Unused1 = (PageDesc.u32StrictChecksum >> 8) & 3;
+# endif
+                    }
+                }
+            }
+            else
+            {
+                Assert(    rc == VINF_SUCCESS
+                       ||  rc == VERR_PAGE_NOT_PRESENT
+                       ||  rc == VERR_PAGE_MAP_LEVEL4_NOT_PRESENT
+                       ||  rc == VERR_PAGE_DIRECTORY_PTR_NOT_PRESENT
+                       ||  rc == VERR_PAGE_TABLE_NOT_PRESENT);
+                rc = VINF_SUCCESS;  /* ignore error */
+            }
+
+            idxPage++;
+            GCPtrPage += PAGE_SIZE;
+            cbLeft    -= PAGE_SIZE;
+        }
+    }
+
+    /*
+     * Do TLB flushing if necessary.
+     */
+    if (fFlushTLBs)
+        PGM_INVL_ALL_VCPU_TLBS(pVM);
+
+    if (fFlushRemTLBs)
+        for (VMCPUID idCurCpu = 0; idCurCpu < pGVM->cCpus; idCurCpu++)
+            CPUMSetChangedFlags(&pGVM->aCpus[idCurCpu], CPUM_CHANGED_GLOBAL_TLB_FLUSH);
+
+    return rc;
+}
+#endif /* VBOX_WITH_PAGE_SHARING */
+
diff --git a/src/VBox/VMM/VMMR0/VMMR0.cpp b/src/VBox/VMM/VMMR0/VMMR0.cpp
new file mode 100644
index 00000000..b666391e
--- /dev/null
+++ b/src/VBox/VMM/VMMR0/VMMR0.cpp
@@ -0,0 +1,2753 @@
+/* $Id: VMMR0.cpp $ */
+/** @file
+ * VMM - Host Context Ring 0.
+ */
+
+/*
+ * Copyright (C) 2006-2020 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+
+/*********************************************************************************************************************************
+*   Header Files                                                                                                                 *
+*********************************************************************************************************************************/
+#define LOG_GROUP LOG_GROUP_VMM
+#include <VBox/vmm/vmm.h>
+#include <VBox/sup.h>
+#include <VBox/vmm/iom.h>
+#include <VBox/vmm/trpm.h>
+#include <VBox/vmm/cpum.h>
+#include <VBox/vmm/pdmapi.h>
+#include <VBox/vmm/pgm.h>
+#ifdef VBOX_WITH_NEM_R0
+# include <VBox/vmm/nem.h>
+#endif
+#include <VBox/vmm/em.h>
+#include <VBox/vmm/stam.h>
+#include <VBox/vmm/tm.h>
+#include "VMMInternal.h"
+#include <VBox/vmm/vmcc.h>
+#include <VBox/vmm/gvm.h>
+#ifdef VBOX_WITH_PCI_PASSTHROUGH
+# include <VBox/vmm/pdmpci.h>
+#endif
+#include <VBox/vmm/apic.h>
+
+#include <VBox/vmm/gvmm.h>
+#include <VBox/vmm/gmm.h>
+#include <VBox/vmm/gim.h>
+#include <VBox/intnet.h>
+#include <VBox/vmm/hm.h>
+#include <VBox/param.h>
+#include <VBox/err.h>
+#include <VBox/version.h>
+#include <VBox/log.h>
+
+#include <iprt/asm-amd64-x86.h>
+#include <iprt/assert.h>
+#include <iprt/crc.h>
+#include <iprt/mp.h>
+#include <iprt/once.h>
+#include <iprt/stdarg.h>
+#include <iprt/string.h>
+#include <iprt/thread.h>
+#include <iprt/timer.h>
+#include <iprt/time.h>
+
+#include "dtrace/VBoxVMM.h"
+
+
+#if defined(_MSC_VER) && defined(RT_ARCH_AMD64) /** @todo check this with with VC7! */
+#  pragma intrinsic(_AddressOfReturnAddress)
+#endif
+
+#if defined(RT_OS_DARWIN) && ARCH_BITS == 32
+# error "32-bit darwin is no longer supported. Go back to 4.3 or earlier!"
+#endif
+
+
+
+/*********************************************************************************************************************************
+*   Defined Constants And Macros                                                                                                 *
+*********************************************************************************************************************************/
+/** @def VMM_CHECK_SMAP_SETUP
+ * SMAP check setup. */
+/** @def VMM_CHECK_SMAP_CHECK
+ * Checks that the AC flag is set if SMAP is enabled. If AC is not set,
+ * it will be logged and @a a_BadExpr is executed. */
+/** @def VMM_CHECK_SMAP_CHECK2
+ * Checks that the AC flag is set if SMAP is enabled.  If AC is not set, it will
+ * be logged, written to the VMs assertion text buffer, and @a a_BadExpr is
+ * executed. */
+#if (defined(VBOX_STRICT) || 1) && !defined(VBOX_WITH_RAM_IN_KERNEL)
+# define VMM_CHECK_SMAP_SETUP() uint32_t const fKernelFeatures = SUPR0GetKernelFeatures()
+# define VMM_CHECK_SMAP_CHECK(a_BadExpr) \
+    do { \
+        if (fKernelFeatures & SUPKERNELFEATURES_SMAP) \
+        { \
+            RTCCUINTREG fEflCheck = ASMGetFlags(); \
+            if (RT_LIKELY(fEflCheck & X86_EFL_AC)) \
+            { /* likely */ } \
+            else \
+            { \
+                SUPR0Printf("%s, line %d: EFLAGS.AC is clear! (%#x)\n", __FUNCTION__, __LINE__, (uint32_t)fEflCheck); \
+                a_BadExpr; \
+            } \
+        } \
+    } while (0)
+# define VMM_CHECK_SMAP_CHECK2(a_pGVM, a_BadExpr) \
+    do { \
+        if (fKernelFeatures & SUPKERNELFEATURES_SMAP) \
+        { \
+            RTCCUINTREG fEflCheck = ASMGetFlags(); \
+            if (RT_LIKELY(fEflCheck & X86_EFL_AC)) \
+            { /* likely */ } \
+            else if (a_pGVM) \
+            { \
+                SUPR0BadContext((a_pGVM)->pSession, __FILE__, __LINE__, "EFLAGS.AC is zero!"); \
+                RTStrPrintf((a_pGVM)->vmm.s.szRing0AssertMsg1, sizeof((a_pGVM)->vmm.s.szRing0AssertMsg1), \
+                            "%s, line %d: EFLAGS.AC is clear! (%#x)\n", __FUNCTION__, __LINE__, (uint32_t)fEflCheck); \
+                a_BadExpr; \
+            } \
+            else \
+            { \
+                SUPR0Printf("%s, line %d: EFLAGS.AC is clear! (%#x)\n", __FUNCTION__, __LINE__, (uint32_t)fEflCheck); \
+                a_BadExpr; \
+            } \
+        } \
+    } while (0)
+#else
+# define VMM_CHECK_SMAP_SETUP()                         uint32_t const fKernelFeatures = 0
+# define VMM_CHECK_SMAP_CHECK(a_BadExpr)                NOREF(fKernelFeatures)
+# define VMM_CHECK_SMAP_CHECK2(a_pGVM, a_BadExpr)       NOREF(fKernelFeatures)
+#endif
+
+
+/*********************************************************************************************************************************
+*   Internal Functions                                                                                                           *
+*********************************************************************************************************************************/
+RT_C_DECLS_BEGIN
+#if defined(RT_ARCH_X86) && (defined(RT_OS_SOLARIS) || defined(RT_OS_FREEBSD))
+extern uint64_t __udivdi3(uint64_t, uint64_t);
+extern uint64_t __umoddi3(uint64_t, uint64_t);
+#endif
+RT_C_DECLS_END
+
+
+/*********************************************************************************************************************************
+*   Global Variables                                                                                                             *
+*********************************************************************************************************************************/
+/** Drag in necessary library bits.
+ * The runtime lives here (in VMMR0.r0) and VBoxDD*R0.r0 links against us. */
+PFNRT g_VMMR0Deps[] =
+{
+    (PFNRT)RTCrc32,
+    (PFNRT)RTOnce,
+#if defined(RT_ARCH_X86) && (defined(RT_OS_SOLARIS) || defined(RT_OS_FREEBSD))
+    (PFNRT)__udivdi3,
+    (PFNRT)__umoddi3,
+#endif
+    NULL
+};
+
+#ifdef RT_OS_SOLARIS
+/* Dependency information for the native solaris loader. */
+extern "C" { char _depends_on[] = "vboxdrv"; }
+#endif
+
+
+/**
+ * Initialize the module.
+ * This is called when we're first loaded.
+ *
+ * @returns 0 on success.
+ * @returns VBox status on failure.
+ * @param   hMod        Image handle for use in APIs.
+ */
+DECLEXPORT(int) ModuleInit(void *hMod)
+{
+    VMM_CHECK_SMAP_SETUP();
+    VMM_CHECK_SMAP_CHECK(RT_NOTHING);
+
+#ifdef VBOX_WITH_DTRACE_R0
+    /*
+     * The first thing to do is register the static tracepoints.
+     * (Deregistration is automatic.)
+     */
+    int rc2 = SUPR0TracerRegisterModule(hMod, &g_VTGObjHeader);
+    if (RT_FAILURE(rc2))
+        return rc2;
+#endif
+    LogFlow(("ModuleInit:\n"));
+
+#ifdef VBOX_WITH_64ON32_CMOS_DEBUG
+    /*
+     * Display the CMOS debug code.
+     */
+    ASMOutU8(0x72, 0x03);
+    uint8_t bDebugCode = ASMInU8(0x73);
+    LogRel(("CMOS Debug Code: %#x (%d)\n", bDebugCode, bDebugCode));
+    RTLogComPrintf("CMOS Debug Code: %#x (%d)\n", bDebugCode, bDebugCode);
+#endif
+
+    /*
+     * Initialize the VMM, GVMM, GMM, HM, PGM (Darwin) and INTNET.
+     */
+    int rc = vmmInitFormatTypes();
+    if (RT_SUCCESS(rc))
+    {
+        VMM_CHECK_SMAP_CHECK(RT_NOTHING);
+        rc = GVMMR0Init();
+        if (RT_SUCCESS(rc))
+        {
+            VMM_CHECK_SMAP_CHECK(RT_NOTHING);
+            rc = GMMR0Init();
+            if (RT_SUCCESS(rc))
+            {
+                VMM_CHECK_SMAP_CHECK(RT_NOTHING);
+                rc = HMR0Init();
+                if (RT_SUCCESS(rc))
+                {
+                    VMM_CHECK_SMAP_CHECK(RT_NOTHING);
+
+                    PDMR0Init(hMod);
+                    VMM_CHECK_SMAP_CHECK(RT_NOTHING);
+
+                    rc = PGMRegisterStringFormatTypes();
+                    if (RT_SUCCESS(rc))
+                    {
+                        VMM_CHECK_SMAP_CHECK(RT_NOTHING);
+#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE
+                        rc = PGMR0DynMapInit();
+#endif
+                        if (RT_SUCCESS(rc))
+                        {
+                            VMM_CHECK_SMAP_CHECK(RT_NOTHING);
+                            rc = IntNetR0Init();
+                            if (RT_SUCCESS(rc))
+                            {
+#ifdef VBOX_WITH_PCI_PASSTHROUGH
+                                VMM_CHECK_SMAP_CHECK(RT_NOTHING);
+                                rc = PciRawR0Init();
+#endif
+                                if (RT_SUCCESS(rc))
+                                {
+                                    VMM_CHECK_SMAP_CHECK(RT_NOTHING);
+                                    rc = CPUMR0ModuleInit();
+                                    if (RT_SUCCESS(rc))
+                                    {
+#ifdef VBOX_WITH_TRIPLE_FAULT_HACK
+                                        VMM_CHECK_SMAP_CHECK(RT_NOTHING);
+                                        rc = vmmR0TripleFaultHackInit();
+                                        if (RT_SUCCESS(rc))
+#endif
+                                        {
+                                            VMM_CHECK_SMAP_CHECK(rc = VERR_VMM_SMAP_BUT_AC_CLEAR);
+                                            if (RT_SUCCESS(rc))
+                                            {
+                                                LogFlow(("ModuleInit: returns success\n"));
+                                                return VINF_SUCCESS;
+                                            }
+                                        }
+
+                                        /*
+                                         * Bail out.
+                                         */
+#ifdef VBOX_WITH_TRIPLE_FAULT_HACK
+                                        vmmR0TripleFaultHackTerm();
+#endif
+                                    }
+                                    else
+                                        LogRel(("ModuleInit: CPUMR0ModuleInit -> %Rrc\n", rc));
+#ifdef VBOX_WITH_PCI_PASSTHROUGH
+                                    PciRawR0Term();
+#endif
+                                }
+                                else
+                                    LogRel(("ModuleInit: PciRawR0Init -> %Rrc\n", rc));
+                                IntNetR0Term();
+                            }
+                            else
+                                LogRel(("ModuleInit: IntNetR0Init -> %Rrc\n", rc));
+#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE
+                            PGMR0DynMapTerm();
+#endif
+                        }
+                        else
+                            LogRel(("ModuleInit: PGMR0DynMapInit -> %Rrc\n", rc));
+                        PGMDeregisterStringFormatTypes();
+                    }
+                    else
+                        LogRel(("ModuleInit: PGMRegisterStringFormatTypes -> %Rrc\n", rc));
+                    HMR0Term();
+                }
+                else
+                    LogRel(("ModuleInit: HMR0Init -> %Rrc\n", rc));
+                GMMR0Term();
+            }
+            else
+                LogRel(("ModuleInit: GMMR0Init -> %Rrc\n", rc));
+            GVMMR0Term();
+        }
+        else
+            LogRel(("ModuleInit: GVMMR0Init -> %Rrc\n", rc));
+        vmmTermFormatTypes();
+    }
+    else
+        LogRel(("ModuleInit: vmmInitFormatTypes -> %Rrc\n", rc));
+
+    LogFlow(("ModuleInit: failed %Rrc\n", rc));
+    return rc;
+}
+
+
+/**
+ * Terminate the module.
+ * This is called when we're finally unloaded.
+ *
+ * @param   hMod        Image handle for use in APIs.
+ */
+DECLEXPORT(void) ModuleTerm(void *hMod)
+{
+    NOREF(hMod);
+    LogFlow(("ModuleTerm:\n"));
+
+    /*
+     * Terminate the CPUM module (Local APIC cleanup).
+     */
+    CPUMR0ModuleTerm();
+
+    /*
+     * Terminate the internal network service.
+     */
+    IntNetR0Term();
+
+    /*
+     * PGM (Darwin), HM and PciRaw global cleanup.
+     */
+#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE
+    PGMR0DynMapTerm();
+#endif
+#ifdef VBOX_WITH_PCI_PASSTHROUGH
+    PciRawR0Term();
+#endif
+    PGMDeregisterStringFormatTypes();
+    HMR0Term();
+#ifdef VBOX_WITH_TRIPLE_FAULT_HACK
+    vmmR0TripleFaultHackTerm();
+#endif
+
+    /*
+     * Destroy the GMM and GVMM instances.
+     */
+    GMMR0Term();
+    GVMMR0Term();
+
+    vmmTermFormatTypes();
+
+    LogFlow(("ModuleTerm: returns\n"));
+}
+
+
+/**
+ * Initiates the R0 driver for a particular VM instance.
+ *
+ * @returns VBox status code.
+ *
+ * @param   pGVM        The global (ring-0) VM structure.
+ * @param   uSvnRev     The SVN revision of the ring-3 part.
+ * @param   uBuildType  Build type indicator.
+ * @thread  EMT(0)
+ */
+static int vmmR0InitVM(PGVM pGVM, uint32_t uSvnRev, uint32_t uBuildType)
+{
+    VMM_CHECK_SMAP_SETUP();
+    VMM_CHECK_SMAP_CHECK(return VERR_VMM_SMAP_BUT_AC_CLEAR);
+
+    /*
+     * Match the SVN revisions and build type.
+     */
+    if (uSvnRev != VMMGetSvnRev())
+    {
+        LogRel(("VMMR0InitVM: Revision mismatch, r3=%d r0=%d\n", uSvnRev, VMMGetSvnRev()));
+        SUPR0Printf("VMMR0InitVM: Revision mismatch, r3=%d r0=%d\n", uSvnRev, VMMGetSvnRev());
+        return VERR_VMM_R0_VERSION_MISMATCH;
+    }
+    if (uBuildType != vmmGetBuildType())
+    {
+        LogRel(("VMMR0InitVM: Build type mismatch, r3=%#x r0=%#x\n", uBuildType, vmmGetBuildType()));
+        SUPR0Printf("VMMR0InitVM: Build type mismatch, r3=%#x r0=%#x\n", uBuildType, vmmGetBuildType());
+        return VERR_VMM_R0_VERSION_MISMATCH;
+    }
+
+    int rc = GVMMR0ValidateGVMandEMT(pGVM, 0 /*idCpu*/);
+    if (RT_FAILURE(rc))
+        return rc;
+
+#ifdef LOG_ENABLED
+    /*
+     * Register the EMT R0 logger instance for VCPU 0.
+     */
+    PVMCPUCC pVCpu = VMCC_GET_CPU_0(pGVM);
+
+    PVMMR0LOGGER pR0Logger = pVCpu->vmm.s.pR0LoggerR0;
+    if (pR0Logger)
+    {
+# if 0 /* testing of the logger. */
+        LogCom(("vmmR0InitVM: before %p\n", RTLogDefaultInstance()));
+        LogCom(("vmmR0InitVM: pfnFlush=%p actual=%p\n", pR0Logger->Logger.pfnFlush, vmmR0LoggerFlush));
+        LogCom(("vmmR0InitVM: pfnLogger=%p actual=%p\n", pR0Logger->Logger.pfnLogger, vmmR0LoggerWrapper));
+        LogCom(("vmmR0InitVM: offScratch=%d fFlags=%#x fDestFlags=%#x\n", pR0Logger->Logger.offScratch, pR0Logger->Logger.fFlags, pR0Logger->Logger.fDestFlags));
+
+        RTLogSetDefaultInstanceThread(&pR0Logger->Logger, (uintptr_t)pGVM->pSession);
+        LogCom(("vmmR0InitVM: after %p reg\n", RTLogDefaultInstance()));
+        RTLogSetDefaultInstanceThread(NULL, pGVM->pSession);
+        LogCom(("vmmR0InitVM: after %p dereg\n", RTLogDefaultInstance()));
+
+        pR0Logger->Logger.pfnLogger("hello ring-0 logger\n");
+        LogCom(("vmmR0InitVM: returned successfully from direct logger call.\n"));
+        pR0Logger->Logger.pfnFlush(&pR0Logger->Logger);
+        LogCom(("vmmR0InitVM: returned successfully from direct flush call.\n"));
+
+        RTLogSetDefaultInstanceThread(&pR0Logger->Logger, (uintptr_t)pGVM->pSession);
+        LogCom(("vmmR0InitVM: after %p reg2\n", RTLogDefaultInstance()));
+        pR0Logger->Logger.pfnLogger("hello ring-0 logger\n");
+        LogCom(("vmmR0InitVM: returned successfully from direct logger call (2). offScratch=%d\n", pR0Logger->Logger.offScratch));
+        RTLogSetDefaultInstanceThread(NULL, pGVM->pSession);
+        LogCom(("vmmR0InitVM: after %p dereg2\n", RTLogDefaultInstance()));
+
+        RTLogLoggerEx(&pR0Logger->Logger, 0, ~0U, "hello ring-0 logger (RTLogLoggerEx)\n");
+        LogCom(("vmmR0InitVM: RTLogLoggerEx returned fine offScratch=%d\n", pR0Logger->Logger.offScratch));
+
+        RTLogSetDefaultInstanceThread(&pR0Logger->Logger, (uintptr_t)pGVM->pSession);
+        RTLogPrintf("hello ring-0 logger (RTLogPrintf)\n");
+        LogCom(("vmmR0InitVM: RTLogPrintf returned fine offScratch=%d\n", pR0Logger->Logger.offScratch));
+# endif
+        Log(("Switching to per-thread logging instance %p (key=%p)\n", &pR0Logger->Logger, pGVM->pSession));
+        RTLogSetDefaultInstanceThread(&pR0Logger->Logger, (uintptr_t)pGVM->pSession);
+        pR0Logger->fRegistered = true;
+    }
+#endif /* LOG_ENABLED */
+SUPR0Printf("VMMR0InitVM: eflags=%x fKernelFeatures=%#x (SUPKERNELFEATURES_SMAP=%d)\n",
+            ASMGetFlags(), fKernelFeatures, RT_BOOL(fKernelFeatures & SUPKERNELFEATURES_SMAP));
+
+    /*
+     * Check if the host supports high resolution timers or not.
+     */
+    if (   pGVM->vmm.s.fUsePeriodicPreemptionTimers
+        && !RTTimerCanDoHighResolution())
+        pGVM->vmm.s.fUsePeriodicPreemptionTimers = false;
+
+    /*
+     * Initialize the per VM data for GVMM and GMM.
+     */
+    VMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+    rc = GVMMR0InitVM(pGVM);
+    if (RT_SUCCESS(rc))
+    {
+        /*
+         * Init HM, CPUM and PGM (Darwin only).
+         */
+        VMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+        rc = HMR0InitVM(pGVM);
+        if (RT_SUCCESS(rc))
+            VMM_CHECK_SMAP_CHECK2(pGVM, rc = VERR_VMM_RING0_ASSERTION); /* CPUR0InitVM will otherwise panic the host */
+        if (RT_SUCCESS(rc))
+        {
+            rc = CPUMR0InitVM(pGVM);
+            if (RT_SUCCESS(rc))
+            {
+                VMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+                rc = PGMR0InitVM(pGVM);
+                if (RT_SUCCESS(rc))
+                {
+                    VMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+                    rc = EMR0InitVM(pGVM);
+                    if (RT_SUCCESS(rc))
+                    {
+                        VMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+#ifdef VBOX_WITH_PCI_PASSTHROUGH
+                        rc = PciRawR0InitVM(pGVM);
+#endif
+                        if (RT_SUCCESS(rc))
+                        {
+                            VMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+                            rc = GIMR0InitVM(pGVM);
+                            if (RT_SUCCESS(rc))
+                            {
+                                VMM_CHECK_SMAP_CHECK2(pGVM, rc = VERR_VMM_RING0_ASSERTION);
+                                if (RT_SUCCESS(rc))
+                                {
+                                    GVMMR0DoneInitVM(pGVM);
+
+                                    /*
+                                     * Collect a bit of info for the VM release log.
+                                     */
+                                    pGVM->vmm.s.fIsPreemptPendingApiTrusty = RTThreadPreemptIsPendingTrusty();
+                                    pGVM->vmm.s.fIsPreemptPossible         = RTThreadPreemptIsPossible();;
+
+                                    VMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+                                    return rc;
+                                }
+
+                                /* bail out*/
+                                GIMR0TermVM(pGVM);
+                            }
+#ifdef VBOX_WITH_PCI_PASSTHROUGH
+                            PciRawR0TermVM(pGVM);
+#endif
+                        }
+                    }
+                }
+            }
+            HMR0TermVM(pGVM);
+        }
+    }
+
+    RTLogSetDefaultInstanceThread(NULL, (uintptr_t)pGVM->pSession);
+    return rc;
+}
+
+
+/**
+ * Does EMT specific VM initialization.
+ *
+ * @returns VBox status code.
+ * @param   pGVM        The ring-0 VM structure.
+ * @param   idCpu       The EMT that's calling.
+ */
+static int vmmR0InitVMEmt(PGVM pGVM, VMCPUID idCpu)
+{
+    /* Paranoia (caller checked these already). */
+    AssertReturn(idCpu < pGVM->cCpus, VERR_INVALID_CPU_ID);
+    AssertReturn(pGVM->aCpus[idCpu].hEMT == RTThreadNativeSelf(), VERR_INVALID_CPU_ID);
+
+#ifdef LOG_ENABLED
+    /*
+     * Registration of ring 0 loggers.
+     */
+    PVMCPUCC       pVCpu     = &pGVM->aCpus[idCpu];
+    PVMMR0LOGGER pR0Logger = pVCpu->vmm.s.pR0LoggerR0;
+    if (   pR0Logger
+        && !pR0Logger->fRegistered)
+    {
+        RTLogSetDefaultInstanceThread(&pR0Logger->Logger, (uintptr_t)pGVM->pSession);
+        pR0Logger->fRegistered = true;
+    }
+#endif
+
+    return VINF_SUCCESS;
+}
+
+
+
+/**
+ * Terminates the R0 bits for a particular VM instance.
+ *
+ * This is normally called by ring-3 as part of the VM termination process, but
+ * may alternatively be called during the support driver session cleanup when
+ * the VM object is destroyed (see GVMM).
+ *
+ * @returns VBox status code.
+ *
+ * @param   pGVM        The global (ring-0) VM structure.
+ * @param   idCpu       Set to 0 if EMT(0) or NIL_VMCPUID if session cleanup
+ *                      thread.
+ * @thread  EMT(0) or session clean up thread.
+ */
+VMMR0_INT_DECL(int) VMMR0TermVM(PGVM pGVM, VMCPUID idCpu)
+{
+    /*
+     * Check EMT(0) claim if we're called from userland.
+     */
+    if (idCpu != NIL_VMCPUID)
+    {
+        AssertReturn(idCpu == 0, VERR_INVALID_CPU_ID);
+        int rc = GVMMR0ValidateGVMandEMT(pGVM, idCpu);
+        if (RT_FAILURE(rc))
+            return rc;
+    }
+
+#ifdef VBOX_WITH_PCI_PASSTHROUGH
+    PciRawR0TermVM(pGVM);
+#endif
+
+    /*
+     * Tell GVMM what we're up to and check that we only do this once.
+     */
+    if (GVMMR0DoingTermVM(pGVM))
+    {
+        GIMR0TermVM(pGVM);
+
+        /** @todo I wish to call PGMR0PhysFlushHandyPages(pGVM, &pGVM->aCpus[idCpu])
+         *        here to make sure we don't leak any shared pages if we crash... */
+#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE
+        PGMR0DynMapTermVM(pGVM);
+#endif
+        HMR0TermVM(pGVM);
+    }
+
+    /*
+     * Deregister the logger.
+     */
+    RTLogSetDefaultInstanceThread(NULL, (uintptr_t)pGVM->pSession);
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * An interrupt or unhalt force flag is set, deal with it.
+ *
+ * @returns VINF_SUCCESS (or VINF_EM_HALT).
+ * @param   pVCpu                   The cross context virtual CPU structure.
+ * @param   uMWait                  Result from EMMonitorWaitIsActive().
+ * @param   enmInterruptibility     Guest CPU interruptbility level.
+ */
+static int vmmR0DoHaltInterrupt(PVMCPUCC pVCpu, unsigned uMWait, CPUMINTERRUPTIBILITY enmInterruptibility)
+{
+    Assert(!TRPMHasTrap(pVCpu));
+    Assert(   enmInterruptibility > CPUMINTERRUPTIBILITY_INVALID
+           && enmInterruptibility < CPUMINTERRUPTIBILITY_END);
+
+    /*
+     * Pending interrupts w/o any SMIs or NMIs?  That the usual case.
+     */
+    if (    VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC)
+        && !VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_INTERRUPT_SMI  | VMCPU_FF_INTERRUPT_NMI))
+    {
+        if (enmInterruptibility <= CPUMINTERRUPTIBILITY_UNRESTRAINED)
+        {
+            uint8_t u8Interrupt = 0;
+            int rc = PDMGetInterrupt(pVCpu, &u8Interrupt);
+            Log(("vmmR0DoHaltInterrupt: CPU%d u8Interrupt=%d (%#x) rc=%Rrc\n", pVCpu->idCpu, u8Interrupt, u8Interrupt, rc));
+            if (RT_SUCCESS(rc))
+            {
+                VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_UNHALT);
+
+                rc = TRPMAssertTrap(pVCpu, u8Interrupt, TRPM_HARDWARE_INT);
+                AssertRCSuccess(rc);
+                STAM_REL_COUNTER_INC(&pVCpu->vmm.s.StatR0HaltExec);
+                return rc;
+            }
+        }
+    }
+    /*
+     * SMI is not implemented yet, at least not here.
+     */
+    else if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INTERRUPT_SMI))
+    {
+        Log12(("vmmR0DoHaltInterrupt: CPU%d failed #3\n", pVCpu->idCpu));
+        STAM_REL_COUNTER_INC(&pVCpu->vmm.s.StatR0HaltToR3);
+        return VINF_EM_HALT;
+    }
+    /*
+     * NMI.
+     */
+    else if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INTERRUPT_NMI))
+    {
+        if (enmInterruptibility < CPUMINTERRUPTIBILITY_NMI_INHIBIT)
+        {
+            /** @todo later. */
+            Log12(("vmmR0DoHaltInterrupt: CPU%d failed #2 (uMWait=%u enmInt=%d)\n", pVCpu->idCpu, uMWait, enmInterruptibility));
+            STAM_REL_COUNTER_INC(&pVCpu->vmm.s.StatR0HaltToR3);
+            return VINF_EM_HALT;
+        }
+    }
+    /*
+     * Nested-guest virtual interrupt.
+     */
+    else if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INTERRUPT_NESTED_GUEST))
+    {
+        if (enmInterruptibility < CPUMINTERRUPTIBILITY_VIRT_INT_DISABLED)
+        {
+            /** @todo NSTVMX: NSTSVM: Remember, we might have to check and perform VM-exits
+             *        here before injecting the virtual interrupt. See emR3ForcedActions
+             *        for details. */
+            Log12(("vmmR0DoHaltInterrupt: CPU%d failed #1 (uMWait=%u enmInt=%d)\n", pVCpu->idCpu, uMWait, enmInterruptibility));
+            STAM_REL_COUNTER_INC(&pVCpu->vmm.s.StatR0HaltToR3);
+            return VINF_EM_HALT;
+        }
+    }
+
+    if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_UNHALT))
+    {
+        STAM_REL_COUNTER_INC(&pVCpu->vmm.s.StatR0HaltExec);
+        Log11(("vmmR0DoHaltInterrupt: CPU%d success VINF_SUCCESS (UNHALT)\n", pVCpu->idCpu));
+        return VINF_SUCCESS;
+    }
+    if (uMWait > 1)
+    {
+        STAM_REL_COUNTER_INC(&pVCpu->vmm.s.StatR0HaltExec);
+        Log11(("vmmR0DoHaltInterrupt: CPU%d success VINF_SUCCESS (uMWait=%u > 1)\n", pVCpu->idCpu, uMWait));
+        return VINF_SUCCESS;
+    }
+
+    Log12(("vmmR0DoHaltInterrupt: CPU%d failed #0 (uMWait=%u enmInt=%d)\n", pVCpu->idCpu, uMWait, enmInterruptibility));
+    STAM_REL_COUNTER_INC(&pVCpu->vmm.s.StatR0HaltToR3);
+    return VINF_EM_HALT;
+}
+
+
+/**
+ * This does one round of vmR3HaltGlobal1Halt().
+ *
+ * The rational here is that we'll reduce latency in interrupt situations if we
+ * don't go to ring-3 immediately on a VINF_EM_HALT (guest executed HLT or
+ * MWAIT), but do one round of blocking here instead and hope the interrupt is
+ * raised in the meanwhile.
+ *
+ * If we go to ring-3 we'll quit the inner HM/NEM loop in EM and end up in the
+ * outer loop, which will then call VMR3WaitHalted() and that in turn will do a
+ * ring-0 call (unless we're too close to a timer event).  When the interrupt
+ * wakes us up, we'll return from ring-0 and EM will by instinct do a
+ * rescheduling (because of raw-mode) before it resumes the HM/NEM loop and gets
+ * back to VMMR0EntryFast().
+ *
+ * @returns VINF_SUCCESS or VINF_EM_HALT.
+ * @param   pGVM        The ring-0 VM structure.
+ * @param   pGVCpu      The ring-0 virtual CPU structure.
+ *
+ * @todo r=bird: All the blocking/waiting and EMT managment should move out of
+ *       the VM module, probably to VMM.  Then this would be more weird wrt
+ *       parameters and statistics.
+ */
+static int vmmR0DoHalt(PGVM pGVM, PGVMCPU pGVCpu)
+{
+    /*
+     * Do spin stat historization.
+     */
+    if (++pGVCpu->vmm.s.cR0Halts & 0xff)
+    { /* likely */ }
+    else if (pGVCpu->vmm.s.cR0HaltsSucceeded > pGVCpu->vmm.s.cR0HaltsToRing3)
+    {
+        pGVCpu->vmm.s.cR0HaltsSucceeded = 2;
+        pGVCpu->vmm.s.cR0HaltsToRing3   = 0;
+    }
+    else
+    {
+        pGVCpu->vmm.s.cR0HaltsSucceeded = 0;
+        pGVCpu->vmm.s.cR0HaltsToRing3   = 2;
+    }
+
+    /*
+     * Flags that makes us go to ring-3.
+     */
+    uint32_t const fVmFFs  = VM_FF_TM_VIRTUAL_SYNC            | VM_FF_PDM_QUEUES              | VM_FF_PDM_DMA
+                           | VM_FF_DBGF                       | VM_FF_REQUEST                 | VM_FF_CHECK_VM_STATE
+                           | VM_FF_RESET                      | VM_FF_EMT_RENDEZVOUS          | VM_FF_PGM_NEED_HANDY_PAGES
+                           | VM_FF_PGM_NO_MEMORY              | VM_FF_DEBUG_SUSPEND;
+    uint64_t const fCpuFFs = VMCPU_FF_TIMER                   | VMCPU_FF_PDM_CRITSECT         | VMCPU_FF_IEM
+                           | VMCPU_FF_REQUEST                 | VMCPU_FF_DBGF                 | VMCPU_FF_HM_UPDATE_CR3
+                           | VMCPU_FF_HM_UPDATE_PAE_PDPES     | VMCPU_FF_PGM_SYNC_CR3         | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL
+                           | VMCPU_FF_TO_R3                   | VMCPU_FF_IOM;
+
+    /*
+     * Check preconditions.
+     */
+    unsigned const             uMWait              = EMMonitorWaitIsActive(pGVCpu);
+    CPUMINTERRUPTIBILITY const enmInterruptibility = CPUMGetGuestInterruptibility(pGVCpu);
+    if (   pGVCpu->vmm.s.fMayHaltInRing0
+        && !TRPMHasTrap(pGVCpu)
+        && (   enmInterruptibility == CPUMINTERRUPTIBILITY_UNRESTRAINED
+            || uMWait > 1))
+    {
+        if (   !VM_FF_IS_ANY_SET(pGVM, fVmFFs)
+            && !VMCPU_FF_IS_ANY_SET(pGVCpu, fCpuFFs))
+        {
+            /*
+             * Interrupts pending already?
+             */
+            if (VMCPU_FF_TEST_AND_CLEAR(pGVCpu, VMCPU_FF_UPDATE_APIC))
+                APICUpdatePendingInterrupts(pGVCpu);
+
+            /*
+             * Flags that wake up from the halted state.
+             */
+            uint64_t const fIntMask = VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC | VMCPU_FF_INTERRUPT_NESTED_GUEST
+                                    | VMCPU_FF_INTERRUPT_NMI  | VMCPU_FF_INTERRUPT_SMI | VMCPU_FF_UNHALT;
+
+            if (VMCPU_FF_IS_ANY_SET(pGVCpu, fIntMask))
+                return vmmR0DoHaltInterrupt(pGVCpu, uMWait, enmInterruptibility);
+            ASMNopPause();
+
+            /*
+             * Check out how long till the next timer event.
+             */
+            uint64_t u64Delta;
+            uint64_t u64GipTime = TMTimerPollGIP(pGVM, pGVCpu, &u64Delta);
+
+            if (   !VM_FF_IS_ANY_SET(pGVM, fVmFFs)
+                && !VMCPU_FF_IS_ANY_SET(pGVCpu, fCpuFFs))
+            {
+                if (VMCPU_FF_TEST_AND_CLEAR(pGVCpu, VMCPU_FF_UPDATE_APIC))
+                    APICUpdatePendingInterrupts(pGVCpu);
+
+                if (VMCPU_FF_IS_ANY_SET(pGVCpu, fIntMask))
+                    return vmmR0DoHaltInterrupt(pGVCpu, uMWait, enmInterruptibility);
+
+                /*
+                 * Wait if there is enough time to the next timer event.
+                 */
+                if (u64Delta >= pGVCpu->vmm.s.cNsSpinBlockThreshold)
+                {
+                    /* If there are few other CPU cores around, we will procrastinate a
+                       little before going to sleep, hoping for some device raising an
+                       interrupt or similar.   Though, the best thing here would be to
+                       dynamically adjust the spin count according to its usfulness or
+                       something... */
+                    if (   pGVCpu->vmm.s.cR0HaltsSucceeded > pGVCpu->vmm.s.cR0HaltsToRing3
+                        && RTMpGetOnlineCount() >= 4)
+                    {
+                        /** @todo Figure out how we can skip this if it hasn't help recently...
+                         *        @bugref{9172#c12} */
+                        uint32_t cSpinLoops = 42;
+                        while (cSpinLoops-- > 0)
+                        {
+                            ASMNopPause();
+                            if (VMCPU_FF_TEST_AND_CLEAR(pGVCpu, VMCPU_FF_UPDATE_APIC))
+                                APICUpdatePendingInterrupts(pGVCpu);
+                            ASMNopPause();
+                            if (VM_FF_IS_ANY_SET(pGVM, fVmFFs))
+                            {
+                                STAM_REL_COUNTER_INC(&pGVCpu->vmm.s.StatR0HaltToR3FromSpin);
+                                STAM_REL_COUNTER_INC(&pGVCpu->vmm.s.StatR0HaltToR3);
+                                return VINF_EM_HALT;
+                            }
+                            ASMNopPause();
+                            if (VMCPU_FF_IS_ANY_SET(pGVCpu, fCpuFFs))
+                            {
+                                STAM_REL_COUNTER_INC(&pGVCpu->vmm.s.StatR0HaltToR3FromSpin);
+                                STAM_REL_COUNTER_INC(&pGVCpu->vmm.s.StatR0HaltToR3);
+                                return VINF_EM_HALT;
+                            }
+                            ASMNopPause();
+                            if (VMCPU_FF_IS_ANY_SET(pGVCpu, fIntMask))
+                            {
+                                STAM_REL_COUNTER_INC(&pGVCpu->vmm.s.StatR0HaltExecFromSpin);
+                                return vmmR0DoHaltInterrupt(pGVCpu, uMWait, enmInterruptibility);
+                            }
+                            ASMNopPause();
+                        }
+                    }
+
+                    /*
+                     * We have to set the state to VMCPUSTATE_STARTED_HALTED here so ring-3
+                     * knows when to notify us (cannot access VMINTUSERPERVMCPU::fWait from here).
+                     * After changing the state we must recheck the force flags of course.
+                     */
+                    if (VMCPU_CMPXCHG_STATE(pGVCpu, VMCPUSTATE_STARTED_HALTED, VMCPUSTATE_STARTED))
+                    {
+                        if (   !VM_FF_IS_ANY_SET(pGVM, fVmFFs)
+                            && !VMCPU_FF_IS_ANY_SET(pGVCpu, fCpuFFs))
+                        {
+                            if (VMCPU_FF_TEST_AND_CLEAR(pGVCpu, VMCPU_FF_UPDATE_APIC))
+                                APICUpdatePendingInterrupts(pGVCpu);
+
+                            if (VMCPU_FF_IS_ANY_SET(pGVCpu, fIntMask))
+                            {
+                                VMCPU_CMPXCHG_STATE(pGVCpu, VMCPUSTATE_STARTED, VMCPUSTATE_STARTED_HALTED);
+                                return vmmR0DoHaltInterrupt(pGVCpu, uMWait, enmInterruptibility);
+                            }
+
+                            /* Okay, block! */
+                            uint64_t const u64StartSchedHalt   = RTTimeNanoTS();
+                            int rc = GVMMR0SchedHalt(pGVM, pGVCpu, u64GipTime);
+                            uint64_t const u64EndSchedHalt     = RTTimeNanoTS();
+                            uint64_t const cNsElapsedSchedHalt = u64EndSchedHalt - u64StartSchedHalt;
+                            Log10(("vmmR0DoHalt: CPU%d: halted %llu ns\n", pGVCpu->idCpu, cNsElapsedSchedHalt));
+
+                            VMCPU_CMPXCHG_STATE(pGVCpu, VMCPUSTATE_STARTED, VMCPUSTATE_STARTED_HALTED);
+                            STAM_REL_PROFILE_ADD_PERIOD(&pGVCpu->vmm.s.StatR0HaltBlock, cNsElapsedSchedHalt);
+                            if (   rc == VINF_SUCCESS
+                                || rc == VERR_INTERRUPTED)
+                            {
+                                /* Keep some stats like ring-3 does. */
+                                int64_t const cNsOverslept = u64EndSchedHalt - u64GipTime;
+                                if (cNsOverslept > 50000)
+                                    STAM_REL_PROFILE_ADD_PERIOD(&pGVCpu->vmm.s.StatR0HaltBlockOverslept, cNsOverslept);
+                                else if (cNsOverslept < -50000)
+                                    STAM_REL_PROFILE_ADD_PERIOD(&pGVCpu->vmm.s.StatR0HaltBlockInsomnia,  cNsElapsedSchedHalt);
+                                else
+                                    STAM_REL_PROFILE_ADD_PERIOD(&pGVCpu->vmm.s.StatR0HaltBlockOnTime,    cNsElapsedSchedHalt);
+
+                                /*
+                                 * Recheck whether we can resume execution or have to go to ring-3.
+                                 */
+                                if (   !VM_FF_IS_ANY_SET(pGVM, fVmFFs)
+                                    && !VMCPU_FF_IS_ANY_SET(pGVCpu, fCpuFFs))
+                                {
+                                    if (VMCPU_FF_TEST_AND_CLEAR(pGVCpu, VMCPU_FF_UPDATE_APIC))
+                                        APICUpdatePendingInterrupts(pGVCpu);
+                                    if (VMCPU_FF_IS_ANY_SET(pGVCpu, fIntMask))
+                                    {
+                                        STAM_REL_COUNTER_INC(&pGVCpu->vmm.s.StatR0HaltExecFromBlock);
+                                        return vmmR0DoHaltInterrupt(pGVCpu, uMWait, enmInterruptibility);
+                                    }
+                                    STAM_REL_COUNTER_INC(&pGVCpu->vmm.s.StatR0HaltToR3PostNoInt);
+                                    Log12(("vmmR0DoHalt: CPU%d post #2 - No pending interrupt\n", pGVCpu->idCpu));
+                                }
+                                else
+                                {
+                                    STAM_REL_COUNTER_INC(&pGVCpu->vmm.s.StatR0HaltToR3PostPendingFF);
+                                    Log12(("vmmR0DoHalt: CPU%d post #1 - Pending FF\n", pGVCpu->idCpu));
+                                }
+                            }
+                            else
+                            {
+                                STAM_REL_COUNTER_INC(&pGVCpu->vmm.s.StatR0HaltToR3Other);
+                                Log12(("vmmR0DoHalt: CPU%d GVMMR0SchedHalt failed: %Rrc\n", pGVCpu->idCpu, rc));
+                            }
+                        }
+                        else
+                        {
+                            VMCPU_CMPXCHG_STATE(pGVCpu, VMCPUSTATE_STARTED, VMCPUSTATE_STARTED_HALTED);
+                            STAM_REL_COUNTER_INC(&pGVCpu->vmm.s.StatR0HaltToR3PendingFF);
+                            Log12(("vmmR0DoHalt: CPU%d failed #5 - Pending FF\n", pGVCpu->idCpu));
+                        }
+                    }
+                    else
+                    {
+                        STAM_REL_COUNTER_INC(&pGVCpu->vmm.s.StatR0HaltToR3Other);
+                        Log12(("vmmR0DoHalt: CPU%d failed #4 - enmState=%d\n", pGVCpu->idCpu, VMCPU_GET_STATE(pGVCpu)));
+                    }
+                }
+                else
+                {
+                    STAM_REL_COUNTER_INC(&pGVCpu->vmm.s.StatR0HaltToR3SmallDelta);
+                    Log12(("vmmR0DoHalt: CPU%d failed #3 - delta too small: %RU64\n", pGVCpu->idCpu, u64Delta));
+                }
+            }
+            else
+            {
+                STAM_REL_COUNTER_INC(&pGVCpu->vmm.s.StatR0HaltToR3PendingFF);
+                Log12(("vmmR0DoHalt: CPU%d failed #2 - Pending FF\n", pGVCpu->idCpu));
+            }
+        }
+        else
+        {
+            STAM_REL_COUNTER_INC(&pGVCpu->vmm.s.StatR0HaltToR3PendingFF);
+            Log12(("vmmR0DoHalt: CPU%d failed #1 - Pending FF\n", pGVCpu->idCpu));
+        }
+    }
+    else
+    {
+        STAM_REL_COUNTER_INC(&pGVCpu->vmm.s.StatR0HaltToR3Other);
+        Log12(("vmmR0DoHalt: CPU%d failed #0 - fMayHaltInRing0=%d TRPMHasTrap=%d enmInt=%d uMWait=%u\n",
+               pGVCpu->idCpu, pGVCpu->vmm.s.fMayHaltInRing0, TRPMHasTrap(pGVCpu), enmInterruptibility, uMWait));
+    }
+
+    STAM_REL_COUNTER_INC(&pGVCpu->vmm.s.StatR0HaltToR3);
+    return VINF_EM_HALT;
+}
+
+
+/**
+ * VMM ring-0 thread-context callback.
+ *
+ * This does common HM state updating and calls the HM-specific thread-context
+ * callback.
+ *
+ * @param   enmEvent    The thread-context event.
+ * @param   pvUser      Opaque pointer to the VMCPU.
+ *
+ * @thread  EMT(pvUser)
+ */
+static DECLCALLBACK(void) vmmR0ThreadCtxCallback(RTTHREADCTXEVENT enmEvent, void *pvUser)
+{
+    PVMCPUCC pVCpu = (PVMCPUCC)pvUser;
+
+    switch (enmEvent)
+    {
+        case RTTHREADCTXEVENT_IN:
+        {
+            /*
+             * Linux may call us with preemption enabled (really!) but technically we
+             * cannot get preempted here, otherwise we end up in an infinite recursion
+             * scenario (i.e. preempted in resume hook -> preempt hook -> resume hook...
+             * ad infinitum). Let's just disable preemption for now...
+             */
+            /** @todo r=bird: I don't believe the above. The linux code is clearly enabling
+             *        preemption after doing the callout (one or two functions up the
+             *        call chain). */
+            /** @todo r=ramshankar: See @bugref{5313#c30}. */
+            RTTHREADPREEMPTSTATE ParanoidPreemptState = RTTHREADPREEMPTSTATE_INITIALIZER;
+            RTThreadPreemptDisable(&ParanoidPreemptState);
+
+            /* We need to update the VCPU <-> host CPU mapping. */
+            RTCPUID idHostCpu;
+            uint32_t iHostCpuSet = RTMpCurSetIndexAndId(&idHostCpu);
+            pVCpu->iHostCpuSet   = iHostCpuSet;
+            ASMAtomicWriteU32(&pVCpu->idHostCpu, idHostCpu);
+
+            /* In the very unlikely event that the GIP delta for the CPU we're
+               rescheduled needs calculating, try force a return to ring-3.
+               We unfortunately cannot do the measurements right here. */
+            if (RT_UNLIKELY(SUPIsTscDeltaAvailableForCpuSetIndex(iHostCpuSet)))
+                VMCPU_FF_SET(pVCpu, VMCPU_FF_TO_R3);
+
+            /* Invoke the HM-specific thread-context callback. */
+            HMR0ThreadCtxCallback(enmEvent, pvUser);
+
+            /* Restore preemption. */
+            RTThreadPreemptRestore(&ParanoidPreemptState);
+            break;
+        }
+
+        case RTTHREADCTXEVENT_OUT:
+        {
+            /* Invoke the HM-specific thread-context callback. */
+            HMR0ThreadCtxCallback(enmEvent, pvUser);
+
+            /*
+             * Sigh. See VMMGetCpu() used by VMCPU_ASSERT_EMT(). We cannot let several VCPUs
+             * have the same host CPU associated with it.
+             */
+            pVCpu->iHostCpuSet = UINT32_MAX;
+            ASMAtomicWriteU32(&pVCpu->idHostCpu, NIL_RTCPUID);
+            break;
+        }
+
+        default:
+            /* Invoke the HM-specific thread-context callback. */
+            HMR0ThreadCtxCallback(enmEvent, pvUser);
+            break;
+    }
+}
+
+
+/**
+ * Creates thread switching hook for the current EMT thread.
+ *
+ * This is called by GVMMR0CreateVM and GVMMR0RegisterVCpu.  If the host
+ * platform does not implement switcher hooks, no hooks will be create and the
+ * member set to NIL_RTTHREADCTXHOOK.
+ *
+ * @returns VBox status code.
+ * @param   pVCpu       The cross context virtual CPU structure.
+ * @thread  EMT(pVCpu)
+ */
+VMMR0_INT_DECL(int) VMMR0ThreadCtxHookCreateForEmt(PVMCPUCC pVCpu)
+{
+    VMCPU_ASSERT_EMT(pVCpu);
+    Assert(pVCpu->vmm.s.hCtxHook == NIL_RTTHREADCTXHOOK);
+
+#if 1 /* To disable this stuff change to zero. */
+    int rc = RTThreadCtxHookCreate(&pVCpu->vmm.s.hCtxHook, 0, vmmR0ThreadCtxCallback, pVCpu);
+    if (RT_SUCCESS(rc))
+        return rc;
+#else
+    RT_NOREF(vmmR0ThreadCtxCallback);
+    int rc = VERR_NOT_SUPPORTED;
+#endif
+
+    pVCpu->vmm.s.hCtxHook = NIL_RTTHREADCTXHOOK;
+    if (rc == VERR_NOT_SUPPORTED)
+        return VINF_SUCCESS;
+
+    LogRelMax(32, ("RTThreadCtxHookCreate failed! rc=%Rrc pVCpu=%p idCpu=%RU32\n", rc, pVCpu, pVCpu->idCpu));
+    return VINF_SUCCESS; /* Just ignore it, we can live without context hooks. */
+}
+
+
+/**
+ * Destroys the thread switching hook for the specified VCPU.
+ *
+ * @param   pVCpu       The cross context virtual CPU structure.
+ * @remarks Can be called from any thread.
+ */
+VMMR0_INT_DECL(void) VMMR0ThreadCtxHookDestroyForEmt(PVMCPUCC pVCpu)
+{
+    int rc = RTThreadCtxHookDestroy(pVCpu->vmm.s.hCtxHook);
+    AssertRC(rc);
+    pVCpu->vmm.s.hCtxHook = NIL_RTTHREADCTXHOOK;
+}
+
+
+/**
+ * Disables the thread switching hook for this VCPU (if we got one).
+ *
+ * @param   pVCpu       The cross context virtual CPU structure.
+ * @thread  EMT(pVCpu)
+ *
+ * @remarks This also clears VMCPU::idHostCpu, so the mapping is invalid after
+ *          this call.  This means you have to be careful with what you do!
+ */
+VMMR0_INT_DECL(void) VMMR0ThreadCtxHookDisable(PVMCPUCC pVCpu)
+{
+    /*
+     * Clear the VCPU <-> host CPU mapping as we've left HM context.
+     * @bugref{7726#c19} explains the need for this trick:
+     *
+     *      VMXR0CallRing3Callback/SVMR0CallRing3Callback &
+     *      hmR0VmxLeaveSession/hmR0SvmLeaveSession disables context hooks during
+     *      longjmp & normal return to ring-3, which opens a window where we may be
+     *      rescheduled without changing VMCPUID::idHostCpu and cause confusion if
+     *      the CPU starts executing a different EMT.  Both functions first disables
+     *      preemption and then calls HMR0LeaveCpu which invalids idHostCpu, leaving
+     *      an opening for getting preempted.
+     */
+    /** @todo Make HM not need this API!  Then we could leave the hooks enabled
+     *        all the time. */
+    /** @todo move this into the context hook disabling if(). */
+    ASMAtomicWriteU32(&pVCpu->idHostCpu, NIL_RTCPUID);
+
+    /*
+     * Disable the context hook, if we got one.
+     */
+    if (pVCpu->vmm.s.hCtxHook != NIL_RTTHREADCTXHOOK)
+    {
+        Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+        int rc = RTThreadCtxHookDisable(pVCpu->vmm.s.hCtxHook);
+        AssertRC(rc);
+    }
+}
+
+
+/**
+ * Internal version of VMMR0ThreadCtxHooksAreRegistered.
+ *
+ * @returns true if registered, false otherwise.
+ * @param   pVCpu       The cross context virtual CPU structure.
+ */
+DECLINLINE(bool) vmmR0ThreadCtxHookIsEnabled(PVMCPUCC pVCpu)
+{
+    return RTThreadCtxHookIsEnabled(pVCpu->vmm.s.hCtxHook);
+}
+
+
+/**
+ * Whether thread-context hooks are registered for this VCPU.
+ *
+ * @returns true if registered, false otherwise.
+ * @param   pVCpu       The cross context virtual CPU structure.
+ */
+VMMR0_INT_DECL(bool) VMMR0ThreadCtxHookIsEnabled(PVMCPUCC pVCpu)
+{
+    return vmmR0ThreadCtxHookIsEnabled(pVCpu);
+}
+
+
+#ifdef VBOX_WITH_STATISTICS
+/**
+ * Record return code statistics
+ * @param   pVM         The cross context VM structure.
+ * @param   pVCpu       The cross context virtual CPU structure.
+ * @param   rc          The status code.
+ */
+static void vmmR0RecordRC(PVMCC pVM, PVMCPUCC pVCpu, int rc)
+{
+    /*
+     * Collect statistics.
+     */
+    switch (rc)
+    {
+        case VINF_SUCCESS:
+            STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetNormal);
+            break;
+        case VINF_EM_RAW_INTERRUPT:
+            STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetInterrupt);
+            break;
+        case VINF_EM_RAW_INTERRUPT_HYPER:
+            STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetInterruptHyper);
+            break;
+        case VINF_EM_RAW_GUEST_TRAP:
+            STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetGuestTrap);
+            break;
+        case VINF_EM_RAW_RING_SWITCH:
+            STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetRingSwitch);
+            break;
+        case VINF_EM_RAW_RING_SWITCH_INT:
+            STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetRingSwitchInt);
+            break;
+        case VINF_EM_RAW_STALE_SELECTOR:
+            STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetStaleSelector);
+            break;
+        case VINF_EM_RAW_IRET_TRAP:
+            STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetIRETTrap);
+            break;
+        case VINF_IOM_R3_IOPORT_READ:
+            STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetIORead);
+            break;
+        case VINF_IOM_R3_IOPORT_WRITE:
+            STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetIOWrite);
+            break;
+        case VINF_IOM_R3_IOPORT_COMMIT_WRITE:
+            STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetIOCommitWrite);
+            break;
+        case VINF_IOM_R3_MMIO_READ:
+            STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetMMIORead);
+            break;
+        case VINF_IOM_R3_MMIO_WRITE:
+            STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetMMIOWrite);
+            break;
+        case VINF_IOM_R3_MMIO_COMMIT_WRITE:
+            STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetMMIOCommitWrite);
+            break;
+        case VINF_IOM_R3_MMIO_READ_WRITE:
+            STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetMMIOReadWrite);
+            break;
+        case VINF_PATM_HC_MMIO_PATCH_READ:
+            STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetMMIOPatchRead);
+            break;
+        case VINF_PATM_HC_MMIO_PATCH_WRITE:
+            STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetMMIOPatchWrite);
+            break;
+        case VINF_CPUM_R3_MSR_READ:
+            STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetMSRRead);
+            break;
+        case VINF_CPUM_R3_MSR_WRITE:
+            STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetMSRWrite);
+            break;
+        case VINF_EM_RAW_EMULATE_INSTR:
+            STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetEmulate);
+            break;
+        case VINF_PATCH_EMULATE_INSTR:
+            STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetPatchEmulate);
+            break;
+        case VINF_EM_RAW_EMULATE_INSTR_LDT_FAULT:
+            STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetLDTFault);
+            break;
+        case VINF_EM_RAW_EMULATE_INSTR_GDT_FAULT:
+            STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetGDTFault);
+            break;
+        case VINF_EM_RAW_EMULATE_INSTR_IDT_FAULT:
+            STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetIDTFault);
+            break;
+        case VINF_EM_RAW_EMULATE_INSTR_TSS_FAULT:
+            STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetTSSFault);
+            break;
+        case VINF_CSAM_PENDING_ACTION:
+            STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetCSAMTask);
+            break;
+        case VINF_PGM_SYNC_CR3:
+            STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetSyncCR3);
+            break;
+        case VINF_PATM_PATCH_INT3:
+            STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetPatchInt3);
+            break;
+        case VINF_PATM_PATCH_TRAP_PF:
+            STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetPatchPF);
+            break;
+        case VINF_PATM_PATCH_TRAP_GP:
+            STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetPatchGP);
+            break;
+        case VINF_PATM_PENDING_IRQ_AFTER_IRET:
+            STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetPatchIretIRQ);
+            break;
+        case VINF_EM_RESCHEDULE_REM:
+            STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetRescheduleREM);
+            break;
+        case VINF_EM_RAW_TO_R3:
+            STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetToR3Total);
+            if (VM_FF_IS_SET(pVM, VM_FF_TM_VIRTUAL_SYNC))
+                STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetToR3TMVirt);
+            else if (VM_FF_IS_SET(pVM, VM_FF_PGM_NEED_HANDY_PAGES))
+                STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetToR3HandyPages);
+            else if (VM_FF_IS_SET(pVM, VM_FF_PDM_QUEUES))
+                STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetToR3PDMQueues);
+            else if (VM_FF_IS_SET(pVM, VM_FF_EMT_RENDEZVOUS))
+                STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetToR3Rendezvous);
+            else if (VM_FF_IS_SET(pVM, VM_FF_PDM_DMA))
+                STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetToR3DMA);
+            else if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_TIMER))
+                STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetToR3Timer);
+            else if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PDM_CRITSECT))
+                STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetToR3CritSect);
+            else if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_TO_R3))
+                STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetToR3FF);
+            else if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_IEM))
+                STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetToR3Iem);
+            else if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_IOM))
+                STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetToR3Iom);
+            else
+                STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetToR3Unknown);
+            break;
+
+        case VINF_EM_RAW_TIMER_PENDING:
+            STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetTimerPending);
+            break;
+        case VINF_EM_RAW_INTERRUPT_PENDING:
+            STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetInterruptPending);
+            break;
+        case VINF_VMM_CALL_HOST:
+            switch (pVCpu->vmm.s.enmCallRing3Operation)
+            {
+                case VMMCALLRING3_PDM_CRIT_SECT_ENTER:
+                    STAM_COUNTER_INC(&pVM->vmm.s.StatRZCallPDMCritSectEnter);
+                    break;
+                case VMMCALLRING3_PDM_LOCK:
+                    STAM_COUNTER_INC(&pVM->vmm.s.StatRZCallPDMLock);
+                    break;
+                case VMMCALLRING3_PGM_POOL_GROW:
+                    STAM_COUNTER_INC(&pVM->vmm.s.StatRZCallPGMPoolGrow);
+                    break;
+                case VMMCALLRING3_PGM_LOCK:
+                    STAM_COUNTER_INC(&pVM->vmm.s.StatRZCallPGMLock);
+                    break;
+                case VMMCALLRING3_PGM_MAP_CHUNK:
+                    STAM_COUNTER_INC(&pVM->vmm.s.StatRZCallPGMMapChunk);
+                    break;
+                case VMMCALLRING3_PGM_ALLOCATE_HANDY_PAGES:
+                    STAM_COUNTER_INC(&pVM->vmm.s.StatRZCallPGMAllocHandy);
+                    break;
+                case VMMCALLRING3_VMM_LOGGER_FLUSH:
+                    STAM_COUNTER_INC(&pVM->vmm.s.StatRZCallLogFlush);
+                    break;
+                case VMMCALLRING3_VM_SET_ERROR:
+                    STAM_COUNTER_INC(&pVM->vmm.s.StatRZCallVMSetError);
+                    break;
+                case VMMCALLRING3_VM_SET_RUNTIME_ERROR:
+                    STAM_COUNTER_INC(&pVM->vmm.s.StatRZCallVMSetRuntimeError);
+                    break;
+                case VMMCALLRING3_VM_R0_ASSERTION:
+                default:
+                    STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetCallRing3);
+                    break;
+            }
+            break;
+        case VINF_PATM_DUPLICATE_FUNCTION:
+            STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetPATMDuplicateFn);
+            break;
+        case VINF_PGM_CHANGE_MODE:
+            STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetPGMChangeMode);
+            break;
+        case VINF_PGM_POOL_FLUSH_PENDING:
+            STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetPGMFlushPending);
+            break;
+        case VINF_EM_PENDING_REQUEST:
+            STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetPendingRequest);
+            break;
+        case VINF_EM_HM_PATCH_TPR_INSTR:
+            STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetPatchTPR);
+            break;
+        default:
+            STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetMisc);
+            break;
+    }
+}
+#endif /* VBOX_WITH_STATISTICS */
+
+
+/**
+ * The Ring 0 entry point, called by the fast-ioctl path.
+ *
+ * @param   pGVM            The global (ring-0) VM structure.
+ * @param   pVMIgnored      The cross context VM structure. The return code is
+ *                          stored in pVM->vmm.s.iLastGZRc.
+ * @param   idCpu           The Virtual CPU ID of the calling EMT.
+ * @param   enmOperation    Which operation to execute.
+ * @remarks Assume called with interrupts _enabled_.
+ */
+VMMR0DECL(void) VMMR0EntryFast(PGVM pGVM, PVMCC pVMIgnored, VMCPUID idCpu, VMMR0OPERATION enmOperation)
+{
+    RT_NOREF(pVMIgnored);
+
+    /*
+     * Validation.
+     */
+    if (   idCpu < pGVM->cCpus
+        && pGVM->cCpus == pGVM->cCpusUnsafe)
+    { /*likely*/ }
+    else
+    {
+        SUPR0Printf("VMMR0EntryFast: Bad idCpu=%#x cCpus=%#x cCpusUnsafe=%#x\n", idCpu, pGVM->cCpus, pGVM->cCpusUnsafe);
+        return;
+    }
+
+    PGVMCPU pGVCpu = &pGVM->aCpus[idCpu];
+    RTNATIVETHREAD const hNativeThread = RTThreadNativeSelf();
+    if (RT_LIKELY(   pGVCpu->hEMT            == hNativeThread
+                  && pGVCpu->hNativeThreadR0 == hNativeThread))
+    { /* likely */ }
+    else
+    {
+        SUPR0Printf("VMMR0EntryFast: Bad thread idCpu=%#x hNativeSelf=%p pGVCpu->hEmt=%p pGVCpu->hNativeThreadR0=%p\n",
+                    idCpu, hNativeThread, pGVCpu->hEMT, pGVCpu->hNativeThreadR0);
+        return;
+    }
+
+    /*
+     * SMAP fun.
+     */
+    VMM_CHECK_SMAP_SETUP();
+    VMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+
+    /*
+     * Perform requested operation.
+     */
+    switch (enmOperation)
+    {
+        /*
+         * Run guest code using the available hardware acceleration technology.
+         */
+        case VMMR0_DO_HM_RUN:
+        {
+            for (;;) /* hlt loop */
+            {
+                /*
+                 * Disable preemption.
+                 */
+                Assert(!vmmR0ThreadCtxHookIsEnabled(pGVCpu));
+                RTTHREADPREEMPTSTATE PreemptState = RTTHREADPREEMPTSTATE_INITIALIZER;
+                RTThreadPreemptDisable(&PreemptState);
+
+                /*
+                 * Get the host CPU identifiers, make sure they are valid and that
+                 * we've got a TSC delta for the CPU.
+                 */
+                RTCPUID  idHostCpu;
+                uint32_t iHostCpuSet = RTMpCurSetIndexAndId(&idHostCpu);
+                if (RT_LIKELY(   iHostCpuSet < RTCPUSET_MAX_CPUS
+                              && SUPIsTscDeltaAvailableForCpuSetIndex(iHostCpuSet)))
+                {
+                    pGVCpu->iHostCpuSet = iHostCpuSet;
+                    ASMAtomicWriteU32(&pGVCpu->idHostCpu, idHostCpu);
+
+                    /*
+                     * Update the periodic preemption timer if it's active.
+                     */
+                    if (pGVM->vmm.s.fUsePeriodicPreemptionTimers)
+                        GVMMR0SchedUpdatePeriodicPreemptionTimer(pGVM, pGVCpu->idHostCpu, TMCalcHostTimerFrequency(pGVM, pGVCpu));
+                    VMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+
+#ifdef VMM_R0_TOUCH_FPU
+                    /*
+                     * Make sure we've got the FPU state loaded so and we don't need to clear
+                     * CR0.TS and get out of sync with the host kernel when loading the guest
+                     * FPU state.  @ref sec_cpum_fpu (CPUM.cpp) and @bugref{4053}.
+                     */
+                    CPUMR0TouchHostFpu();
+#endif
+                    int  rc;
+                    bool fPreemptRestored = false;
+                    if (!HMR0SuspendPending())
+                    {
+                        /*
+                         * Enable the context switching hook.
+                         */
+                        if (pGVCpu->vmm.s.hCtxHook != NIL_RTTHREADCTXHOOK)
+                        {
+                            Assert(!RTThreadCtxHookIsEnabled(pGVCpu->vmm.s.hCtxHook));
+                            int rc2 = RTThreadCtxHookEnable(pGVCpu->vmm.s.hCtxHook); AssertRC(rc2);
+                        }
+
+                        /*
+                         * Enter HM context.
+                         */
+                        rc = HMR0Enter(pGVCpu);
+                        if (RT_SUCCESS(rc))
+                        {
+                            VMCPU_SET_STATE(pGVCpu, VMCPUSTATE_STARTED_HM);
+
+                            /*
+                             * When preemption hooks are in place, enable preemption now that
+                             * we're in HM context.
+                             */
+                            if (vmmR0ThreadCtxHookIsEnabled(pGVCpu))
+                            {
+                                fPreemptRestored = true;
+                                RTThreadPreemptRestore(&PreemptState);
+                            }
+
+                            /*
+                             * Setup the longjmp machinery and execute guest code (calls HMR0RunGuestCode).
+                             */
+                            VMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+                            rc = vmmR0CallRing3SetJmp(&pGVCpu->vmm.s.CallRing3JmpBufR0, HMR0RunGuestCode, pGVM, pGVCpu);
+                            VMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+
+                            /*
+                             * Assert sanity on the way out.  Using manual assertions code here as normal
+                             * assertions are going to panic the host since we're outside the setjmp/longjmp zone.
+                             */
+                            if (RT_UNLIKELY(   VMCPU_GET_STATE(pGVCpu) != VMCPUSTATE_STARTED_HM
+                                            && RT_SUCCESS_NP(rc)  && rc !=  VINF_VMM_CALL_HOST ))
+                            {
+                                pGVM->vmm.s.szRing0AssertMsg1[0] = '\0';
+                                RTStrPrintf(pGVM->vmm.s.szRing0AssertMsg2, sizeof(pGVM->vmm.s.szRing0AssertMsg2),
+                                            "Got VMCPU state %d expected %d.\n", VMCPU_GET_STATE(pGVCpu), VMCPUSTATE_STARTED_HM);
+                                rc = VERR_VMM_WRONG_HM_VMCPU_STATE;
+                            }
+                            /** @todo Get rid of this. HM shouldn't disable the context hook. */
+                            else if (RT_UNLIKELY(vmmR0ThreadCtxHookIsEnabled(pGVCpu)))
+                            {
+                                pGVM->vmm.s.szRing0AssertMsg1[0] = '\0';
+                                RTStrPrintf(pGVM->vmm.s.szRing0AssertMsg2, sizeof(pGVM->vmm.s.szRing0AssertMsg2),
+                                            "Thread-context hooks still enabled! VCPU=%p Id=%u rc=%d.\n", pGVCpu, pGVCpu->idCpu, rc);
+                                rc = VERR_INVALID_STATE;
+                            }
+
+                            VMCPU_SET_STATE(pGVCpu, VMCPUSTATE_STARTED);
+                        }
+                        STAM_COUNTER_INC(&pGVM->vmm.s.StatRunGC);
+
+                        /*
+                         * Invalidate the host CPU identifiers before we disable the context
+                         * hook / restore preemption.
+                         */
+                        pGVCpu->iHostCpuSet = UINT32_MAX;
+                        ASMAtomicWriteU32(&pGVCpu->idHostCpu, NIL_RTCPUID);
+
+                        /*
+                         * Disable context hooks.  Due to unresolved cleanup issues, we
+                         * cannot leave the hooks enabled when we return to ring-3.
+                         *
+                         * Note! At the moment HM may also have disabled the hook
+                         *       when we get here, but the IPRT API handles that.
+                         */
+                        if (pGVCpu->vmm.s.hCtxHook != NIL_RTTHREADCTXHOOK)
+                        {
+                            ASMAtomicWriteU32(&pGVCpu->idHostCpu, NIL_RTCPUID);
+                            RTThreadCtxHookDisable(pGVCpu->vmm.s.hCtxHook);
+                        }
+                    }
+                    /*
+                     * The system is about to go into suspend mode; go back to ring 3.
+                     */
+                    else
+                    {
+                        rc = VINF_EM_RAW_INTERRUPT;
+                        pGVCpu->iHostCpuSet = UINT32_MAX;
+                        ASMAtomicWriteU32(&pGVCpu->idHostCpu, NIL_RTCPUID);
+                    }
+
+                    /** @todo When HM stops messing with the context hook state, we'll disable
+                     *        preemption again before the RTThreadCtxHookDisable call. */
+                    if (!fPreemptRestored)
+                        RTThreadPreemptRestore(&PreemptState);
+
+                    pGVCpu->vmm.s.iLastGZRc = rc;
+
+                    /* Fire dtrace probe and collect statistics. */
+                    VBOXVMM_R0_VMM_RETURN_TO_RING3_HM(pGVCpu, CPUMQueryGuestCtxPtr(pGVCpu), rc);
+#ifdef VBOX_WITH_STATISTICS
+                    vmmR0RecordRC(pGVM, pGVCpu, rc);
+#endif
+                    /*
+                     * If this is a halt.
+                     */
+                    if (rc != VINF_EM_HALT)
+                    { /* we're not in a hurry for a HLT, so prefer this path */ }
+                    else
+                    {
+                        pGVCpu->vmm.s.iLastGZRc = rc = vmmR0DoHalt(pGVM, pGVCpu);
+                        if (rc == VINF_SUCCESS)
+                        {
+                            pGVCpu->vmm.s.cR0HaltsSucceeded++;
+                            continue;
+                        }
+                        pGVCpu->vmm.s.cR0HaltsToRing3++;
+                    }
+                }
+                /*
+                 * Invalid CPU set index or TSC delta in need of measuring.
+                 */
+                else
+                {
+                    pGVCpu->iHostCpuSet = UINT32_MAX;
+                    ASMAtomicWriteU32(&pGVCpu->idHostCpu, NIL_RTCPUID);
+                    RTThreadPreemptRestore(&PreemptState);
+                    if (iHostCpuSet < RTCPUSET_MAX_CPUS)
+                    {
+                        int rc = SUPR0TscDeltaMeasureBySetIndex(pGVM->pSession, iHostCpuSet, 0 /*fFlags*/,
+                                                                2 /*cMsWaitRetry*/, 5*RT_MS_1SEC /*cMsWaitThread*/,
+                                                                0 /*default cTries*/);
+                        if (RT_SUCCESS(rc) || rc == VERR_CPU_OFFLINE)
+                            pGVCpu->vmm.s.iLastGZRc = VINF_EM_RAW_TO_R3;
+                        else
+                            pGVCpu->vmm.s.iLastGZRc = rc;
+                    }
+                    else
+                        pGVCpu->vmm.s.iLastGZRc = VERR_INVALID_CPU_INDEX;
+                }
+                break;
+
+            } /* halt loop. */
+            break;
+        }
+
+#ifdef VBOX_WITH_NEM_R0
+# if defined(RT_ARCH_AMD64) && defined(RT_OS_WINDOWS)
+        case VMMR0_DO_NEM_RUN:
+        {
+            /*
+             * Setup the longjmp machinery and execute guest code (calls NEMR0RunGuestCode).
+             */
+            VMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+#  ifdef VBOXSTRICTRC_STRICT_ENABLED
+            int rc = vmmR0CallRing3SetJmp2(&pGVCpu->vmm.s.CallRing3JmpBufR0, (PFNVMMR0SETJMP2)NEMR0RunGuestCode, pGVM, idCpu);
+#  else
+            int rc = vmmR0CallRing3SetJmp2(&pGVCpu->vmm.s.CallRing3JmpBufR0, NEMR0RunGuestCode, pGVM, idCpu);
+#  endif
+            VMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+            STAM_COUNTER_INC(&pGVM->vmm.s.StatRunGC);
+
+            pGVCpu->vmm.s.iLastGZRc = rc;
+
+            /*
+             * Fire dtrace probe and collect statistics.
+             */
+            VBOXVMM_R0_VMM_RETURN_TO_RING3_NEM(pGVCpu, CPUMQueryGuestCtxPtr(pGVCpu), rc);
+#  ifdef VBOX_WITH_STATISTICS
+            vmmR0RecordRC(pGVM, pGVCpu, rc);
+#  endif
+            break;
+        }
+# endif
+#endif
+
+        /*
+         * For profiling.
+         */
+        case VMMR0_DO_NOP:
+            pGVCpu->vmm.s.iLastGZRc = VINF_SUCCESS;
+            break;
+
+        /*
+         * Shouldn't happen.
+         */
+        default:
+            AssertMsgFailed(("%#x\n", enmOperation));
+            pGVCpu->vmm.s.iLastGZRc = VERR_NOT_SUPPORTED;
+            break;
+    }
+    VMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+}
+
+
+/**
+ * Validates a session or VM session argument.
+ *
+ * @returns true / false accordingly.
+ * @param   pGVM            The global (ring-0) VM structure.
+ * @param   pClaimedSession The session claim to validate.
+ * @param   pSession        The session argument.
+ */
+DECLINLINE(bool) vmmR0IsValidSession(PGVM pGVM, PSUPDRVSESSION pClaimedSession, PSUPDRVSESSION pSession)
+{
+    /* This must be set! */
+    if (!pSession)
+        return false;
+
+    /* Only one out of the two. */
+    if (pGVM && pClaimedSession)
+        return false;
+    if (pGVM)
+        pClaimedSession = pGVM->pSession;
+    return pClaimedSession == pSession;
+}
+
+
+/**
+ * VMMR0EntryEx worker function, either called directly or when ever possible
+ * called thru a longjmp so we can exit safely on failure.
+ *
+ * @returns VBox status code.
+ * @param   pGVM            The global (ring-0) VM structure.
+ * @param   idCpu           Virtual CPU ID argument. Must be NIL_VMCPUID if pVM
+ *                          is NIL_RTR0PTR, and may be NIL_VMCPUID if it isn't
+ * @param   enmOperation    Which operation to execute.
+ * @param   pReqHdr         This points to a SUPVMMR0REQHDR packet. Optional.
+ *                          The support driver validates this if it's present.
+ * @param   u64Arg          Some simple constant argument.
+ * @param   pSession        The session of the caller.
+ *
+ * @remarks Assume called with interrupts _enabled_.
+ */
+static int vmmR0EntryExWorker(PGVM pGVM, VMCPUID idCpu, VMMR0OPERATION enmOperation,
+                              PSUPVMMR0REQHDR pReqHdr, uint64_t u64Arg, PSUPDRVSESSION pSession)
+{
+    /*
+     * Validate pGVM and idCpu for consistency and validity.
+     */
+    if (pGVM != NULL)
+    {
+        if (RT_LIKELY(((uintptr_t)pGVM & PAGE_OFFSET_MASK) == 0))
+        { /* likely */ }
+        else
+        {
+            SUPR0Printf("vmmR0EntryExWorker: Invalid pGVM=%p! (op=%d)\n", pGVM, enmOperation);
+            return VERR_INVALID_POINTER;
+        }
+
+        if (RT_LIKELY(idCpu == NIL_VMCPUID || idCpu < pGVM->cCpus))
+        { /* likely */ }
+        else
+        {
+            SUPR0Printf("vmmR0EntryExWorker: Invalid idCpu %#x (cCpus=%#x)\n", idCpu, pGVM->cCpus);
+            return VERR_INVALID_PARAMETER;
+        }
+
+        if (RT_LIKELY(   pGVM->enmVMState >= VMSTATE_CREATING
+                      && pGVM->enmVMState <= VMSTATE_TERMINATED
+                      && pGVM->pSession   == pSession
+                      && pGVM->pSelf      == pGVM))
+        { /* likely */ }
+        else
+        {
+            SUPR0Printf("vmmR0EntryExWorker: Invalid pGVM=%p:{.enmVMState=%d, .cCpus=%#x, .pSession=%p(==%p), .pSelf=%p(==%p)}! (op=%d)\n",
+                        pGVM, pGVM->enmVMState, pGVM->cCpus, pGVM->pSession, pSession, pGVM->pSelf, pGVM, enmOperation);
+            return VERR_INVALID_POINTER;
+        }
+    }
+    else if (RT_LIKELY(idCpu == NIL_VMCPUID))
+    { /* likely */ }
+    else
+    {
+        SUPR0Printf("vmmR0EntryExWorker: Invalid idCpu=%u\n", idCpu);
+        return VERR_INVALID_PARAMETER;
+    }
+
+    /*
+     * SMAP fun.
+     */
+    VMM_CHECK_SMAP_SETUP();
+    VMM_CHECK_SMAP_CHECK(RT_NOTHING);
+
+    /*
+     * Process the request.
+     */
+    int rc;
+    switch (enmOperation)
+    {
+        /*
+         * GVM requests
+         */
+        case VMMR0_DO_GVMM_CREATE_VM:
+            if (pGVM == NULL && u64Arg == 0 && idCpu == NIL_VMCPUID)
+                rc = GVMMR0CreateVMReq((PGVMMCREATEVMREQ)pReqHdr, pSession);
+            else
+                rc = VERR_INVALID_PARAMETER;
+            VMM_CHECK_SMAP_CHECK(RT_NOTHING);
+            break;
+
+        case VMMR0_DO_GVMM_DESTROY_VM:
+            if (pReqHdr == NULL && u64Arg == 0)
+                rc = GVMMR0DestroyVM(pGVM);
+            else
+                rc = VERR_INVALID_PARAMETER;
+            VMM_CHECK_SMAP_CHECK(RT_NOTHING);
+            break;
+
+        case VMMR0_DO_GVMM_REGISTER_VMCPU:
+            if (pGVM != NULL)
+                rc = GVMMR0RegisterVCpu(pGVM, idCpu);
+            else
+                rc = VERR_INVALID_PARAMETER;
+            VMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+            break;
+
+        case VMMR0_DO_GVMM_DEREGISTER_VMCPU:
+            if (pGVM != NULL)
+                rc = GVMMR0DeregisterVCpu(pGVM, idCpu);
+            else
+                rc = VERR_INVALID_PARAMETER;
+            VMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+            break;
+
+        case VMMR0_DO_GVMM_SCHED_HALT:
+            if (pReqHdr)
+                return VERR_INVALID_PARAMETER;
+            VMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+            rc = GVMMR0SchedHaltReq(pGVM, idCpu, u64Arg);
+            VMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+            break;
+
+        case VMMR0_DO_GVMM_SCHED_WAKE_UP:
+            if (pReqHdr || u64Arg)
+                return VERR_INVALID_PARAMETER;
+            VMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+            rc = GVMMR0SchedWakeUp(pGVM, idCpu);
+            VMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+            break;
+
+        case VMMR0_DO_GVMM_SCHED_POKE:
+            if (pReqHdr || u64Arg)
+                return VERR_INVALID_PARAMETER;
+            rc = GVMMR0SchedPoke(pGVM, idCpu);
+            VMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+            break;
+
+        case VMMR0_DO_GVMM_SCHED_WAKE_UP_AND_POKE_CPUS:
+            if (u64Arg)
+                return VERR_INVALID_PARAMETER;
+            rc = GVMMR0SchedWakeUpAndPokeCpusReq(pGVM, (PGVMMSCHEDWAKEUPANDPOKECPUSREQ)pReqHdr);
+            VMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+            break;
+
+        case VMMR0_DO_GVMM_SCHED_POLL:
+            if (pReqHdr || u64Arg > 1)
+                return VERR_INVALID_PARAMETER;
+            rc = GVMMR0SchedPoll(pGVM, idCpu, !!u64Arg);
+            VMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+            break;
+
+        case VMMR0_DO_GVMM_QUERY_STATISTICS:
+            if (u64Arg)
+                return VERR_INVALID_PARAMETER;
+            rc = GVMMR0QueryStatisticsReq(pGVM, (PGVMMQUERYSTATISTICSSREQ)pReqHdr, pSession);
+            VMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+            break;
+
+        case VMMR0_DO_GVMM_RESET_STATISTICS:
+            if (u64Arg)
+                return VERR_INVALID_PARAMETER;
+            rc = GVMMR0ResetStatisticsReq(pGVM, (PGVMMRESETSTATISTICSSREQ)pReqHdr, pSession);
+            VMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+            break;
+
+        /*
+         * Initialize the R0 part of a VM instance.
+         */
+        case VMMR0_DO_VMMR0_INIT:
+            rc = vmmR0InitVM(pGVM, RT_LODWORD(u64Arg), RT_HIDWORD(u64Arg));
+            VMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+            break;
+
+        /*
+         * Does EMT specific ring-0 init.
+         */
+        case VMMR0_DO_VMMR0_INIT_EMT:
+            rc = vmmR0InitVMEmt(pGVM, idCpu);
+            VMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+            break;
+
+        /*
+         * Terminate the R0 part of a VM instance.
+         */
+        case VMMR0_DO_VMMR0_TERM:
+            rc = VMMR0TermVM(pGVM, 0 /*idCpu*/);
+            VMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+            break;
+
+        /*
+         * Attempt to enable hm mode and check the current setting.
+         */
+        case VMMR0_DO_HM_ENABLE:
+            rc = HMR0EnableAllCpus(pGVM);
+            VMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+            break;
+
+        /*
+         * Setup the hardware accelerated session.
+         */
+        case VMMR0_DO_HM_SETUP_VM:
+            rc = HMR0SetupVM(pGVM);
+            VMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+            break;
+
+        /*
+         * PGM wrappers.
+         */
+        case VMMR0_DO_PGM_ALLOCATE_HANDY_PAGES:
+            if (idCpu == NIL_VMCPUID)
+                return VERR_INVALID_CPU_ID;
+            rc = PGMR0PhysAllocateHandyPages(pGVM, idCpu);
+            VMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+            break;
+
+        case VMMR0_DO_PGM_FLUSH_HANDY_PAGES:
+            if (idCpu == NIL_VMCPUID)
+                return VERR_INVALID_CPU_ID;
+            rc = PGMR0PhysFlushHandyPages(pGVM, idCpu);
+            VMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+            break;
+
+        case VMMR0_DO_PGM_ALLOCATE_LARGE_HANDY_PAGE:
+            if (idCpu == NIL_VMCPUID)
+                return VERR_INVALID_CPU_ID;
+            rc = PGMR0PhysAllocateLargeHandyPage(pGVM, idCpu);
+            VMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+            break;
+
+        case VMMR0_DO_PGM_PHYS_SETUP_IOMMU:
+            if (idCpu != 0)
+                return VERR_INVALID_CPU_ID;
+            rc = PGMR0PhysSetupIoMmu(pGVM);
+            VMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+            break;
+
+        case VMMR0_DO_PGM_POOL_GROW:
+            if (idCpu == NIL_VMCPUID)
+                return VERR_INVALID_CPU_ID;
+            rc = PGMR0PoolGrow(pGVM);
+            VMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+            break;
+
+        /*
+         * GMM wrappers.
+         */
+        case VMMR0_DO_GMM_INITIAL_RESERVATION:
+            if (u64Arg)
+                return VERR_INVALID_PARAMETER;
+            rc = GMMR0InitialReservationReq(pGVM, idCpu, (PGMMINITIALRESERVATIONREQ)pReqHdr);
+            VMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+            break;
+
+        case VMMR0_DO_GMM_UPDATE_RESERVATION:
+            if (u64Arg)
+                return VERR_INVALID_PARAMETER;
+            rc = GMMR0UpdateReservationReq(pGVM, idCpu, (PGMMUPDATERESERVATIONREQ)pReqHdr);
+            VMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+            break;
+
+        case VMMR0_DO_GMM_ALLOCATE_PAGES:
+            if (u64Arg)
+                return VERR_INVALID_PARAMETER;
+            rc = GMMR0AllocatePagesReq(pGVM, idCpu, (PGMMALLOCATEPAGESREQ)pReqHdr);
+            VMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+            break;
+
+        case VMMR0_DO_GMM_FREE_PAGES:
+            if (u64Arg)
+                return VERR_INVALID_PARAMETER;
+            rc = GMMR0FreePagesReq(pGVM, idCpu, (PGMMFREEPAGESREQ)pReqHdr);
+            VMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+            break;
+
+        case VMMR0_DO_GMM_FREE_LARGE_PAGE:
+            if (u64Arg)
+                return VERR_INVALID_PARAMETER;
+            rc = GMMR0FreeLargePageReq(pGVM, idCpu, (PGMMFREELARGEPAGEREQ)pReqHdr);
+            VMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+            break;
+
+        case VMMR0_DO_GMM_QUERY_HYPERVISOR_MEM_STATS:
+            if (u64Arg)
+                return VERR_INVALID_PARAMETER;
+            rc = GMMR0QueryHypervisorMemoryStatsReq((PGMMMEMSTATSREQ)pReqHdr);
+            VMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+            break;
+
+        case VMMR0_DO_GMM_QUERY_MEM_STATS:
+            if (idCpu == NIL_VMCPUID)
+                return VERR_INVALID_CPU_ID;
+            if (u64Arg)
+                return VERR_INVALID_PARAMETER;
+            rc = GMMR0QueryMemoryStatsReq(pGVM, idCpu, (PGMMMEMSTATSREQ)pReqHdr);
+            VMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+            break;
+
+        case VMMR0_DO_GMM_BALLOONED_PAGES:
+            if (u64Arg)
+                return VERR_INVALID_PARAMETER;
+            rc = GMMR0BalloonedPagesReq(pGVM, idCpu, (PGMMBALLOONEDPAGESREQ)pReqHdr);
+            VMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+            break;
+
+        case VMMR0_DO_GMM_MAP_UNMAP_CHUNK:
+            if (u64Arg)
+                return VERR_INVALID_PARAMETER;
+            rc = GMMR0MapUnmapChunkReq(pGVM, (PGMMMAPUNMAPCHUNKREQ)pReqHdr);
+            VMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+            break;
+
+        case VMMR0_DO_GMM_SEED_CHUNK:
+            if (pReqHdr)
+                return VERR_INVALID_PARAMETER;
+            rc = GMMR0SeedChunk(pGVM, idCpu, (RTR3PTR)u64Arg);
+            VMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+            break;
+
+        case VMMR0_DO_GMM_REGISTER_SHARED_MODULE:
+            if (idCpu == NIL_VMCPUID)
+                return VERR_INVALID_CPU_ID;
+            if (u64Arg)
+                return VERR_INVALID_PARAMETER;
+            rc = GMMR0RegisterSharedModuleReq(pGVM, idCpu, (PGMMREGISTERSHAREDMODULEREQ)pReqHdr);
+            VMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+            break;
+
+        case VMMR0_DO_GMM_UNREGISTER_SHARED_MODULE:
+            if (idCpu == NIL_VMCPUID)
+                return VERR_INVALID_CPU_ID;
+            if (u64Arg)
+                return VERR_INVALID_PARAMETER;
+            rc = GMMR0UnregisterSharedModuleReq(pGVM, idCpu, (PGMMUNREGISTERSHAREDMODULEREQ)pReqHdr);
+            VMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+            break;
+
+        case VMMR0_DO_GMM_RESET_SHARED_MODULES:
+            if (idCpu == NIL_VMCPUID)
+                return VERR_INVALID_CPU_ID;
+            if (    u64Arg
+                ||  pReqHdr)
+                return VERR_INVALID_PARAMETER;
+            rc = GMMR0ResetSharedModules(pGVM, idCpu);
+            VMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+            break;
+
+#ifdef VBOX_WITH_PAGE_SHARING
+        case VMMR0_DO_GMM_CHECK_SHARED_MODULES:
+        {
+            if (idCpu == NIL_VMCPUID)
+                return VERR_INVALID_CPU_ID;
+            if (    u64Arg
+                ||  pReqHdr)
+                return VERR_INVALID_PARAMETER;
+            rc = GMMR0CheckSharedModules(pGVM, idCpu);
+            VMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+            break;
+        }
+#endif
+
+#if defined(VBOX_STRICT) && HC_ARCH_BITS == 64
+        case VMMR0_DO_GMM_FIND_DUPLICATE_PAGE:
+            if (u64Arg)
+                return VERR_INVALID_PARAMETER;
+            rc = GMMR0FindDuplicatePageReq(pGVM, (PGMMFINDDUPLICATEPAGEREQ)pReqHdr);
+            VMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+            break;
+#endif
+
+        case VMMR0_DO_GMM_QUERY_STATISTICS:
+            if (u64Arg)
+                return VERR_INVALID_PARAMETER;
+            rc = GMMR0QueryStatisticsReq(pGVM, (PGMMQUERYSTATISTICSSREQ)pReqHdr);
+            VMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+            break;
+
+        case VMMR0_DO_GMM_RESET_STATISTICS:
+            if (u64Arg)
+                return VERR_INVALID_PARAMETER;
+            rc = GMMR0ResetStatisticsReq(pGVM, (PGMMRESETSTATISTICSSREQ)pReqHdr);
+            VMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+            break;
+
+        /*
+         * A quick GCFGM mock-up.
+         */
+        /** @todo GCFGM with proper access control, ring-3 management interface and all that. */
+        case VMMR0_DO_GCFGM_SET_VALUE:
+        case VMMR0_DO_GCFGM_QUERY_VALUE:
+        {
+            if (pGVM || !pReqHdr || u64Arg || idCpu != NIL_VMCPUID)
+                return VERR_INVALID_PARAMETER;
+            PGCFGMVALUEREQ pReq = (PGCFGMVALUEREQ)pReqHdr;
+            if (pReq->Hdr.cbReq != sizeof(*pReq))
+                return VERR_INVALID_PARAMETER;
+            if (enmOperation == VMMR0_DO_GCFGM_SET_VALUE)
+            {
+                rc = GVMMR0SetConfig(pReq->pSession, &pReq->szName[0], pReq->u64Value);
+                //if (rc == VERR_CFGM_VALUE_NOT_FOUND)
+                //    rc = GMMR0SetConfig(pReq->pSession, &pReq->szName[0], pReq->u64Value);
+            }
+            else
+            {
+                rc = GVMMR0QueryConfig(pReq->pSession, &pReq->szName[0], &pReq->u64Value);
+                //if (rc == VERR_CFGM_VALUE_NOT_FOUND)
+                //    rc = GMMR0QueryConfig(pReq->pSession, &pReq->szName[0], &pReq->u64Value);
+            }
+            VMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+            break;
+        }
+
+        /*
+         * PDM Wrappers.
+         */
+        case VMMR0_DO_PDM_DRIVER_CALL_REQ_HANDLER:
+        {
+            if (!pReqHdr || u64Arg || idCpu != NIL_VMCPUID)
+                return VERR_INVALID_PARAMETER;
+            rc = PDMR0DriverCallReqHandler(pGVM, (PPDMDRIVERCALLREQHANDLERREQ)pReqHdr);
+            VMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+            break;
+        }
+
+        case VMMR0_DO_PDM_DEVICE_CREATE:
+        {
+            if (!pReqHdr || u64Arg || idCpu != 0)
+                return VERR_INVALID_PARAMETER;
+            rc = PDMR0DeviceCreateReqHandler(pGVM, (PPDMDEVICECREATEREQ)pReqHdr);
+            VMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+            break;
+        }
+
+        case VMMR0_DO_PDM_DEVICE_GEN_CALL:
+        {
+            if (!pReqHdr || u64Arg)
+                return VERR_INVALID_PARAMETER;
+            rc = PDMR0DeviceGenCallReqHandler(pGVM, (PPDMDEVICEGENCALLREQ)pReqHdr, idCpu);
+            VMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+            break;
+        }
+
+        /** @todo Remove the once all devices has been converted to new style! @bugref{9218} */
+        case VMMR0_DO_PDM_DEVICE_COMPAT_SET_CRITSECT:
+        {
+            if (!pReqHdr || u64Arg || idCpu != 0)
+                return VERR_INVALID_PARAMETER;
+            rc = PDMR0DeviceCompatSetCritSectReqHandler(pGVM, (PPDMDEVICECOMPATSETCRITSECTREQ)pReqHdr);
+            VMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+            break;
+        }
+
+        /*
+         * Requests to the internal networking service.
+         */
+        case VMMR0_DO_INTNET_OPEN:
+        {
+            PINTNETOPENREQ pReq = (PINTNETOPENREQ)pReqHdr;
+            if (u64Arg || !pReq || !vmmR0IsValidSession(pGVM, pReq->pSession, pSession) || idCpu != NIL_VMCPUID)
+                return VERR_INVALID_PARAMETER;
+            rc = IntNetR0OpenReq(pSession, pReq);
+            VMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+            break;
+        }
+
+        case VMMR0_DO_INTNET_IF_CLOSE:
+            if (u64Arg || !pReqHdr || !vmmR0IsValidSession(pGVM, ((PINTNETIFCLOSEREQ)pReqHdr)->pSession, pSession) || idCpu != NIL_VMCPUID)
+                return VERR_INVALID_PARAMETER;
+            rc = IntNetR0IfCloseReq(pSession, (PINTNETIFCLOSEREQ)pReqHdr);
+            VMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+            break;
+
+
+        case VMMR0_DO_INTNET_IF_GET_BUFFER_PTRS:
+            if (u64Arg || !pReqHdr || !vmmR0IsValidSession(pGVM, ((PINTNETIFGETBUFFERPTRSREQ)pReqHdr)->pSession, pSession) || idCpu != NIL_VMCPUID)
+                return VERR_INVALID_PARAMETER;
+            rc = IntNetR0IfGetBufferPtrsReq(pSession, (PINTNETIFGETBUFFERPTRSREQ)pReqHdr);
+            VMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+            break;
+
+        case VMMR0_DO_INTNET_IF_SET_PROMISCUOUS_MODE:
+            if (u64Arg || !pReqHdr || !vmmR0IsValidSession(pGVM, ((PINTNETIFSETPROMISCUOUSMODEREQ)pReqHdr)->pSession, pSession) || idCpu != NIL_VMCPUID)
+                return VERR_INVALID_PARAMETER;
+            rc = IntNetR0IfSetPromiscuousModeReq(pSession, (PINTNETIFSETPROMISCUOUSMODEREQ)pReqHdr);
+            VMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+            break;
+
+        case VMMR0_DO_INTNET_IF_SET_MAC_ADDRESS:
+            if (u64Arg || !pReqHdr || !vmmR0IsValidSession(pGVM, ((PINTNETIFSETMACADDRESSREQ)pReqHdr)->pSession, pSession) || idCpu != NIL_VMCPUID)
+                return VERR_INVALID_PARAMETER;
+            rc = IntNetR0IfSetMacAddressReq(pSession, (PINTNETIFSETMACADDRESSREQ)pReqHdr);
+            VMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+            break;
+
+        case VMMR0_DO_INTNET_IF_SET_ACTIVE:
+            if (u64Arg || !pReqHdr || !vmmR0IsValidSession(pGVM, ((PINTNETIFSETACTIVEREQ)pReqHdr)->pSession, pSession) || idCpu != NIL_VMCPUID)
+                return VERR_INVALID_PARAMETER;
+            rc = IntNetR0IfSetActiveReq(pSession, (PINTNETIFSETACTIVEREQ)pReqHdr);
+            VMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+            break;
+
+        case VMMR0_DO_INTNET_IF_SEND:
+            if (u64Arg || !pReqHdr || !vmmR0IsValidSession(pGVM, ((PINTNETIFSENDREQ)pReqHdr)->pSession, pSession) || idCpu != NIL_VMCPUID)
+                return VERR_INVALID_PARAMETER;
+            rc = IntNetR0IfSendReq(pSession, (PINTNETIFSENDREQ)pReqHdr);
+            VMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+            break;
+
+        case VMMR0_DO_INTNET_IF_WAIT:
+            if (u64Arg || !pReqHdr || !vmmR0IsValidSession(pGVM, ((PINTNETIFWAITREQ)pReqHdr)->pSession, pSession) || idCpu != NIL_VMCPUID)
+                return VERR_INVALID_PARAMETER;
+            rc = IntNetR0IfWaitReq(pSession, (PINTNETIFWAITREQ)pReqHdr);
+            VMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+            break;
+
+        case VMMR0_DO_INTNET_IF_ABORT_WAIT:
+            if (u64Arg || !pReqHdr || !vmmR0IsValidSession(pGVM, ((PINTNETIFWAITREQ)pReqHdr)->pSession, pSession) || idCpu != NIL_VMCPUID)
+                return VERR_INVALID_PARAMETER;
+            rc = IntNetR0IfAbortWaitReq(pSession, (PINTNETIFABORTWAITREQ)pReqHdr);
+            VMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+            break;
+
+#if 0 //def VBOX_WITH_PCI_PASSTHROUGH
+        /*
+         * Requests to host PCI driver service.
+         */
+        case VMMR0_DO_PCIRAW_REQ:
+            if (u64Arg || !pReqHdr || !vmmR0IsValidSession(pGVM, ((PPCIRAWSENDREQ)pReqHdr)->pSession, pSession) || idCpu != NIL_VMCPUID)
+                return VERR_INVALID_PARAMETER;
+            rc = PciRawR0ProcessReq(pGVM, pSession, (PPCIRAWSENDREQ)pReqHdr);
+            VMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+            break;
+#endif
+
+        /*
+         * NEM requests.
+         */
+#ifdef VBOX_WITH_NEM_R0
+# if defined(RT_ARCH_AMD64) && defined(RT_OS_WINDOWS)
+        case VMMR0_DO_NEM_INIT_VM:
+            if (u64Arg || pReqHdr || idCpu != 0)
+                return VERR_INVALID_PARAMETER;
+            rc = NEMR0InitVM(pGVM);
+            VMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+            break;
+
+        case VMMR0_DO_NEM_INIT_VM_PART_2:
+            if (u64Arg || pReqHdr || idCpu != 0)
+                return VERR_INVALID_PARAMETER;
+            rc = NEMR0InitVMPart2(pGVM);
+            VMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+            break;
+
+        case VMMR0_DO_NEM_MAP_PAGES:
+            if (u64Arg || pReqHdr || idCpu == NIL_VMCPUID)
+                return VERR_INVALID_PARAMETER;
+            rc = NEMR0MapPages(pGVM, idCpu);
+            VMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+            break;
+
+        case VMMR0_DO_NEM_UNMAP_PAGES:
+            if (u64Arg || pReqHdr || idCpu == NIL_VMCPUID)
+                return VERR_INVALID_PARAMETER;
+            rc = NEMR0UnmapPages(pGVM, idCpu);
+            VMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+            break;
+
+        case VMMR0_DO_NEM_EXPORT_STATE:
+            if (u64Arg || pReqHdr || idCpu == NIL_VMCPUID)
+                return VERR_INVALID_PARAMETER;
+            rc = NEMR0ExportState(pGVM, idCpu);
+            VMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+            break;
+
+        case VMMR0_DO_NEM_IMPORT_STATE:
+            if (pReqHdr || idCpu == NIL_VMCPUID)
+                return VERR_INVALID_PARAMETER;
+            rc = NEMR0ImportState(pGVM, idCpu, u64Arg);
+            VMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+            break;
+
+        case VMMR0_DO_NEM_QUERY_CPU_TICK:
+            if (u64Arg || pReqHdr || idCpu == NIL_VMCPUID)
+                return VERR_INVALID_PARAMETER;
+            rc = NEMR0QueryCpuTick(pGVM, idCpu);
+            VMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+            break;
+
+        case VMMR0_DO_NEM_RESUME_CPU_TICK_ON_ALL:
+            if (pReqHdr || idCpu == NIL_VMCPUID)
+                return VERR_INVALID_PARAMETER;
+            rc = NEMR0ResumeCpuTickOnAll(pGVM, idCpu, u64Arg);
+            VMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+            break;
+
+        case VMMR0_DO_NEM_UPDATE_STATISTICS:
+            if (u64Arg || pReqHdr)
+                return VERR_INVALID_PARAMETER;
+            rc = NEMR0UpdateStatistics(pGVM, idCpu);
+            VMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+            break;
+
+#   if 1 && defined(DEBUG_bird)
+        case VMMR0_DO_NEM_EXPERIMENT:
+            if (pReqHdr)
+                return VERR_INVALID_PARAMETER;
+            rc = NEMR0DoExperiment(pGVM, idCpu, u64Arg);
+            VMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+            break;
+#   endif
+# endif
+#endif
+
+        /*
+         * IOM requests.
+         */
+        case VMMR0_DO_IOM_GROW_IO_PORTS:
+        {
+            if (pReqHdr || idCpu != 0)
+                return VERR_INVALID_PARAMETER;
+            rc = IOMR0IoPortGrowRegistrationTables(pGVM, u64Arg);
+            VMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+            break;
+        }
+
+        case VMMR0_DO_IOM_GROW_IO_PORT_STATS:
+        {
+            if (pReqHdr || idCpu != 0)
+                return VERR_INVALID_PARAMETER;
+            rc = IOMR0IoPortGrowStatisticsTable(pGVM, u64Arg);
+            VMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+            break;
+        }
+
+        case VMMR0_DO_IOM_GROW_MMIO_REGS:
+        {
+            if (pReqHdr || idCpu != 0)
+                return VERR_INVALID_PARAMETER;
+            rc = IOMR0MmioGrowRegistrationTables(pGVM, u64Arg);
+            VMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+            break;
+        }
+
+        case VMMR0_DO_IOM_GROW_MMIO_STATS:
+        {
+            if (pReqHdr || idCpu != 0)
+                return VERR_INVALID_PARAMETER;
+            rc = IOMR0MmioGrowStatisticsTable(pGVM, u64Arg);
+            VMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+            break;
+        }
+
+        case VMMR0_DO_IOM_SYNC_STATS_INDICES:
+        {
+            if (pReqHdr || idCpu != 0)
+                return VERR_INVALID_PARAMETER;
+            rc = IOMR0IoPortSyncStatisticsIndices(pGVM);
+            if (RT_SUCCESS(rc))
+                rc = IOMR0MmioSyncStatisticsIndices(pGVM);
+            VMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+            break;
+        }
+
+        /*
+         * For profiling.
+         */
+        case VMMR0_DO_NOP:
+        case VMMR0_DO_SLOW_NOP:
+            return VINF_SUCCESS;
+
+        /*
+         * For testing Ring-0 APIs invoked in this environment.
+         */
+        case VMMR0_DO_TESTS:
+            /** @todo make new test */
+            return VINF_SUCCESS;
+
+        default:
+            /*
+             * We're returning VERR_NOT_SUPPORT here so we've got something else
+             * than -1 which the interrupt gate glue code might return.
+             */
+            Log(("operation %#x is not supported\n", enmOperation));
+            return VERR_NOT_SUPPORTED;
+    }
+    return rc;
+}
+
+
+/**
+ * Argument for vmmR0EntryExWrapper containing the arguments for VMMR0EntryEx.
+ */
+typedef struct VMMR0ENTRYEXARGS
+{
+    PGVM                pGVM;
+    VMCPUID             idCpu;
+    VMMR0OPERATION      enmOperation;
+    PSUPVMMR0REQHDR     pReq;
+    uint64_t            u64Arg;
+    PSUPDRVSESSION      pSession;
+} VMMR0ENTRYEXARGS;
+/** Pointer to a vmmR0EntryExWrapper argument package. */
+typedef VMMR0ENTRYEXARGS *PVMMR0ENTRYEXARGS;
+
+/**
+ * This is just a longjmp wrapper function for VMMR0EntryEx calls.
+ *
+ * @returns VBox status code.
+ * @param   pvArgs      The argument package
+ */
+static DECLCALLBACK(int) vmmR0EntryExWrapper(void *pvArgs)
+{
+    return vmmR0EntryExWorker(((PVMMR0ENTRYEXARGS)pvArgs)->pGVM,
+                              ((PVMMR0ENTRYEXARGS)pvArgs)->idCpu,
+                              ((PVMMR0ENTRYEXARGS)pvArgs)->enmOperation,
+                              ((PVMMR0ENTRYEXARGS)pvArgs)->pReq,
+                              ((PVMMR0ENTRYEXARGS)pvArgs)->u64Arg,
+                              ((PVMMR0ENTRYEXARGS)pvArgs)->pSession);
+}
+
+
+/**
+ * The Ring 0 entry point, called by the support library (SUP).
+ *
+ * @returns VBox status code.
+ * @param   pGVM            The global (ring-0) VM structure.
+ * @param   pVM             The cross context VM structure.
+ * @param   idCpu           Virtual CPU ID argument. Must be NIL_VMCPUID if pVM
+ *                          is NIL_RTR0PTR, and may be NIL_VMCPUID if it isn't
+ * @param   enmOperation    Which operation to execute.
+ * @param   pReq            Pointer to the SUPVMMR0REQHDR packet. Optional.
+ * @param   u64Arg          Some simple constant argument.
+ * @param   pSession        The session of the caller.
+ * @remarks Assume called with interrupts _enabled_.
+ */
+VMMR0DECL(int) VMMR0EntryEx(PGVM pGVM, PVMCC pVM, VMCPUID idCpu, VMMR0OPERATION enmOperation,
+                            PSUPVMMR0REQHDR pReq, uint64_t u64Arg, PSUPDRVSESSION pSession)
+{
+    /*
+     * Requests that should only happen on the EMT thread will be
+     * wrapped in a setjmp so we can assert without causing trouble.
+     */
+    if (   pVM  != NULL
+        && pGVM != NULL
+        && pVM  == pGVM /** @todo drop pGVM */
+        && idCpu < pGVM->cCpus
+        && pGVM->pSession == pSession
+        && pGVM->pSelf    == pVM)
+    {
+        switch (enmOperation)
+        {
+            /* These might/will be called before VMMR3Init. */
+            case VMMR0_DO_GMM_INITIAL_RESERVATION:
+            case VMMR0_DO_GMM_UPDATE_RESERVATION:
+            case VMMR0_DO_GMM_ALLOCATE_PAGES:
+            case VMMR0_DO_GMM_FREE_PAGES:
+            case VMMR0_DO_GMM_BALLOONED_PAGES:
+            /* On the mac we might not have a valid jmp buf, so check these as well. */
+            case VMMR0_DO_VMMR0_INIT:
+            case VMMR0_DO_VMMR0_TERM:
+
+            case VMMR0_DO_PDM_DEVICE_CREATE:
+            case VMMR0_DO_PDM_DEVICE_GEN_CALL:
+            case VMMR0_DO_IOM_GROW_IO_PORTS:
+            case VMMR0_DO_IOM_GROW_IO_PORT_STATS:
+            {
+                PGVMCPU        pGVCpu        = &pGVM->aCpus[idCpu];
+                RTNATIVETHREAD hNativeThread = RTThreadNativeSelf();
+                if (RT_LIKELY(   pGVCpu->hEMT            == hNativeThread
+                              && pGVCpu->hNativeThreadR0 == hNativeThread))
+                {
+                    if (!pGVCpu->vmm.s.CallRing3JmpBufR0.pvSavedStack)
+                        break;
+
+                    /** @todo validate this EMT claim... GVM knows. */
+                    VMMR0ENTRYEXARGS Args;
+                    Args.pGVM = pGVM;
+                    Args.idCpu = idCpu;
+                    Args.enmOperation = enmOperation;
+                    Args.pReq = pReq;
+                    Args.u64Arg = u64Arg;
+                    Args.pSession = pSession;
+                    return vmmR0CallRing3SetJmpEx(&pGVCpu->vmm.s.CallRing3JmpBufR0, vmmR0EntryExWrapper, &Args);
+                }
+                return VERR_VM_THREAD_NOT_EMT;
+            }
+
+            default:
+            case VMMR0_DO_PGM_POOL_GROW:
+                break;
+        }
+    }
+    return vmmR0EntryExWorker(pGVM, idCpu, enmOperation, pReq, u64Arg, pSession);
+}
+
+
+/**
+ * Checks whether we've armed the ring-0 long jump machinery.
+ *
+ * @returns @c true / @c false
+ * @param   pVCpu           The cross context virtual CPU structure.
+ * @thread  EMT
+ * @sa      VMMIsLongJumpArmed
+ */
+VMMR0_INT_DECL(bool) VMMR0IsLongJumpArmed(PVMCPUCC pVCpu)
+{
+#ifdef RT_ARCH_X86
+    return pVCpu->vmm.s.CallRing3JmpBufR0.eip
+        && !pVCpu->vmm.s.CallRing3JmpBufR0.fInRing3Call;
+#else
+    return pVCpu->vmm.s.CallRing3JmpBufR0.rip
+        && !pVCpu->vmm.s.CallRing3JmpBufR0.fInRing3Call;
+#endif
+}
+
+
+/**
+ * Checks whether we've done a ring-3 long jump.
+ *
+ * @returns @c true / @c false
+ * @param   pVCpu       The cross context virtual CPU structure.
+ * @thread  EMT
+ */
+VMMR0_INT_DECL(bool) VMMR0IsInRing3LongJump(PVMCPUCC pVCpu)
+{
+    return pVCpu->vmm.s.CallRing3JmpBufR0.fInRing3Call;
+}
+
+
+/**
+ * Internal R0 logger worker: Flush logger.
+ *
+ * @param   pLogger     The logger instance to flush.
+ * @remark  This function must be exported!
+ */
+VMMR0DECL(void) vmmR0LoggerFlush(PRTLOGGER pLogger)
+{
+#ifdef LOG_ENABLED
+    /*
+     * Convert the pLogger into a VM handle and 'call' back to Ring-3.
+     * (This is a bit paranoid code.)
+     */
+    PVMMR0LOGGER pR0Logger = (PVMMR0LOGGER)((uintptr_t)pLogger - RT_UOFFSETOF(VMMR0LOGGER, Logger));
+    if (    !VALID_PTR(pR0Logger)
+        ||  !VALID_PTR(pR0Logger + 1)
+        ||  pLogger->u32Magic != RTLOGGER_MAGIC)
+    {
+# ifdef DEBUG
+        SUPR0Printf("vmmR0LoggerFlush: pLogger=%p!\n", pLogger);
+# endif
+        return;
+    }
+    if (pR0Logger->fFlushingDisabled)
+        return; /* quietly */
+
+    PVMCC pVM = pR0Logger->pVM;
+    if (   !VALID_PTR(pVM)
+        || pVM->pSelf != pVM)
+    {
+# ifdef DEBUG
+        SUPR0Printf("vmmR0LoggerFlush: pVM=%p! pSelf=%p! pLogger=%p\n", pVM, pVM->pSelf, pLogger);
+# endif
+        return;
+    }
+
+    PVMCPUCC pVCpu = VMMGetCpu(pVM);
+    if (pVCpu)
+    {
+        /*
+         * Check that the jump buffer is armed.
+         */
+# ifdef RT_ARCH_X86
+        if (    !pVCpu->vmm.s.CallRing3JmpBufR0.eip
+            ||  pVCpu->vmm.s.CallRing3JmpBufR0.fInRing3Call)
+# else
+        if (    !pVCpu->vmm.s.CallRing3JmpBufR0.rip
+            ||  pVCpu->vmm.s.CallRing3JmpBufR0.fInRing3Call)
+# endif
+        {
+# ifdef DEBUG
+            SUPR0Printf("vmmR0LoggerFlush: Jump buffer isn't armed!\n");
+# endif
+            return;
+        }
+        VMMRZCallRing3(pVM, pVCpu, VMMCALLRING3_VMM_LOGGER_FLUSH, 0);
+    }
+# ifdef DEBUG
+    else
+        SUPR0Printf("vmmR0LoggerFlush: invalid VCPU context!\n");
+# endif
+#else
+    NOREF(pLogger);
+#endif  /* LOG_ENABLED */
+}
+
+#ifdef LOG_ENABLED
+
+/**
+ * Disables flushing of the ring-0 debug log.
+ *
+ * @param   pVCpu       The cross context virtual CPU structure.
+ */
+VMMR0_INT_DECL(void) VMMR0LogFlushDisable(PVMCPUCC pVCpu)
+{
+    if (pVCpu->vmm.s.pR0LoggerR0)
+        pVCpu->vmm.s.pR0LoggerR0->fFlushingDisabled = true;
+    if (pVCpu->vmm.s.pR0RelLoggerR0)
+        pVCpu->vmm.s.pR0RelLoggerR0->fFlushingDisabled = true;
+}
+
+
+/**
+ * Enables flushing of the ring-0 debug log.
+ *
+ * @param   pVCpu       The cross context virtual CPU structure.
+ */
+VMMR0_INT_DECL(void) VMMR0LogFlushEnable(PVMCPUCC pVCpu)
+{
+    if (pVCpu->vmm.s.pR0LoggerR0)
+        pVCpu->vmm.s.pR0LoggerR0->fFlushingDisabled = false;
+    if (pVCpu->vmm.s.pR0RelLoggerR0)
+        pVCpu->vmm.s.pR0RelLoggerR0->fFlushingDisabled = false;
+}
+
+
+/**
+ * Checks if log flushing is disabled or not.
+ *
+ * @param   pVCpu       The cross context virtual CPU structure.
+ */
+VMMR0_INT_DECL(bool) VMMR0IsLogFlushDisabled(PVMCPUCC pVCpu)
+{
+    if (pVCpu->vmm.s.pR0LoggerR0)
+        return pVCpu->vmm.s.pR0LoggerR0->fFlushingDisabled;
+    if (pVCpu->vmm.s.pR0RelLoggerR0)
+        return pVCpu->vmm.s.pR0RelLoggerR0->fFlushingDisabled;
+    return true;
+}
+
+#endif /* LOG_ENABLED */
+
+/**
+ * Override RTLogRelGetDefaultInstanceEx so we can do LogRel to VBox.log from EMTs in ring-0.
+ */
+DECLEXPORT(PRTLOGGER) RTLogRelGetDefaultInstanceEx(uint32_t fFlagsAndGroup)
+{
+    PGVMCPU pGVCpu = GVMMR0GetGVCpuByEMT(NIL_RTNATIVETHREAD);
+    if (pGVCpu)
+    {
+        PVMCPUCC pVCpu = pGVCpu;
+        if (RT_VALID_PTR(pVCpu))
+        {
+            PVMMR0LOGGER pVmmLogger = pVCpu->vmm.s.pR0RelLoggerR0;
+            if (RT_VALID_PTR(pVmmLogger))
+            {
+                if (   pVmmLogger->fCreated
+                    && pVmmLogger->pVM == pGVCpu->pGVM)
+                {
+                    if (pVmmLogger->Logger.fFlags & RTLOGFLAGS_DISABLED)
+                        return NULL;
+                    uint16_t const fFlags = RT_LO_U16(fFlagsAndGroup);
+                    uint16_t const iGroup = RT_HI_U16(fFlagsAndGroup);
+                    if (   iGroup != UINT16_MAX
+                        && (   (  pVmmLogger->Logger.afGroups[iGroup < pVmmLogger->Logger.cGroups ? iGroup : 0]
+                                & (fFlags | (uint32_t)RTLOGGRPFLAGS_ENABLED))
+                            != (fFlags | (uint32_t)RTLOGGRPFLAGS_ENABLED)))
+                        return NULL;
+                    return &pVmmLogger->Logger;
+                }
+            }
+        }
+    }
+    return SUPR0GetDefaultLogRelInstanceEx(fFlagsAndGroup);
+}
+
+
+/**
+ * Jump back to ring-3 if we're the EMT and the longjmp is armed.
+ *
+ * @returns true if the breakpoint should be hit, false if it should be ignored.
+ */
+DECLEXPORT(bool) RTCALL RTAssertShouldPanic(void)
+{
+#if 0
+    return true;
+#else
+    PVMCC pVM = GVMMR0GetVMByEMT(NIL_RTNATIVETHREAD);
+    if (pVM)
+    {
+        PVMCPUCC pVCpu = VMMGetCpu(pVM);
+
+        if (pVCpu)
+        {
+#ifdef RT_ARCH_X86
+            if (    pVCpu->vmm.s.CallRing3JmpBufR0.eip
+                &&  !pVCpu->vmm.s.CallRing3JmpBufR0.fInRing3Call)
+#else
+            if (    pVCpu->vmm.s.CallRing3JmpBufR0.rip
+                &&  !pVCpu->vmm.s.CallRing3JmpBufR0.fInRing3Call)
+#endif
+            {
+                int rc = VMMRZCallRing3(pVM, pVCpu, VMMCALLRING3_VM_R0_ASSERTION, 0);
+                return RT_FAILURE_NP(rc);
+            }
+        }
+    }
+#ifdef RT_OS_LINUX
+    return true;
+#else
+    return false;
+#endif
+#endif
+}
+
+
+/**
+ * Override this so we can push it up to ring-3.
+ *
+ * @param   pszExpr     Expression. Can be NULL.
+ * @param   uLine       Location line number.
+ * @param   pszFile     Location file name.
+ * @param   pszFunction Location function name.
+ */
+DECLEXPORT(void) RTCALL RTAssertMsg1Weak(const char *pszExpr, unsigned uLine, const char *pszFile, const char *pszFunction)
+{
+    /*
+     * To the log.
+     */
+    LogAlways(("\n!!R0-Assertion Failed!!\n"
+               "Expression: %s\n"
+               "Location  : %s(%d) %s\n",
+               pszExpr, pszFile, uLine, pszFunction));
+
+    /*
+     * To the global VMM buffer.
+     */
+    PVMCC pVM = GVMMR0GetVMByEMT(NIL_RTNATIVETHREAD);
+    if (pVM)
+        RTStrPrintf(pVM->vmm.s.szRing0AssertMsg1, sizeof(pVM->vmm.s.szRing0AssertMsg1),
+                    "\n!!R0-Assertion Failed!!\n"
+                    "Expression: %.*s\n"
+                    "Location  : %s(%d) %s\n",
+                    sizeof(pVM->vmm.s.szRing0AssertMsg1) / 4 * 3, pszExpr,
+                    pszFile, uLine, pszFunction);
+
+    /*
+     * Continue the normal way.
+     */
+    RTAssertMsg1(pszExpr, uLine, pszFile, pszFunction);
+}
+
+
+/**
+ * Callback for RTLogFormatV which writes to the ring-3 log port.
+ * See PFNLOGOUTPUT() for details.
+ */
+static DECLCALLBACK(size_t) rtLogOutput(void *pv, const char *pachChars, size_t cbChars)
+{
+    for (size_t i = 0; i < cbChars; i++)
+    {
+        LogAlways(("%c", pachChars[i])); NOREF(pachChars);
+    }
+
+    NOREF(pv);
+    return cbChars;
+}
+
+
+/**
+ * Override this so we can push it up to ring-3.
+ *
+ * @param   pszFormat   The format string.
+ * @param   va          Arguments.
+ */
+DECLEXPORT(void) RTCALL RTAssertMsg2WeakV(const char *pszFormat, va_list va)
+{
+    va_list vaCopy;
+
+    /*
+     * Push the message to the loggers.
+     */
+    PRTLOGGER pLog = RTLogGetDefaultInstance(); /* Don't initialize it here... */
+    if (pLog)
+    {
+        va_copy(vaCopy, va);
+        RTLogFormatV(rtLogOutput, pLog, pszFormat, vaCopy);
+        va_end(vaCopy);
+    }
+    pLog = RTLogRelGetDefaultInstance();
+    if (pLog)
+    {
+        va_copy(vaCopy, va);
+        RTLogFormatV(rtLogOutput, pLog, pszFormat, vaCopy);
+        va_end(vaCopy);
+    }
+
+    /*
+     * Push it to the global VMM buffer.
+     */
+    PVMCC pVM = GVMMR0GetVMByEMT(NIL_RTNATIVETHREAD);
+    if (pVM)
+    {
+        va_copy(vaCopy, va);
+        RTStrPrintfV(pVM->vmm.s.szRing0AssertMsg2, sizeof(pVM->vmm.s.szRing0AssertMsg2), pszFormat, vaCopy);
+        va_end(vaCopy);
+    }
+
+    /*
+     * Continue the normal way.
+     */
+    RTAssertMsg2V(pszFormat, va);
+}
+
diff --git a/src/VBox/VMM/VMMR0/VMMR0.def b/src/VBox/VMM/VMMR0/VMMR0.def
new file mode 100644
index 00000000..5ef46607
--- /dev/null
+++ b/src/VBox/VMM/VMMR0/VMMR0.def
@@ -0,0 +1,120 @@
+; $Id: VMMR0.def $
+;; @file
+; VMM Ring 0 DLL - Definition file.
+
+;
+; Copyright (C) 2006-2020 Oracle Corporation
+;
+; This file is part of VirtualBox Open Source Edition (OSE), as
+; available from http://www.virtualbox.org. This file is free software;
+; you can redistribute it and/or modify it under the terms of the GNU
+; General Public License (GPL) as published by the Free Software
+; Foundation, in version 2 as it comes in the "COPYING" file of the
+; VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+;
+
+LIBRARY VMMR0.r0
+EXPORTS
+    ; data
+
+    ; code
+    GIMGetMmio2Regions
+    PDMCritSectEnter
+    PDMCritSectEnterDebug
+    PDMCritSectIsOwner
+    PDMCritSectLeave
+    PDMHCCritSectScheduleExitEvent
+    PDMCritSectTryEnter
+    PDMCritSectTryEnterDebug
+    PDMQueueAlloc
+    PDMQueueInsert
+    PGMHandlerPhysicalPageTempOff
+    PGMShwMakePageWritable
+    PGMPhysSimpleWriteGCPhys
+    PGMPhysSimpleReadGCPtr
+    PGMPhysSimpleWriteGCPtr
+    PGMPhysReadGCPtr
+    PGMPhysWriteGCPtr
+    PGMPhysSimpleDirtyWriteGCPtr
+    PDMR0DeviceRegisterModule
+    PDMR0DeviceDeregisterModule
+    IOMMmioResetRegion
+    IOMMmioMapMmio2Page
+    RTLogDefaultInstance
+    RTLogDefaultInstanceEx
+    RTLogRelGetDefaultInstance
+    RTLogRelGetDefaultInstanceEx
+    RTLogLogger
+    RTLogLoggerEx
+    RTLogLoggerExV
+    RTTimeMilliTS
+    RTTraceBufAddMsgF
+    RTTraceBufAddPos
+    RTTraceBufAddPosMsgF
+    TMTimerFromMilli
+    TMTimerFromMicro
+    TMTimerFromNano
+    TMTimerGet
+    TMTimerGetFreq
+    TMTimerIsActive
+    TMTimerIsLockOwner
+    TMTimerLock
+    TMTimerSet
+    TMTimerSetRelative
+    TMTimerSetMillies
+    TMTimerSetMicro
+    TMTimerSetNano
+    TMTimerSetFrequencyHint
+    TMTimerStop
+    TMTimerUnlock
+    VMMGetSvnRev
+    vmmR0LoggerFlush
+    vmmR0LoggerWrapper
+    VMSetError
+    VMSetErrorV
+
+    ; Internal Networking
+    IntNetR0Open
+    IntNetR0IfClose
+    IntNetR0IfGetBufferPtrs
+    IntNetR0IfSetPromiscuousMode
+    IntNetR0IfSetMacAddress
+    IntNetR0IfSetActive
+    IntNetR0IfSend
+    IntNetR0IfWait
+
+    ; Network Shaper
+    PDMNsAllocateBandwidth
+
+    ; runtime
+    RTAssertMsg1Weak
+    RTAssertMsg2Weak
+    RTAssertShouldPanic
+    RTCrc32
+    RTOnceSlow
+    RTTimeNanoTSLegacySyncInvarNoDelta
+    RTTimeNanoTSLegacySyncInvarWithDelta
+    RTTimeNanoTSLegacyAsync
+    RTTimeNanoTSLFenceSyncInvarNoDelta
+    RTTimeNanoTSLFenceSyncInvarWithDelta
+    RTTimeNanoTSLFenceAsync
+    RTTimeSystemNanoTS
+    RTTimeNanoTS
+    ASMMultU64ByU32DivByU32             ; not-os2
+    ASMAtomicXchgU8                     ; not-x86
+    ASMAtomicXchgU16                    ; not-x86
+    ASMBitFirstSet                      ; not-x86
+    ASMNopPause                         ; not-x86
+    nocrt_memchr
+    nocrt_memcmp
+    nocrt_memcpy
+    memcpy=nocrt_memcpy                 ; not-os2
+    nocrt_memmove
+    nocrt_memset
+    memset=nocrt_memset                 ; not-os2
+    nocrt_strcpy
+    nocrt_strcmp
+    nocrt_strchr
+    nocrt_strlen
+
diff --git a/src/VBox/VMM/VMMR0/VMMR0JmpA-amd64.asm b/src/VBox/VMM/VMMR0/VMMR0JmpA-amd64.asm
new file mode 100644
index 00000000..9d330417
--- /dev/null
+++ b/src/VBox/VMM/VMMR0/VMMR0JmpA-amd64.asm
@@ -0,0 +1,491 @@
+; $Id: VMMR0JmpA-amd64.asm $
+;; @file
+; VMM - R0 SetJmp / LongJmp routines for AMD64.
+;
+
+;
+; Copyright (C) 2006-2020 Oracle Corporation
+;
+; This file is part of VirtualBox Open Source Edition (OSE), as
+; available from http://www.virtualbox.org. This file is free software;
+; you can redistribute it and/or modify it under the terms of the GNU
+; General Public License (GPL) as published by the Free Software
+; Foundation, in version 2 as it comes in the "COPYING" file of the
+; VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+;
+
+;*******************************************************************************
+;* Header Files                                                                *
+;*******************************************************************************
+%define RT_ASM_WITH_SEH64
+%include "VBox/asmdefs.mac"
+%include "VMMInternal.mac"
+%include "VBox/err.mac"
+%include "VBox/param.mac"
+
+
+;*******************************************************************************
+;*  Defined Constants And Macros                                               *
+;*******************************************************************************
+%define RESUME_MAGIC    07eadf00dh
+%define STACK_PADDING   0eeeeeeeeeeeeeeeeh
+
+;; Workaround for linux 4.6 fast/slow syscall stack depth difference.
+%ifdef VMM_R0_SWITCH_STACK
+ %define STACK_FUZZ_SIZE 0
+%else
+ %define STACK_FUZZ_SIZE 128
+%endif
+
+
+BEGINCODE
+
+
+;;
+; The setjmp variant used for calling Ring-3.
+;
+; This differs from the normal setjmp in that it will resume VMMRZCallRing3 if we're
+; in the middle of a ring-3 call. Another differences is the function pointer and
+; argument. This has to do with resuming code and the stack frame of the caller.
+;
+; @returns  VINF_SUCCESS on success or whatever is passed to vmmR0CallRing3LongJmp.
+; @param    pJmpBuf msc:rcx gcc:rdi x86:[esp+0x04]     Our jmp_buf.
+; @param    pfn     msc:rdx gcc:rsi x86:[esp+0x08]     The function to be called when not resuming.
+; @param    pvUser1 msc:r8  gcc:rdx x86:[esp+0x0c]     The argument of that function.
+; @param    pvUser2 msc:r9  gcc:rcx x86:[esp+0x10]     The argument of that function.
+;
+BEGINPROC vmmR0CallRing3SetJmp
+GLOBALNAME vmmR0CallRing3SetJmp2
+GLOBALNAME vmmR0CallRing3SetJmpEx
+    ;
+    ; Save the registers.
+    ;
+    push    rbp
+    SEH64_PUSH_xBP
+    mov     rbp, rsp
+    SEH64_SET_FRAME_xBP 0
+ %ifdef ASM_CALL64_MSC
+    sub     rsp, 30h + STACK_FUZZ_SIZE  ; (10h is used by resume (??), 20h for callee spill area)
+    SEH64_ALLOCATE_STACK 30h + STACK_FUZZ_SIZE
+SEH64_END_PROLOGUE
+    mov     r11, rdx                    ; pfn
+    mov     rdx, rcx                    ; pJmpBuf;
+ %else
+    sub     rsp, 10h + STACK_FUZZ_SIZE  ; (10h is used by resume (??))
+    SEH64_ALLOCATE_STACK 10h + STACK_FUZZ_SIZE
+SEH64_END_PROLOGUE
+    mov     r8, rdx                     ; pvUser1 (save it like MSC)
+    mov     r9, rcx                     ; pvUser2 (save it like MSC)
+    mov     r11, rsi                    ; pfn
+    mov     rdx, rdi                    ; pJmpBuf
+ %endif
+    mov     [xDX + VMMR0JMPBUF.rbx], rbx
+ %ifdef ASM_CALL64_MSC
+    mov     [xDX + VMMR0JMPBUF.rsi], rsi
+    mov     [xDX + VMMR0JMPBUF.rdi], rdi
+ %endif
+    mov     [xDX + VMMR0JMPBUF.rbp], rbp
+    mov     [xDX + VMMR0JMPBUF.r12], r12
+    mov     [xDX + VMMR0JMPBUF.r13], r13
+    mov     [xDX + VMMR0JMPBUF.r14], r14
+    mov     [xDX + VMMR0JMPBUF.r15], r15
+    mov     xAX, [rbp + 8]              ; (not really necessary, except for validity check)
+    mov     [xDX + VMMR0JMPBUF.rip], xAX
+ %ifdef ASM_CALL64_MSC
+    lea     r10, [rsp + 20h]            ; must save the spill area
+ %else
+    lea     r10, [rsp]
+ %endif
+    mov     [xDX + VMMR0JMPBUF.rsp], r10
+ %ifdef RT_OS_WINDOWS
+    movdqa  [xDX + VMMR0JMPBUF.xmm6], xmm6
+    movdqa  [xDX + VMMR0JMPBUF.xmm7], xmm7
+    movdqa  [xDX + VMMR0JMPBUF.xmm8], xmm8
+    movdqa  [xDX + VMMR0JMPBUF.xmm9], xmm9
+    movdqa  [xDX + VMMR0JMPBUF.xmm10], xmm10
+    movdqa  [xDX + VMMR0JMPBUF.xmm11], xmm11
+    movdqa  [xDX + VMMR0JMPBUF.xmm12], xmm12
+    movdqa  [xDX + VMMR0JMPBUF.xmm13], xmm13
+    movdqa  [xDX + VMMR0JMPBUF.xmm14], xmm14
+    movdqa  [xDX + VMMR0JMPBUF.xmm15], xmm15
+ %endif
+    pushf
+    pop     xAX
+    mov     [xDX + VMMR0JMPBUF.rflags], xAX
+
+    ;
+    ; If we're not in a ring-3 call, call pfn and return.
+    ;
+    test    byte [xDX + VMMR0JMPBUF.fInRing3Call], 1
+    jnz     .resume
+
+ %ifdef VMM_R0_SWITCH_STACK
+    mov     r15, [xDX + VMMR0JMPBUF.pvSavedStack]
+    test    r15, r15
+    jz      .entry_error
+  %ifdef VBOX_STRICT
+    cmp     dword [r15], 0h
+    jne     .entry_error
+    mov     rdi, r15
+    mov     rcx, VMM_STACK_SIZE / 8
+    mov     rax, qword 0eeeeeeeffeeeeeeeh
+    repne stosq
+    mov     [rdi - 10h], rbx
+  %endif
+    lea     r15, [r15 + VMM_STACK_SIZE - 40h]
+    mov     rsp, r15                    ; Switch stack!
+ %endif ; VMM_R0_SWITCH_STACK
+
+    mov     r12, rdx                    ; Save pJmpBuf.
+ %ifdef ASM_CALL64_MSC
+    mov     rcx, r8                     ; pvUser -> arg0
+    mov     rdx, r9
+ %else
+    mov     rdi, r8                     ; pvUser -> arg0
+    mov     rsi, r9
+ %endif
+    call    r11
+    mov     rdx, r12                    ; Restore pJmpBuf
+
+ %ifdef VMM_R0_SWITCH_STACK
+  %ifdef VBOX_STRICT
+    mov     r15, [xDX + VMMR0JMPBUF.pvSavedStack]
+    mov     dword [r15], 0h             ; Reset the marker
+  %endif
+ %endif
+
+    ;
+    ; Return like in the long jump but clear eip, no shortcuts here.
+    ;
+.proper_return:
+%ifdef RT_OS_WINDOWS
+    movdqa  xmm6,  [xDX + VMMR0JMPBUF.xmm6 ]
+    movdqa  xmm7,  [xDX + VMMR0JMPBUF.xmm7 ]
+    movdqa  xmm8,  [xDX + VMMR0JMPBUF.xmm8 ]
+    movdqa  xmm9,  [xDX + VMMR0JMPBUF.xmm9 ]
+    movdqa  xmm10, [xDX + VMMR0JMPBUF.xmm10]
+    movdqa  xmm11, [xDX + VMMR0JMPBUF.xmm11]
+    movdqa  xmm12, [xDX + VMMR0JMPBUF.xmm12]
+    movdqa  xmm13, [xDX + VMMR0JMPBUF.xmm13]
+    movdqa  xmm14, [xDX + VMMR0JMPBUF.xmm14]
+    movdqa  xmm15, [xDX + VMMR0JMPBUF.xmm15]
+%endif
+    mov     rbx, [xDX + VMMR0JMPBUF.rbx]
+%ifdef ASM_CALL64_MSC
+    mov     rsi, [xDX + VMMR0JMPBUF.rsi]
+    mov     rdi, [xDX + VMMR0JMPBUF.rdi]
+%endif
+    mov     r12, [xDX + VMMR0JMPBUF.r12]
+    mov     r13, [xDX + VMMR0JMPBUF.r13]
+    mov     r14, [xDX + VMMR0JMPBUF.r14]
+    mov     r15, [xDX + VMMR0JMPBUF.r15]
+    mov     rbp, [xDX + VMMR0JMPBUF.rbp]
+    and     qword [xDX + VMMR0JMPBUF.rip], byte 0 ; used for valid check.
+    mov     rsp, [xDX + VMMR0JMPBUF.rsp]
+    push    qword [xDX + VMMR0JMPBUF.rflags]
+    popf
+    leave
+    ret
+
+.entry_error:
+    mov     eax, VERR_VMM_SET_JMP_ERROR
+    jmp     .proper_return
+
+.stack_overflow:
+    mov     eax, VERR_VMM_SET_JMP_STACK_OVERFLOW
+    jmp     .proper_return
+
+    ;
+    ; Aborting resume.
+    ; Note! No need to restore XMM registers here since we haven't touched them yet.
+    ;
+.bad:
+    and     qword [xDX + VMMR0JMPBUF.rip], byte 0 ; used for valid check.
+    mov     rbx, [xDX + VMMR0JMPBUF.rbx]
+ %ifdef ASM_CALL64_MSC
+    mov     rsi, [xDX + VMMR0JMPBUF.rsi]
+    mov     rdi, [xDX + VMMR0JMPBUF.rdi]
+ %endif
+    mov     r12, [xDX + VMMR0JMPBUF.r12]
+    mov     r13, [xDX + VMMR0JMPBUF.r13]
+    mov     r14, [xDX + VMMR0JMPBUF.r14]
+    mov     r15, [xDX + VMMR0JMPBUF.r15]
+    mov     eax, VERR_VMM_SET_JMP_ABORTED_RESUME
+    leave
+    ret
+
+    ;
+    ; Resume VMMRZCallRing3 the call.
+    ;
+.resume:
+ %ifndef VMM_R0_SWITCH_STACK
+    ; Sanity checks incoming stack, applying fuzz if needed.
+    sub     r10, [xDX + VMMR0JMPBUF.SpCheck]
+    jz      .resume_stack_checked_out
+    add     r10, STACK_FUZZ_SIZE        ; plus/minus STACK_FUZZ_SIZE is fine.
+    cmp     r10, STACK_FUZZ_SIZE * 2
+    ja      .bad
+
+    mov     r10, [xDX + VMMR0JMPBUF.SpCheck]
+    mov     [xDX + VMMR0JMPBUF.rsp], r10 ; Must be update in case of another long jump (used for save calc).
+
+.resume_stack_checked_out:
+    mov     ecx, [xDX + VMMR0JMPBUF.cbSavedStack]
+    cmp     rcx, VMM_STACK_SIZE
+    ja      .bad
+    test    rcx, 7
+    jnz     .bad
+    mov     rdi, [xDX + VMMR0JMPBUF.SpCheck]
+    sub     rdi, [xDX + VMMR0JMPBUF.SpResume]
+    cmp     rcx, rdi
+    jne     .bad
+ %endif
+
+%ifdef VMM_R0_SWITCH_STACK
+    ; Switch stack.
+    mov     rsp, [xDX + VMMR0JMPBUF.SpResume]
+%else
+    ; Restore the stack.
+    mov     ecx, [xDX + VMMR0JMPBUF.cbSavedStack]
+    shr     ecx, 3
+    mov     rsi, [xDX + VMMR0JMPBUF.pvSavedStack]
+    mov     rdi, [xDX + VMMR0JMPBUF.SpResume]
+    mov     rsp, rdi
+    rep movsq
+%endif ; !VMM_R0_SWITCH_STACK
+    mov     byte [xDX + VMMR0JMPBUF.fInRing3Call], 0
+
+    ;
+    ; Continue where we left off.
+    ;
+%ifdef VBOX_STRICT
+    pop     rax                         ; magic
+    cmp     rax, RESUME_MAGIC
+    je      .magic_ok
+    mov     ecx, 0123h
+    mov     [ecx], edx
+.magic_ok:
+%endif
+%ifdef RT_OS_WINDOWS
+    movdqa  xmm6,  [rsp + 000h]
+    movdqa  xmm7,  [rsp + 010h]
+    movdqa  xmm8,  [rsp + 020h]
+    movdqa  xmm9,  [rsp + 030h]
+    movdqa  xmm10, [rsp + 040h]
+    movdqa  xmm11, [rsp + 050h]
+    movdqa  xmm12, [rsp + 060h]
+    movdqa  xmm13, [rsp + 070h]
+    movdqa  xmm14, [rsp + 080h]
+    movdqa  xmm15, [rsp + 090h]
+    add     rsp, 0a0h
+%endif
+    popf
+    pop     rbx
+%ifdef ASM_CALL64_MSC
+    pop     rsi
+    pop     rdi
+%endif
+    pop     r12
+    pop     r13
+    pop     r14
+    pop     r15
+    pop     rbp
+    xor     eax, eax                    ; VINF_SUCCESS
+    ret
+ENDPROC vmmR0CallRing3SetJmp
+
+
+;;
+; Worker for VMMRZCallRing3.
+; This will save the stack and registers.
+;
+; @param    pJmpBuf msc:rcx gcc:rdi x86:[ebp+8]     Pointer to the jump buffer.
+; @param    rc      msc:rdx gcc:rsi x86:[ebp+c]     The return code.
+;
+BEGINPROC vmmR0CallRing3LongJmp
+    ;
+    ; Save the registers on the stack.
+    ;
+    push    rbp
+    SEH64_PUSH_xBP
+    mov     rbp, rsp
+    SEH64_SET_FRAME_xBP 0
+    push    r15
+    SEH64_PUSH_GREG r15
+    push    r14
+    SEH64_PUSH_GREG r14
+    push    r13
+    SEH64_PUSH_GREG r13
+    push    r12
+    SEH64_PUSH_GREG r12
+%ifdef ASM_CALL64_MSC
+    push    rdi
+    SEH64_PUSH_GREG rdi
+    push    rsi
+    SEH64_PUSH_GREG rsi
+%endif
+    push    rbx
+    SEH64_PUSH_GREG rbx
+    pushf
+    SEH64_ALLOCATE_STACK 8
+%ifdef RT_OS_WINDOWS
+    sub     rsp, 0a0h
+    SEH64_ALLOCATE_STACK 0a0h
+    movdqa  [rsp + 000h], xmm6
+    movdqa  [rsp + 010h], xmm7
+    movdqa  [rsp + 020h], xmm8
+    movdqa  [rsp + 030h], xmm9
+    movdqa  [rsp + 040h], xmm10
+    movdqa  [rsp + 050h], xmm11
+    movdqa  [rsp + 060h], xmm12
+    movdqa  [rsp + 070h], xmm13
+    movdqa  [rsp + 080h], xmm14
+    movdqa  [rsp + 090h], xmm15
+%endif
+%ifdef VBOX_STRICT
+    push    RESUME_MAGIC
+    SEH64_ALLOCATE_STACK 8
+%endif
+SEH64_END_PROLOGUE
+
+    ;
+    ; Normalize the parameters.
+    ;
+%ifdef ASM_CALL64_MSC
+    mov     eax, edx                    ; rc
+    mov     rdx, rcx                    ; pJmpBuf
+%else
+    mov     rdx, rdi                    ; pJmpBuf
+    mov     eax, esi                    ; rc
+%endif
+
+    ;
+    ; Is the jump buffer armed?
+    ;
+    cmp     qword [xDX + VMMR0JMPBUF.rip], byte 0
+    je      .nok
+
+    ;
+    ; Sanity checks.
+    ;
+    mov     rdi, [xDX + VMMR0JMPBUF.pvSavedStack]
+    test    rdi, rdi                    ; darwin may set this to 0.
+    jz      .nok
+    mov     [xDX + VMMR0JMPBUF.SpResume], rsp
+ %ifndef VMM_R0_SWITCH_STACK
+    mov     rsi, rsp
+    mov     rcx, [xDX + VMMR0JMPBUF.rsp]
+    sub     rcx, rsi
+
+    ; two sanity checks on the size.
+    cmp     rcx, VMM_STACK_SIZE         ; check max size.
+    jnbe    .nok
+
+    ;
+    ; Copy the stack
+    ;
+    test    ecx, 7                      ; check alignment
+    jnz     .nok
+    mov     [xDX + VMMR0JMPBUF.cbSavedStack], ecx
+    shr     ecx, 3
+    rep movsq
+
+ %endif ; !VMM_R0_SWITCH_STACK
+
+    ; Save a PC and return PC here to assist unwinding.
+.unwind_point:
+    lea     rcx, [.unwind_point wrt RIP]
+    mov     [xDX + VMMR0JMPBUF.SavedEipForUnwind], rcx
+    mov     rcx, [xDX + VMMR0JMPBUF.rbp]
+    lea     rcx, [rcx + 8]
+    mov     [xDX + VMMR0JMPBUF.UnwindRetPcLocation], rcx
+    mov     rcx, [rcx]
+    mov     [xDX + VMMR0JMPBUF.UnwindRetPcValue], rcx
+
+    ; Save RSP & RBP to enable stack dumps
+    mov     rcx, rbp
+    mov     [xDX + VMMR0JMPBUF.SavedEbp], rcx
+    sub     rcx, 8
+    mov     [xDX + VMMR0JMPBUF.SavedEsp], rcx
+
+    ; store the last pieces of info.
+    mov     rcx, [xDX + VMMR0JMPBUF.rsp]
+    mov     [xDX + VMMR0JMPBUF.SpCheck], rcx
+    mov     byte [xDX + VMMR0JMPBUF.fInRing3Call], 1
+
+    ;
+    ; Do the long jump.
+    ;
+%ifdef RT_OS_WINDOWS
+    movdqa  xmm6,  [xDX + VMMR0JMPBUF.xmm6 ]
+    movdqa  xmm7,  [xDX + VMMR0JMPBUF.xmm7 ]
+    movdqa  xmm8,  [xDX + VMMR0JMPBUF.xmm8 ]
+    movdqa  xmm9,  [xDX + VMMR0JMPBUF.xmm9 ]
+    movdqa  xmm10, [xDX + VMMR0JMPBUF.xmm10]
+    movdqa  xmm11, [xDX + VMMR0JMPBUF.xmm11]
+    movdqa  xmm12, [xDX + VMMR0JMPBUF.xmm12]
+    movdqa  xmm13, [xDX + VMMR0JMPBUF.xmm13]
+    movdqa  xmm14, [xDX + VMMR0JMPBUF.xmm14]
+    movdqa  xmm15, [xDX + VMMR0JMPBUF.xmm15]
+%endif
+    mov     rbx, [xDX + VMMR0JMPBUF.rbx]
+%ifdef ASM_CALL64_MSC
+    mov     rsi, [xDX + VMMR0JMPBUF.rsi]
+    mov     rdi, [xDX + VMMR0JMPBUF.rdi]
+%endif
+    mov     r12, [xDX + VMMR0JMPBUF.r12]
+    mov     r13, [xDX + VMMR0JMPBUF.r13]
+    mov     r14, [xDX + VMMR0JMPBUF.r14]
+    mov     r15, [xDX + VMMR0JMPBUF.r15]
+    mov     rbp, [xDX + VMMR0JMPBUF.rbp]
+    mov     rsp, [xDX + VMMR0JMPBUF.rsp]
+    push    qword [xDX + VMMR0JMPBUF.rflags]
+    popf
+    leave
+    ret
+
+    ;
+    ; Failure
+    ;
+.nok:
+%ifdef VBOX_STRICT
+    pop     rax                         ; magic
+    cmp     rax, RESUME_MAGIC
+    je      .magic_ok
+    mov     ecx, 0123h
+    mov     [rcx], edx
+.magic_ok:
+%endif
+    mov     eax, VERR_VMM_LONG_JMP_ERROR
+%ifdef RT_OS_WINDOWS
+    add     rsp, 0a0h                   ; skip XMM registers since they are unmodified.
+%endif
+    popf
+    pop     rbx
+%ifdef ASM_CALL64_MSC
+    pop     rsi
+    pop     rdi
+%endif
+    pop     r12
+    pop     r13
+    pop     r14
+    pop     r15
+    leave
+    ret
+ENDPROC vmmR0CallRing3LongJmp
+
+
+;;
+; Internal R0 logger worker: Logger wrapper.
+;
+; @cproto VMMR0DECL(void) vmmR0LoggerWrapper(const char *pszFormat, ...)
+;
+BEGINPROC_EXPORTED vmmR0LoggerWrapper
+SEH64_END_PROLOGUE
+    int3
+    int3
+    int3
+    ret
+ENDPROC vmmR0LoggerWrapper
+
diff --git a/src/VBox/VMM/VMMR0/VMMR0JmpA-x86.asm b/src/VBox/VMM/VMMR0/VMMR0JmpA-x86.asm
new file mode 100644
index 00000000..8ec64213
--- /dev/null
+++ b/src/VBox/VMM/VMMR0/VMMR0JmpA-x86.asm
@@ -0,0 +1,401 @@
+; $Id: VMMR0JmpA-x86.asm $
+;; @file
+; VMM - R0 SetJmp / LongJmp routines for X86.
+;
+
+;
+; Copyright (C) 2006-2020 Oracle Corporation
+;
+; This file is part of VirtualBox Open Source Edition (OSE), as
+; available from http://www.virtualbox.org. This file is free software;
+; you can redistribute it and/or modify it under the terms of the GNU
+; General Public License (GPL) as published by the Free Software
+; Foundation, in version 2 as it comes in the "COPYING" file of the
+; VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+;
+
+;*******************************************************************************
+;* Header Files                                                                *
+;*******************************************************************************
+%include "VBox/asmdefs.mac"
+%include "VMMInternal.mac"
+%include "VBox/err.mac"
+%include "VBox/param.mac"
+
+
+;*******************************************************************************
+;*  Defined Constants And Macros                                               *
+;*******************************************************************************
+%define RESUME_MAGIC    07eadf00dh
+%define STACK_PADDING   0eeeeeeeeh
+
+
+; For vmmR0LoggerWrapper. (The other architecture(s) use(s) C99 variadic macros.)
+extern NAME(RTLogLogger)
+
+
+BEGINCODE
+
+
+;;
+; The setjmp variant used for calling Ring-3.
+;
+; This differs from the normal setjmp in that it will resume VMMRZCallRing3 if we're
+; in the middle of a ring-3 call. Another differences is the function pointer and
+; argument. This has to do with resuming code and the stack frame of the caller.
+;
+; @returns  VINF_SUCCESS on success or whatever is passed to vmmR0CallRing3LongJmp.
+; @param    pJmpBuf msc:rcx gcc:rdi x86:[esp+0x04]     Our jmp_buf.
+; @param    pfn     msc:rdx gcc:rsi x86:[esp+0x08]     The function to be called when not resuming.
+; @param    pvUser1 msc:r8  gcc:rdx x86:[esp+0x0c]     The argument of that function.
+; @param    pvUser2 msc:r9  gcc:rcx x86:[esp+0x10]     The argument of that function.
+;
+BEGINPROC vmmR0CallRing3SetJmp
+GLOBALNAME vmmR0CallRing3SetJmp2
+GLOBALNAME vmmR0CallRing3SetJmpEx
+    ;
+    ; Save the registers.
+    ;
+    mov     edx, [esp + 4h]             ; pJmpBuf
+    mov     [xDX + VMMR0JMPBUF.ebx], ebx
+    mov     [xDX + VMMR0JMPBUF.esi], esi
+    mov     [xDX + VMMR0JMPBUF.edi], edi
+    mov     [xDX + VMMR0JMPBUF.ebp], ebp
+    mov     xAX, [esp]
+    mov     [xDX + VMMR0JMPBUF.eip], xAX
+    lea     ecx, [esp + 4]              ; (used in resume)
+    mov     [xDX + VMMR0JMPBUF.esp], ecx
+    pushf
+    pop     xAX
+    mov     [xDX + VMMR0JMPBUF.eflags], xAX
+
+    ;
+    ; If we're not in a ring-3 call, call pfn and return.
+    ;
+    test    byte [xDX + VMMR0JMPBUF.fInRing3Call], 1
+    jnz     .resume
+
+    mov     ebx, edx                    ; pJmpBuf -> ebx (persistent reg)
+%ifdef VMM_R0_SWITCH_STACK
+    mov     esi, [ebx + VMMR0JMPBUF.pvSavedStack]
+    test    esi, esi
+    jz      .entry_error
+ %ifdef VBOX_STRICT
+    cmp     dword [esi], 0h
+    jne     .entry_error
+    mov     edx, esi
+    mov     edi, esi
+    mov     ecx, VMM_STACK_SIZE / 4
+    mov     eax, STACK_PADDING
+    repne stosd
+ %endif
+    lea     esi, [esi + VMM_STACK_SIZE - 32]
+    mov     [esi + 1ch], dword 0deadbeefh ; Marker 1.
+    mov     [esi + 18h], ebx            ; Save pJmpBuf pointer.
+    mov     [esi + 14h], dword 00c00ffeeh ; Marker 2.
+    mov     [esi + 10h], dword 0f00dbeefh ; Marker 3.
+    mov     edx, [esp + 10h]            ; pvArg2
+    mov     ecx, [esp + 0ch]            ; pvArg1
+    mov     eax, [esp + 08h]            ; pfn
+ %if 1                                  ; Use this to eat of some extra stack - handy for finding paths using lots of stack.
+  %define FRAME_OFFSET 0
+ %else
+  %define FRAME_OFFSET 1024
+ %endif
+    mov     [esi - FRAME_OFFSET + 04h], edx
+    mov     [esi - FRAME_OFFSET      ], ecx
+    lea     esp, [esi - FRAME_OFFSET]   ; Switch stack!
+    call    eax
+    and     dword [esi + 1ch], byte 0   ; reset marker.
+
+ %ifdef VBOX_STRICT
+    ; Calc stack usage and check for overflows.
+    mov     edi, [ebx + VMMR0JMPBUF.pvSavedStack]
+    cmp     dword [edi], STACK_PADDING  ; Check for obvious stack overflow.
+    jne     .stack_overflow
+    mov     esi, eax                    ; save eax
+    mov     eax, STACK_PADDING
+    mov     ecx, VMM_STACK_SIZE / 4
+    cld
+    repe scasd
+    shl     ecx, 2                      ; *4
+    cmp     ecx, VMM_STACK_SIZE - 64    ; Less than 64 bytes left -> overflow as well.
+    mov     eax, esi                    ; restore eax in case of overflow (esi remains used)
+    jae     .stack_overflow_almost
+
+    ; Update stack usage statistics.
+    cmp     ecx, [ebx + VMMR0JMPBUF.cbUsedMax] ; New max usage?
+    jle     .no_used_max
+    mov     [ebx + VMMR0JMPBUF.cbUsedMax], ecx
+.no_used_max:
+    ; To simplify the average stuff, just historize before we hit div errors.
+    inc     dword [ebx + VMMR0JMPBUF.cUsedTotal]
+    test    [ebx + VMMR0JMPBUF.cUsedTotal], dword 0c0000000h
+    jz      .no_historize
+    mov     dword [ebx + VMMR0JMPBUF.cUsedTotal], 2
+    mov     edi, [ebx + VMMR0JMPBUF.cbUsedAvg]
+    mov     [ebx + VMMR0JMPBUF.cbUsedTotal], edi
+    mov     dword [ebx + VMMR0JMPBUF.cbUsedTotal + 4], 0
+.no_historize:
+    add     [ebx + VMMR0JMPBUF.cbUsedTotal], ecx
+    adc     dword [ebx + VMMR0JMPBUF.cbUsedTotal + 4], 0
+    mov     eax, [ebx + VMMR0JMPBUF.cbUsedTotal]
+    mov     edx, [ebx + VMMR0JMPBUF.cbUsedTotal + 4]
+    mov     edi, [ebx + VMMR0JMPBUF.cUsedTotal]
+    div     edi
+    mov     [ebx + VMMR0JMPBUF.cbUsedAvg], eax
+
+    mov     eax, esi                    ; restore eax (final, esi released)
+
+    mov     edi, [ebx + VMMR0JMPBUF.pvSavedStack]
+    mov     dword [edi], 0h             ; Reset the overflow marker.
+ %endif ; VBOX_STRICT
+
+%else  ; !VMM_R0_SWITCH_STACK
+    mov     ecx, [esp + 0ch]            ; pvArg1
+    mov     edx, [esp + 10h]            ; pvArg2
+    mov     eax, [esp + 08h]            ; pfn
+    sub     esp, 12                     ; align the stack on a 16-byte boundary.
+    mov     [esp      ], ecx
+    mov     [esp + 04h], edx
+    call    eax
+%endif ; !VMM_R0_SWITCH_STACK
+    mov     edx, ebx                    ; pJmpBuf -> edx (volatile reg)
+
+    ;
+    ; Return like in the long jump but clear eip, no short cuts here.
+    ;
+.proper_return:
+    mov     ebx, [xDX + VMMR0JMPBUF.ebx]
+    mov     esi, [xDX + VMMR0JMPBUF.esi]
+    mov     edi, [xDX + VMMR0JMPBUF.edi]
+    mov     ebp, [xDX + VMMR0JMPBUF.ebp]
+    mov     xCX, [xDX + VMMR0JMPBUF.eip]
+    and     dword [xDX + VMMR0JMPBUF.eip], byte 0 ; used for valid check.
+    mov     esp, [xDX + VMMR0JMPBUF.esp]
+    push    dword [xDX + VMMR0JMPBUF.eflags]
+    popf
+    jmp     xCX
+
+.entry_error:
+    mov     eax, VERR_VMM_SET_JMP_ERROR
+    jmp     .proper_return
+
+.stack_overflow:
+    mov     eax, VERR_VMM_SET_JMP_STACK_OVERFLOW
+    mov     edx, ebx
+    jmp     .proper_return
+
+.stack_overflow_almost:
+    mov     eax, VERR_VMM_SET_JMP_STACK_OVERFLOW
+    mov     edx, ebx
+    jmp     .proper_return
+
+    ;
+    ; Aborting resume.
+    ;
+.bad:
+    and     dword [xDX + VMMR0JMPBUF.eip], byte 0 ; used for valid check.
+    mov     edi, [xDX + VMMR0JMPBUF.edi]
+    mov     esi, [xDX + VMMR0JMPBUF.esi]
+    mov     ebx, [xDX + VMMR0JMPBUF.ebx]
+    mov     eax, VERR_VMM_SET_JMP_ABORTED_RESUME
+    ret
+
+    ;
+    ; Resume VMMRZCallRing3 the call.
+    ;
+.resume:
+    ; Sanity checks.
+%ifdef VMM_R0_SWITCH_STACK
+    mov     eax, [xDX + VMMR0JMPBUF.pvSavedStack]
+ %ifdef RT_STRICT
+    cmp     dword [eax], STACK_PADDING
+ %endif
+    lea     eax, [eax + VMM_STACK_SIZE - 32]
+    cmp     dword [eax + 1ch], 0deadbeefh       ; Marker 1.
+    jne     .bad
+ %ifdef RT_STRICT
+    cmp     [esi + 18h], edx                    ; The saved pJmpBuf pointer.
+    jne     .bad
+    cmp     dword [esi + 14h], 00c00ffeeh       ; Marker 2.
+    jne     .bad
+    cmp     dword [esi + 10h], 0f00dbeefh       ; Marker 3.
+    jne     .bad
+ %endif
+%else  ; !VMM_R0_SWITCH_STACK
+    cmp     ecx, [xDX + VMMR0JMPBUF.SpCheck]
+    jne     .bad
+.espCheck_ok:
+    mov     ecx, [xDX + VMMR0JMPBUF.cbSavedStack]
+    cmp     ecx, VMM_STACK_SIZE
+    ja      .bad
+    test    ecx, 3
+    jnz     .bad
+    mov     edi, [xDX + VMMR0JMPBUF.esp]
+    sub     edi, [xDX + VMMR0JMPBUF.SpResume]
+    cmp     ecx, edi
+    jne     .bad
+%endif
+
+%ifdef VMM_R0_SWITCH_STACK
+    ; Switch stack.
+    mov     esp, [xDX + VMMR0JMPBUF.SpResume]
+%else
+    ; Restore the stack.
+    mov     ecx, [xDX + VMMR0JMPBUF.cbSavedStack]
+    shr     ecx, 2
+    mov     esi, [xDX + VMMR0JMPBUF.pvSavedStack]
+    mov     edi, [xDX + VMMR0JMPBUF.SpResume]
+    mov     esp, edi
+    rep movsd
+%endif ; !VMM_R0_SWITCH_STACK
+    mov     byte [xDX + VMMR0JMPBUF.fInRing3Call], 0
+
+    ;
+    ; Continue where we left off.
+    ;
+%ifdef VBOX_STRICT
+    pop     eax                         ; magic
+    cmp     eax, RESUME_MAGIC
+    je      .magic_ok
+    mov     ecx, 0123h
+    mov     [ecx], edx
+.magic_ok:
+%endif
+    popf
+    pop     ebx
+    pop     esi
+    pop     edi
+    pop     ebp
+    xor     eax, eax                    ; VINF_SUCCESS
+    ret
+ENDPROC vmmR0CallRing3SetJmp
+
+
+;;
+; Worker for VMMRZCallRing3.
+; This will save the stack and registers.
+;
+; @param    pJmpBuf msc:rcx gcc:rdi x86:[ebp+8]     Pointer to the jump buffer.
+; @param    rc      msc:rdx gcc:rsi x86:[ebp+c]     The return code.
+;
+BEGINPROC vmmR0CallRing3LongJmp
+    ;
+    ; Save the registers on the stack.
+    ;
+    push    ebp
+    mov     ebp, esp
+    push    edi
+    push    esi
+    push    ebx
+    pushf
+%ifdef VBOX_STRICT
+    push    RESUME_MAGIC
+%endif
+
+    ;
+    ; Load parameters.
+    ;
+    mov     edx, [ebp + 08h]            ; pJmpBuf
+    mov     eax, [ebp + 0ch]            ; rc
+
+    ;
+    ; Is the jump buffer armed?
+    ;
+    cmp     dword [xDX + VMMR0JMPBUF.eip], byte 0
+    je      .nok
+
+    ;
+    ; Sanity checks.
+    ;
+    mov     edi, [xDX + VMMR0JMPBUF.pvSavedStack]
+    test    edi, edi                    ; darwin may set this to 0.
+    jz      .nok
+    mov     [xDX + VMMR0JMPBUF.SpResume], esp
+%ifndef VMM_R0_SWITCH_STACK
+    mov     esi, esp
+    mov     ecx, [xDX + VMMR0JMPBUF.esp]
+    sub     ecx, esi
+
+    ; two sanity checks on the size.
+    cmp     ecx, VMM_STACK_SIZE         ; check max size.
+    jnbe    .nok
+
+    ;
+    ; Copy the stack.
+    ;
+    test    ecx, 3                      ; check alignment
+    jnz     .nok
+    mov     [xDX + VMMR0JMPBUF.cbSavedStack], ecx
+    shr     ecx, 2
+    rep movsd
+%endif ; !VMM_R0_SWITCH_STACK
+
+    ; Save a PC here to assist unwinding.
+.unwind_point:
+    mov     dword [xDX + VMMR0JMPBUF.SavedEipForUnwind], .unwind_point
+    mov     ecx, [xDX + VMMR0JMPBUF.ebp]
+    lea     ecx, [ecx + 4]
+    mov     [xDX + VMMR0JMPBUF.UnwindRetPcLocation], ecx
+
+    ; Save ESP & EBP to enable stack dumps
+    mov     ecx, ebp
+    mov     [xDX + VMMR0JMPBUF.SavedEbp], ecx
+    sub     ecx, 4
+    mov     [xDX + VMMR0JMPBUF.SavedEsp], ecx
+
+    ; store the last pieces of info.
+    mov     ecx, [xDX + VMMR0JMPBUF.esp]
+    mov     [xDX + VMMR0JMPBUF.SpCheck], ecx
+    mov     byte [xDX + VMMR0JMPBUF.fInRing3Call], 1
+
+    ;
+    ; Do the long jump.
+    ;
+    mov     ebx, [xDX + VMMR0JMPBUF.ebx]
+    mov     esi, [xDX + VMMR0JMPBUF.esi]
+    mov     edi, [xDX + VMMR0JMPBUF.edi]
+    mov     ebp, [xDX + VMMR0JMPBUF.ebp]
+    mov     ecx, [xDX + VMMR0JMPBUF.eip]
+    mov     [xDX + VMMR0JMPBUF.UnwindRetPcValue], ecx
+    mov     esp, [xDX + VMMR0JMPBUF.esp]
+    push    dword [xDX + VMMR0JMPBUF.eflags]
+    popf
+    jmp     ecx
+
+    ;
+    ; Failure
+    ;
+.nok:
+%ifdef VBOX_STRICT
+    pop     eax                         ; magic
+    cmp     eax, RESUME_MAGIC
+    je      .magic_ok
+    mov     ecx, 0123h
+    mov     [ecx], edx
+.magic_ok:
+%endif
+    popf
+    pop     ebx
+    pop     esi
+    pop     edi
+    mov     eax, VERR_VMM_LONG_JMP_ERROR
+    leave
+    ret
+ENDPROC vmmR0CallRing3LongJmp
+
+
+;;
+; Internal R0 logger worker: Logger wrapper.
+;
+; @cproto VMMR0DECL(void) vmmR0LoggerWrapper(const char *pszFormat, ...)
+;
+EXPORTEDNAME vmmR0LoggerWrapper
+    push    0                           ; assumes we're the wrapper for a default instance.
+    call    NAME(RTLogLogger)
+    add     esp, byte 4
+    ret
+ENDPROC vmmR0LoggerWrapper
+
diff --git a/src/VBox/VMM/VMMR0/VMMR0TripleFaultHack.cpp b/src/VBox/VMM/VMMR0/VMMR0TripleFaultHack.cpp
new file mode 100644
index 00000000..d286ec70
--- /dev/null
+++ b/src/VBox/VMM/VMMR0/VMMR0TripleFaultHack.cpp
@@ -0,0 +1,209 @@
+/* $Id: VMMR0TripleFaultHack.cpp $ */
+/** @file
+ * VMM - Host Context Ring 0, Triple Fault Debugging Hack.
+ *
+ * Only use this when desperate.  May not work on all systems, esp. newer ones,
+ * since it require BIOS support for the warm reset vector at 0467h.
+ */
+
+/*
+ * Copyright (C) 2011-2020 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+
+/*********************************************************************************************************************************
+*   Header Files                                                                                                                 *
+*********************************************************************************************************************************/
+#define LOG_GROUP LOG_GROUP_VMM
+#include <VBox/vmm/vmm.h>
+#include "VMMInternal.h"
+#include <VBox/param.h>
+
+#include <iprt/asm-amd64-x86.h>
+#include <iprt/assert.h>
+#include <iprt/memobj.h>
+#include <iprt/mem.h>
+#include <iprt/string.h>
+
+
+/*********************************************************************************************************************************
+*   Global Variables                                                                                                             *
+*********************************************************************************************************************************/
+static RTR0MEMOBJ   g_hMemPage0;
+static RTR0MEMOBJ   g_hMapPage0;
+static uint8_t     *g_pbPage0;
+
+static RTR0MEMOBJ   g_hMemLowCore;
+static RTR0MEMOBJ   g_hMapLowCore;
+static uint8_t     *g_pbLowCore;
+static RTHCPHYS     g_HCPhysLowCore;
+
+/** @name For restoring memory we've overwritten.
+ * @{ */
+static uint32_t     g_u32SavedVector;
+static uint16_t     g_u16SavedCadIndicator;
+static void        *g_pvSavedLowCore;
+/** @}  */
+
+
+/*********************************************************************************************************************************
+*   Internal Functions                                                                                                           *
+*********************************************************************************************************************************/
+/* VMMR0TripleFaultHackA.asm */
+DECLASM(void) vmmR0TripleFaultHackStart(void);
+DECLASM(void) vmmR0TripleFaultHackEnd(void);
+DECLASM(void) vmmR0TripleFaultHackTripleFault(void);
+
+
+/**
+ * Initalizes the triple fault / boot hack.
+ *
+ * Always call vmmR0TripleFaultHackTerm to clean up, even when this call fails.
+ *
+ * @returns VBox status code.
+ */
+int vmmR0TripleFaultHackInit(void)
+{
+    /*
+     * Map the first page.
+     */
+    int rc = RTR0MemObjEnterPhys(&g_hMemPage0, 0, PAGE_SIZE, RTMEM_CACHE_POLICY_DONT_CARE);
+    AssertRCReturn(rc, rc);
+    rc = RTR0MemObjMapKernel(&g_hMapPage0, g_hMemPage0, (void *)-1, 0, RTMEM_PROT_READ | RTMEM_PROT_WRITE);
+    AssertRCReturn(rc, rc);
+    g_pbPage0 = (uint8_t *)RTR0MemObjAddress(g_hMapPage0);
+    LogRel(("0040:0067 = %04x:%04x\n", RT_MAKE_U16(g_pbPage0[0x467+2],  g_pbPage0[0x467+3]),  RT_MAKE_U16(g_pbPage0[0x467+0],  g_pbPage0[0x467+1]) ));
+
+    /*
+     * Allocate some "low core" memory.  If that fails, just grab some memory.
+     */
+    //rc = RTR0MemObjAllocPhys(&g_hMemLowCore, PAGE_SIZE, _1M - 1);
+    //__debugbreak();
+    rc = RTR0MemObjEnterPhys(&g_hMemLowCore, 0x7000, PAGE_SIZE, RTMEM_CACHE_POLICY_DONT_CARE);
+    AssertRCReturn(rc, rc);
+    rc = RTR0MemObjMapKernel(&g_hMapLowCore, g_hMemLowCore, (void *)-1, 0, RTMEM_PROT_READ | RTMEM_PROT_WRITE);
+    AssertRCReturn(rc, rc);
+    g_pbLowCore = (uint8_t *)RTR0MemObjAddress(g_hMapLowCore);
+    g_HCPhysLowCore = RTR0MemObjGetPagePhysAddr(g_hMapLowCore, 0);
+    LogRel(("Low core at %RHp mapped at %p\n", g_HCPhysLowCore, g_pbLowCore));
+
+    /*
+     * Save memory we'll be overwriting.
+     */
+    g_pvSavedLowCore = RTMemAlloc(PAGE_SIZE);
+    AssertReturn(g_pvSavedLowCore, VERR_NO_MEMORY);
+    memcpy(g_pvSavedLowCore, g_pbLowCore, PAGE_SIZE);
+
+    g_u32SavedVector = RT_MAKE_U32_FROM_U8(g_pbPage0[0x467], g_pbPage0[0x467+1], g_pbPage0[0x467+2], g_pbPage0[0x467+3]);
+    g_u16SavedCadIndicator = RT_MAKE_U16(g_pbPage0[0x472], g_pbPage0[0x472+1]);
+
+    /*
+     * Install the code.
+     */
+    size_t cbCode = (uintptr_t)&vmmR0TripleFaultHackEnd - (uintptr_t)&vmmR0TripleFaultHackStart;
+    AssertLogRelReturn(cbCode <= PAGE_SIZE, VERR_OUT_OF_RANGE);
+    memcpy(g_pbLowCore, &vmmR0TripleFaultHackStart, cbCode);
+
+    g_pbPage0[0x467+0] = 0x00;
+    g_pbPage0[0x467+1] = 0x70;
+    g_pbPage0[0x467+2] = 0x00;
+    g_pbPage0[0x467+3] = 0x00;
+
+    g_pbPage0[0x472+0] = 0x34;
+    g_pbPage0[0x472+1] = 0x12;
+
+    /*
+     * Configure the status port and cmos shutdown command.
+     */
+    uint32_t fSaved = ASMIntDisableFlags();
+
+    ASMOutU8(0x70, 0x0f);
+    ASMOutU8(0x71, 0x0a);
+
+    ASMOutU8(0x70, 0x05);
+    ASMInU8(0x71);
+
+    ASMReloadCR3();
+    ASMWriteBackAndInvalidateCaches();
+
+    ASMSetFlags(fSaved);
+
+#if 1 /* For testing & debugging. */
+    vmmR0TripleFaultHackTripleFault();
+#endif
+
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Try undo the harm done by the init function.
+ *
+ * This may leave the system in an unstable state since we might have been
+ * hijacking memory below 1MB that is in use by the kernel.
+ */
+void vmmR0TripleFaultHackTerm(void)
+{
+    /*
+     * Restore overwritten memory.
+     */
+    if (   g_pvSavedLowCore
+        && g_pbLowCore)
+        memcpy(g_pbLowCore, g_pvSavedLowCore, PAGE_SIZE);
+
+    if (g_pbPage0)
+    {
+        g_pbPage0[0x467+0] = RT_BYTE1(g_u32SavedVector);
+        g_pbPage0[0x467+1] = RT_BYTE2(g_u32SavedVector);
+        g_pbPage0[0x467+2] = RT_BYTE3(g_u32SavedVector);
+        g_pbPage0[0x467+3] = RT_BYTE4(g_u32SavedVector);
+
+        g_pbPage0[0x472+0] = RT_BYTE1(g_u16SavedCadIndicator);
+        g_pbPage0[0x472+1] = RT_BYTE2(g_u16SavedCadIndicator);
+    }
+
+    /*
+     * Fix the CMOS.
+     */
+    if (g_pvSavedLowCore)
+    {
+        uint32_t fSaved = ASMIntDisableFlags();
+
+        ASMOutU8(0x70, 0x0f);
+        ASMOutU8(0x71, 0x0a);
+
+        ASMOutU8(0x70, 0x00);
+        ASMInU8(0x71);
+
+        ASMReloadCR3();
+        ASMWriteBackAndInvalidateCaches();
+
+        ASMSetFlags(fSaved);
+    }
+
+    /*
+     * Release resources.
+     */
+    RTMemFree(g_pvSavedLowCore);
+    g_pvSavedLowCore = NULL;
+
+    RTR0MemObjFree(g_hMemLowCore, true /*fFreeMappings*/);
+    g_hMemLowCore   = NIL_RTR0MEMOBJ;
+    g_hMapLowCore   = NIL_RTR0MEMOBJ;
+    g_pbLowCore     = NULL;
+    g_HCPhysLowCore = NIL_RTHCPHYS;
+
+    RTR0MemObjFree(g_hMemPage0, true /*fFreeMappings*/);
+    g_hMemPage0     = NIL_RTR0MEMOBJ;
+    g_hMapPage0     = NIL_RTR0MEMOBJ;
+    g_pbPage0       = NULL;
+}
+
diff --git a/src/VBox/VMM/VMMR0/VMMR0TripleFaultHackA.asm b/src/VBox/VMM/VMMR0/VMMR0TripleFaultHackA.asm
new file mode 100644
index 00000000..cca91654
--- /dev/null
+++ b/src/VBox/VMM/VMMR0/VMMR0TripleFaultHackA.asm
@@ -0,0 +1,264 @@
+; $Id: VMMR0TripleFaultHackA.asm $
+;; @file
+; VMM - Host Context Ring 0, Assembly Code for The Triple Fault Debugging Hack.
+;
+
+;
+; Copyright (C) 2011-2020 Oracle Corporation
+;
+; This file is part of VirtualBox Open Source Edition (OSE), as
+; available from http://www.virtualbox.org. This file is free software;
+; you can redistribute it and/or modify it under the terms of the GNU
+; General Public License (GPL) as published by the Free Software
+; Foundation, in version 2 as it comes in the "COPYING" file of the
+; VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+;
+
+;*******************************************************************************
+;* Header Files                                                                *
+;*******************************************************************************
+%include "VBox/asmdefs.mac"
+
+
+BEGINCODE
+GLOBALNAME vmmR0TripleFaultHackStart
+%define CALC_ADDR(a_Addr) ( (a_Addr) - NAME(vmmR0TripleFaultHackStart) + 07000h )
+
+
+BITS 16
+BEGINPROC vmmR0TripleFaultHack
+    ; Set up stack.
+    cli                                 ; paranoia
+    mov     sp, 0ffffh
+    mov     ax, cs
+    mov     ss, ax
+    mov     ds, ax
+    mov     es, ax
+    cld                                 ; paranoia
+
+    COM_INIT
+
+    ; Beep and say hello to the post-reset world.
+    call    NAME(vmmR0TripleFaultHackBeep)
+    mov     si, CALC_ADDR(.s_szHello)
+    call    NAME(vmmR0TripleFaultHackPrint)
+
+.forever:
+    hlt
+    jmp     .forever
+
+.s_szHello:
+    db      'Hello post-reset world', 0ah, 0dh, 0
+ENDPROC   vmmR0TripleFaultHack
+
+;; ds:si = zero terminated string.
+BEGINPROC   vmmR0TripleFaultHackPrint
+    push    eax
+    push    esi
+
+.outer_loop:
+    lodsb
+    cmp     al, 0
+    je      .done
+    call    NAME(vmmR0TripleFaultHackPrintCh)
+    jmp     .outer_loop
+
+.done:
+    pop     esi
+    pop     eax
+    ret
+ENDPROC     vmmR0TripleFaultHackPrint
+
+
+;; al = char to print
+BEGINPROC   vmmR0TripleFaultHackPrintCh
+    push    eax
+    push    edx
+    push    ecx
+    mov     ah, al                      ; save char.
+
+    ; Wait for status.
+    mov     ecx, _1G
+    mov     dx, VBOX_UART_BASE + 5
+.pre_status:
+    in      al, dx
+    test    al, 20h
+    jnz     .put_char
+    dec     ecx
+    jnz     .pre_status
+
+    ; Write the character.
+.put_char:
+    mov     al, ah
+    mov     dx, VBOX_UART_BASE
+    out     dx, al
+
+    ; Wait for status.
+    mov     ecx, _1G
+    mov     dx, VBOX_UART_BASE + 5
+.post_status:
+    in      al, dx
+    test    al, 20h
+    jnz     .done
+    dec     ecx
+    jnz     .post_status
+
+.done:
+    pop     ecx
+    pop     edx
+    pop     eax
+    ret
+ENDPROC     vmmR0TripleFaultHackPrintCh
+
+;;
+; make a 440 BEEP.
+BEGINPROC   vmmR0TripleFaultHackBeep
+    push    eax
+    push    edx
+    push    ecx
+
+    ; program PIT(1) and stuff.
+    mov     al, 10110110b
+    out     43h, al
+    mov     ax, 0a79h                   ; A = 440
+    out     42h, al
+    shr     ax, 8
+    out     42h, al
+
+    in      al, 61h
+    or      al, 3
+    out     61h, al
+
+    ; delay
+    mov     ecx, _1G
+.delay:
+    inc     ecx
+    dec     ecx
+    dec     ecx
+    jnz     .delay
+
+    ; shut up speaker.
+    in      al, 61h
+    and     al, 11111100b
+    out     61h, al
+
+.done:
+    pop     ecx
+    pop     edx
+    pop     eax
+    ret
+ENDPROC     vmmR0TripleFaultHackBeep
+
+
+GLOBALNAME vmmR0TripleFaultHackEnd
+
+
+
+
+;;;
+;;;
+;;;
+;;;
+;;;
+
+
+
+BITS ARCH_BITS
+
+BEGINPROC vmmR0TripleFaultHackKbdWait
+        push    xAX
+
+.check_status:
+        in      al, 64h
+        test    al, 1                   ; KBD_STAT_OBF
+        jnz     .read_data_and_status
+        test    al, 2                   ; KBD_STAT_IBF
+        jnz     .check_status
+
+        pop     xAX
+        ret
+
+.read_data_and_status:
+        in      al, 60h
+        jmp     .check_status
+ENDPROC vmmR0TripleFaultHackKbdWait
+
+
+BEGINPROC vmmR0TripleFaultHackKbdRead
+        out     64h, al                 ; Write the command.
+
+.check_status:
+        in      al, 64h
+        test    al, 1                   ; KBD_STAT_OBF
+        jz      .check_status
+
+        in      al, 60h                 ; Read the data.
+        ret
+ENDPROC   vmmR0TripleFaultHackKbdRead
+
+
+BEGINPROC vmmR0TripleFaultHackKbdWrite
+        out     64h, al                 ; Write the command.
+        call    NAME(vmmR0TripleFaultHackKbdWait)
+
+        xchg    al, ah
+        out     60h, al                 ; Write the data.
+        call    NAME(vmmR0TripleFaultHackKbdWait)
+        xchg    al, ah
+
+        ret
+ENDPROC   vmmR0TripleFaultHackKbdWrite
+
+
+
+BEGINPROC vmmR0TripleFaultHackTripleFault
+    push    xAX
+    push    xSI
+
+    xor     eax, eax
+    push    xAX
+    push    xAX
+    push    xAX
+    push    xAX
+
+    COM_CHAR 'B'
+    COM_CHAR 'y'
+    COM_CHAR 'e'
+    COM_CHAR '!'
+    COM_CHAR 0ah
+    COM_CHAR 0dh
+
+
+    ;call    NAME(vmmR0TripleFaultHackBeep32)
+%if 1
+    lidt    [xSP]
+%elif 0
+    in      al, 92h
+    or      al, 1
+    out     92h, al
+    in      al, 92h
+    cli
+    hlt
+%else
+    mov     al, 0d0h                ; KBD_CCMD_READ_OUTPORT
+    call    NAME(vmmR0TripleFaultHackKbdRead)
+    mov     ah, 0feh
+    and     ah, al
+    mov     al, 0d1h                ; KBD_CCMD_WRITE_OUTPORT
+    call    NAME(vmmR0TripleFaultHackKbdWrite)
+    cli
+    hlt
+%endif
+    int3
+
+    pop     xAX
+    pop     xAX
+    pop     xAX
+    pop     xAX
+
+    pop     xSI
+    pop     xAX
+    ret
+ENDPROC   vmmR0TripleFaultHackTripleFault
+