summaryrefslogtreecommitdiffstats
path: root/src/VBox/VMM/VMMR0
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-06 03:01:46 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-06 03:01:46 +0000
commitf8fe689a81f906d1b91bb3220acde2a4ecb14c5b (patch)
tree26484e9d7e2c67806c2d1760196ff01aaa858e8c /src/VBox/VMM/VMMR0
parentInitial commit. (diff)
downloadvirtualbox-f8fe689a81f906d1b91bb3220acde2a4ecb14c5b.tar.xz
virtualbox-f8fe689a81f906d1b91bb3220acde2a4ecb14c5b.zip
Adding upstream version 6.0.4-dfsg.upstream/6.0.4-dfsgupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/VBox/VMM/VMMR0')
-rw-r--r--src/VBox/VMM/VMMR0/CPUMR0.cpp1009
-rw-r--r--src/VBox/VMM/VMMR0/CPUMR0A.asm358
-rw-r--r--src/VBox/VMM/VMMR0/EMR0.cpp60
-rw-r--r--src/VBox/VMM/VMMR0/GIMR0.cpp117
-rw-r--r--src/VBox/VMM/VMMR0/GIMR0Hv.cpp182
-rw-r--r--src/VBox/VMM/VMMR0/GIMR0Kvm.cpp119
-rw-r--r--src/VBox/VMM/VMMR0/GMMR0.cpp5445
-rw-r--r--src/VBox/VMM/VMMR0/GMMR0Internal.h92
-rw-r--r--src/VBox/VMM/VMMR0/GVMMR0.cpp3106
-rw-r--r--src/VBox/VMM/VMMR0/GVMMR0Internal.h69
-rw-r--r--src/VBox/VMM/VMMR0/HMR0.cpp2005
-rw-r--r--src/VBox/VMM/VMMR0/HMR0A.asm2184
-rw-r--r--src/VBox/VMM/VMMR0/HMSVMR0.cpp8232
-rw-r--r--src/VBox/VMM/VMMR0/HMSVMR0.h99
-rw-r--r--src/VBox/VMM/VMMR0/HMVMXR0.cpp13777
-rw-r--r--src/VBox/VMM/VMMR0/HMVMXR0.h85
-rw-r--r--src/VBox/VMM/VMMR0/Makefile.kup0
-rw-r--r--src/VBox/VMM/VMMR0/NEMR0Native-win.cpp2628
-rw-r--r--src/VBox/VMM/VMMR0/PDMR0Device.cpp861
-rw-r--r--src/VBox/VMM/VMMR0/PDMR0Driver.cpp64
-rw-r--r--src/VBox/VMM/VMMR0/PGMR0.cpp660
-rw-r--r--src/VBox/VMM/VMMR0/PGMR0Bth.h25
-rw-r--r--src/VBox/VMM/VMMR0/PGMR0SharedPage.cpp170
-rw-r--r--src/VBox/VMM/VMMR0/TRPMR0.cpp107
-rw-r--r--src/VBox/VMM/VMMR0/TRPMR0A.asm155
-rw-r--r--src/VBox/VMM/VMMR0/VMMR0.cpp2861
-rw-r--r--src/VBox/VMM/VMMR0/VMMR0.def117
-rw-r--r--src/VBox/VMM/VMMR0/VMMR0JmpA-amd64.asm491
-rw-r--r--src/VBox/VMM/VMMR0/VMMR0JmpA-x86.asm401
-rw-r--r--src/VBox/VMM/VMMR0/VMMR0TripleFaultHack.cpp209
-rw-r--r--src/VBox/VMM/VMMR0/VMMR0TripleFaultHackA.asm264
31 files changed, 45952 insertions, 0 deletions
diff --git a/src/VBox/VMM/VMMR0/CPUMR0.cpp b/src/VBox/VMM/VMMR0/CPUMR0.cpp
new file mode 100644
index 00000000..e7afcbac
--- /dev/null
+++ b/src/VBox/VMM/VMMR0/CPUMR0.cpp
@@ -0,0 +1,1009 @@
+/* $Id: CPUMR0.cpp $ */
+/** @file
+ * CPUM - Host Context Ring 0.
+ */
+
+/*
+ * Copyright (C) 2006-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#define LOG_GROUP LOG_GROUP_CPUM
+#include <VBox/vmm/cpum.h>
+#include "CPUMInternal.h"
+#include <VBox/vmm/vm.h>
+#include <VBox/err.h>
+#include <VBox/log.h>
+#include <VBox/vmm/hm.h>
+#include <iprt/assert.h>
+#include <iprt/asm-amd64-x86.h>
+#ifdef VBOX_WITH_VMMR0_DISABLE_LAPIC_NMI
+# include <iprt/mem.h>
+# include <iprt/memobj.h>
+# include <VBox/apic.h>
+#endif
+#include <iprt/x86.h>
+
+
+/*********************************************************************************************************************************
+* Structures and Typedefs *
+*********************************************************************************************************************************/
+#ifdef VBOX_WITH_VMMR0_DISABLE_LAPIC_NMI
+/**
+ * Local APIC mappings.
+ */
+typedef struct CPUMHOSTLAPIC
+{
+ /** Indicates that the entry is in use and have valid data. */
+ bool fEnabled;
+ /** Whether it's operating in X2APIC mode (EXTD). */
+ bool fX2Apic;
+ /** The APIC version number. */
+ uint32_t uVersion;
+ /** The physical address of the APIC registers. */
+ RTHCPHYS PhysBase;
+ /** The memory object entering the physical address. */
+ RTR0MEMOBJ hMemObj;
+ /** The mapping object for hMemObj. */
+ RTR0MEMOBJ hMapObj;
+ /** The mapping address APIC registers.
+ * @remarks Different CPUs may use the same physical address to map their
+ * APICs, so this pointer is only valid when on the CPU owning the
+ * APIC. */
+ void *pv;
+} CPUMHOSTLAPIC;
+#endif
+
+
+/*********************************************************************************************************************************
+* Global Variables *
+*********************************************************************************************************************************/
+#ifdef VBOX_WITH_VMMR0_DISABLE_LAPIC_NMI
+static CPUMHOSTLAPIC g_aLApics[RTCPUSET_MAX_CPUS];
+#endif
+
+/**
+ * CPUID bits to unify among all cores.
+ */
+static struct
+{
+ uint32_t uLeaf; /**< Leaf to check. */
+ uint32_t uEcx; /**< which bits in ecx to unify between CPUs. */
+ uint32_t uEdx; /**< which bits in edx to unify between CPUs. */
+}
+const g_aCpuidUnifyBits[] =
+{
+ {
+ 0x00000001,
+ X86_CPUID_FEATURE_ECX_CX16 | X86_CPUID_FEATURE_ECX_MONITOR,
+ X86_CPUID_FEATURE_EDX_CX8
+ }
+};
+
+
+
+/*********************************************************************************************************************************
+* Internal Functions *
+*********************************************************************************************************************************/
+#ifdef VBOX_WITH_VMMR0_DISABLE_LAPIC_NMI
+static int cpumR0MapLocalApics(void);
+static void cpumR0UnmapLocalApics(void);
+#endif
+static int cpumR0SaveHostDebugState(PVMCPU pVCpu);
+
+
+/**
+ * Does the Ring-0 CPU initialization once during module load.
+ * XXX Host-CPU hot-plugging?
+ */
+VMMR0_INT_DECL(int) CPUMR0ModuleInit(void)
+{
+ int rc = VINF_SUCCESS;
+#ifdef VBOX_WITH_VMMR0_DISABLE_LAPIC_NMI
+ rc = cpumR0MapLocalApics();
+#endif
+ return rc;
+}
+
+
+/**
+ * Terminate the module.
+ */
+VMMR0_INT_DECL(int) CPUMR0ModuleTerm(void)
+{
+#ifdef VBOX_WITH_VMMR0_DISABLE_LAPIC_NMI
+ cpumR0UnmapLocalApics();
+#endif
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Check the CPUID features of this particular CPU and disable relevant features
+ * for the guest which do not exist on this CPU. We have seen systems where the
+ * X86_CPUID_FEATURE_ECX_MONITOR feature flag is only set on some host CPUs, see
+ * @bugref{5436}.
+ *
+ * @note This function might be called simultaneously on more than one CPU!
+ *
+ * @param idCpu The identifier for the CPU the function is called on.
+ * @param pvUser1 Pointer to the VM structure.
+ * @param pvUser2 Ignored.
+ */
+static DECLCALLBACK(void) cpumR0CheckCpuid(RTCPUID idCpu, void *pvUser1, void *pvUser2)
+{
+ PVM pVM = (PVM)pvUser1;
+
+ NOREF(idCpu); NOREF(pvUser2);
+ for (uint32_t i = 0; i < RT_ELEMENTS(g_aCpuidUnifyBits); i++)
+ {
+ /* Note! Cannot use cpumCpuIdGetLeaf from here because we're not
+ necessarily in the VM process context. So, we using the
+ legacy arrays as temporary storage. */
+
+ uint32_t uLeaf = g_aCpuidUnifyBits[i].uLeaf;
+ PCPUMCPUID pLegacyLeaf;
+ if (uLeaf < RT_ELEMENTS(pVM->cpum.s.aGuestCpuIdPatmStd))
+ pLegacyLeaf = &pVM->cpum.s.aGuestCpuIdPatmStd[uLeaf];
+ else if (uLeaf - UINT32_C(0x80000000) < RT_ELEMENTS(pVM->cpum.s.aGuestCpuIdPatmExt))
+ pLegacyLeaf = &pVM->cpum.s.aGuestCpuIdPatmExt[uLeaf - UINT32_C(0x80000000)];
+ else if (uLeaf - UINT32_C(0xc0000000) < RT_ELEMENTS(pVM->cpum.s.aGuestCpuIdPatmCentaur))
+ pLegacyLeaf = &pVM->cpum.s.aGuestCpuIdPatmCentaur[uLeaf - UINT32_C(0xc0000000)];
+ else
+ continue;
+
+ uint32_t eax, ebx, ecx, edx;
+ ASMCpuIdExSlow(uLeaf, 0, 0, 0, &eax, &ebx, &ecx, &edx);
+
+ ASMAtomicAndU32(&pLegacyLeaf->uEcx, ecx | ~g_aCpuidUnifyBits[i].uEcx);
+ ASMAtomicAndU32(&pLegacyLeaf->uEdx, edx | ~g_aCpuidUnifyBits[i].uEdx);
+ }
+}
+
+
+/**
+ * Does Ring-0 CPUM initialization.
+ *
+ * This is mainly to check that the Host CPU mode is compatible
+ * with VBox.
+ *
+ * @returns VBox status code.
+ * @param pVM The cross context VM structure.
+ */
+VMMR0_INT_DECL(int) CPUMR0InitVM(PVM pVM)
+{
+ LogFlow(("CPUMR0Init: %p\n", pVM));
+
+ /*
+ * Check CR0 & CR4 flags.
+ */
+ uint32_t u32CR0 = ASMGetCR0();
+ if ((u32CR0 & (X86_CR0_PE | X86_CR0_PG)) != (X86_CR0_PE | X86_CR0_PG)) /* a bit paranoid perhaps.. */
+ {
+ Log(("CPUMR0Init: PE or PG not set. cr0=%#x\n", u32CR0));
+ return VERR_UNSUPPORTED_CPU_MODE;
+ }
+
+ /*
+ * Check for sysenter and syscall usage.
+ */
+ if (ASMHasCpuId())
+ {
+ /*
+ * SYSENTER/SYSEXIT
+ *
+ * Intel docs claim you should test both the flag and family, model &
+ * stepping because some Pentium Pro CPUs have the SEP cpuid flag set,
+ * but don't support it. AMD CPUs may support this feature in legacy
+ * mode, they've banned it from long mode. Since we switch to 32-bit
+ * mode when entering raw-mode context the feature would become
+ * accessible again on AMD CPUs, so we have to check regardless of
+ * host bitness.
+ */
+ uint32_t u32CpuVersion;
+ uint32_t u32Dummy;
+ uint32_t fFeatures; /* (Used further down to check for MSRs, so don't clobber.) */
+ ASMCpuId(1, &u32CpuVersion, &u32Dummy, &u32Dummy, &fFeatures);
+ uint32_t const u32Family = u32CpuVersion >> 8;
+ uint32_t const u32Model = (u32CpuVersion >> 4) & 0xF;
+ uint32_t const u32Stepping = u32CpuVersion & 0xF;
+ if ( (fFeatures & X86_CPUID_FEATURE_EDX_SEP)
+ && ( u32Family != 6 /* (> pentium pro) */
+ || u32Model >= 3
+ || u32Stepping >= 3
+ || !ASMIsIntelCpu())
+ )
+ {
+ /*
+ * Read the MSR and see if it's in use or not.
+ */
+ uint32_t u32 = ASMRdMsr_Low(MSR_IA32_SYSENTER_CS);
+ if (u32)
+ {
+ pVM->cpum.s.fHostUseFlags |= CPUM_USE_SYSENTER;
+ Log(("CPUMR0Init: host uses sysenter cs=%08x%08x\n", ASMRdMsr_High(MSR_IA32_SYSENTER_CS), u32));
+ }
+ }
+
+ /*
+ * SYSCALL/SYSRET
+ *
+ * This feature is indicated by the SEP bit returned in EDX by CPUID
+ * function 0x80000001. Intel CPUs only supports this feature in
+ * long mode. Since we're not running 64-bit guests in raw-mode there
+ * are no issues with 32-bit intel hosts.
+ */
+ uint32_t cExt = 0;
+ ASMCpuId(0x80000000, &cExt, &u32Dummy, &u32Dummy, &u32Dummy);
+ if (ASMIsValidExtRange(cExt))
+ {
+ uint32_t fExtFeaturesEDX = ASMCpuId_EDX(0x80000001);
+ if (fExtFeaturesEDX & X86_CPUID_EXT_FEATURE_EDX_SYSCALL)
+ {
+#ifdef RT_ARCH_X86
+ if (!ASMIsIntelCpu())
+#endif
+ {
+ uint64_t fEfer = ASMRdMsr(MSR_K6_EFER);
+ if (fEfer & MSR_K6_EFER_SCE)
+ {
+ pVM->cpum.s.fHostUseFlags |= CPUM_USE_SYSCALL;
+ Log(("CPUMR0Init: host uses syscall\n"));
+ }
+ }
+ }
+ }
+
+ /*
+ * Copy MSR_IA32_ARCH_CAPABILITIES bits over into the host feature structure.
+ */
+ pVM->cpum.s.HostFeatures.fArchRdclNo = 0;
+ pVM->cpum.s.HostFeatures.fArchIbrsAll = 0;
+ pVM->cpum.s.HostFeatures.fArchRsbOverride = 0;
+ pVM->cpum.s.HostFeatures.fArchVmmNeedNotFlushL1d = 0;
+ uint32_t const cStdRange = ASMCpuId_EAX(0);
+ if ( ASMIsValidStdRange(cStdRange)
+ && cStdRange >= 7)
+ {
+ uint32_t fEdxFeatures = ASMCpuId_EDX(7);
+ if ( (fEdxFeatures & X86_CPUID_STEXT_FEATURE_EDX_ARCHCAP)
+ && (fFeatures & X86_CPUID_FEATURE_EDX_MSR))
+ {
+ uint64_t const fArchVal = ASMRdMsr(MSR_IA32_ARCH_CAPABILITIES);
+ pVM->cpum.s.HostFeatures.fArchRdclNo = RT_BOOL(fArchVal & MSR_IA32_ARCH_CAP_F_RDCL_NO);
+ pVM->cpum.s.HostFeatures.fArchIbrsAll = RT_BOOL(fArchVal & MSR_IA32_ARCH_CAP_F_IBRS_ALL);
+ pVM->cpum.s.HostFeatures.fArchRsbOverride = RT_BOOL(fArchVal & MSR_IA32_ARCH_CAP_F_RSBO);
+ pVM->cpum.s.HostFeatures.fArchVmmNeedNotFlushL1d = RT_BOOL(fArchVal & MSR_IA32_ARCH_CAP_F_VMM_NEED_NOT_FLUSH_L1D);
+ }
+ else
+ pVM->cpum.s.HostFeatures.fArchCap = 0;
+ }
+
+ /*
+ * Unify/cross check some CPUID feature bits on all available CPU cores
+ * and threads. We've seen CPUs where the monitor support differed.
+ *
+ * Because the hyper heap isn't always mapped into ring-0, we cannot
+ * access it from a RTMpOnAll callback. We use the legacy CPUID arrays
+ * as temp ring-0 accessible memory instead, ASSUMING that they're all
+ * up to date when we get here.
+ */
+ RTMpOnAll(cpumR0CheckCpuid, pVM, NULL);
+
+ for (uint32_t i = 0; i < RT_ELEMENTS(g_aCpuidUnifyBits); i++)
+ {
+ bool fIgnored;
+ uint32_t uLeaf = g_aCpuidUnifyBits[i].uLeaf;
+ PCPUMCPUIDLEAF pLeaf = cpumCpuIdGetLeafEx(pVM, uLeaf, 0, &fIgnored);
+ if (pLeaf)
+ {
+ PCPUMCPUID pLegacyLeaf;
+ if (uLeaf < RT_ELEMENTS(pVM->cpum.s.aGuestCpuIdPatmStd))
+ pLegacyLeaf = &pVM->cpum.s.aGuestCpuIdPatmStd[uLeaf];
+ else if (uLeaf - UINT32_C(0x80000000) < RT_ELEMENTS(pVM->cpum.s.aGuestCpuIdPatmExt))
+ pLegacyLeaf = &pVM->cpum.s.aGuestCpuIdPatmExt[uLeaf - UINT32_C(0x80000000)];
+ else if (uLeaf - UINT32_C(0xc0000000) < RT_ELEMENTS(pVM->cpum.s.aGuestCpuIdPatmCentaur))
+ pLegacyLeaf = &pVM->cpum.s.aGuestCpuIdPatmCentaur[uLeaf - UINT32_C(0xc0000000)];
+ else
+ continue;
+
+ pLeaf->uEcx = pLegacyLeaf->uEcx;
+ pLeaf->uEdx = pLegacyLeaf->uEdx;
+ }
+ }
+
+ }
+
+
+ /*
+ * Check if debug registers are armed.
+ * This ASSUMES that DR7.GD is not set, or that it's handled transparently!
+ */
+ uint32_t u32DR7 = ASMGetDR7();
+ if (u32DR7 & X86_DR7_ENABLED_MASK)
+ {
+ for (VMCPUID i = 0; i < pVM->cCpus; i++)
+ pVM->aCpus[i].cpum.s.fUseFlags |= CPUM_USE_DEBUG_REGS_HOST;
+ Log(("CPUMR0Init: host uses debug registers (dr7=%x)\n", u32DR7));
+ }
+
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Trap handler for device-not-available fault (\#NM).
+ * Device not available, FP or (F)WAIT instruction.
+ *
+ * @returns VBox status code.
+ * @retval VINF_SUCCESS if the guest FPU state is loaded.
+ * @retval VINF_EM_RAW_GUEST_TRAP if it is a guest trap.
+ * @retval VINF_CPUM_HOST_CR0_MODIFIED if we modified the host CR0.
+ *
+ * @param pVM The cross context VM structure.
+ * @param pVCpu The cross context virtual CPU structure.
+ */
+VMMR0_INT_DECL(int) CPUMR0Trap07Handler(PVM pVM, PVMCPU pVCpu)
+{
+ Assert(pVM->cpum.s.HostFeatures.fFxSaveRstor);
+ Assert(ASMGetCR4() & X86_CR4_OSFXSR);
+
+ /* If the FPU state has already been loaded, then it's a guest trap. */
+ if (CPUMIsGuestFPUStateActive(pVCpu))
+ {
+ Assert( ((pVCpu->cpum.s.Guest.cr0 & (X86_CR0_MP | X86_CR0_EM | X86_CR0_TS)) == (X86_CR0_MP | X86_CR0_TS))
+ || ((pVCpu->cpum.s.Guest.cr0 & (X86_CR0_MP | X86_CR0_EM | X86_CR0_TS)) == (X86_CR0_MP | X86_CR0_TS | X86_CR0_EM)));
+ return VINF_EM_RAW_GUEST_TRAP;
+ }
+
+ /*
+ * There are two basic actions:
+ * 1. Save host fpu and restore guest fpu.
+ * 2. Generate guest trap.
+ *
+ * When entering the hypervisor we'll always enable MP (for proper wait
+ * trapping) and TS (for intercepting all fpu/mmx/sse stuff). The EM flag
+ * is taken from the guest OS in order to get proper SSE handling.
+ *
+ *
+ * Actions taken depending on the guest CR0 flags:
+ *
+ * 3 2 1
+ * TS | EM | MP | FPUInstr | WAIT :: VMM Action
+ * ------------------------------------------------------------------------
+ * 0 | 0 | 0 | Exec | Exec :: Clear TS & MP, Save HC, Load GC.
+ * 0 | 0 | 1 | Exec | Exec :: Clear TS, Save HC, Load GC.
+ * 0 | 1 | 0 | #NM | Exec :: Clear TS & MP, Save HC, Load GC.
+ * 0 | 1 | 1 | #NM | Exec :: Clear TS, Save HC, Load GC.
+ * 1 | 0 | 0 | #NM | Exec :: Clear MP, Save HC, Load GC. (EM is already cleared.)
+ * 1 | 0 | 1 | #NM | #NM :: Go to guest taking trap there.
+ * 1 | 1 | 0 | #NM | Exec :: Clear MP, Save HC, Load GC. (EM is already set.)
+ * 1 | 1 | 1 | #NM | #NM :: Go to guest taking trap there.
+ */
+
+ switch (pVCpu->cpum.s.Guest.cr0 & (X86_CR0_MP | X86_CR0_EM | X86_CR0_TS))
+ {
+ case X86_CR0_MP | X86_CR0_TS:
+ case X86_CR0_MP | X86_CR0_TS | X86_CR0_EM:
+ return VINF_EM_RAW_GUEST_TRAP;
+ default:
+ break;
+ }
+
+ return CPUMR0LoadGuestFPU(pVM, pVCpu);
+}
+
+
+/**
+ * Saves the host-FPU/XMM state (if necessary) and (always) loads the guest-FPU
+ * state into the CPU.
+ *
+ * @returns VINF_SUCCESS on success, host CR0 unmodified.
+ * @returns VINF_CPUM_HOST_CR0_MODIFIED on success when the host CR0 was
+ * modified and VT-x needs to update the value in the VMCS.
+ *
+ * @param pVM The cross context VM structure.
+ * @param pVCpu The cross context virtual CPU structure.
+ */
+VMMR0_INT_DECL(int) CPUMR0LoadGuestFPU(PVM pVM, PVMCPU pVCpu)
+{
+ int rc = VINF_SUCCESS;
+ Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+ Assert(!(pVCpu->cpum.s.fUseFlags & CPUM_USED_FPU_GUEST));
+ Assert(!(pVCpu->cpum.s.fUseFlags & CPUM_SYNC_FPU_STATE));
+
+#if HC_ARCH_BITS == 32 && defined(VBOX_WITH_64_BITS_GUESTS)
+ if (CPUMIsGuestInLongModeEx(&pVCpu->cpum.s.Guest))
+ {
+ Assert(!(pVCpu->cpum.s.fUseFlags & CPUM_USED_MANUAL_XMM_RESTORE));
+
+ /* Save the host state if necessary. */
+ if (!(pVCpu->cpum.s.fUseFlags & CPUM_USED_FPU_HOST))
+ rc = cpumRZSaveHostFPUState(&pVCpu->cpum.s);
+
+ /* Restore the state on entry as we need to be in 64-bit mode to access the full state. */
+ pVCpu->cpum.s.fUseFlags |= CPUM_SYNC_FPU_STATE;
+
+ Assert( (pVCpu->cpum.s.fUseFlags & (CPUM_USED_FPU_HOST | CPUM_USED_FPU_SINCE_REM))
+ == (CPUM_USED_FPU_HOST | CPUM_USED_FPU_SINCE_REM));
+ }
+ else
+#endif
+ {
+ if (!pVM->cpum.s.HostFeatures.fLeakyFxSR)
+ {
+ Assert(!(pVCpu->cpum.s.fUseFlags & CPUM_USED_MANUAL_XMM_RESTORE));
+ rc = cpumR0SaveHostRestoreGuestFPUState(&pVCpu->cpum.s);
+ }
+ else
+ {
+ Assert(!(pVCpu->cpum.s.fUseFlags & CPUM_USED_MANUAL_XMM_RESTORE) || (pVCpu->cpum.s.fUseFlags & CPUM_USED_FPU_HOST));
+ /** @todo r=ramshankar: Can't we used a cached value here
+ * instead of reading the MSR? host EFER doesn't usually
+ * change. */
+ uint64_t uHostEfer = ASMRdMsr(MSR_K6_EFER);
+ if (!(uHostEfer & MSR_K6_EFER_FFXSR))
+ rc = cpumR0SaveHostRestoreGuestFPUState(&pVCpu->cpum.s);
+ else
+ {
+ RTCCUINTREG const uSavedFlags = ASMIntDisableFlags();
+ pVCpu->cpum.s.fUseFlags |= CPUM_USED_MANUAL_XMM_RESTORE;
+ ASMWrMsr(MSR_K6_EFER, uHostEfer & ~MSR_K6_EFER_FFXSR);
+ rc = cpumR0SaveHostRestoreGuestFPUState(&pVCpu->cpum.s);
+ ASMWrMsr(MSR_K6_EFER, uHostEfer | MSR_K6_EFER_FFXSR);
+ ASMSetFlags(uSavedFlags);
+ }
+ }
+ Assert( (pVCpu->cpum.s.fUseFlags & (CPUM_USED_FPU_GUEST | CPUM_USED_FPU_HOST | CPUM_USED_FPU_SINCE_REM))
+ == (CPUM_USED_FPU_GUEST | CPUM_USED_FPU_HOST | CPUM_USED_FPU_SINCE_REM));
+ }
+ return rc;
+}
+
+
+/**
+ * Saves the guest FPU/XMM state if needed, restores the host FPU/XMM state as
+ * needed.
+ *
+ * @returns true if we saved the guest state.
+ * @param pVCpu The cross context virtual CPU structure.
+ */
+VMMR0_INT_DECL(bool) CPUMR0FpuStateMaybeSaveGuestAndRestoreHost(PVMCPU pVCpu)
+{
+ bool fSavedGuest;
+ Assert(pVCpu->CTX_SUFF(pVM)->cpum.s.HostFeatures.fFxSaveRstor);
+ Assert(ASMGetCR4() & X86_CR4_OSFXSR);
+ if (pVCpu->cpum.s.fUseFlags & (CPUM_USED_FPU_GUEST | CPUM_USED_FPU_HOST))
+ {
+ fSavedGuest = RT_BOOL(pVCpu->cpum.s.fUseFlags & CPUM_USED_FPU_GUEST);
+#if HC_ARCH_BITS == 32 && defined(VBOX_WITH_64_BITS_GUESTS)
+ if (CPUMIsGuestInLongModeEx(&pVCpu->cpum.s.Guest))
+ {
+ if (pVCpu->cpum.s.fUseFlags & CPUM_USED_FPU_GUEST)
+ {
+ Assert(!(pVCpu->cpum.s.fUseFlags & CPUM_SYNC_FPU_STATE));
+ HMR0SaveFPUState(pVCpu->CTX_SUFF(pVM), pVCpu, &pVCpu->cpum.s.Guest);
+ }
+ else
+ pVCpu->cpum.s.fUseFlags &= ~CPUM_SYNC_FPU_STATE;
+ cpumR0RestoreHostFPUState(&pVCpu->cpum.s);
+ }
+ else
+#endif
+ {
+ if (!(pVCpu->cpum.s.fUseFlags & CPUM_USED_MANUAL_XMM_RESTORE))
+ cpumR0SaveGuestRestoreHostFPUState(&pVCpu->cpum.s);
+ else
+ {
+ /* Temporarily clear MSR_K6_EFER_FFXSR or else we'll be unable to
+ save/restore the XMM state with fxsave/fxrstor. */
+ uint64_t uHostEfer = ASMRdMsr(MSR_K6_EFER);
+ if (uHostEfer & MSR_K6_EFER_FFXSR)
+ {
+ RTCCUINTREG const uSavedFlags = ASMIntDisableFlags();
+ ASMWrMsr(MSR_K6_EFER, uHostEfer & ~MSR_K6_EFER_FFXSR);
+ cpumR0SaveGuestRestoreHostFPUState(&pVCpu->cpum.s);
+ ASMWrMsr(MSR_K6_EFER, uHostEfer | MSR_K6_EFER_FFXSR);
+ ASMSetFlags(uSavedFlags);
+ }
+ else
+ cpumR0SaveGuestRestoreHostFPUState(&pVCpu->cpum.s);
+ pVCpu->cpum.s.fUseFlags &= ~CPUM_USED_MANUAL_XMM_RESTORE;
+ }
+ }
+ }
+ else
+ fSavedGuest = false;
+ Assert(!( pVCpu->cpum.s.fUseFlags
+ & (CPUM_USED_FPU_GUEST | CPUM_USED_FPU_HOST | CPUM_SYNC_FPU_STATE | CPUM_USED_MANUAL_XMM_RESTORE)));
+ return fSavedGuest;
+}
+
+
+/**
+ * Saves the host debug state, setting CPUM_USED_HOST_DEBUG_STATE and loading
+ * DR7 with safe values.
+ *
+ * @returns VBox status code.
+ * @param pVCpu The cross context virtual CPU structure.
+ */
+static int cpumR0SaveHostDebugState(PVMCPU pVCpu)
+{
+ /*
+ * Save the host state.
+ */
+ pVCpu->cpum.s.Host.dr0 = ASMGetDR0();
+ pVCpu->cpum.s.Host.dr1 = ASMGetDR1();
+ pVCpu->cpum.s.Host.dr2 = ASMGetDR2();
+ pVCpu->cpum.s.Host.dr3 = ASMGetDR3();
+ pVCpu->cpum.s.Host.dr6 = ASMGetDR6();
+ /** @todo dr7 might already have been changed to 0x400; don't care right now as it's harmless. */
+ pVCpu->cpum.s.Host.dr7 = ASMGetDR7();
+
+ /* Preemption paranoia. */
+ ASMAtomicOrU32(&pVCpu->cpum.s.fUseFlags, CPUM_USED_DEBUG_REGS_HOST);
+
+ /*
+ * Make sure DR7 is harmless or else we could trigger breakpoints when
+ * load guest or hypervisor DRx values later.
+ */
+ if (pVCpu->cpum.s.Host.dr7 != X86_DR7_INIT_VAL)
+ ASMSetDR7(X86_DR7_INIT_VAL);
+
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Saves the guest DRx state residing in host registers and restore the host
+ * register values.
+ *
+ * The guest DRx state is only saved if CPUMR0LoadGuestDebugState was called,
+ * since it's assumed that we're shadowing the guest DRx register values
+ * accurately when using the combined hypervisor debug register values
+ * (CPUMR0LoadHyperDebugState).
+ *
+ * @returns true if either guest or hypervisor debug registers were loaded.
+ * @param pVCpu The cross context virtual CPU structure of the calling EMT.
+ * @param fDr6 Whether to include DR6 or not.
+ * @thread EMT(pVCpu)
+ */
+VMMR0_INT_DECL(bool) CPUMR0DebugStateMaybeSaveGuestAndRestoreHost(PVMCPU pVCpu, bool fDr6)
+{
+ Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+ bool const fDrXLoaded = RT_BOOL(pVCpu->cpum.s.fUseFlags & (CPUM_USED_DEBUG_REGS_GUEST | CPUM_USED_DEBUG_REGS_HYPER));
+
+ /*
+ * Do we need to save the guest DRx registered loaded into host registers?
+ * (DR7 and DR6 (if fDr6 is true) are left to the caller.)
+ */
+ if (pVCpu->cpum.s.fUseFlags & CPUM_USED_DEBUG_REGS_GUEST)
+ {
+#if HC_ARCH_BITS == 32 && defined(VBOX_WITH_64_BITS_GUESTS)
+ if (CPUMIsGuestInLongModeEx(&pVCpu->cpum.s.Guest))
+ {
+ uint64_t uDr6 = pVCpu->cpum.s.Guest.dr[6];
+ HMR0SaveDebugState(pVCpu->CTX_SUFF(pVM), pVCpu, &pVCpu->cpum.s.Guest);
+ if (!fDr6)
+ pVCpu->cpum.s.Guest.dr[6] = uDr6;
+ }
+ else
+#endif
+ {
+ pVCpu->cpum.s.Guest.dr[0] = ASMGetDR0();
+ pVCpu->cpum.s.Guest.dr[1] = ASMGetDR1();
+ pVCpu->cpum.s.Guest.dr[2] = ASMGetDR2();
+ pVCpu->cpum.s.Guest.dr[3] = ASMGetDR3();
+ if (fDr6)
+ pVCpu->cpum.s.Guest.dr[6] = ASMGetDR6();
+ }
+ }
+ ASMAtomicAndU32(&pVCpu->cpum.s.fUseFlags, ~( CPUM_USED_DEBUG_REGS_GUEST | CPUM_USED_DEBUG_REGS_HYPER
+ | CPUM_SYNC_DEBUG_REGS_GUEST | CPUM_SYNC_DEBUG_REGS_HYPER));
+
+ /*
+ * Restore the host's debug state. DR0-3, DR6 and only then DR7!
+ */
+ if (pVCpu->cpum.s.fUseFlags & CPUM_USED_DEBUG_REGS_HOST)
+ {
+ /* A bit of paranoia first... */
+ uint64_t uCurDR7 = ASMGetDR7();
+ if (uCurDR7 != X86_DR7_INIT_VAL)
+ ASMSetDR7(X86_DR7_INIT_VAL);
+
+ ASMSetDR0(pVCpu->cpum.s.Host.dr0);
+ ASMSetDR1(pVCpu->cpum.s.Host.dr1);
+ ASMSetDR2(pVCpu->cpum.s.Host.dr2);
+ ASMSetDR3(pVCpu->cpum.s.Host.dr3);
+ /** @todo consider only updating if they differ, esp. DR6. Need to figure how
+ * expensive DRx reads are over DRx writes. */
+ ASMSetDR6(pVCpu->cpum.s.Host.dr6);
+ ASMSetDR7(pVCpu->cpum.s.Host.dr7);
+
+ ASMAtomicAndU32(&pVCpu->cpum.s.fUseFlags, ~CPUM_USED_DEBUG_REGS_HOST);
+ }
+
+ return fDrXLoaded;
+}
+
+
+/**
+ * Saves the guest DRx state if it resides host registers.
+ *
+ * This does NOT clear any use flags, so the host registers remains loaded with
+ * the guest DRx state upon return. The purpose is only to make sure the values
+ * in the CPU context structure is up to date.
+ *
+ * @returns true if the host registers contains guest values, false if not.
+ * @param pVCpu The cross context virtual CPU structure of the calling EMT.
+ * @param fDr6 Whether to include DR6 or not.
+ * @thread EMT(pVCpu)
+ */
+VMMR0_INT_DECL(bool) CPUMR0DebugStateMaybeSaveGuest(PVMCPU pVCpu, bool fDr6)
+{
+ /*
+ * Do we need to save the guest DRx registered loaded into host registers?
+ * (DR7 and DR6 (if fDr6 is true) are left to the caller.)
+ */
+ if (pVCpu->cpum.s.fUseFlags & CPUM_USED_DEBUG_REGS_GUEST)
+ {
+#if HC_ARCH_BITS == 32 && defined(VBOX_WITH_64_BITS_GUESTS)
+ if (CPUMIsGuestInLongModeEx(&pVCpu->cpum.s.Guest))
+ {
+ uint64_t uDr6 = pVCpu->cpum.s.Guest.dr[6];
+ HMR0SaveDebugState(pVCpu->CTX_SUFF(pVM), pVCpu, &pVCpu->cpum.s.Guest);
+ if (!fDr6)
+ pVCpu->cpum.s.Guest.dr[6] = uDr6;
+ }
+ else
+#endif
+ {
+ pVCpu->cpum.s.Guest.dr[0] = ASMGetDR0();
+ pVCpu->cpum.s.Guest.dr[1] = ASMGetDR1();
+ pVCpu->cpum.s.Guest.dr[2] = ASMGetDR2();
+ pVCpu->cpum.s.Guest.dr[3] = ASMGetDR3();
+ if (fDr6)
+ pVCpu->cpum.s.Guest.dr[6] = ASMGetDR6();
+ }
+ return true;
+ }
+ return false;
+}
+
+
+/**
+ * Lazily sync in the debug state.
+ *
+ * @param pVCpu The cross context virtual CPU structure of the calling EMT.
+ * @param fDr6 Whether to include DR6 or not.
+ * @thread EMT(pVCpu)
+ */
+VMMR0_INT_DECL(void) CPUMR0LoadGuestDebugState(PVMCPU pVCpu, bool fDr6)
+{
+ /*
+ * Save the host state and disarm all host BPs.
+ */
+ cpumR0SaveHostDebugState(pVCpu);
+ Assert(ASMGetDR7() == X86_DR7_INIT_VAL);
+
+ /*
+ * Activate the guest state DR0-3.
+ * DR7 and DR6 (if fDr6 is true) are left to the caller.
+ */
+#if HC_ARCH_BITS == 32 && defined(VBOX_WITH_64_BITS_GUESTS)
+ if (CPUMIsGuestInLongModeEx(&pVCpu->cpum.s.Guest))
+ ASMAtomicOrU32(&pVCpu->cpum.s.fUseFlags, CPUM_SYNC_DEBUG_REGS_GUEST); /* Postpone it to the world switch. */
+ else
+#endif
+ {
+ ASMSetDR0(pVCpu->cpum.s.Guest.dr[0]);
+ ASMSetDR1(pVCpu->cpum.s.Guest.dr[1]);
+ ASMSetDR2(pVCpu->cpum.s.Guest.dr[2]);
+ ASMSetDR3(pVCpu->cpum.s.Guest.dr[3]);
+ if (fDr6)
+ ASMSetDR6(pVCpu->cpum.s.Guest.dr[6]);
+
+ ASMAtomicOrU32(&pVCpu->cpum.s.fUseFlags, CPUM_USED_DEBUG_REGS_GUEST);
+ }
+}
+
+
+/**
+ * Lazily sync in the hypervisor debug state
+ *
+ * @returns VBox status code.
+ * @param pVCpu The cross context virtual CPU structure of the calling EMT.
+ * @param fDr6 Whether to include DR6 or not.
+ * @thread EMT(pVCpu)
+ */
+VMMR0_INT_DECL(void) CPUMR0LoadHyperDebugState(PVMCPU pVCpu, bool fDr6)
+{
+ /*
+ * Save the host state and disarm all host BPs.
+ */
+ cpumR0SaveHostDebugState(pVCpu);
+ Assert(ASMGetDR7() == X86_DR7_INIT_VAL);
+
+ /*
+ * Make sure the hypervisor values are up to date.
+ */
+ CPUMRecalcHyperDRx(pVCpu, UINT8_MAX /* no loading, please */, true);
+
+ /*
+ * Activate the guest state DR0-3.
+ * DR7 and DR6 (if fDr6 is true) are left to the caller.
+ */
+#if HC_ARCH_BITS == 32 && defined(VBOX_WITH_64_BITS_GUESTS)
+ if (CPUMIsGuestInLongModeEx(&pVCpu->cpum.s.Guest))
+ ASMAtomicOrU32(&pVCpu->cpum.s.fUseFlags, CPUM_SYNC_DEBUG_REGS_HYPER); /* Postpone it. */
+ else
+#endif
+ {
+ ASMSetDR0(pVCpu->cpum.s.Hyper.dr[0]);
+ ASMSetDR1(pVCpu->cpum.s.Hyper.dr[1]);
+ ASMSetDR2(pVCpu->cpum.s.Hyper.dr[2]);
+ ASMSetDR3(pVCpu->cpum.s.Hyper.dr[3]);
+ if (fDr6)
+ ASMSetDR6(X86_DR6_INIT_VAL);
+
+ ASMAtomicOrU32(&pVCpu->cpum.s.fUseFlags, CPUM_USED_DEBUG_REGS_HYPER);
+ }
+}
+
+#ifdef VBOX_WITH_VMMR0_DISABLE_LAPIC_NMI
+
+/**
+ * Per-CPU callback that probes the CPU for APIC support.
+ *
+ * @param idCpu The identifier for the CPU the function is called on.
+ * @param pvUser1 Ignored.
+ * @param pvUser2 Ignored.
+ */
+static DECLCALLBACK(void) cpumR0MapLocalApicCpuProber(RTCPUID idCpu, void *pvUser1, void *pvUser2)
+{
+ NOREF(pvUser1); NOREF(pvUser2);
+ int iCpu = RTMpCpuIdToSetIndex(idCpu);
+ AssertReturnVoid(iCpu >= 0 && (unsigned)iCpu < RT_ELEMENTS(g_aLApics));
+
+ /*
+ * Check for APIC support.
+ */
+ uint32_t uMaxLeaf, u32EBX, u32ECX, u32EDX;
+ ASMCpuId(0, &uMaxLeaf, &u32EBX, &u32ECX, &u32EDX);
+ if ( ( ASMIsIntelCpuEx(u32EBX, u32ECX, u32EDX)
+ || ASMIsAmdCpuEx(u32EBX, u32ECX, u32EDX)
+ || ASMIsViaCentaurCpuEx(u32EBX, u32ECX, u32EDX)
+ || ASMIsShanghaiCpuEx(u32EBX, u32ECX, u32EDX))
+ && ASMIsValidStdRange(uMaxLeaf))
+ {
+ uint32_t uDummy;
+ ASMCpuId(1, &uDummy, &u32EBX, &u32ECX, &u32EDX);
+ if ( (u32EDX & X86_CPUID_FEATURE_EDX_APIC)
+ && (u32EDX & X86_CPUID_FEATURE_EDX_MSR))
+ {
+ /*
+ * Safe to access the MSR. Read it and calc the BASE (a little complicated).
+ */
+ uint64_t u64ApicBase = ASMRdMsr(MSR_IA32_APICBASE);
+ uint64_t u64Mask = MSR_IA32_APICBASE_BASE_MIN;
+
+ /* see Intel Manual: Local APIC Status and Location: MAXPHYADDR default is bit 36 */
+ uint32_t uMaxExtLeaf;
+ ASMCpuId(0x80000000, &uMaxExtLeaf, &u32EBX, &u32ECX, &u32EDX);
+ if ( uMaxExtLeaf >= UINT32_C(0x80000008)
+ && ASMIsValidExtRange(uMaxExtLeaf))
+ {
+ uint32_t u32PhysBits;
+ ASMCpuId(0x80000008, &u32PhysBits, &u32EBX, &u32ECX, &u32EDX);
+ u32PhysBits &= 0xff;
+ u64Mask = ((UINT64_C(1) << u32PhysBits) - 1) & UINT64_C(0xfffffffffffff000);
+ }
+
+ AssertCompile(sizeof(g_aLApics[iCpu].PhysBase) == sizeof(u64ApicBase));
+ g_aLApics[iCpu].PhysBase = u64ApicBase & u64Mask;
+ g_aLApics[iCpu].fEnabled = RT_BOOL(u64ApicBase & MSR_IA32_APICBASE_EN);
+ g_aLApics[iCpu].fX2Apic = (u64ApicBase & (MSR_IA32_APICBASE_EXTD | MSR_IA32_APICBASE_EN))
+ == (MSR_IA32_APICBASE_EXTD | MSR_IA32_APICBASE_EN);
+ }
+ }
+}
+
+
+
+/**
+ * Per-CPU callback that verifies our APIC expectations.
+ *
+ * @param idCpu The identifier for the CPU the function is called on.
+ * @param pvUser1 Ignored.
+ * @param pvUser2 Ignored.
+ */
+static DECLCALLBACK(void) cpumR0MapLocalApicCpuChecker(RTCPUID idCpu, void *pvUser1, void *pvUser2)
+{
+ NOREF(pvUser1); NOREF(pvUser2);
+
+ int iCpu = RTMpCpuIdToSetIndex(idCpu);
+ AssertReturnVoid(iCpu >= 0 && (unsigned)iCpu < RT_ELEMENTS(g_aLApics));
+ if (!g_aLApics[iCpu].fEnabled)
+ return;
+
+ /*
+ * 0x0X 82489 external APIC
+ * 0x1X Local APIC
+ * 0x2X..0xFF reserved
+ */
+ uint32_t uApicVersion;
+ if (g_aLApics[iCpu].fX2Apic)
+ uApicVersion = ApicX2RegRead32(APIC_REG_VERSION);
+ else
+ uApicVersion = ApicRegRead(g_aLApics[iCpu].pv, APIC_REG_VERSION);
+ if ((APIC_REG_VERSION_GET_VER(uApicVersion) & 0xF0) == 0x10)
+ {
+ g_aLApics[iCpu].uVersion = uApicVersion;
+
+#if 0 /* enable if you need it. */
+ if (g_aLApics[iCpu].fX2Apic)
+ SUPR0Printf("CPUM: X2APIC %02u - ver %#010x, lint0=%#07x lint1=%#07x pc=%#07x thmr=%#07x cmci=%#07x\n",
+ iCpu, uApicVersion,
+ ApicX2RegRead32(APIC_REG_LVT_LINT0), ApicX2RegRead32(APIC_REG_LVT_LINT1),
+ ApicX2RegRead32(APIC_REG_LVT_PC), ApicX2RegRead32(APIC_REG_LVT_THMR),
+ ApicX2RegRead32(APIC_REG_LVT_CMCI));
+ else
+ {
+ SUPR0Printf("CPUM: APIC %02u at %RGp (mapped at %p) - ver %#010x, lint0=%#07x lint1=%#07x pc=%#07x thmr=%#07x cmci=%#07x\n",
+ iCpu, g_aLApics[iCpu].PhysBase, g_aLApics[iCpu].pv, uApicVersion,
+ ApicRegRead(g_aLApics[iCpu].pv, APIC_REG_LVT_LINT0), ApicRegRead(g_aLApics[iCpu].pv, APIC_REG_LVT_LINT1),
+ ApicRegRead(g_aLApics[iCpu].pv, APIC_REG_LVT_PC), ApicRegRead(g_aLApics[iCpu].pv, APIC_REG_LVT_THMR),
+ ApicRegRead(g_aLApics[iCpu].pv, APIC_REG_LVT_CMCI));
+ if (uApicVersion & 0x80000000)
+ {
+ uint32_t uExtFeatures = ApicRegRead(g_aLApics[iCpu].pv, 0x400);
+ uint32_t cEiLvt = (uExtFeatures >> 16) & 0xff;
+ SUPR0Printf("CPUM: APIC %02u: ExtSpace available. extfeat=%08x eilvt[0..3]=%08x %08x %08x %08x\n",
+ iCpu,
+ ApicRegRead(g_aLApics[iCpu].pv, 0x400),
+ cEiLvt >= 1 ? ApicRegRead(g_aLApics[iCpu].pv, 0x500) : 0,
+ cEiLvt >= 2 ? ApicRegRead(g_aLApics[iCpu].pv, 0x510) : 0,
+ cEiLvt >= 3 ? ApicRegRead(g_aLApics[iCpu].pv, 0x520) : 0,
+ cEiLvt >= 4 ? ApicRegRead(g_aLApics[iCpu].pv, 0x530) : 0);
+ }
+ }
+#endif
+ }
+ else
+ {
+ g_aLApics[iCpu].fEnabled = false;
+ g_aLApics[iCpu].fX2Apic = false;
+ SUPR0Printf("VBox/CPUM: Unsupported APIC version %#x (iCpu=%d)\n", uApicVersion, iCpu);
+ }
+}
+
+
+/**
+ * Map the MMIO page of each local APIC in the system.
+ */
+static int cpumR0MapLocalApics(void)
+{
+ /*
+ * Check that we'll always stay within the array bounds.
+ */
+ if (RTMpGetArraySize() > RT_ELEMENTS(g_aLApics))
+ {
+ LogRel(("CPUM: Too many real CPUs/cores/threads - %u, max %u\n", RTMpGetArraySize(), RT_ELEMENTS(g_aLApics)));
+ return VERR_TOO_MANY_CPUS;
+ }
+
+ /*
+ * Create mappings for all online CPUs we think have legacy APICs.
+ */
+ int rc = RTMpOnAll(cpumR0MapLocalApicCpuProber, NULL, NULL);
+
+ for (unsigned iCpu = 0; RT_SUCCESS(rc) && iCpu < RT_ELEMENTS(g_aLApics); iCpu++)
+ {
+ if (g_aLApics[iCpu].fEnabled && !g_aLApics[iCpu].fX2Apic)
+ {
+ rc = RTR0MemObjEnterPhys(&g_aLApics[iCpu].hMemObj, g_aLApics[iCpu].PhysBase,
+ PAGE_SIZE, RTMEM_CACHE_POLICY_MMIO);
+ if (RT_SUCCESS(rc))
+ {
+ rc = RTR0MemObjMapKernel(&g_aLApics[iCpu].hMapObj, g_aLApics[iCpu].hMemObj, (void *)-1,
+ PAGE_SIZE, RTMEM_PROT_READ | RTMEM_PROT_WRITE);
+ if (RT_SUCCESS(rc))
+ {
+ g_aLApics[iCpu].pv = RTR0MemObjAddress(g_aLApics[iCpu].hMapObj);
+ continue;
+ }
+ RTR0MemObjFree(g_aLApics[iCpu].hMemObj, true /* fFreeMappings */);
+ }
+ g_aLApics[iCpu].fEnabled = false;
+ }
+ g_aLApics[iCpu].pv = NULL;
+ }
+
+ /*
+ * Check the APICs.
+ */
+ if (RT_SUCCESS(rc))
+ rc = RTMpOnAll(cpumR0MapLocalApicCpuChecker, NULL, NULL);
+
+ if (RT_FAILURE(rc))
+ {
+ cpumR0UnmapLocalApics();
+ return rc;
+ }
+
+#ifdef LOG_ENABLED
+ /*
+ * Log the result (pretty useless, requires enabling CPUM in VBoxDrv
+ * and !VBOX_WITH_R0_LOGGING).
+ */
+ if (LogIsEnabled())
+ {
+ uint32_t cEnabled = 0;
+ uint32_t cX2Apics = 0;
+ for (unsigned iCpu = 0; iCpu < RT_ELEMENTS(g_aLApics); iCpu++)
+ if (g_aLApics[iCpu].fEnabled)
+ {
+ cEnabled++;
+ cX2Apics += g_aLApics[iCpu].fX2Apic;
+ }
+ Log(("CPUM: %u APICs, %u X2APICs\n", cEnabled, cX2Apics));
+ }
+#endif
+
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Unmap the Local APIC of all host CPUs.
+ */
+static void cpumR0UnmapLocalApics(void)
+{
+ for (unsigned iCpu = RT_ELEMENTS(g_aLApics); iCpu-- > 0;)
+ {
+ if (g_aLApics[iCpu].pv)
+ {
+ RTR0MemObjFree(g_aLApics[iCpu].hMapObj, true /* fFreeMappings */);
+ RTR0MemObjFree(g_aLApics[iCpu].hMemObj, true /* fFreeMappings */);
+ g_aLApics[iCpu].hMapObj = NIL_RTR0MEMOBJ;
+ g_aLApics[iCpu].hMemObj = NIL_RTR0MEMOBJ;
+ g_aLApics[iCpu].fEnabled = false;
+ g_aLApics[iCpu].fX2Apic = false;
+ g_aLApics[iCpu].pv = NULL;
+ }
+ }
+}
+
+
+/**
+ * Updates CPUMCPU::pvApicBase and CPUMCPU::fX2Apic prior to world switch.
+ *
+ * Writes the Local APIC mapping address of the current host CPU to CPUMCPU so
+ * the world switchers can access the APIC registers for the purpose of
+ * disabling and re-enabling the NMIs. Must be called with disabled preemption
+ * or disabled interrupts!
+ *
+ * @param pVCpu The cross context virtual CPU structure of the calling EMT.
+ * @param iHostCpuSet The CPU set index of the current host CPU.
+ */
+VMMR0_INT_DECL(void) CPUMR0SetLApic(PVMCPU pVCpu, uint32_t iHostCpuSet)
+{
+ Assert(iHostCpuSet <= RT_ELEMENTS(g_aLApics));
+ pVCpu->cpum.s.pvApicBase = g_aLApics[iHostCpuSet].pv;
+ pVCpu->cpum.s.fX2Apic = g_aLApics[iHostCpuSet].fX2Apic;
+// Log6(("CPUMR0SetLApic: pvApicBase=%p fX2Apic=%d\n", g_aLApics[idxCpu].pv, g_aLApics[idxCpu].fX2Apic));
+}
+
+#endif /* VBOX_WITH_VMMR0_DISABLE_LAPIC_NMI */
+
diff --git a/src/VBox/VMM/VMMR0/CPUMR0A.asm b/src/VBox/VMM/VMMR0/CPUMR0A.asm
new file mode 100644
index 00000000..b0d1eef3
--- /dev/null
+++ b/src/VBox/VMM/VMMR0/CPUMR0A.asm
@@ -0,0 +1,358 @@
+ ; $Id: CPUMR0A.asm $
+;; @file
+; CPUM - Ring-0 Assembly Routines (supporting HM and IEM).
+;
+
+;
+; Copyright (C) 2006-2019 Oracle Corporation
+;
+; This file is part of VirtualBox Open Source Edition (OSE), as
+; available from http://www.virtualbox.org. This file is free software;
+; you can redistribute it and/or modify it under the terms of the GNU
+; General Public License (GPL) as published by the Free Software
+; Foundation, in version 2 as it comes in the "COPYING" file of the
+; VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+;
+
+
+;*******************************************************************************
+;* Header Files *
+;*******************************************************************************
+%define RT_ASM_WITH_SEH64
+%include "iprt/asmdefs.mac"
+%include "VBox/asmdefs.mac"
+%include "VBox/vmm/vm.mac"
+%include "VBox/err.mac"
+%include "VBox/vmm/stam.mac"
+%include "CPUMInternal.mac"
+%include "iprt/x86.mac"
+%include "VBox/vmm/cpum.mac"
+
+
+BEGINCODE
+
+;;
+; Makes sure the EMTs have a FPU state associated with them on hosts where we're
+; allowed to use it in ring-0 too.
+;
+; This ensure that we don't have to allocate the state lazily while trying to execute
+; guest code with preemption disabled or worse.
+;
+; @cproto VMMR0_INT_DECL(void) CPUMR0RegisterVCpuThread(PVMCPU pVCpu);
+;
+BEGINPROC CPUMR0RegisterVCpuThread
+ push xBP
+ SEH64_PUSH_xBP
+ mov xBP, xSP
+ SEH64_SET_FRAME_xBP 0
+SEH64_END_PROLOGUE
+
+%ifdef VMM_R0_TOUCH_FPU
+ movdqa xmm0, xmm0 ; hope this is harmless.
+%endif
+
+.return:
+ xor eax, eax ; paranoia
+ leave
+ ret
+ENDPROC CPUMR0RegisterVCpuThread
+
+
+%ifdef VMM_R0_TOUCH_FPU
+;;
+; Touches the host FPU state.
+;
+; @uses nothing (well, maybe cr0)
+;
+ %ifndef RT_ASM_WITH_SEH64 ; workaround for yasm 1.3.0 bug (error: prologue -1 bytes, must be <256)
+ALIGNCODE(16)
+ %endif
+BEGINPROC CPUMR0TouchHostFpu
+ push xBP
+ SEH64_PUSH_xBP
+ mov xBP, xSP
+ SEH64_SET_FRAME_xBP 0
+SEH64_END_PROLOGUE
+
+ movdqa xmm0, xmm0 ; Hope this is harmless.
+
+ leave
+ ret
+ENDPROC CPUMR0TouchHostFpu
+%endif ; VMM_R0_TOUCH_FPU
+
+
+;;
+; Saves the host FPU/SSE/AVX state and restores the guest FPU/SSE/AVX state.
+;
+; @returns VINF_SUCCESS (0) or VINF_CPUM_HOST_CR0_MODIFIED. (EAX)
+; @param pCpumCpu x86:[ebp+8] gcc:rdi msc:rcx CPUMCPU pointer
+;
+; @remarks 64-bit Windows drivers shouldn't use AVX registers without saving+loading:
+; https://msdn.microsoft.com/en-us/library/windows/hardware/ff545910%28v=vs.85%29.aspx?f=255&MSPPError=-2147217396
+; However the compiler docs have different idea:
+; https://msdn.microsoft.com/en-us/library/9z1stfyw.aspx
+; We'll go with the former for now.
+;
+%ifndef RT_ASM_WITH_SEH64 ; workaround for yasm 1.3.0 bug (error: prologue -1 bytes, must be <256)
+ALIGNCODE(16)
+%endif
+BEGINPROC cpumR0SaveHostRestoreGuestFPUState
+ push xBP
+ SEH64_PUSH_xBP
+ mov xBP, xSP
+ SEH64_SET_FRAME_xBP 0
+SEH64_END_PROLOGUE
+
+ ;
+ ; Prologue - xAX+xDX must be free for XSAVE/XRSTOR input.
+ ;
+%ifdef RT_ARCH_AMD64
+ %ifdef RT_OS_WINDOWS
+ mov r11, rcx
+ %else
+ mov r11, rdi
+ %endif
+ %define pCpumCpu r11
+ %define pXState r10
+%else
+ push ebx
+ push esi
+ mov ebx, dword [ebp + 8]
+ %define pCpumCpu ebx
+ %define pXState esi
+%endif
+
+ pushf ; The darwin kernel can get upset or upset things if an
+ cli ; interrupt occurs while we're doing fxsave/fxrstor/cr0.
+
+ ;
+ ; Save the host state.
+ ;
+ test dword [pCpumCpu + CPUMCPU.fUseFlags], CPUM_USED_FPU_HOST
+ jnz .already_saved_host
+
+ CPUMRZ_TOUCH_FPU_CLEAR_CR0_FPU_TRAPS_SET_RC xCX, xAX, pCpumCpu ; xCX is the return value for VT-x; xAX is scratch.
+
+ CPUMR0_SAVE_HOST
+
+%ifdef VBOX_WITH_KERNEL_USING_XMM
+ jmp .load_guest
+%endif
+.already_saved_host:
+%ifdef VBOX_WITH_KERNEL_USING_XMM
+ ; If we didn't save the host state, we must save the non-volatile XMM registers.
+ mov pXState, [pCpumCpu + CPUMCPU.Host.pXStateR0]
+ stmxcsr [pXState + X86FXSTATE.MXCSR]
+ movdqa [pXState + X86FXSTATE.xmm6 ], xmm6
+ movdqa [pXState + X86FXSTATE.xmm7 ], xmm7
+ movdqa [pXState + X86FXSTATE.xmm8 ], xmm8
+ movdqa [pXState + X86FXSTATE.xmm9 ], xmm9
+ movdqa [pXState + X86FXSTATE.xmm10], xmm10
+ movdqa [pXState + X86FXSTATE.xmm11], xmm11
+ movdqa [pXState + X86FXSTATE.xmm12], xmm12
+ movdqa [pXState + X86FXSTATE.xmm13], xmm13
+ movdqa [pXState + X86FXSTATE.xmm14], xmm14
+ movdqa [pXState + X86FXSTATE.xmm15], xmm15
+
+ ;
+ ; Load the guest state.
+ ;
+.load_guest:
+%endif
+ CPUMR0_LOAD_GUEST
+
+%ifdef VBOX_WITH_KERNEL_USING_XMM
+ ; Restore the non-volatile xmm registers. ASSUMING 64-bit host.
+ mov pXState, [pCpumCpu + CPUMCPU.Host.pXStateR0]
+ movdqa xmm6, [pXState + X86FXSTATE.xmm6]
+ movdqa xmm7, [pXState + X86FXSTATE.xmm7]
+ movdqa xmm8, [pXState + X86FXSTATE.xmm8]
+ movdqa xmm9, [pXState + X86FXSTATE.xmm9]
+ movdqa xmm10, [pXState + X86FXSTATE.xmm10]
+ movdqa xmm11, [pXState + X86FXSTATE.xmm11]
+ movdqa xmm12, [pXState + X86FXSTATE.xmm12]
+ movdqa xmm13, [pXState + X86FXSTATE.xmm13]
+ movdqa xmm14, [pXState + X86FXSTATE.xmm14]
+ movdqa xmm15, [pXState + X86FXSTATE.xmm15]
+ ldmxcsr [pXState + X86FXSTATE.MXCSR]
+%endif
+
+ or dword [pCpumCpu + CPUMCPU.fUseFlags], (CPUM_USED_FPU_GUEST | CPUM_USED_FPU_SINCE_REM | CPUM_USED_FPU_HOST)
+ popf
+
+ mov eax, ecx
+.return:
+%ifdef RT_ARCH_X86
+ pop esi
+ pop ebx
+%endif
+ leave
+ ret
+ENDPROC cpumR0SaveHostRestoreGuestFPUState
+
+
+;;
+; Saves the guest FPU/SSE/AVX state and restores the host FPU/SSE/AVX state.
+;
+; @param pCpumCpu x86:[ebp+8] gcc:rdi msc:rcx CPUMCPU pointer
+;
+; @remarks 64-bit Windows drivers shouldn't use AVX registers without saving+loading:
+; https://msdn.microsoft.com/en-us/library/windows/hardware/ff545910%28v=vs.85%29.aspx?f=255&MSPPError=-2147217396
+; However the compiler docs have different idea:
+; https://msdn.microsoft.com/en-us/library/9z1stfyw.aspx
+; We'll go with the former for now.
+;
+%ifndef RT_ASM_WITH_SEH64 ; workaround for yasm 1.3.0 bug (error: prologue -1 bytes, must be <256)
+ALIGNCODE(16)
+%endif
+BEGINPROC cpumR0SaveGuestRestoreHostFPUState
+ push xBP
+ SEH64_PUSH_xBP
+ mov xBP, xSP
+ SEH64_SET_FRAME_xBP 0
+SEH64_END_PROLOGUE
+
+ ;
+ ; Prologue - xAX+xDX must be free for XSAVE/XRSTOR input.
+ ;
+%ifdef RT_ARCH_AMD64
+ %ifdef RT_OS_WINDOWS
+ mov r11, rcx
+ %else
+ mov r11, rdi
+ %endif
+ %define pCpumCpu r11
+ %define pXState r10
+%else
+ push ebx
+ push esi
+ mov ebx, dword [ebp + 8]
+ %define pCpumCpu ebx
+ %define pXState esi
+%endif
+ pushf ; The darwin kernel can get upset or upset things if an
+ cli ; interrupt occurs while we're doing fxsave/fxrstor/cr0.
+
+ %ifdef VBOX_WITH_KERNEL_USING_XMM
+ ;
+ ; Copy non-volatile XMM registers to the host state so we can use
+ ; them while saving the guest state (we've gotta do this anyway).
+ ;
+ mov pXState, [pCpumCpu + CPUMCPU.Host.pXStateR0]
+ stmxcsr [pXState + X86FXSTATE.MXCSR]
+ movdqa [pXState + X86FXSTATE.xmm6], xmm6
+ movdqa [pXState + X86FXSTATE.xmm7], xmm7
+ movdqa [pXState + X86FXSTATE.xmm8], xmm8
+ movdqa [pXState + X86FXSTATE.xmm9], xmm9
+ movdqa [pXState + X86FXSTATE.xmm10], xmm10
+ movdqa [pXState + X86FXSTATE.xmm11], xmm11
+ movdqa [pXState + X86FXSTATE.xmm12], xmm12
+ movdqa [pXState + X86FXSTATE.xmm13], xmm13
+ movdqa [pXState + X86FXSTATE.xmm14], xmm14
+ movdqa [pXState + X86FXSTATE.xmm15], xmm15
+ %endif
+
+ ;
+ ; Save the guest state if necessary.
+ ;
+ test dword [pCpumCpu + CPUMCPU.fUseFlags], CPUM_USED_FPU_GUEST
+ jz .load_only_host
+
+ %ifdef VBOX_WITH_KERNEL_USING_XMM
+ ; Load the guest XMM register values we already saved in HMR0VMXStartVMWrapXMM.
+ mov pXState, [pCpumCpu + CPUMCPU.Guest.pXStateR0]
+ movdqa xmm0, [pXState + X86FXSTATE.xmm0]
+ movdqa xmm1, [pXState + X86FXSTATE.xmm1]
+ movdqa xmm2, [pXState + X86FXSTATE.xmm2]
+ movdqa xmm3, [pXState + X86FXSTATE.xmm3]
+ movdqa xmm4, [pXState + X86FXSTATE.xmm4]
+ movdqa xmm5, [pXState + X86FXSTATE.xmm5]
+ movdqa xmm6, [pXState + X86FXSTATE.xmm6]
+ movdqa xmm7, [pXState + X86FXSTATE.xmm7]
+ movdqa xmm8, [pXState + X86FXSTATE.xmm8]
+ movdqa xmm9, [pXState + X86FXSTATE.xmm9]
+ movdqa xmm10, [pXState + X86FXSTATE.xmm10]
+ movdqa xmm11, [pXState + X86FXSTATE.xmm11]
+ movdqa xmm12, [pXState + X86FXSTATE.xmm12]
+ movdqa xmm13, [pXState + X86FXSTATE.xmm13]
+ movdqa xmm14, [pXState + X86FXSTATE.xmm14]
+ movdqa xmm15, [pXState + X86FXSTATE.xmm15]
+ ldmxcsr [pXState + X86FXSTATE.MXCSR]
+ %endif
+ CPUMR0_SAVE_GUEST
+
+ ;
+ ; Load the host state.
+ ;
+.load_only_host:
+ CPUMR0_LOAD_HOST
+
+ ; Restore the CR0 value we saved in cpumR0SaveHostRestoreGuestFPUState or
+ ; in cpumRZSaveHostFPUState.
+ mov xCX, [pCpumCpu + CPUMCPU.Host.cr0Fpu]
+ CPUMRZ_RESTORE_CR0_IF_TS_OR_EM_SET xCX
+ and dword [pCpumCpu + CPUMCPU.fUseFlags], ~(CPUM_USED_FPU_GUEST | CPUM_USED_FPU_HOST)
+
+ popf
+%ifdef RT_ARCH_X86
+ pop esi
+ pop ebx
+%endif
+ leave
+ ret
+%undef pCpumCpu
+%undef pXState
+ENDPROC cpumR0SaveGuestRestoreHostFPUState
+
+
+%if ARCH_BITS == 32
+ %ifdef VBOX_WITH_64_BITS_GUESTS
+;;
+; Restores the host's FPU/SSE/AVX state from pCpumCpu->Host.
+;
+; @param pCpumCpu x86:[ebp+8] gcc:rdi msc:rcx CPUMCPU pointer
+;
+ %ifndef RT_ASM_WITH_SEH64 ; workaround for yasm 1.3.0 bug (error: prologue -1 bytes, must be <256)
+ALIGNCODE(16)
+ %endif
+BEGINPROC cpumR0RestoreHostFPUState
+ ;
+ ; Prologue - xAX+xDX must be free for XSAVE/XRSTOR input.
+ ;
+ push ebp
+ mov ebp, esp
+ push ebx
+ push esi
+ mov ebx, dword [ebp + 8]
+ %define pCpumCpu ebx
+ %define pXState esi
+
+ ;
+ ; Restore host CPU state.
+ ;
+ pushf ; The darwin kernel can get upset or upset things if an
+ cli ; interrupt occurs while we're doing fxsave/fxrstor/cr0.
+
+ CPUMR0_LOAD_HOST
+
+ ; Restore the CR0 value we saved in cpumR0SaveHostRestoreGuestFPUState or
+ ; in cpumRZSaveHostFPUState.
+ ;; @todo What about XCR0?
+ mov xCX, [pCpumCpu + CPUMCPU.Host.cr0Fpu]
+ CPUMRZ_RESTORE_CR0_IF_TS_OR_EM_SET xCX
+
+ and dword [pCpumCpu + CPUMCPU.fUseFlags], ~CPUM_USED_FPU_HOST
+ popf
+
+ pop esi
+ pop ebx
+ leave
+ ret
+ %undef pCpumCPu
+ %undef pXState
+ENDPROC cpumR0RestoreHostFPUState
+ %endif ; VBOX_WITH_64_BITS_GUESTS
+%endif ; ARCH_BITS == 32
+
diff --git a/src/VBox/VMM/VMMR0/EMR0.cpp b/src/VBox/VMM/VMMR0/EMR0.cpp
new file mode 100644
index 00000000..68efbd88
--- /dev/null
+++ b/src/VBox/VMM/VMMR0/EMR0.cpp
@@ -0,0 +1,60 @@
+/* $Id: EMR0.cpp $ */
+/** @file
+ * EM - Host Context Ring 0.
+ */
+
+/*
+ * Copyright (C) 2006-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#define LOG_GROUP LOG_GROUP_EM
+#include <VBox/vmm/em.h>
+#include "EMInternal.h"
+#include <VBox/vmm/vm.h>
+#include <VBox/vmm/gvm.h>
+#include <iprt/errcore.h>
+#include <VBox/log.h>
+#include <iprt/assert.h>
+#include <iprt/thread.h>
+
+
+
+/**
+ * Adjusts EM configuration options.
+ *
+ * @returns VBox status code.
+ * @param pGVM The ring-0 VM structure.
+ * @param pVM The cross context VM structure.
+ */
+VMMR0_INT_DECL(int) EMR0InitVM(PGVM pGVM, PVM pVM)
+{
+ /*
+ * Override ring-0 exit optimizations settings.
+ */
+ bool fEnabledR0 = pVM->aCpus[0].em.s.fExitOptimizationEnabled
+ && pVM->aCpus[0].em.s.fExitOptimizationEnabledR0
+ && (RTThreadPreemptIsPossible() || RTThreadPreemptIsPendingTrusty());
+ bool fEnabledR0PreemptDisabled = fEnabledR0
+ && pVM->aCpus[0].em.s.fExitOptimizationEnabledR0PreemptDisabled
+ && RTThreadPreemptIsPendingTrusty();
+ for (VMCPUID i = 0; i < pGVM->cCpus; i++)
+ {
+ pVM->aCpus[i].em.s.fExitOptimizationEnabledR0 = fEnabledR0;
+ pVM->aCpus[i].em.s.fExitOptimizationEnabledR0PreemptDisabled = fEnabledR0PreemptDisabled;
+ }
+
+ return VINF_SUCCESS;
+}
+
diff --git a/src/VBox/VMM/VMMR0/GIMR0.cpp b/src/VBox/VMM/VMMR0/GIMR0.cpp
new file mode 100644
index 00000000..e4750911
--- /dev/null
+++ b/src/VBox/VMM/VMMR0/GIMR0.cpp
@@ -0,0 +1,117 @@
+/* $Id: GIMR0.cpp $ */
+/** @file
+ * Guest Interface Manager (GIM) - Host Context Ring-0.
+ */
+
+/*
+ * Copyright (C) 2014-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#define LOG_GROUP LOG_GROUP_GIM
+#include <VBox/vmm/gim.h>
+#include "GIMInternal.h"
+#include "GIMHvInternal.h"
+#include <VBox/vmm/vm.h>
+
+#include <VBox/err.h>
+
+
+/**
+ * Does ring-0 per-VM GIM initialization.
+ *
+ * @returns VBox status code.
+ * @param pVM The cross context VM structure.
+ */
+VMMR0_INT_DECL(int) GIMR0InitVM(PVM pVM)
+{
+ if (!GIMIsEnabled(pVM))
+ return VINF_SUCCESS;
+
+ switch (pVM->gim.s.enmProviderId)
+ {
+ case GIMPROVIDERID_HYPERV:
+ return gimR0HvInitVM(pVM);
+
+ case GIMPROVIDERID_KVM:
+ return gimR0KvmInitVM(pVM);
+
+ default:
+ break;
+ }
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Does ring-0 per-VM GIM termination.
+ *
+ * @returns VBox status code.
+ * @param pVM The cross context VM structure.
+ */
+VMMR0_INT_DECL(int) GIMR0TermVM(PVM pVM)
+{
+ if (!GIMIsEnabled(pVM))
+ return VINF_SUCCESS;
+
+ switch (pVM->gim.s.enmProviderId)
+ {
+ case GIMPROVIDERID_HYPERV:
+ return gimR0HvTermVM(pVM);
+
+ case GIMPROVIDERID_KVM:
+ return gimR0KvmTermVM(pVM);
+
+ default:
+ break;
+ }
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Updates the paravirtualized TSC supported by the GIM provider.
+ *
+ * @returns VBox status code.
+ * @retval VINF_SUCCESS if the paravirt. TSC is setup and in use.
+ * @retval VERR_GIM_NOT_ENABLED if no GIM provider is configured for this VM.
+ * @retval VERR_GIM_PVTSC_NOT_AVAILABLE if the GIM provider does not support any
+ * paravirt. TSC.
+ * @retval VERR_GIM_PVTSC_NOT_IN_USE if the GIM provider supports paravirt. TSC
+ * but the guest isn't currently using it.
+ *
+ * @param pVM The cross context VM structure.
+ * @param u64Offset The computed TSC offset.
+ *
+ * @thread EMT(pVCpu)
+ */
+VMMR0_INT_DECL(int) GIMR0UpdateParavirtTsc(PVM pVM, uint64_t u64Offset)
+{
+ switch (pVM->gim.s.enmProviderId)
+ {
+ case GIMPROVIDERID_HYPERV:
+ return gimR0HvUpdateParavirtTsc(pVM, u64Offset);
+
+ case GIMPROVIDERID_KVM:
+ return VINF_SUCCESS;
+
+ case GIMPROVIDERID_NONE:
+ return VERR_GIM_NOT_ENABLED;
+
+ default:
+ break;
+ }
+ return VERR_GIM_PVTSC_NOT_AVAILABLE;
+}
+
diff --git a/src/VBox/VMM/VMMR0/GIMR0Hv.cpp b/src/VBox/VMM/VMMR0/GIMR0Hv.cpp
new file mode 100644
index 00000000..cbf23de1
--- /dev/null
+++ b/src/VBox/VMM/VMMR0/GIMR0Hv.cpp
@@ -0,0 +1,182 @@
+/* $Id: GIMR0Hv.cpp $ */
+/** @file
+ * Guest Interface Manager (GIM), Hyper-V - Host Context Ring-0.
+ */
+
+/*
+ * Copyright (C) 2014-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#define LOG_GROUP LOG_GROUP_GIM
+#include <VBox/vmm/gim.h>
+#include <VBox/vmm/tm.h>
+#include "GIMInternal.h"
+#include "GIMHvInternal.h"
+#include <VBox/vmm/vm.h>
+
+#include <VBox/err.h>
+
+#include <iprt/spinlock.h>
+
+
+#if 0
+/**
+ * Allocates and maps one physically contiguous page. The allocated page is
+ * zero'd out.
+ *
+ * @returns IPRT status code.
+ * @param pMemObj Pointer to the ring-0 memory object.
+ * @param ppVirt Where to store the virtual address of the
+ * allocation.
+ * @param pPhys Where to store the physical address of the
+ * allocation.
+ */
+static int gimR0HvPageAllocZ(PRTR0MEMOBJ pMemObj, PRTR0PTR ppVirt, PRTHCPHYS pHCPhys)
+{
+ AssertPtr(pMemObj);
+ AssertPtr(ppVirt);
+ AssertPtr(pHCPhys);
+
+ int rc = RTR0MemObjAllocCont(pMemObj, PAGE_SIZE, false /* fExecutable */);
+ if (RT_FAILURE(rc))
+ return rc;
+ *ppVirt = RTR0MemObjAddress(*pMemObj);
+ *pHCPhys = RTR0MemObjGetPagePhysAddr(*pMemObj, 0 /* iPage */);
+ ASMMemZero32(*ppVirt, PAGE_SIZE);
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Frees and unmaps an allocated physical page.
+ *
+ * @param pMemObj Pointer to the ring-0 memory object.
+ * @param ppVirt Where to re-initialize the virtual address of
+ * allocation as 0.
+ * @param pHCPhys Where to re-initialize the physical address of the
+ * allocation as 0.
+ */
+static void gimR0HvPageFree(PRTR0MEMOBJ pMemObj, PRTR0PTR ppVirt, PRTHCPHYS pHCPhys)
+{
+ AssertPtr(pMemObj);
+ AssertPtr(ppVirt);
+ AssertPtr(pHCPhys);
+ if (*pMemObj != NIL_RTR0MEMOBJ)
+ {
+ int rc = RTR0MemObjFree(*pMemObj, true /* fFreeMappings */);
+ AssertRC(rc);
+ *pMemObj = NIL_RTR0MEMOBJ;
+ *ppVirt = 0;
+ *pHCPhys = 0;
+ }
+}
+#endif
+
+/**
+ * Updates Hyper-V's reference TSC page.
+ *
+ * @returns VBox status code.
+ * @param pVM The cross context VM structure.
+ * @param u64Offset The computed TSC offset.
+ * @thread EMT.
+ */
+VMM_INT_DECL(int) gimR0HvUpdateParavirtTsc(PVM pVM, uint64_t u64Offset)
+{
+ Assert(GIMIsEnabled(pVM));
+ bool fHvTscEnabled = MSR_GIM_HV_REF_TSC_IS_ENABLED(pVM->gim.s.u.Hv.u64TscPageMsr);
+ if (RT_UNLIKELY(!fHvTscEnabled))
+ return VERR_GIM_PVTSC_NOT_ENABLED;
+
+ /** @todo this is buggy when large pages are used due to a PGM limitation, see
+ * @bugref{7532}.
+ *
+ * In any case, we do not ever update this page while the guest is
+ * running after setting it up (in ring-3, see gimR3HvEnableTscPage()) as
+ * the TSC offset is handled in the VMCS/VMCB (HM) or by trapping RDTSC
+ * (raw-mode). */
+#if 0
+ PCGIMHV pcHv = &pVM->gim.s.u.Hv;
+ PCGIMMMIO2REGION pcRegion = &pcHv->aMmio2Regions[GIM_HV_REF_TSC_PAGE_REGION_IDX];
+ PGIMHVREFTSC pRefTsc = (PGIMHVREFTSC)pcRegion->CTX_SUFF(pvPage);
+ Assert(pRefTsc);
+
+ /*
+ * Hyper-V reports the reference time in 100 nanosecond units.
+ */
+ uint64_t u64Tsc100Ns = pcHv->cTscTicksPerSecond / RT_NS_10MS;
+ int64_t i64TscOffset = (int64_t)u64Offset / u64Tsc100Ns;
+
+ /*
+ * The TSC page can be simulatenously read by other VCPUs in the guest. The
+ * spinlock is only for protecting simultaneous hypervisor writes from other
+ * EMTs.
+ */
+ RTSpinlockAcquire(pcHv->hSpinlockR0);
+ if (pRefTsc->i64TscOffset != i64TscOffset)
+ {
+ if (pRefTsc->u32TscSequence < UINT32_C(0xfffffffe))
+ ASMAtomicIncU32(&pRefTsc->u32TscSequence);
+ else
+ ASMAtomicWriteU32(&pRefTsc->u32TscSequence, 1);
+ ASMAtomicWriteS64(&pRefTsc->i64TscOffset, i64TscOffset);
+ }
+ RTSpinlockRelease(pcHv->hSpinlockR0);
+
+ Assert(pRefTsc->u32TscSequence != 0);
+ Assert(pRefTsc->u32TscSequence != UINT32_C(0xffffffff));
+#else
+ NOREF(u64Offset);
+#endif
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Does ring-0 per-VM GIM Hyper-V initialization.
+ *
+ * @returns VBox status code.
+ * @param pVM The cross context VM structure.
+ */
+VMMR0_INT_DECL(int) gimR0HvInitVM(PVM pVM)
+{
+ AssertPtr(pVM);
+ Assert(GIMIsEnabled(pVM));
+
+ PGIMHV pHv = &pVM->gim.s.u.Hv;
+ Assert(pHv->hSpinlockR0 == NIL_RTSPINLOCK);
+
+ int rc = RTSpinlockCreate(&pHv->hSpinlockR0, RTSPINLOCK_FLAGS_INTERRUPT_UNSAFE, "Hyper-V");
+ return rc;
+}
+
+
+/**
+ * Does ring-0 per-VM GIM Hyper-V termination.
+ *
+ * @returns VBox status code.
+ * @param pVM The cross context VM structure.
+ */
+VMMR0_INT_DECL(int) gimR0HvTermVM(PVM pVM)
+{
+ AssertPtr(pVM);
+ Assert(GIMIsEnabled(pVM));
+
+ PGIMHV pHv = &pVM->gim.s.u.Hv;
+ RTSpinlockDestroy(pHv->hSpinlockR0);
+ pHv->hSpinlockR0 = NIL_RTSPINLOCK;
+
+ return VINF_SUCCESS;
+}
+
diff --git a/src/VBox/VMM/VMMR0/GIMR0Kvm.cpp b/src/VBox/VMM/VMMR0/GIMR0Kvm.cpp
new file mode 100644
index 00000000..bcc849db
--- /dev/null
+++ b/src/VBox/VMM/VMMR0/GIMR0Kvm.cpp
@@ -0,0 +1,119 @@
+/* $Id: GIMR0Kvm.cpp $ */
+/** @file
+ * Guest Interface Manager (GIM), KVM - Host Context Ring-0.
+ */
+
+/*
+ * Copyright (C) 2015-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#define LOG_GROUP LOG_GROUP_GIM
+#include <VBox/vmm/gim.h>
+#include <VBox/vmm/tm.h>
+#include "GIMInternal.h"
+#include "GIMKvmInternal.h"
+#include <VBox/vmm/vm.h>
+
+#include <VBox/err.h>
+
+#include <iprt/spinlock.h>
+
+
+/**
+ * Updates KVM's system time information globally for all VCPUs.
+ *
+ * @returns VBox status code.
+ * @param pVM The cross context VM structure.
+ * @param pVCpu The cross context virtual CPU structure.
+ * @thread EMT.
+ * @remarks Can be called with preemption disabled!
+ */
+VMM_INT_DECL(int) gimR0KvmUpdateSystemTime(PVM pVM, PVMCPU pVCpu)
+{
+ /*
+ * Validate.
+ */
+ Assert(GIMIsEnabled(pVM));
+ PGIMKVM pKvm = &pVM->gim.s.u.Kvm;
+ AssertReturn(pKvm->hSpinlockR0 != NIL_RTSPINLOCK, VERR_GIM_IPE_3);
+
+ /*
+ * Record the TSC and virtual NanoTS pairs.
+ */
+ uint64_t uTsc;
+ uint64_t uVirtNanoTS;
+ RTCCUINTREG fEFlags = ASMIntDisableFlags();
+ uTsc = TMCpuTickGetNoCheck(pVCpu) | UINT64_C(1);
+ uVirtNanoTS = TMVirtualGetNoCheck(pVM) | UINT64_C(1);
+ ASMSetFlags(fEFlags);
+
+ /*
+ * Update VCPUs with this information. The first VCPU's values
+ * will be applied to the remaining.
+ */
+ RTSpinlockAcquire(pKvm->hSpinlockR0);
+ for (uint32_t i = 0; i < pVM->cCpus; i++)
+ {
+ PGIMKVMCPU pKvmCpu = &pVM->aCpus[i].gim.s.u.KvmCpu;
+ if ( !pKvmCpu->uTsc
+ && !pKvmCpu->uVirtNanoTS)
+ {
+ pKvmCpu->uTsc = uTsc;
+ pKvmCpu->uVirtNanoTS = uVirtNanoTS;
+ }
+ }
+ RTSpinlockRelease(pKvm->hSpinlockR0);
+
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Does ring-0 per-VM GIM KVM initialization.
+ *
+ * @returns VBox status code.
+ * @param pVM The cross context VM structure.
+ */
+VMMR0_INT_DECL(int) gimR0KvmInitVM(PVM pVM)
+{
+ AssertPtr(pVM);
+ Assert(GIMIsEnabled(pVM));
+
+ PGIMKVM pKvm = &pVM->gim.s.u.Kvm;
+ Assert(pKvm->hSpinlockR0 == NIL_RTSPINLOCK);
+
+ int rc = RTSpinlockCreate(&pKvm->hSpinlockR0, RTSPINLOCK_FLAGS_INTERRUPT_UNSAFE, "KVM");
+ return rc;
+}
+
+
+/**
+ * Does ring-0 per-VM GIM KVM termination.
+ *
+ * @returns VBox status code.
+ * @param pVM The cross context VM structure.
+ */
+VMMR0_INT_DECL(int) gimR0KvmTermVM(PVM pVM)
+{
+ AssertPtr(pVM);
+ Assert(GIMIsEnabled(pVM));
+
+ PGIMKVM pKvm = &pVM->gim.s.u.Kvm;
+ RTSpinlockDestroy(pKvm->hSpinlockR0);
+ pKvm->hSpinlockR0 = NIL_RTSPINLOCK;
+
+ return VINF_SUCCESS;
+}
+
diff --git a/src/VBox/VMM/VMMR0/GMMR0.cpp b/src/VBox/VMM/VMMR0/GMMR0.cpp
new file mode 100644
index 00000000..cf90eb66
--- /dev/null
+++ b/src/VBox/VMM/VMMR0/GMMR0.cpp
@@ -0,0 +1,5445 @@
+/* $Id: GMMR0.cpp $ */
+/** @file
+ * GMM - Global Memory Manager.
+ */
+
+/*
+ * Copyright (C) 2007-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+
+/** @page pg_gmm GMM - The Global Memory Manager
+ *
+ * As the name indicates, this component is responsible for global memory
+ * management. Currently only guest RAM is allocated from the GMM, but this
+ * may change to include shadow page tables and other bits later.
+ *
+ * Guest RAM is managed as individual pages, but allocated from the host OS
+ * in chunks for reasons of portability / efficiency. To minimize the memory
+ * footprint all tracking structure must be as small as possible without
+ * unnecessary performance penalties.
+ *
+ * The allocation chunks has fixed sized, the size defined at compile time
+ * by the #GMM_CHUNK_SIZE \#define.
+ *
+ * Each chunk is given an unique ID. Each page also has a unique ID. The
+ * relationship between the two IDs is:
+ * @code
+ * GMM_CHUNK_SHIFT = log2(GMM_CHUNK_SIZE / PAGE_SIZE);
+ * idPage = (idChunk << GMM_CHUNK_SHIFT) | iPage;
+ * @endcode
+ * Where iPage is the index of the page within the chunk. This ID scheme
+ * permits for efficient chunk and page lookup, but it relies on the chunk size
+ * to be set at compile time. The chunks are organized in an AVL tree with their
+ * IDs being the keys.
+ *
+ * The physical address of each page in an allocation chunk is maintained by
+ * the #RTR0MEMOBJ and obtained using #RTR0MemObjGetPagePhysAddr. There is no
+ * need to duplicate this information (it'll cost 8-bytes per page if we did).
+ *
+ * So what do we need to track per page? Most importantly we need to know
+ * which state the page is in:
+ * - Private - Allocated for (eventually) backing one particular VM page.
+ * - Shared - Readonly page that is used by one or more VMs and treated
+ * as COW by PGM.
+ * - Free - Not used by anyone.
+ *
+ * For the page replacement operations (sharing, defragmenting and freeing)
+ * to be somewhat efficient, private pages needs to be associated with a
+ * particular page in a particular VM.
+ *
+ * Tracking the usage of shared pages is impractical and expensive, so we'll
+ * settle for a reference counting system instead.
+ *
+ * Free pages will be chained on LIFOs
+ *
+ * On 64-bit systems we will use a 64-bit bitfield per page, while on 32-bit
+ * systems a 32-bit bitfield will have to suffice because of address space
+ * limitations. The #GMMPAGE structure shows the details.
+ *
+ *
+ * @section sec_gmm_alloc_strat Page Allocation Strategy
+ *
+ * The strategy for allocating pages has to take fragmentation and shared
+ * pages into account, or we may end up with with 2000 chunks with only
+ * a few pages in each. Shared pages cannot easily be reallocated because
+ * of the inaccurate usage accounting (see above). Private pages can be
+ * reallocated by a defragmentation thread in the same manner that sharing
+ * is done.
+ *
+ * The first approach is to manage the free pages in two sets depending on
+ * whether they are mainly for the allocation of shared or private pages.
+ * In the initial implementation there will be almost no possibility for
+ * mixing shared and private pages in the same chunk (only if we're really
+ * stressed on memory), but when we implement forking of VMs and have to
+ * deal with lots of COW pages it'll start getting kind of interesting.
+ *
+ * The sets are lists of chunks with approximately the same number of
+ * free pages. Say the chunk size is 1MB, meaning 256 pages, and a set
+ * consists of 16 lists. So, the first list will contain the chunks with
+ * 1-7 free pages, the second covers 8-15, and so on. The chunks will be
+ * moved between the lists as pages are freed up or allocated.
+ *
+ *
+ * @section sec_gmm_costs Costs
+ *
+ * The per page cost in kernel space is 32-bit plus whatever RTR0MEMOBJ
+ * entails. In addition there is the chunk cost of approximately
+ * (sizeof(RT0MEMOBJ) + sizeof(CHUNK)) / 2^CHUNK_SHIFT bytes per page.
+ *
+ * On Windows the per page #RTR0MEMOBJ cost is 32-bit on 32-bit windows
+ * and 64-bit on 64-bit windows (a PFN_NUMBER in the MDL). So, 64-bit per page.
+ * The cost on Linux is identical, but here it's because of sizeof(struct page *).
+ *
+ *
+ * @section sec_gmm_legacy Legacy Mode for Non-Tier-1 Platforms
+ *
+ * In legacy mode the page source is locked user pages and not
+ * #RTR0MemObjAllocPhysNC, this means that a page can only be allocated
+ * by the VM that locked it. We will make no attempt at implementing
+ * page sharing on these systems, just do enough to make it all work.
+ *
+ *
+ * @subsection sub_gmm_locking Serializing
+ *
+ * One simple fast mutex will be employed in the initial implementation, not
+ * two as mentioned in @ref sec_pgmPhys_Serializing.
+ *
+ * @see @ref sec_pgmPhys_Serializing
+ *
+ *
+ * @section sec_gmm_overcommit Memory Over-Commitment Management
+ *
+ * The GVM will have to do the system wide memory over-commitment
+ * management. My current ideas are:
+ * - Per VM oc policy that indicates how much to initially commit
+ * to it and what to do in a out-of-memory situation.
+ * - Prevent overtaxing the host.
+ *
+ * There are some challenges here, the main ones are configurability and
+ * security. Should we for instance permit anyone to request 100% memory
+ * commitment? Who should be allowed to do runtime adjustments of the
+ * config. And how to prevent these settings from being lost when the last
+ * VM process exits? The solution is probably to have an optional root
+ * daemon the will keep VMMR0.r0 in memory and enable the security measures.
+ *
+ *
+ *
+ * @section sec_gmm_numa NUMA
+ *
+ * NUMA considerations will be designed and implemented a bit later.
+ *
+ * The preliminary guesses is that we will have to try allocate memory as
+ * close as possible to the CPUs the VM is executed on (EMT and additional CPU
+ * threads). Which means it's mostly about allocation and sharing policies.
+ * Both the scheduler and allocator interface will to supply some NUMA info
+ * and we'll need to have a way to calc access costs.
+ *
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#define LOG_GROUP LOG_GROUP_GMM
+#include <VBox/rawpci.h>
+#include <VBox/vmm/vm.h>
+#include <VBox/vmm/gmm.h>
+#include "GMMR0Internal.h"
+#include <VBox/vmm/gvm.h>
+#include <VBox/vmm/pgm.h>
+#include <VBox/log.h>
+#include <VBox/param.h>
+#include <VBox/err.h>
+#include <VBox/VMMDev.h>
+#include <iprt/asm.h>
+#include <iprt/avl.h>
+#ifdef VBOX_STRICT
+# include <iprt/crc.h>
+#endif
+#include <iprt/critsect.h>
+#include <iprt/list.h>
+#include <iprt/mem.h>
+#include <iprt/memobj.h>
+#include <iprt/mp.h>
+#include <iprt/semaphore.h>
+#include <iprt/string.h>
+#include <iprt/time.h>
+
+
+/*********************************************************************************************************************************
+* Defined Constants And Macros *
+*********************************************************************************************************************************/
+/** @def VBOX_USE_CRIT_SECT_FOR_GIANT
+ * Use a critical section instead of a fast mutex for the giant GMM lock.
+ *
+ * @remarks This is primarily a way of avoiding the deadlock checks in the
+ * windows driver verifier. */
+#if defined(RT_OS_WINDOWS) || defined(DOXYGEN_RUNNING)
+# define VBOX_USE_CRIT_SECT_FOR_GIANT
+#endif
+
+
+/*********************************************************************************************************************************
+* Structures and Typedefs *
+*********************************************************************************************************************************/
+/** Pointer to set of free chunks. */
+typedef struct GMMCHUNKFREESET *PGMMCHUNKFREESET;
+
+/**
+ * The per-page tracking structure employed by the GMM.
+ *
+ * On 32-bit hosts we'll some trickery is necessary to compress all
+ * the information into 32-bits. When the fSharedFree member is set,
+ * the 30th bit decides whether it's a free page or not.
+ *
+ * Because of the different layout on 32-bit and 64-bit hosts, macros
+ * are used to get and set some of the data.
+ */
+typedef union GMMPAGE
+{
+#if HC_ARCH_BITS == 64
+ /** Unsigned integer view. */
+ uint64_t u;
+
+ /** The common view. */
+ struct GMMPAGECOMMON
+ {
+ uint32_t uStuff1 : 32;
+ uint32_t uStuff2 : 30;
+ /** The page state. */
+ uint32_t u2State : 2;
+ } Common;
+
+ /** The view of a private page. */
+ struct GMMPAGEPRIVATE
+ {
+ /** The guest page frame number. (Max addressable: 2 ^ 44 - 16) */
+ uint32_t pfn;
+ /** The GVM handle. (64K VMs) */
+ uint32_t hGVM : 16;
+ /** Reserved. */
+ uint32_t u16Reserved : 14;
+ /** The page state. */
+ uint32_t u2State : 2;
+ } Private;
+
+ /** The view of a shared page. */
+ struct GMMPAGESHARED
+ {
+ /** The host page frame number. (Max addressable: 2 ^ 44 - 16) */
+ uint32_t pfn;
+ /** The reference count (64K VMs). */
+ uint32_t cRefs : 16;
+ /** Used for debug checksumming. */
+ uint32_t u14Checksum : 14;
+ /** The page state. */
+ uint32_t u2State : 2;
+ } Shared;
+
+ /** The view of a free page. */
+ struct GMMPAGEFREE
+ {
+ /** The index of the next page in the free list. UINT16_MAX is NIL. */
+ uint16_t iNext;
+ /** Reserved. Checksum or something? */
+ uint16_t u16Reserved0;
+ /** Reserved. Checksum or something? */
+ uint32_t u30Reserved1 : 30;
+ /** The page state. */
+ uint32_t u2State : 2;
+ } Free;
+
+#else /* 32-bit */
+ /** Unsigned integer view. */
+ uint32_t u;
+
+ /** The common view. */
+ struct GMMPAGECOMMON
+ {
+ uint32_t uStuff : 30;
+ /** The page state. */
+ uint32_t u2State : 2;
+ } Common;
+
+ /** The view of a private page. */
+ struct GMMPAGEPRIVATE
+ {
+ /** The guest page frame number. (Max addressable: 2 ^ 36) */
+ uint32_t pfn : 24;
+ /** The GVM handle. (127 VMs) */
+ uint32_t hGVM : 7;
+ /** The top page state bit, MBZ. */
+ uint32_t fZero : 1;
+ } Private;
+
+ /** The view of a shared page. */
+ struct GMMPAGESHARED
+ {
+ /** The reference count. */
+ uint32_t cRefs : 30;
+ /** The page state. */
+ uint32_t u2State : 2;
+ } Shared;
+
+ /** The view of a free page. */
+ struct GMMPAGEFREE
+ {
+ /** The index of the next page in the free list. UINT16_MAX is NIL. */
+ uint32_t iNext : 16;
+ /** Reserved. Checksum or something? */
+ uint32_t u14Reserved : 14;
+ /** The page state. */
+ uint32_t u2State : 2;
+ } Free;
+#endif
+} GMMPAGE;
+AssertCompileSize(GMMPAGE, sizeof(RTHCUINTPTR));
+/** Pointer to a GMMPAGE. */
+typedef GMMPAGE *PGMMPAGE;
+
+
+/** @name The Page States.
+ * @{ */
+/** A private page. */
+#define GMM_PAGE_STATE_PRIVATE 0
+/** A private page - alternative value used on the 32-bit implementation.
+ * This will never be used on 64-bit hosts. */
+#define GMM_PAGE_STATE_PRIVATE_32 1
+/** A shared page. */
+#define GMM_PAGE_STATE_SHARED 2
+/** A free page. */
+#define GMM_PAGE_STATE_FREE 3
+/** @} */
+
+
+/** @def GMM_PAGE_IS_PRIVATE
+ *
+ * @returns true if private, false if not.
+ * @param pPage The GMM page.
+ */
+#if HC_ARCH_BITS == 64
+# define GMM_PAGE_IS_PRIVATE(pPage) ( (pPage)->Common.u2State == GMM_PAGE_STATE_PRIVATE )
+#else
+# define GMM_PAGE_IS_PRIVATE(pPage) ( (pPage)->Private.fZero == 0 )
+#endif
+
+/** @def GMM_PAGE_IS_SHARED
+ *
+ * @returns true if shared, false if not.
+ * @param pPage The GMM page.
+ */
+#define GMM_PAGE_IS_SHARED(pPage) ( (pPage)->Common.u2State == GMM_PAGE_STATE_SHARED )
+
+/** @def GMM_PAGE_IS_FREE
+ *
+ * @returns true if free, false if not.
+ * @param pPage The GMM page.
+ */
+#define GMM_PAGE_IS_FREE(pPage) ( (pPage)->Common.u2State == GMM_PAGE_STATE_FREE )
+
+/** @def GMM_PAGE_PFN_LAST
+ * The last valid guest pfn range.
+ * @remark Some of the values outside the range has special meaning,
+ * see GMM_PAGE_PFN_UNSHAREABLE.
+ */
+#if HC_ARCH_BITS == 64
+# define GMM_PAGE_PFN_LAST UINT32_C(0xfffffff0)
+#else
+# define GMM_PAGE_PFN_LAST UINT32_C(0x00fffff0)
+#endif
+AssertCompile(GMM_PAGE_PFN_LAST == (GMM_GCPHYS_LAST >> PAGE_SHIFT));
+
+/** @def GMM_PAGE_PFN_UNSHAREABLE
+ * Indicates that this page isn't used for normal guest memory and thus isn't shareable.
+ */
+#if HC_ARCH_BITS == 64
+# define GMM_PAGE_PFN_UNSHAREABLE UINT32_C(0xfffffff1)
+#else
+# define GMM_PAGE_PFN_UNSHAREABLE UINT32_C(0x00fffff1)
+#endif
+AssertCompile(GMM_PAGE_PFN_UNSHAREABLE == (GMM_GCPHYS_UNSHAREABLE >> PAGE_SHIFT));
+
+
+/**
+ * A GMM allocation chunk ring-3 mapping record.
+ *
+ * This should really be associated with a session and not a VM, but
+ * it's simpler to associated with a VM and cleanup with the VM object
+ * is destroyed.
+ */
+typedef struct GMMCHUNKMAP
+{
+ /** The mapping object. */
+ RTR0MEMOBJ hMapObj;
+ /** The VM owning the mapping. */
+ PGVM pGVM;
+} GMMCHUNKMAP;
+/** Pointer to a GMM allocation chunk mapping. */
+typedef struct GMMCHUNKMAP *PGMMCHUNKMAP;
+
+
+/**
+ * A GMM allocation chunk.
+ */
+typedef struct GMMCHUNK
+{
+ /** The AVL node core.
+ * The Key is the chunk ID. (Giant mtx.) */
+ AVLU32NODECORE Core;
+ /** The memory object.
+ * Either from RTR0MemObjAllocPhysNC or RTR0MemObjLockUser depending on
+ * what the host can dish up with. (Chunk mtx protects mapping accesses
+ * and related frees.) */
+ RTR0MEMOBJ hMemObj;
+ /** Pointer to the next chunk in the free list. (Giant mtx.) */
+ PGMMCHUNK pFreeNext;
+ /** Pointer to the previous chunk in the free list. (Giant mtx.) */
+ PGMMCHUNK pFreePrev;
+ /** Pointer to the free set this chunk belongs to. NULL for
+ * chunks with no free pages. (Giant mtx.) */
+ PGMMCHUNKFREESET pSet;
+ /** List node in the chunk list (GMM::ChunkList). (Giant mtx.) */
+ RTLISTNODE ListNode;
+ /** Pointer to an array of mappings. (Chunk mtx.) */
+ PGMMCHUNKMAP paMappingsX;
+ /** The number of mappings. (Chunk mtx.) */
+ uint16_t cMappingsX;
+ /** The mapping lock this chunk is using using. UINT16_MAX if nobody is
+ * mapping or freeing anything. (Giant mtx.) */
+ uint8_t volatile iChunkMtx;
+ /** Flags field reserved for future use (like eliminating enmType).
+ * (Giant mtx.) */
+ uint8_t fFlags;
+ /** The head of the list of free pages. UINT16_MAX is the NIL value.
+ * (Giant mtx.) */
+ uint16_t iFreeHead;
+ /** The number of free pages. (Giant mtx.) */
+ uint16_t cFree;
+ /** The GVM handle of the VM that first allocated pages from this chunk, this
+ * is used as a preference when there are several chunks to choose from.
+ * When in bound memory mode this isn't a preference any longer. (Giant
+ * mtx.) */
+ uint16_t hGVM;
+ /** The ID of the NUMA node the memory mostly resides on. (Reserved for
+ * future use.) (Giant mtx.) */
+ uint16_t idNumaNode;
+ /** The number of private pages. (Giant mtx.) */
+ uint16_t cPrivate;
+ /** The number of shared pages. (Giant mtx.) */
+ uint16_t cShared;
+ /** The pages. (Giant mtx.) */
+ GMMPAGE aPages[GMM_CHUNK_SIZE >> PAGE_SHIFT];
+} GMMCHUNK;
+
+/** Indicates that the NUMA properies of the memory is unknown. */
+#define GMM_CHUNK_NUMA_ID_UNKNOWN UINT16_C(0xfffe)
+
+/** @name GMM_CHUNK_FLAGS_XXX - chunk flags.
+ * @{ */
+/** Indicates that the chunk is a large page (2MB). */
+#define GMM_CHUNK_FLAGS_LARGE_PAGE UINT16_C(0x0001)
+/** @} */
+
+
+/**
+ * An allocation chunk TLB entry.
+ */
+typedef struct GMMCHUNKTLBE
+{
+ /** The chunk id. */
+ uint32_t idChunk;
+ /** Pointer to the chunk. */
+ PGMMCHUNK pChunk;
+} GMMCHUNKTLBE;
+/** Pointer to an allocation chunk TLB entry. */
+typedef GMMCHUNKTLBE *PGMMCHUNKTLBE;
+
+
+/** The number of entries tin the allocation chunk TLB. */
+#define GMM_CHUNKTLB_ENTRIES 32
+/** Gets the TLB entry index for the given Chunk ID. */
+#define GMM_CHUNKTLB_IDX(idChunk) ( (idChunk) & (GMM_CHUNKTLB_ENTRIES - 1) )
+
+/**
+ * An allocation chunk TLB.
+ */
+typedef struct GMMCHUNKTLB
+{
+ /** The TLB entries. */
+ GMMCHUNKTLBE aEntries[GMM_CHUNKTLB_ENTRIES];
+} GMMCHUNKTLB;
+/** Pointer to an allocation chunk TLB. */
+typedef GMMCHUNKTLB *PGMMCHUNKTLB;
+
+
+/**
+ * The GMM instance data.
+ */
+typedef struct GMM
+{
+ /** Magic / eye catcher. GMM_MAGIC */
+ uint32_t u32Magic;
+ /** The number of threads waiting on the mutex. */
+ uint32_t cMtxContenders;
+#ifdef VBOX_USE_CRIT_SECT_FOR_GIANT
+ /** The critical section protecting the GMM.
+ * More fine grained locking can be implemented later if necessary. */
+ RTCRITSECT GiantCritSect;
+#else
+ /** The fast mutex protecting the GMM.
+ * More fine grained locking can be implemented later if necessary. */
+ RTSEMFASTMUTEX hMtx;
+#endif
+#ifdef VBOX_STRICT
+ /** The current mutex owner. */
+ RTNATIVETHREAD hMtxOwner;
+#endif
+ /** The chunk tree. */
+ PAVLU32NODECORE pChunks;
+ /** The chunk TLB. */
+ GMMCHUNKTLB ChunkTLB;
+ /** The private free set. */
+ GMMCHUNKFREESET PrivateX;
+ /** The shared free set. */
+ GMMCHUNKFREESET Shared;
+
+ /** Shared module tree (global).
+ * @todo separate trees for distinctly different guest OSes. */
+ PAVLLU32NODECORE pGlobalSharedModuleTree;
+ /** Sharable modules (count of nodes in pGlobalSharedModuleTree). */
+ uint32_t cShareableModules;
+
+ /** The chunk list. For simplifying the cleanup process. */
+ RTLISTANCHOR ChunkList;
+
+ /** The maximum number of pages we're allowed to allocate.
+ * @gcfgm{GMM/MaxPages,64-bit, Direct.}
+ * @gcfgm{GMM/PctPages,32-bit, Relative to the number of host pages.} */
+ uint64_t cMaxPages;
+ /** The number of pages that has been reserved.
+ * The deal is that cReservedPages - cOverCommittedPages <= cMaxPages. */
+ uint64_t cReservedPages;
+ /** The number of pages that we have over-committed in reservations. */
+ uint64_t cOverCommittedPages;
+ /** The number of actually allocated (committed if you like) pages. */
+ uint64_t cAllocatedPages;
+ /** The number of pages that are shared. A subset of cAllocatedPages. */
+ uint64_t cSharedPages;
+ /** The number of pages that are actually shared between VMs. */
+ uint64_t cDuplicatePages;
+ /** The number of pages that are shared that has been left behind by
+ * VMs not doing proper cleanups. */
+ uint64_t cLeftBehindSharedPages;
+ /** The number of allocation chunks.
+ * (The number of pages we've allocated from the host can be derived from this.) */
+ uint32_t cChunks;
+ /** The number of current ballooned pages. */
+ uint64_t cBalloonedPages;
+
+ /** The legacy allocation mode indicator.
+ * This is determined at initialization time. */
+ bool fLegacyAllocationMode;
+ /** The bound memory mode indicator.
+ * When set, the memory will be bound to a specific VM and never
+ * shared. This is always set if fLegacyAllocationMode is set.
+ * (Also determined at initialization time.) */
+ bool fBoundMemoryMode;
+ /** The number of registered VMs. */
+ uint16_t cRegisteredVMs;
+
+ /** The number of freed chunks ever. This is used a list generation to
+ * avoid restarting the cleanup scanning when the list wasn't modified. */
+ uint32_t cFreedChunks;
+ /** The previous allocated Chunk ID.
+ * Used as a hint to avoid scanning the whole bitmap. */
+ uint32_t idChunkPrev;
+ /** Chunk ID allocation bitmap.
+ * Bits of allocated IDs are set, free ones are clear.
+ * The NIL id (0) is marked allocated. */
+ uint32_t bmChunkId[(GMM_CHUNKID_LAST + 1 + 31) / 32];
+
+ /** The index of the next mutex to use. */
+ uint32_t iNextChunkMtx;
+ /** Chunk locks for reducing lock contention without having to allocate
+ * one lock per chunk. */
+ struct
+ {
+ /** The mutex */
+ RTSEMFASTMUTEX hMtx;
+ /** The number of threads currently using this mutex. */
+ uint32_t volatile cUsers;
+ } aChunkMtx[64];
+} GMM;
+/** Pointer to the GMM instance. */
+typedef GMM *PGMM;
+
+/** The value of GMM::u32Magic (Katsuhiro Otomo). */
+#define GMM_MAGIC UINT32_C(0x19540414)
+
+
+/**
+ * GMM chunk mutex state.
+ *
+ * This is returned by gmmR0ChunkMutexAcquire and is used by the other
+ * gmmR0ChunkMutex* methods.
+ */
+typedef struct GMMR0CHUNKMTXSTATE
+{
+ PGMM pGMM;
+ /** The index of the chunk mutex. */
+ uint8_t iChunkMtx;
+ /** The relevant flags (GMMR0CHUNK_MTX_XXX). */
+ uint8_t fFlags;
+} GMMR0CHUNKMTXSTATE;
+/** Pointer to a chunk mutex state. */
+typedef GMMR0CHUNKMTXSTATE *PGMMR0CHUNKMTXSTATE;
+
+/** @name GMMR0CHUNK_MTX_XXX
+ * @{ */
+#define GMMR0CHUNK_MTX_INVALID UINT32_C(0)
+#define GMMR0CHUNK_MTX_KEEP_GIANT UINT32_C(1)
+#define GMMR0CHUNK_MTX_RETAKE_GIANT UINT32_C(2)
+#define GMMR0CHUNK_MTX_DROP_GIANT UINT32_C(3)
+#define GMMR0CHUNK_MTX_END UINT32_C(4)
+/** @} */
+
+
+/** The maximum number of shared modules per-vm. */
+#define GMM_MAX_SHARED_PER_VM_MODULES 2048
+/** The maximum number of shared modules GMM is allowed to track. */
+#define GMM_MAX_SHARED_GLOBAL_MODULES 16834
+
+
+/**
+ * Argument packet for gmmR0SharedModuleCleanup.
+ */
+typedef struct GMMR0SHMODPERVMDTORARGS
+{
+ PGVM pGVM;
+ PGMM pGMM;
+} GMMR0SHMODPERVMDTORARGS;
+
+/**
+ * Argument packet for gmmR0CheckSharedModule.
+ */
+typedef struct GMMCHECKSHAREDMODULEINFO
+{
+ PGVM pGVM;
+ VMCPUID idCpu;
+} GMMCHECKSHAREDMODULEINFO;
+
+/**
+ * Argument packet for gmmR0FindDupPageInChunk by GMMR0FindDuplicatePage.
+ */
+typedef struct GMMFINDDUPPAGEINFO
+{
+ PGVM pGVM;
+ PGMM pGMM;
+ uint8_t *pSourcePage;
+ bool fFoundDuplicate;
+} GMMFINDDUPPAGEINFO;
+
+
+/*********************************************************************************************************************************
+* Global Variables *
+*********************************************************************************************************************************/
+/** Pointer to the GMM instance data. */
+static PGMM g_pGMM = NULL;
+
+/** Macro for obtaining and validating the g_pGMM pointer.
+ *
+ * On failure it will return from the invoking function with the specified
+ * return value.
+ *
+ * @param pGMM The name of the pGMM variable.
+ * @param rc The return value on failure. Use VERR_GMM_INSTANCE for VBox
+ * status codes.
+ */
+#define GMM_GET_VALID_INSTANCE(pGMM, rc) \
+ do { \
+ (pGMM) = g_pGMM; \
+ AssertPtrReturn((pGMM), (rc)); \
+ AssertMsgReturn((pGMM)->u32Magic == GMM_MAGIC, ("%p - %#x\n", (pGMM), (pGMM)->u32Magic), (rc)); \
+ } while (0)
+
+/** Macro for obtaining and validating the g_pGMM pointer, void function
+ * variant.
+ *
+ * On failure it will return from the invoking function.
+ *
+ * @param pGMM The name of the pGMM variable.
+ */
+#define GMM_GET_VALID_INSTANCE_VOID(pGMM) \
+ do { \
+ (pGMM) = g_pGMM; \
+ AssertPtrReturnVoid((pGMM)); \
+ AssertMsgReturnVoid((pGMM)->u32Magic == GMM_MAGIC, ("%p - %#x\n", (pGMM), (pGMM)->u32Magic)); \
+ } while (0)
+
+
+/** @def GMM_CHECK_SANITY_UPON_ENTERING
+ * Checks the sanity of the GMM instance data before making changes.
+ *
+ * This is macro is a stub by default and must be enabled manually in the code.
+ *
+ * @returns true if sane, false if not.
+ * @param pGMM The name of the pGMM variable.
+ */
+#if defined(VBOX_STRICT) && defined(GMMR0_WITH_SANITY_CHECK) && 0
+# define GMM_CHECK_SANITY_UPON_ENTERING(pGMM) (gmmR0SanityCheck((pGMM), __PRETTY_FUNCTION__, __LINE__) == 0)
+#else
+# define GMM_CHECK_SANITY_UPON_ENTERING(pGMM) (true)
+#endif
+
+/** @def GMM_CHECK_SANITY_UPON_LEAVING
+ * Checks the sanity of the GMM instance data after making changes.
+ *
+ * This is macro is a stub by default and must be enabled manually in the code.
+ *
+ * @returns true if sane, false if not.
+ * @param pGMM The name of the pGMM variable.
+ */
+#if defined(VBOX_STRICT) && defined(GMMR0_WITH_SANITY_CHECK) && 0
+# define GMM_CHECK_SANITY_UPON_LEAVING(pGMM) (gmmR0SanityCheck((pGMM), __PRETTY_FUNCTION__, __LINE__) == 0)
+#else
+# define GMM_CHECK_SANITY_UPON_LEAVING(pGMM) (true)
+#endif
+
+/** @def GMM_CHECK_SANITY_IN_LOOPS
+ * Checks the sanity of the GMM instance in the allocation loops.
+ *
+ * This is macro is a stub by default and must be enabled manually in the code.
+ *
+ * @returns true if sane, false if not.
+ * @param pGMM The name of the pGMM variable.
+ */
+#if defined(VBOX_STRICT) && defined(GMMR0_WITH_SANITY_CHECK) && 0
+# define GMM_CHECK_SANITY_IN_LOOPS(pGMM) (gmmR0SanityCheck((pGMM), __PRETTY_FUNCTION__, __LINE__) == 0)
+#else
+# define GMM_CHECK_SANITY_IN_LOOPS(pGMM) (true)
+#endif
+
+
+/*********************************************************************************************************************************
+* Internal Functions *
+*********************************************************************************************************************************/
+static DECLCALLBACK(int) gmmR0TermDestroyChunk(PAVLU32NODECORE pNode, void *pvGMM);
+static bool gmmR0CleanupVMScanChunk(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk);
+DECLINLINE(void) gmmR0UnlinkChunk(PGMMCHUNK pChunk);
+DECLINLINE(void) gmmR0LinkChunk(PGMMCHUNK pChunk, PGMMCHUNKFREESET pSet);
+DECLINLINE(void) gmmR0SelectSetAndLinkChunk(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk);
+#ifdef GMMR0_WITH_SANITY_CHECK
+static uint32_t gmmR0SanityCheck(PGMM pGMM, const char *pszFunction, unsigned uLineNo);
+#endif
+static bool gmmR0FreeChunk(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk, bool fRelaxedSem);
+DECLINLINE(void) gmmR0FreePrivatePage(PGMM pGMM, PGVM pGVM, uint32_t idPage, PGMMPAGE pPage);
+DECLINLINE(void) gmmR0FreeSharedPage(PGMM pGMM, PGVM pGVM, uint32_t idPage, PGMMPAGE pPage);
+static int gmmR0UnmapChunkLocked(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk);
+#ifdef VBOX_WITH_PAGE_SHARING
+static void gmmR0SharedModuleCleanup(PGMM pGMM, PGVM pGVM);
+# ifdef VBOX_STRICT
+static uint32_t gmmR0StrictPageChecksum(PGMM pGMM, PGVM pGVM, uint32_t idPage);
+# endif
+#endif
+
+
+
+/**
+ * Initializes the GMM component.
+ *
+ * This is called when the VMMR0.r0 module is loaded and protected by the
+ * loader semaphore.
+ *
+ * @returns VBox status code.
+ */
+GMMR0DECL(int) GMMR0Init(void)
+{
+ LogFlow(("GMMInit:\n"));
+
+ /*
+ * Allocate the instance data and the locks.
+ */
+ PGMM pGMM = (PGMM)RTMemAllocZ(sizeof(*pGMM));
+ if (!pGMM)
+ return VERR_NO_MEMORY;
+
+ pGMM->u32Magic = GMM_MAGIC;
+ for (unsigned i = 0; i < RT_ELEMENTS(pGMM->ChunkTLB.aEntries); i++)
+ pGMM->ChunkTLB.aEntries[i].idChunk = NIL_GMM_CHUNKID;
+ RTListInit(&pGMM->ChunkList);
+ ASMBitSet(&pGMM->bmChunkId[0], NIL_GMM_CHUNKID);
+
+#ifdef VBOX_USE_CRIT_SECT_FOR_GIANT
+ int rc = RTCritSectInit(&pGMM->GiantCritSect);
+#else
+ int rc = RTSemFastMutexCreate(&pGMM->hMtx);
+#endif
+ if (RT_SUCCESS(rc))
+ {
+ unsigned iMtx;
+ for (iMtx = 0; iMtx < RT_ELEMENTS(pGMM->aChunkMtx); iMtx++)
+ {
+ rc = RTSemFastMutexCreate(&pGMM->aChunkMtx[iMtx].hMtx);
+ if (RT_FAILURE(rc))
+ break;
+ }
+ if (RT_SUCCESS(rc))
+ {
+ /*
+ * Check and see if RTR0MemObjAllocPhysNC works.
+ */
+#if 0 /* later, see @bufref{3170}. */
+ RTR0MEMOBJ MemObj;
+ rc = RTR0MemObjAllocPhysNC(&MemObj, _64K, NIL_RTHCPHYS);
+ if (RT_SUCCESS(rc))
+ {
+ rc = RTR0MemObjFree(MemObj, true);
+ AssertRC(rc);
+ }
+ else if (rc == VERR_NOT_SUPPORTED)
+ pGMM->fLegacyAllocationMode = pGMM->fBoundMemoryMode = true;
+ else
+ SUPR0Printf("GMMR0Init: RTR0MemObjAllocPhysNC(,64K,Any) -> %d!\n", rc);
+#else
+# if defined(RT_OS_WINDOWS) || (defined(RT_OS_SOLARIS) && ARCH_BITS == 64) || defined(RT_OS_LINUX) || defined(RT_OS_FREEBSD)
+ pGMM->fLegacyAllocationMode = false;
+# if ARCH_BITS == 32
+ /* Don't reuse possibly partial chunks because of the virtual
+ address space limitation. */
+ pGMM->fBoundMemoryMode = true;
+# else
+ pGMM->fBoundMemoryMode = false;
+# endif
+# else
+ pGMM->fLegacyAllocationMode = true;
+ pGMM->fBoundMemoryMode = true;
+# endif
+#endif
+
+ /*
+ * Query system page count and guess a reasonable cMaxPages value.
+ */
+ pGMM->cMaxPages = UINT32_MAX; /** @todo IPRT function for query ram size and such. */
+
+ g_pGMM = pGMM;
+ LogFlow(("GMMInit: pGMM=%p fLegacyAllocationMode=%RTbool fBoundMemoryMode=%RTbool\n", pGMM, pGMM->fLegacyAllocationMode, pGMM->fBoundMemoryMode));
+ return VINF_SUCCESS;
+ }
+
+ /*
+ * Bail out.
+ */
+ while (iMtx-- > 0)
+ RTSemFastMutexDestroy(pGMM->aChunkMtx[iMtx].hMtx);
+#ifdef VBOX_USE_CRIT_SECT_FOR_GIANT
+ RTCritSectDelete(&pGMM->GiantCritSect);
+#else
+ RTSemFastMutexDestroy(pGMM->hMtx);
+#endif
+ }
+
+ pGMM->u32Magic = 0;
+ RTMemFree(pGMM);
+ SUPR0Printf("GMMR0Init: failed! rc=%d\n", rc);
+ return rc;
+}
+
+
+/**
+ * Terminates the GMM component.
+ */
+GMMR0DECL(void) GMMR0Term(void)
+{
+ LogFlow(("GMMTerm:\n"));
+
+ /*
+ * Take care / be paranoid...
+ */
+ PGMM pGMM = g_pGMM;
+ if (!VALID_PTR(pGMM))
+ return;
+ if (pGMM->u32Magic != GMM_MAGIC)
+ {
+ SUPR0Printf("GMMR0Term: u32Magic=%#x\n", pGMM->u32Magic);
+ return;
+ }
+
+ /*
+ * Undo what init did and free all the resources we've acquired.
+ */
+ /* Destroy the fundamentals. */
+ g_pGMM = NULL;
+ pGMM->u32Magic = ~GMM_MAGIC;
+#ifdef VBOX_USE_CRIT_SECT_FOR_GIANT
+ RTCritSectDelete(&pGMM->GiantCritSect);
+#else
+ RTSemFastMutexDestroy(pGMM->hMtx);
+ pGMM->hMtx = NIL_RTSEMFASTMUTEX;
+#endif
+
+ /* Free any chunks still hanging around. */
+ RTAvlU32Destroy(&pGMM->pChunks, gmmR0TermDestroyChunk, pGMM);
+
+ /* Destroy the chunk locks. */
+ for (unsigned iMtx = 0; iMtx < RT_ELEMENTS(pGMM->aChunkMtx); iMtx++)
+ {
+ Assert(pGMM->aChunkMtx[iMtx].cUsers == 0);
+ RTSemFastMutexDestroy(pGMM->aChunkMtx[iMtx].hMtx);
+ pGMM->aChunkMtx[iMtx].hMtx = NIL_RTSEMFASTMUTEX;
+ }
+
+ /* Finally the instance data itself. */
+ RTMemFree(pGMM);
+ LogFlow(("GMMTerm: done\n"));
+}
+
+
+/**
+ * RTAvlU32Destroy callback.
+ *
+ * @returns 0
+ * @param pNode The node to destroy.
+ * @param pvGMM The GMM handle.
+ */
+static DECLCALLBACK(int) gmmR0TermDestroyChunk(PAVLU32NODECORE pNode, void *pvGMM)
+{
+ PGMMCHUNK pChunk = (PGMMCHUNK)pNode;
+
+ if (pChunk->cFree != (GMM_CHUNK_SIZE >> PAGE_SHIFT))
+ SUPR0Printf("GMMR0Term: %RKv/%#x: cFree=%d cPrivate=%d cShared=%d cMappings=%d\n", pChunk,
+ pChunk->Core.Key, pChunk->cFree, pChunk->cPrivate, pChunk->cShared, pChunk->cMappingsX);
+
+ int rc = RTR0MemObjFree(pChunk->hMemObj, true /* fFreeMappings */);
+ if (RT_FAILURE(rc))
+ {
+ SUPR0Printf("GMMR0Term: %RKv/%#x: RTRMemObjFree(%RKv,true) -> %d (cMappings=%d)\n", pChunk,
+ pChunk->Core.Key, pChunk->hMemObj, rc, pChunk->cMappingsX);
+ AssertRC(rc);
+ }
+ pChunk->hMemObj = NIL_RTR0MEMOBJ;
+
+ RTMemFree(pChunk->paMappingsX);
+ pChunk->paMappingsX = NULL;
+
+ RTMemFree(pChunk);
+ NOREF(pvGMM);
+ return 0;
+}
+
+
+/**
+ * Initializes the per-VM data for the GMM.
+ *
+ * This is called from within the GVMM lock (from GVMMR0CreateVM)
+ * and should only initialize the data members so GMMR0CleanupVM
+ * can deal with them. We reserve no memory or anything here,
+ * that's done later in GMMR0InitVM.
+ *
+ * @param pGVM Pointer to the Global VM structure.
+ */
+GMMR0DECL(void) GMMR0InitPerVMData(PGVM pGVM)
+{
+ AssertCompile(RT_SIZEOFMEMB(GVM,gmm.s) <= RT_SIZEOFMEMB(GVM,gmm.padding));
+
+ pGVM->gmm.s.Stats.enmPolicy = GMMOCPOLICY_INVALID;
+ pGVM->gmm.s.Stats.enmPriority = GMMPRIORITY_INVALID;
+ pGVM->gmm.s.Stats.fMayAllocate = false;
+}
+
+
+/**
+ * Acquires the GMM giant lock.
+ *
+ * @returns Assert status code from RTSemFastMutexRequest.
+ * @param pGMM Pointer to the GMM instance.
+ */
+static int gmmR0MutexAcquire(PGMM pGMM)
+{
+ ASMAtomicIncU32(&pGMM->cMtxContenders);
+#ifdef VBOX_USE_CRIT_SECT_FOR_GIANT
+ int rc = RTCritSectEnter(&pGMM->GiantCritSect);
+#else
+ int rc = RTSemFastMutexRequest(pGMM->hMtx);
+#endif
+ ASMAtomicDecU32(&pGMM->cMtxContenders);
+ AssertRC(rc);
+#ifdef VBOX_STRICT
+ pGMM->hMtxOwner = RTThreadNativeSelf();
+#endif
+ return rc;
+}
+
+
+/**
+ * Releases the GMM giant lock.
+ *
+ * @returns Assert status code from RTSemFastMutexRequest.
+ * @param pGMM Pointer to the GMM instance.
+ */
+static int gmmR0MutexRelease(PGMM pGMM)
+{
+#ifdef VBOX_STRICT
+ pGMM->hMtxOwner = NIL_RTNATIVETHREAD;
+#endif
+#ifdef VBOX_USE_CRIT_SECT_FOR_GIANT
+ int rc = RTCritSectLeave(&pGMM->GiantCritSect);
+#else
+ int rc = RTSemFastMutexRelease(pGMM->hMtx);
+ AssertRC(rc);
+#endif
+ return rc;
+}
+
+
+/**
+ * Yields the GMM giant lock if there is contention and a certain minimum time
+ * has elapsed since we took it.
+ *
+ * @returns @c true if the mutex was yielded, @c false if not.
+ * @param pGMM Pointer to the GMM instance.
+ * @param puLockNanoTS Where the lock acquisition time stamp is kept
+ * (in/out).
+ */
+static bool gmmR0MutexYield(PGMM pGMM, uint64_t *puLockNanoTS)
+{
+ /*
+ * If nobody is contending the mutex, don't bother checking the time.
+ */
+ if (ASMAtomicReadU32(&pGMM->cMtxContenders) == 0)
+ return false;
+
+ /*
+ * Don't yield if we haven't executed for at least 2 milliseconds.
+ */
+ uint64_t uNanoNow = RTTimeSystemNanoTS();
+ if (uNanoNow - *puLockNanoTS < UINT32_C(2000000))
+ return false;
+
+ /*
+ * Yield the mutex.
+ */
+#ifdef VBOX_STRICT
+ pGMM->hMtxOwner = NIL_RTNATIVETHREAD;
+#endif
+ ASMAtomicIncU32(&pGMM->cMtxContenders);
+#ifdef VBOX_USE_CRIT_SECT_FOR_GIANT
+ int rc1 = RTCritSectLeave(&pGMM->GiantCritSect); AssertRC(rc1);
+#else
+ int rc1 = RTSemFastMutexRelease(pGMM->hMtx); AssertRC(rc1);
+#endif
+
+ RTThreadYield();
+
+#ifdef VBOX_USE_CRIT_SECT_FOR_GIANT
+ int rc2 = RTCritSectEnter(&pGMM->GiantCritSect); AssertRC(rc2);
+#else
+ int rc2 = RTSemFastMutexRequest(pGMM->hMtx); AssertRC(rc2);
+#endif
+ *puLockNanoTS = RTTimeSystemNanoTS();
+ ASMAtomicDecU32(&pGMM->cMtxContenders);
+#ifdef VBOX_STRICT
+ pGMM->hMtxOwner = RTThreadNativeSelf();
+#endif
+
+ return true;
+}
+
+
+/**
+ * Acquires a chunk lock.
+ *
+ * The caller must own the giant lock.
+ *
+ * @returns Assert status code from RTSemFastMutexRequest.
+ * @param pMtxState The chunk mutex state info. (Avoids
+ * passing the same flags and stuff around
+ * for subsequent release and drop-giant
+ * calls.)
+ * @param pGMM Pointer to the GMM instance.
+ * @param pChunk Pointer to the chunk.
+ * @param fFlags Flags regarding the giant lock, GMMR0CHUNK_MTX_XXX.
+ */
+static int gmmR0ChunkMutexAcquire(PGMMR0CHUNKMTXSTATE pMtxState, PGMM pGMM, PGMMCHUNK pChunk, uint32_t fFlags)
+{
+ Assert(fFlags > GMMR0CHUNK_MTX_INVALID && fFlags < GMMR0CHUNK_MTX_END);
+ Assert(pGMM->hMtxOwner == RTThreadNativeSelf());
+
+ pMtxState->pGMM = pGMM;
+ pMtxState->fFlags = (uint8_t)fFlags;
+
+ /*
+ * Get the lock index and reference the lock.
+ */
+ Assert(pGMM->hMtxOwner == RTThreadNativeSelf());
+ uint32_t iChunkMtx = pChunk->iChunkMtx;
+ if (iChunkMtx == UINT8_MAX)
+ {
+ iChunkMtx = pGMM->iNextChunkMtx++;
+ iChunkMtx %= RT_ELEMENTS(pGMM->aChunkMtx);
+
+ /* Try get an unused one... */
+ if (pGMM->aChunkMtx[iChunkMtx].cUsers)
+ {
+ iChunkMtx = pGMM->iNextChunkMtx++;
+ iChunkMtx %= RT_ELEMENTS(pGMM->aChunkMtx);
+ if (pGMM->aChunkMtx[iChunkMtx].cUsers)
+ {
+ iChunkMtx = pGMM->iNextChunkMtx++;
+ iChunkMtx %= RT_ELEMENTS(pGMM->aChunkMtx);
+ if (pGMM->aChunkMtx[iChunkMtx].cUsers)
+ {
+ iChunkMtx = pGMM->iNextChunkMtx++;
+ iChunkMtx %= RT_ELEMENTS(pGMM->aChunkMtx);
+ }
+ }
+ }
+
+ pChunk->iChunkMtx = iChunkMtx;
+ }
+ AssertCompile(RT_ELEMENTS(pGMM->aChunkMtx) < UINT8_MAX);
+ pMtxState->iChunkMtx = (uint8_t)iChunkMtx;
+ ASMAtomicIncU32(&pGMM->aChunkMtx[iChunkMtx].cUsers);
+
+ /*
+ * Drop the giant?
+ */
+ if (fFlags != GMMR0CHUNK_MTX_KEEP_GIANT)
+ {
+ /** @todo GMM life cycle cleanup (we may race someone
+ * destroying and cleaning up GMM)? */
+ gmmR0MutexRelease(pGMM);
+ }
+
+ /*
+ * Take the chunk mutex.
+ */
+ int rc = RTSemFastMutexRequest(pGMM->aChunkMtx[iChunkMtx].hMtx);
+ AssertRC(rc);
+ return rc;
+}
+
+
+/**
+ * Releases the GMM giant lock.
+ *
+ * @returns Assert status code from RTSemFastMutexRequest.
+ * @param pMtxState Pointer to the chunk mutex state.
+ * @param pChunk Pointer to the chunk if it's still
+ * alive, NULL if it isn't. This is used to deassociate
+ * the chunk from the mutex on the way out so a new one
+ * can be selected next time, thus avoiding contented
+ * mutexes.
+ */
+static int gmmR0ChunkMutexRelease(PGMMR0CHUNKMTXSTATE pMtxState, PGMMCHUNK pChunk)
+{
+ PGMM pGMM = pMtxState->pGMM;
+
+ /*
+ * Release the chunk mutex and reacquire the giant if requested.
+ */
+ int rc = RTSemFastMutexRelease(pGMM->aChunkMtx[pMtxState->iChunkMtx].hMtx);
+ AssertRC(rc);
+ if (pMtxState->fFlags == GMMR0CHUNK_MTX_RETAKE_GIANT)
+ rc = gmmR0MutexAcquire(pGMM);
+ else
+ Assert((pMtxState->fFlags != GMMR0CHUNK_MTX_DROP_GIANT) == (pGMM->hMtxOwner == RTThreadNativeSelf()));
+
+ /*
+ * Drop the chunk mutex user reference and deassociate it from the chunk
+ * when possible.
+ */
+ if ( ASMAtomicDecU32(&pGMM->aChunkMtx[pMtxState->iChunkMtx].cUsers) == 0
+ && pChunk
+ && RT_SUCCESS(rc) )
+ {
+ if (pMtxState->fFlags != GMMR0CHUNK_MTX_DROP_GIANT)
+ pChunk->iChunkMtx = UINT8_MAX;
+ else
+ {
+ rc = gmmR0MutexAcquire(pGMM);
+ if (RT_SUCCESS(rc))
+ {
+ if (pGMM->aChunkMtx[pMtxState->iChunkMtx].cUsers == 0)
+ pChunk->iChunkMtx = UINT8_MAX;
+ rc = gmmR0MutexRelease(pGMM);
+ }
+ }
+ }
+
+ pMtxState->pGMM = NULL;
+ return rc;
+}
+
+
+/**
+ * Drops the giant GMM lock we kept in gmmR0ChunkMutexAcquire while keeping the
+ * chunk locked.
+ *
+ * This only works if gmmR0ChunkMutexAcquire was called with
+ * GMMR0CHUNK_MTX_KEEP_GIANT. gmmR0ChunkMutexRelease will retake the giant
+ * mutex, i.e. behave as if GMMR0CHUNK_MTX_RETAKE_GIANT was used.
+ *
+ * @returns VBox status code (assuming success is ok).
+ * @param pMtxState Pointer to the chunk mutex state.
+ */
+static int gmmR0ChunkMutexDropGiant(PGMMR0CHUNKMTXSTATE pMtxState)
+{
+ AssertReturn(pMtxState->fFlags == GMMR0CHUNK_MTX_KEEP_GIANT, VERR_GMM_MTX_FLAGS);
+ Assert(pMtxState->pGMM->hMtxOwner == RTThreadNativeSelf());
+ pMtxState->fFlags = GMMR0CHUNK_MTX_RETAKE_GIANT;
+ /** @todo GMM life cycle cleanup (we may race someone
+ * destroying and cleaning up GMM)? */
+ return gmmR0MutexRelease(pMtxState->pGMM);
+}
+
+
+/**
+ * For experimenting with NUMA affinity and such.
+ *
+ * @returns The current NUMA Node ID.
+ */
+static uint16_t gmmR0GetCurrentNumaNodeId(void)
+{
+#if 1
+ return GMM_CHUNK_NUMA_ID_UNKNOWN;
+#else
+ return RTMpCpuId() / 16;
+#endif
+}
+
+
+
+/**
+ * Cleans up when a VM is terminating.
+ *
+ * @param pGVM Pointer to the Global VM structure.
+ */
+GMMR0DECL(void) GMMR0CleanupVM(PGVM pGVM)
+{
+ LogFlow(("GMMR0CleanupVM: pGVM=%p:{.pVM=%p, .hSelf=%#x}\n", pGVM, pGVM->pVM, pGVM->hSelf));
+
+ PGMM pGMM;
+ GMM_GET_VALID_INSTANCE_VOID(pGMM);
+
+#ifdef VBOX_WITH_PAGE_SHARING
+ /*
+ * Clean up all registered shared modules first.
+ */
+ gmmR0SharedModuleCleanup(pGMM, pGVM);
+#endif
+
+ gmmR0MutexAcquire(pGMM);
+ uint64_t uLockNanoTS = RTTimeSystemNanoTS();
+ GMM_CHECK_SANITY_UPON_ENTERING(pGMM);
+
+ /*
+ * The policy is 'INVALID' until the initial reservation
+ * request has been serviced.
+ */
+ if ( pGVM->gmm.s.Stats.enmPolicy > GMMOCPOLICY_INVALID
+ && pGVM->gmm.s.Stats.enmPolicy < GMMOCPOLICY_END)
+ {
+ /*
+ * If it's the last VM around, we can skip walking all the chunk looking
+ * for the pages owned by this VM and instead flush the whole shebang.
+ *
+ * This takes care of the eventuality that a VM has left shared page
+ * references behind (shouldn't happen of course, but you never know).
+ */
+ Assert(pGMM->cRegisteredVMs);
+ pGMM->cRegisteredVMs--;
+
+ /*
+ * Walk the entire pool looking for pages that belong to this VM
+ * and leftover mappings. (This'll only catch private pages,
+ * shared pages will be 'left behind'.)
+ */
+ /** @todo r=bird: This scanning+freeing could be optimized in bound mode! */
+ uint64_t cPrivatePages = pGVM->gmm.s.Stats.cPrivatePages; /* save */
+
+ unsigned iCountDown = 64;
+ bool fRedoFromStart;
+ PGMMCHUNK pChunk;
+ do
+ {
+ fRedoFromStart = false;
+ RTListForEachReverse(&pGMM->ChunkList, pChunk, GMMCHUNK, ListNode)
+ {
+ uint32_t const cFreeChunksOld = pGMM->cFreedChunks;
+ if ( ( !pGMM->fBoundMemoryMode
+ || pChunk->hGVM == pGVM->hSelf)
+ && gmmR0CleanupVMScanChunk(pGMM, pGVM, pChunk))
+ {
+ /* We left the giant mutex, so reset the yield counters. */
+ uLockNanoTS = RTTimeSystemNanoTS();
+ iCountDown = 64;
+ }
+ else
+ {
+ /* Didn't leave it, so do normal yielding. */
+ if (!iCountDown)
+ gmmR0MutexYield(pGMM, &uLockNanoTS);
+ else
+ iCountDown--;
+ }
+ if (pGMM->cFreedChunks != cFreeChunksOld)
+ {
+ fRedoFromStart = true;
+ break;
+ }
+ }
+ } while (fRedoFromStart);
+
+ if (pGVM->gmm.s.Stats.cPrivatePages)
+ SUPR0Printf("GMMR0CleanupVM: hGVM=%#x has %#x private pages that cannot be found!\n", pGVM->hSelf, pGVM->gmm.s.Stats.cPrivatePages);
+
+ pGMM->cAllocatedPages -= cPrivatePages;
+
+ /*
+ * Free empty chunks.
+ */
+ PGMMCHUNKFREESET pPrivateSet = pGMM->fBoundMemoryMode ? &pGVM->gmm.s.Private : &pGMM->PrivateX;
+ do
+ {
+ fRedoFromStart = false;
+ iCountDown = 10240;
+ pChunk = pPrivateSet->apLists[GMM_CHUNK_FREE_SET_UNUSED_LIST];
+ while (pChunk)
+ {
+ PGMMCHUNK pNext = pChunk->pFreeNext;
+ Assert(pChunk->cFree == GMM_CHUNK_NUM_PAGES);
+ if ( !pGMM->fBoundMemoryMode
+ || pChunk->hGVM == pGVM->hSelf)
+ {
+ uint64_t const idGenerationOld = pPrivateSet->idGeneration;
+ if (gmmR0FreeChunk(pGMM, pGVM, pChunk, true /*fRelaxedSem*/))
+ {
+ /* We've left the giant mutex, restart? (+1 for our unlink) */
+ fRedoFromStart = pPrivateSet->idGeneration != idGenerationOld + 1;
+ if (fRedoFromStart)
+ break;
+ uLockNanoTS = RTTimeSystemNanoTS();
+ iCountDown = 10240;
+ }
+ }
+
+ /* Advance and maybe yield the lock. */
+ pChunk = pNext;
+ if (--iCountDown == 0)
+ {
+ uint64_t const idGenerationOld = pPrivateSet->idGeneration;
+ fRedoFromStart = gmmR0MutexYield(pGMM, &uLockNanoTS)
+ && pPrivateSet->idGeneration != idGenerationOld;
+ if (fRedoFromStart)
+ break;
+ iCountDown = 10240;
+ }
+ }
+ } while (fRedoFromStart);
+
+ /*
+ * Account for shared pages that weren't freed.
+ */
+ if (pGVM->gmm.s.Stats.cSharedPages)
+ {
+ Assert(pGMM->cSharedPages >= pGVM->gmm.s.Stats.cSharedPages);
+ SUPR0Printf("GMMR0CleanupVM: hGVM=%#x left %#x shared pages behind!\n", pGVM->hSelf, pGVM->gmm.s.Stats.cSharedPages);
+ pGMM->cLeftBehindSharedPages += pGVM->gmm.s.Stats.cSharedPages;
+ }
+
+ /*
+ * Clean up balloon statistics in case the VM process crashed.
+ */
+ Assert(pGMM->cBalloonedPages >= pGVM->gmm.s.Stats.cBalloonedPages);
+ pGMM->cBalloonedPages -= pGVM->gmm.s.Stats.cBalloonedPages;
+
+ /*
+ * Update the over-commitment management statistics.
+ */
+ pGMM->cReservedPages -= pGVM->gmm.s.Stats.Reserved.cBasePages
+ + pGVM->gmm.s.Stats.Reserved.cFixedPages
+ + pGVM->gmm.s.Stats.Reserved.cShadowPages;
+ switch (pGVM->gmm.s.Stats.enmPolicy)
+ {
+ case GMMOCPOLICY_NO_OC:
+ break;
+ default:
+ /** @todo Update GMM->cOverCommittedPages */
+ break;
+ }
+ }
+
+ /* zap the GVM data. */
+ pGVM->gmm.s.Stats.enmPolicy = GMMOCPOLICY_INVALID;
+ pGVM->gmm.s.Stats.enmPriority = GMMPRIORITY_INVALID;
+ pGVM->gmm.s.Stats.fMayAllocate = false;
+
+ GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
+ gmmR0MutexRelease(pGMM);
+
+ LogFlow(("GMMR0CleanupVM: returns\n"));
+}
+
+
+/**
+ * Scan one chunk for private pages belonging to the specified VM.
+ *
+ * @note This function may drop the giant mutex!
+ *
+ * @returns @c true if we've temporarily dropped the giant mutex, @c false if
+ * we didn't.
+ * @param pGMM Pointer to the GMM instance.
+ * @param pGVM The global VM handle.
+ * @param pChunk The chunk to scan.
+ */
+static bool gmmR0CleanupVMScanChunk(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk)
+{
+ Assert(!pGMM->fBoundMemoryMode || pChunk->hGVM == pGVM->hSelf);
+
+ /*
+ * Look for pages belonging to the VM.
+ * (Perform some internal checks while we're scanning.)
+ */
+#ifndef VBOX_STRICT
+ if (pChunk->cFree != (GMM_CHUNK_SIZE >> PAGE_SHIFT))
+#endif
+ {
+ unsigned cPrivate = 0;
+ unsigned cShared = 0;
+ unsigned cFree = 0;
+
+ gmmR0UnlinkChunk(pChunk); /* avoiding cFreePages updates. */
+
+ uint16_t hGVM = pGVM->hSelf;
+ unsigned iPage = (GMM_CHUNK_SIZE >> PAGE_SHIFT);
+ while (iPage-- > 0)
+ if (GMM_PAGE_IS_PRIVATE(&pChunk->aPages[iPage]))
+ {
+ if (pChunk->aPages[iPage].Private.hGVM == hGVM)
+ {
+ /*
+ * Free the page.
+ *
+ * The reason for not using gmmR0FreePrivatePage here is that we
+ * must *not* cause the chunk to be freed from under us - we're in
+ * an AVL tree walk here.
+ */
+ pChunk->aPages[iPage].u = 0;
+ pChunk->aPages[iPage].Free.iNext = pChunk->iFreeHead;
+ pChunk->aPages[iPage].Free.u2State = GMM_PAGE_STATE_FREE;
+ pChunk->iFreeHead = iPage;
+ pChunk->cPrivate--;
+ pChunk->cFree++;
+ pGVM->gmm.s.Stats.cPrivatePages--;
+ cFree++;
+ }
+ else
+ cPrivate++;
+ }
+ else if (GMM_PAGE_IS_FREE(&pChunk->aPages[iPage]))
+ cFree++;
+ else
+ cShared++;
+
+ gmmR0SelectSetAndLinkChunk(pGMM, pGVM, pChunk);
+
+ /*
+ * Did it add up?
+ */
+ if (RT_UNLIKELY( pChunk->cFree != cFree
+ || pChunk->cPrivate != cPrivate
+ || pChunk->cShared != cShared))
+ {
+ SUPR0Printf("gmmR0CleanupVMScanChunk: Chunk %RKv/%#x has bogus stats - free=%d/%d private=%d/%d shared=%d/%d\n",
+ pChunk, pChunk->Core.Key, pChunk->cFree, cFree, pChunk->cPrivate, cPrivate, pChunk->cShared, cShared);
+ pChunk->cFree = cFree;
+ pChunk->cPrivate = cPrivate;
+ pChunk->cShared = cShared;
+ }
+ }
+
+ /*
+ * If not in bound memory mode, we should reset the hGVM field
+ * if it has our handle in it.
+ */
+ if (pChunk->hGVM == pGVM->hSelf)
+ {
+ if (!g_pGMM->fBoundMemoryMode)
+ pChunk->hGVM = NIL_GVM_HANDLE;
+ else if (pChunk->cFree != GMM_CHUNK_NUM_PAGES)
+ {
+ SUPR0Printf("gmmR0CleanupVMScanChunk: %RKv/%#x: cFree=%#x - it should be 0 in bound mode!\n",
+ pChunk, pChunk->Core.Key, pChunk->cFree);
+ AssertMsgFailed(("%p/%#x: cFree=%#x - it should be 0 in bound mode!\n", pChunk, pChunk->Core.Key, pChunk->cFree));
+
+ gmmR0UnlinkChunk(pChunk);
+ pChunk->cFree = GMM_CHUNK_NUM_PAGES;
+ gmmR0SelectSetAndLinkChunk(pGMM, pGVM, pChunk);
+ }
+ }
+
+ /*
+ * Look for a mapping belonging to the terminating VM.
+ */
+ GMMR0CHUNKMTXSTATE MtxState;
+ gmmR0ChunkMutexAcquire(&MtxState, pGMM, pChunk, GMMR0CHUNK_MTX_KEEP_GIANT);
+ unsigned cMappings = pChunk->cMappingsX;
+ for (unsigned i = 0; i < cMappings; i++)
+ if (pChunk->paMappingsX[i].pGVM == pGVM)
+ {
+ gmmR0ChunkMutexDropGiant(&MtxState);
+
+ RTR0MEMOBJ hMemObj = pChunk->paMappingsX[i].hMapObj;
+
+ cMappings--;
+ if (i < cMappings)
+ pChunk->paMappingsX[i] = pChunk->paMappingsX[cMappings];
+ pChunk->paMappingsX[cMappings].pGVM = NULL;
+ pChunk->paMappingsX[cMappings].hMapObj = NIL_RTR0MEMOBJ;
+ Assert(pChunk->cMappingsX - 1U == cMappings);
+ pChunk->cMappingsX = cMappings;
+
+ int rc = RTR0MemObjFree(hMemObj, false /* fFreeMappings (NA) */);
+ if (RT_FAILURE(rc))
+ {
+ SUPR0Printf("gmmR0CleanupVMScanChunk: %RKv/%#x: mapping #%x: RTRMemObjFree(%RKv,false) -> %d \n",
+ pChunk, pChunk->Core.Key, i, hMemObj, rc);
+ AssertRC(rc);
+ }
+
+ gmmR0ChunkMutexRelease(&MtxState, pChunk);
+ return true;
+ }
+
+ gmmR0ChunkMutexRelease(&MtxState, pChunk);
+ return false;
+}
+
+
+/**
+ * The initial resource reservations.
+ *
+ * This will make memory reservations according to policy and priority. If there aren't
+ * sufficient resources available to sustain the VM this function will fail and all
+ * future allocations requests will fail as well.
+ *
+ * These are just the initial reservations made very very early during the VM creation
+ * process and will be adjusted later in the GMMR0UpdateReservation call after the
+ * ring-3 init has completed.
+ *
+ * @returns VBox status code.
+ * @retval VERR_GMM_MEMORY_RESERVATION_DECLINED
+ * @retval VERR_GMM_
+ *
+ * @param pGVM The global (ring-0) VM structure.
+ * @param pVM The cross context VM structure.
+ * @param idCpu The VCPU id - must be zero.
+ * @param cBasePages The number of pages that may be allocated for the base RAM and ROMs.
+ * This does not include MMIO2 and similar.
+ * @param cShadowPages The number of pages that may be allocated for shadow paging structures.
+ * @param cFixedPages The number of pages that may be allocated for fixed objects like the
+ * hyper heap, MMIO2 and similar.
+ * @param enmPolicy The OC policy to use on this VM.
+ * @param enmPriority The priority in an out-of-memory situation.
+ *
+ * @thread The creator thread / EMT(0).
+ */
+GMMR0DECL(int) GMMR0InitialReservation(PGVM pGVM, PVM pVM, VMCPUID idCpu, uint64_t cBasePages, uint32_t cShadowPages,
+ uint32_t cFixedPages, GMMOCPOLICY enmPolicy, GMMPRIORITY enmPriority)
+{
+ LogFlow(("GMMR0InitialReservation: pGVM=%p pVM=%p cBasePages=%#llx cShadowPages=%#x cFixedPages=%#x enmPolicy=%d enmPriority=%d\n",
+ pGVM, pVM, cBasePages, cShadowPages, cFixedPages, enmPolicy, enmPriority));
+
+ /*
+ * Validate, get basics and take the semaphore.
+ */
+ AssertReturn(idCpu == 0, VERR_INVALID_CPU_ID);
+ PGMM pGMM;
+ GMM_GET_VALID_INSTANCE(pGMM, VERR_GMM_INSTANCE);
+ int rc = GVMMR0ValidateGVMandVMandEMT(pGVM, pVM, idCpu);
+ if (RT_FAILURE(rc))
+ return rc;
+
+ AssertReturn(cBasePages, VERR_INVALID_PARAMETER);
+ AssertReturn(cShadowPages, VERR_INVALID_PARAMETER);
+ AssertReturn(cFixedPages, VERR_INVALID_PARAMETER);
+ AssertReturn(enmPolicy > GMMOCPOLICY_INVALID && enmPolicy < GMMOCPOLICY_END, VERR_INVALID_PARAMETER);
+ AssertReturn(enmPriority > GMMPRIORITY_INVALID && enmPriority < GMMPRIORITY_END, VERR_INVALID_PARAMETER);
+
+ gmmR0MutexAcquire(pGMM);
+ if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
+ {
+ if ( !pGVM->gmm.s.Stats.Reserved.cBasePages
+ && !pGVM->gmm.s.Stats.Reserved.cFixedPages
+ && !pGVM->gmm.s.Stats.Reserved.cShadowPages)
+ {
+ /*
+ * Check if we can accommodate this.
+ */
+ /* ... later ... */
+ if (RT_SUCCESS(rc))
+ {
+ /*
+ * Update the records.
+ */
+ pGVM->gmm.s.Stats.Reserved.cBasePages = cBasePages;
+ pGVM->gmm.s.Stats.Reserved.cFixedPages = cFixedPages;
+ pGVM->gmm.s.Stats.Reserved.cShadowPages = cShadowPages;
+ pGVM->gmm.s.Stats.enmPolicy = enmPolicy;
+ pGVM->gmm.s.Stats.enmPriority = enmPriority;
+ pGVM->gmm.s.Stats.fMayAllocate = true;
+
+ pGMM->cReservedPages += cBasePages + cFixedPages + cShadowPages;
+ pGMM->cRegisteredVMs++;
+ }
+ }
+ else
+ rc = VERR_WRONG_ORDER;
+ GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
+ }
+ else
+ rc = VERR_GMM_IS_NOT_SANE;
+ gmmR0MutexRelease(pGMM);
+ LogFlow(("GMMR0InitialReservation: returns %Rrc\n", rc));
+ return rc;
+}
+
+
+/**
+ * VMMR0 request wrapper for GMMR0InitialReservation.
+ *
+ * @returns see GMMR0InitialReservation.
+ * @param pGVM The global (ring-0) VM structure.
+ * @param pVM The cross context VM structure.
+ * @param idCpu The VCPU id.
+ * @param pReq Pointer to the request packet.
+ */
+GMMR0DECL(int) GMMR0InitialReservationReq(PGVM pGVM, PVM pVM, VMCPUID idCpu, PGMMINITIALRESERVATIONREQ pReq)
+{
+ /*
+ * Validate input and pass it on.
+ */
+ AssertPtrReturn(pVM, VERR_INVALID_POINTER);
+ AssertPtrReturn(pReq, VERR_INVALID_POINTER);
+ AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
+
+ return GMMR0InitialReservation(pGVM, pVM, idCpu, pReq->cBasePages, pReq->cShadowPages,
+ pReq->cFixedPages, pReq->enmPolicy, pReq->enmPriority);
+}
+
+
+/**
+ * This updates the memory reservation with the additional MMIO2 and ROM pages.
+ *
+ * @returns VBox status code.
+ * @retval VERR_GMM_MEMORY_RESERVATION_DECLINED
+ *
+ * @param pGVM The global (ring-0) VM structure.
+ * @param pVM The cross context VM structure.
+ * @param idCpu The VCPU id.
+ * @param cBasePages The number of pages that may be allocated for the base RAM and ROMs.
+ * This does not include MMIO2 and similar.
+ * @param cShadowPages The number of pages that may be allocated for shadow paging structures.
+ * @param cFixedPages The number of pages that may be allocated for fixed objects like the
+ * hyper heap, MMIO2 and similar.
+ *
+ * @thread EMT(idCpu)
+ */
+GMMR0DECL(int) GMMR0UpdateReservation(PGVM pGVM, PVM pVM, VMCPUID idCpu, uint64_t cBasePages,
+ uint32_t cShadowPages, uint32_t cFixedPages)
+{
+ LogFlow(("GMMR0UpdateReservation: pGVM=%p pVM=%p cBasePages=%#llx cShadowPages=%#x cFixedPages=%#x\n",
+ pGVM, pVM, cBasePages, cShadowPages, cFixedPages));
+
+ /*
+ * Validate, get basics and take the semaphore.
+ */
+ PGMM pGMM;
+ GMM_GET_VALID_INSTANCE(pGMM, VERR_GMM_INSTANCE);
+ int rc = GVMMR0ValidateGVMandVMandEMT(pGVM, pVM, idCpu);
+ if (RT_FAILURE(rc))
+ return rc;
+
+ AssertReturn(cBasePages, VERR_INVALID_PARAMETER);
+ AssertReturn(cShadowPages, VERR_INVALID_PARAMETER);
+ AssertReturn(cFixedPages, VERR_INVALID_PARAMETER);
+
+ gmmR0MutexAcquire(pGMM);
+ if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
+ {
+ if ( pGVM->gmm.s.Stats.Reserved.cBasePages
+ && pGVM->gmm.s.Stats.Reserved.cFixedPages
+ && pGVM->gmm.s.Stats.Reserved.cShadowPages)
+ {
+ /*
+ * Check if we can accommodate this.
+ */
+ /* ... later ... */
+ if (RT_SUCCESS(rc))
+ {
+ /*
+ * Update the records.
+ */
+ pGMM->cReservedPages -= pGVM->gmm.s.Stats.Reserved.cBasePages
+ + pGVM->gmm.s.Stats.Reserved.cFixedPages
+ + pGVM->gmm.s.Stats.Reserved.cShadowPages;
+ pGMM->cReservedPages += cBasePages + cFixedPages + cShadowPages;
+
+ pGVM->gmm.s.Stats.Reserved.cBasePages = cBasePages;
+ pGVM->gmm.s.Stats.Reserved.cFixedPages = cFixedPages;
+ pGVM->gmm.s.Stats.Reserved.cShadowPages = cShadowPages;
+ }
+ }
+ else
+ rc = VERR_WRONG_ORDER;
+ GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
+ }
+ else
+ rc = VERR_GMM_IS_NOT_SANE;
+ gmmR0MutexRelease(pGMM);
+ LogFlow(("GMMR0UpdateReservation: returns %Rrc\n", rc));
+ return rc;
+}
+
+
+/**
+ * VMMR0 request wrapper for GMMR0UpdateReservation.
+ *
+ * @returns see GMMR0UpdateReservation.
+ * @param pGVM The global (ring-0) VM structure.
+ * @param pVM The cross context VM structure.
+ * @param idCpu The VCPU id.
+ * @param pReq Pointer to the request packet.
+ */
+GMMR0DECL(int) GMMR0UpdateReservationReq(PGVM pGVM, PVM pVM, VMCPUID idCpu, PGMMUPDATERESERVATIONREQ pReq)
+{
+ /*
+ * Validate input and pass it on.
+ */
+ AssertPtrReturn(pVM, VERR_INVALID_POINTER);
+ AssertPtrReturn(pReq, VERR_INVALID_POINTER);
+ AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
+
+ return GMMR0UpdateReservation(pGVM, pVM, idCpu, pReq->cBasePages, pReq->cShadowPages, pReq->cFixedPages);
+}
+
+#ifdef GMMR0_WITH_SANITY_CHECK
+
+/**
+ * Performs sanity checks on a free set.
+ *
+ * @returns Error count.
+ *
+ * @param pGMM Pointer to the GMM instance.
+ * @param pSet Pointer to the set.
+ * @param pszSetName The set name.
+ * @param pszFunction The function from which it was called.
+ * @param uLine The line number.
+ */
+static uint32_t gmmR0SanityCheckSet(PGMM pGMM, PGMMCHUNKFREESET pSet, const char *pszSetName,
+ const char *pszFunction, unsigned uLineNo)
+{
+ uint32_t cErrors = 0;
+
+ /*
+ * Count the free pages in all the chunks and match it against pSet->cFreePages.
+ */
+ uint32_t cPages = 0;
+ for (unsigned i = 0; i < RT_ELEMENTS(pSet->apLists); i++)
+ {
+ for (PGMMCHUNK pCur = pSet->apLists[i]; pCur; pCur = pCur->pFreeNext)
+ {
+ /** @todo check that the chunk is hash into the right set. */
+ cPages += pCur->cFree;
+ }
+ }
+ if (RT_UNLIKELY(cPages != pSet->cFreePages))
+ {
+ SUPR0Printf("GMM insanity: found %#x pages in the %s set, expected %#x. (%s, line %u)\n",
+ cPages, pszSetName, pSet->cFreePages, pszFunction, uLineNo);
+ cErrors++;
+ }
+
+ return cErrors;
+}
+
+
+/**
+ * Performs some sanity checks on the GMM while owning lock.
+ *
+ * @returns Error count.
+ *
+ * @param pGMM Pointer to the GMM instance.
+ * @param pszFunction The function from which it is called.
+ * @param uLineNo The line number.
+ */
+static uint32_t gmmR0SanityCheck(PGMM pGMM, const char *pszFunction, unsigned uLineNo)
+{
+ uint32_t cErrors = 0;
+
+ cErrors += gmmR0SanityCheckSet(pGMM, &pGMM->PrivateX, "private", pszFunction, uLineNo);
+ cErrors += gmmR0SanityCheckSet(pGMM, &pGMM->Shared, "shared", pszFunction, uLineNo);
+ /** @todo add more sanity checks. */
+
+ return cErrors;
+}
+
+#endif /* GMMR0_WITH_SANITY_CHECK */
+
+/**
+ * Looks up a chunk in the tree and fill in the TLB entry for it.
+ *
+ * This is not expected to fail and will bitch if it does.
+ *
+ * @returns Pointer to the allocation chunk, NULL if not found.
+ * @param pGMM Pointer to the GMM instance.
+ * @param idChunk The ID of the chunk to find.
+ * @param pTlbe Pointer to the TLB entry.
+ */
+static PGMMCHUNK gmmR0GetChunkSlow(PGMM pGMM, uint32_t idChunk, PGMMCHUNKTLBE pTlbe)
+{
+ PGMMCHUNK pChunk = (PGMMCHUNK)RTAvlU32Get(&pGMM->pChunks, idChunk);
+ AssertMsgReturn(pChunk, ("Chunk %#x not found!\n", idChunk), NULL);
+ pTlbe->idChunk = idChunk;
+ pTlbe->pChunk = pChunk;
+ return pChunk;
+}
+
+
+/**
+ * Finds a allocation chunk.
+ *
+ * This is not expected to fail and will bitch if it does.
+ *
+ * @returns Pointer to the allocation chunk, NULL if not found.
+ * @param pGMM Pointer to the GMM instance.
+ * @param idChunk The ID of the chunk to find.
+ */
+DECLINLINE(PGMMCHUNK) gmmR0GetChunk(PGMM pGMM, uint32_t idChunk)
+{
+ /*
+ * Do a TLB lookup, branch if not in the TLB.
+ */
+ PGMMCHUNKTLBE pTlbe = &pGMM->ChunkTLB.aEntries[GMM_CHUNKTLB_IDX(idChunk)];
+ if ( pTlbe->idChunk != idChunk
+ || !pTlbe->pChunk)
+ return gmmR0GetChunkSlow(pGMM, idChunk, pTlbe);
+ return pTlbe->pChunk;
+}
+
+
+/**
+ * Finds a page.
+ *
+ * This is not expected to fail and will bitch if it does.
+ *
+ * @returns Pointer to the page, NULL if not found.
+ * @param pGMM Pointer to the GMM instance.
+ * @param idPage The ID of the page to find.
+ */
+DECLINLINE(PGMMPAGE) gmmR0GetPage(PGMM pGMM, uint32_t idPage)
+{
+ PGMMCHUNK pChunk = gmmR0GetChunk(pGMM, idPage >> GMM_CHUNKID_SHIFT);
+ if (RT_LIKELY(pChunk))
+ return &pChunk->aPages[idPage & GMM_PAGEID_IDX_MASK];
+ return NULL;
+}
+
+
+#if 0 /* unused */
+/**
+ * Gets the host physical address for a page given by it's ID.
+ *
+ * @returns The host physical address or NIL_RTHCPHYS.
+ * @param pGMM Pointer to the GMM instance.
+ * @param idPage The ID of the page to find.
+ */
+DECLINLINE(RTHCPHYS) gmmR0GetPageHCPhys(PGMM pGMM, uint32_t idPage)
+{
+ PGMMCHUNK pChunk = gmmR0GetChunk(pGMM, idPage >> GMM_CHUNKID_SHIFT);
+ if (RT_LIKELY(pChunk))
+ return RTR0MemObjGetPagePhysAddr(pChunk->hMemObj, idPage & GMM_PAGEID_IDX_MASK);
+ return NIL_RTHCPHYS;
+}
+#endif /* unused */
+
+
+/**
+ * Selects the appropriate free list given the number of free pages.
+ *
+ * @returns Free list index.
+ * @param cFree The number of free pages in the chunk.
+ */
+DECLINLINE(unsigned) gmmR0SelectFreeSetList(unsigned cFree)
+{
+ unsigned iList = cFree >> GMM_CHUNK_FREE_SET_SHIFT;
+ AssertMsg(iList < RT_SIZEOFMEMB(GMMCHUNKFREESET, apLists) / RT_SIZEOFMEMB(GMMCHUNKFREESET, apLists[0]),
+ ("%d (%u)\n", iList, cFree));
+ return iList;
+}
+
+
+/**
+ * Unlinks the chunk from the free list it's currently on (if any).
+ *
+ * @param pChunk The allocation chunk.
+ */
+DECLINLINE(void) gmmR0UnlinkChunk(PGMMCHUNK pChunk)
+{
+ PGMMCHUNKFREESET pSet = pChunk->pSet;
+ if (RT_LIKELY(pSet))
+ {
+ pSet->cFreePages -= pChunk->cFree;
+ pSet->idGeneration++;
+
+ PGMMCHUNK pPrev = pChunk->pFreePrev;
+ PGMMCHUNK pNext = pChunk->pFreeNext;
+ if (pPrev)
+ pPrev->pFreeNext = pNext;
+ else
+ pSet->apLists[gmmR0SelectFreeSetList(pChunk->cFree)] = pNext;
+ if (pNext)
+ pNext->pFreePrev = pPrev;
+
+ pChunk->pSet = NULL;
+ pChunk->pFreeNext = NULL;
+ pChunk->pFreePrev = NULL;
+ }
+ else
+ {
+ Assert(!pChunk->pFreeNext);
+ Assert(!pChunk->pFreePrev);
+ Assert(!pChunk->cFree);
+ }
+}
+
+
+/**
+ * Links the chunk onto the appropriate free list in the specified free set.
+ *
+ * If no free entries, it's not linked into any list.
+ *
+ * @param pChunk The allocation chunk.
+ * @param pSet The free set.
+ */
+DECLINLINE(void) gmmR0LinkChunk(PGMMCHUNK pChunk, PGMMCHUNKFREESET pSet)
+{
+ Assert(!pChunk->pSet);
+ Assert(!pChunk->pFreeNext);
+ Assert(!pChunk->pFreePrev);
+
+ if (pChunk->cFree > 0)
+ {
+ pChunk->pSet = pSet;
+ pChunk->pFreePrev = NULL;
+ unsigned const iList = gmmR0SelectFreeSetList(pChunk->cFree);
+ pChunk->pFreeNext = pSet->apLists[iList];
+ if (pChunk->pFreeNext)
+ pChunk->pFreeNext->pFreePrev = pChunk;
+ pSet->apLists[iList] = pChunk;
+
+ pSet->cFreePages += pChunk->cFree;
+ pSet->idGeneration++;
+ }
+}
+
+
+/**
+ * Links the chunk onto the appropriate free list in the specified free set.
+ *
+ * If no free entries, it's not linked into any list.
+ *
+ * @param pGMM Pointer to the GMM instance.
+ * @param pGVM Pointer to the kernel-only VM instace data.
+ * @param pChunk The allocation chunk.
+ */
+DECLINLINE(void) gmmR0SelectSetAndLinkChunk(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk)
+{
+ PGMMCHUNKFREESET pSet;
+ if (pGMM->fBoundMemoryMode)
+ pSet = &pGVM->gmm.s.Private;
+ else if (pChunk->cShared)
+ pSet = &pGMM->Shared;
+ else
+ pSet = &pGMM->PrivateX;
+ gmmR0LinkChunk(pChunk, pSet);
+}
+
+
+/**
+ * Frees a Chunk ID.
+ *
+ * @param pGMM Pointer to the GMM instance.
+ * @param idChunk The Chunk ID to free.
+ */
+static void gmmR0FreeChunkId(PGMM pGMM, uint32_t idChunk)
+{
+ AssertReturnVoid(idChunk != NIL_GMM_CHUNKID);
+ AssertMsg(ASMBitTest(&pGMM->bmChunkId[0], idChunk), ("%#x\n", idChunk));
+ ASMAtomicBitClear(&pGMM->bmChunkId[0], idChunk);
+}
+
+
+/**
+ * Allocates a new Chunk ID.
+ *
+ * @returns The Chunk ID.
+ * @param pGMM Pointer to the GMM instance.
+ */
+static uint32_t gmmR0AllocateChunkId(PGMM pGMM)
+{
+ AssertCompile(!((GMM_CHUNKID_LAST + 1) & 31)); /* must be a multiple of 32 */
+ AssertCompile(NIL_GMM_CHUNKID == 0);
+
+ /*
+ * Try the next sequential one.
+ */
+ int32_t idChunk = ++pGMM->idChunkPrev;
+#if 0 /** @todo enable this code */
+ if ( idChunk <= GMM_CHUNKID_LAST
+ && idChunk > NIL_GMM_CHUNKID
+ && !ASMAtomicBitTestAndSet(&pVMM->bmChunkId[0], idChunk))
+ return idChunk;
+#endif
+
+ /*
+ * Scan sequentially from the last one.
+ */
+ if ( (uint32_t)idChunk < GMM_CHUNKID_LAST
+ && idChunk > NIL_GMM_CHUNKID)
+ {
+ idChunk = ASMBitNextClear(&pGMM->bmChunkId[0], GMM_CHUNKID_LAST + 1, idChunk - 1);
+ if (idChunk > NIL_GMM_CHUNKID)
+ {
+ AssertMsgReturn(!ASMAtomicBitTestAndSet(&pGMM->bmChunkId[0], idChunk), ("%#x\n", idChunk), NIL_GMM_CHUNKID);
+ return pGMM->idChunkPrev = idChunk;
+ }
+ }
+
+ /*
+ * Ok, scan from the start.
+ * We're not racing anyone, so there is no need to expect failures or have restart loops.
+ */
+ idChunk = ASMBitFirstClear(&pGMM->bmChunkId[0], GMM_CHUNKID_LAST + 1);
+ AssertMsgReturn(idChunk > NIL_GMM_CHUNKID, ("%#x\n", idChunk), NIL_GVM_HANDLE);
+ AssertMsgReturn(!ASMAtomicBitTestAndSet(&pGMM->bmChunkId[0], idChunk), ("%#x\n", idChunk), NIL_GMM_CHUNKID);
+
+ return pGMM->idChunkPrev = idChunk;
+}
+
+
+/**
+ * Allocates one private page.
+ *
+ * Worker for gmmR0AllocatePages.
+ *
+ * @param pChunk The chunk to allocate it from.
+ * @param hGVM The GVM handle of the VM requesting memory.
+ * @param pPageDesc The page descriptor.
+ */
+static void gmmR0AllocatePage(PGMMCHUNK pChunk, uint32_t hGVM, PGMMPAGEDESC pPageDesc)
+{
+ /* update the chunk stats. */
+ if (pChunk->hGVM == NIL_GVM_HANDLE)
+ pChunk->hGVM = hGVM;
+ Assert(pChunk->cFree);
+ pChunk->cFree--;
+ pChunk->cPrivate++;
+
+ /* unlink the first free page. */
+ const uint32_t iPage = pChunk->iFreeHead;
+ AssertReleaseMsg(iPage < RT_ELEMENTS(pChunk->aPages), ("%d\n", iPage));
+ PGMMPAGE pPage = &pChunk->aPages[iPage];
+ Assert(GMM_PAGE_IS_FREE(pPage));
+ pChunk->iFreeHead = pPage->Free.iNext;
+ Log3(("A pPage=%p iPage=%#x/%#x u2State=%d iFreeHead=%#x iNext=%#x\n",
+ pPage, iPage, (pChunk->Core.Key << GMM_CHUNKID_SHIFT) | iPage,
+ pPage->Common.u2State, pChunk->iFreeHead, pPage->Free.iNext));
+
+ /* make the page private. */
+ pPage->u = 0;
+ AssertCompile(GMM_PAGE_STATE_PRIVATE == 0);
+ pPage->Private.hGVM = hGVM;
+ AssertCompile(NIL_RTHCPHYS >= GMM_GCPHYS_LAST);
+ AssertCompile(GMM_GCPHYS_UNSHAREABLE >= GMM_GCPHYS_LAST);
+ if (pPageDesc->HCPhysGCPhys <= GMM_GCPHYS_LAST)
+ pPage->Private.pfn = pPageDesc->HCPhysGCPhys >> PAGE_SHIFT;
+ else
+ pPage->Private.pfn = GMM_PAGE_PFN_UNSHAREABLE; /* unshareable / unassigned - same thing. */
+
+ /* update the page descriptor. */
+ pPageDesc->HCPhysGCPhys = RTR0MemObjGetPagePhysAddr(pChunk->hMemObj, iPage);
+ Assert(pPageDesc->HCPhysGCPhys != NIL_RTHCPHYS);
+ pPageDesc->idPage = (pChunk->Core.Key << GMM_CHUNKID_SHIFT) | iPage;
+ pPageDesc->idSharedPage = NIL_GMM_PAGEID;
+}
+
+
+/**
+ * Picks the free pages from a chunk.
+ *
+ * @returns The new page descriptor table index.
+ * @param pChunk The chunk.
+ * @param hGVM The affinity of the chunk. NIL_GVM_HANDLE for no
+ * affinity.
+ * @param iPage The current page descriptor table index.
+ * @param cPages The total number of pages to allocate.
+ * @param paPages The page descriptor table (input + ouput).
+ */
+static uint32_t gmmR0AllocatePagesFromChunk(PGMMCHUNK pChunk, uint16_t const hGVM, uint32_t iPage, uint32_t cPages,
+ PGMMPAGEDESC paPages)
+{
+ PGMMCHUNKFREESET pSet = pChunk->pSet; Assert(pSet);
+ gmmR0UnlinkChunk(pChunk);
+
+ for (; pChunk->cFree && iPage < cPages; iPage++)
+ gmmR0AllocatePage(pChunk, hGVM, &paPages[iPage]);
+
+ gmmR0LinkChunk(pChunk, pSet);
+ return iPage;
+}
+
+
+/**
+ * Registers a new chunk of memory.
+ *
+ * This is called by both gmmR0AllocateOneChunk and GMMR0SeedChunk.
+ *
+ * @returns VBox status code. On success, the giant GMM lock will be held, the
+ * caller must release it (ugly).
+ * @param pGMM Pointer to the GMM instance.
+ * @param pSet Pointer to the set.
+ * @param MemObj The memory object for the chunk.
+ * @param hGVM The affinity of the chunk. NIL_GVM_HANDLE for no
+ * affinity.
+ * @param fChunkFlags The chunk flags, GMM_CHUNK_FLAGS_XXX.
+ * @param ppChunk Chunk address (out). Optional.
+ *
+ * @remarks The caller must not own the giant GMM mutex.
+ * The giant GMM mutex will be acquired and returned acquired in
+ * the success path. On failure, no locks will be held.
+ */
+static int gmmR0RegisterChunk(PGMM pGMM, PGMMCHUNKFREESET pSet, RTR0MEMOBJ MemObj, uint16_t hGVM, uint16_t fChunkFlags,
+ PGMMCHUNK *ppChunk)
+{
+ Assert(pGMM->hMtxOwner != RTThreadNativeSelf());
+ Assert(hGVM != NIL_GVM_HANDLE || pGMM->fBoundMemoryMode);
+ Assert(fChunkFlags == 0 || fChunkFlags == GMM_CHUNK_FLAGS_LARGE_PAGE);
+
+ int rc;
+ PGMMCHUNK pChunk = (PGMMCHUNK)RTMemAllocZ(sizeof(*pChunk));
+ if (pChunk)
+ {
+ /*
+ * Initialize it.
+ */
+ pChunk->hMemObj = MemObj;
+ pChunk->cFree = GMM_CHUNK_NUM_PAGES;
+ pChunk->hGVM = hGVM;
+ /*pChunk->iFreeHead = 0;*/
+ pChunk->idNumaNode = gmmR0GetCurrentNumaNodeId();
+ pChunk->iChunkMtx = UINT8_MAX;
+ pChunk->fFlags = fChunkFlags;
+ for (unsigned iPage = 0; iPage < RT_ELEMENTS(pChunk->aPages) - 1; iPage++)
+ {
+ pChunk->aPages[iPage].Free.u2State = GMM_PAGE_STATE_FREE;
+ pChunk->aPages[iPage].Free.iNext = iPage + 1;
+ }
+ pChunk->aPages[RT_ELEMENTS(pChunk->aPages) - 1].Free.u2State = GMM_PAGE_STATE_FREE;
+ pChunk->aPages[RT_ELEMENTS(pChunk->aPages) - 1].Free.iNext = UINT16_MAX;
+
+ /*
+ * Allocate a Chunk ID and insert it into the tree.
+ * This has to be done behind the mutex of course.
+ */
+ rc = gmmR0MutexAcquire(pGMM);
+ if (RT_SUCCESS(rc))
+ {
+ if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
+ {
+ pChunk->Core.Key = gmmR0AllocateChunkId(pGMM);
+ if ( pChunk->Core.Key != NIL_GMM_CHUNKID
+ && pChunk->Core.Key <= GMM_CHUNKID_LAST
+ && RTAvlU32Insert(&pGMM->pChunks, &pChunk->Core))
+ {
+ pGMM->cChunks++;
+ RTListAppend(&pGMM->ChunkList, &pChunk->ListNode);
+ gmmR0LinkChunk(pChunk, pSet);
+ LogFlow(("gmmR0RegisterChunk: pChunk=%p id=%#x cChunks=%d\n", pChunk, pChunk->Core.Key, pGMM->cChunks));
+
+ if (ppChunk)
+ *ppChunk = pChunk;
+ GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
+ return VINF_SUCCESS;
+ }
+
+ /* bail out */
+ rc = VERR_GMM_CHUNK_INSERT;
+ }
+ else
+ rc = VERR_GMM_IS_NOT_SANE;
+ gmmR0MutexRelease(pGMM);
+ }
+
+ RTMemFree(pChunk);
+ }
+ else
+ rc = VERR_NO_MEMORY;
+ return rc;
+}
+
+
+/**
+ * Allocate a new chunk, immediately pick the requested pages from it, and adds
+ * what's remaining to the specified free set.
+ *
+ * @note This will leave the giant mutex while allocating the new chunk!
+ *
+ * @returns VBox status code.
+ * @param pGMM Pointer to the GMM instance data.
+ * @param pGVM Pointer to the kernel-only VM instace data.
+ * @param pSet Pointer to the free set.
+ * @param cPages The number of pages requested.
+ * @param paPages The page descriptor table (input + output).
+ * @param piPage The pointer to the page descriptor table index variable.
+ * This will be updated.
+ */
+static int gmmR0AllocateChunkNew(PGMM pGMM, PGVM pGVM, PGMMCHUNKFREESET pSet, uint32_t cPages,
+ PGMMPAGEDESC paPages, uint32_t *piPage)
+{
+ gmmR0MutexRelease(pGMM);
+
+ RTR0MEMOBJ hMemObj;
+ int rc = RTR0MemObjAllocPhysNC(&hMemObj, GMM_CHUNK_SIZE, NIL_RTHCPHYS);
+ if (RT_SUCCESS(rc))
+ {
+/** @todo Duplicate gmmR0RegisterChunk here so we can avoid chaining up the
+ * free pages first and then unchaining them right afterwards. Instead
+ * do as much work as possible without holding the giant lock. */
+ PGMMCHUNK pChunk;
+ rc = gmmR0RegisterChunk(pGMM, pSet, hMemObj, pGVM->hSelf, 0 /*fChunkFlags*/, &pChunk);
+ if (RT_SUCCESS(rc))
+ {
+ *piPage = gmmR0AllocatePagesFromChunk(pChunk, pGVM->hSelf, *piPage, cPages, paPages);
+ return VINF_SUCCESS;
+ }
+
+ /* bail out */
+ RTR0MemObjFree(hMemObj, false /* fFreeMappings */);
+ }
+
+ int rc2 = gmmR0MutexAcquire(pGMM);
+ AssertRCReturn(rc2, RT_FAILURE(rc) ? rc : rc2);
+ return rc;
+
+}
+
+
+/**
+ * As a last restort we'll pick any page we can get.
+ *
+ * @returns The new page descriptor table index.
+ * @param pSet The set to pick from.
+ * @param pGVM Pointer to the global VM structure.
+ * @param iPage The current page descriptor table index.
+ * @param cPages The total number of pages to allocate.
+ * @param paPages The page descriptor table (input + ouput).
+ */
+static uint32_t gmmR0AllocatePagesIndiscriminately(PGMMCHUNKFREESET pSet, PGVM pGVM,
+ uint32_t iPage, uint32_t cPages, PGMMPAGEDESC paPages)
+{
+ unsigned iList = RT_ELEMENTS(pSet->apLists);
+ while (iList-- > 0)
+ {
+ PGMMCHUNK pChunk = pSet->apLists[iList];
+ while (pChunk)
+ {
+ PGMMCHUNK pNext = pChunk->pFreeNext;
+
+ iPage = gmmR0AllocatePagesFromChunk(pChunk, pGVM->hSelf, iPage, cPages, paPages);
+ if (iPage >= cPages)
+ return iPage;
+
+ pChunk = pNext;
+ }
+ }
+ return iPage;
+}
+
+
+/**
+ * Pick pages from empty chunks on the same NUMA node.
+ *
+ * @returns The new page descriptor table index.
+ * @param pSet The set to pick from.
+ * @param pGVM Pointer to the global VM structure.
+ * @param iPage The current page descriptor table index.
+ * @param cPages The total number of pages to allocate.
+ * @param paPages The page descriptor table (input + ouput).
+ */
+static uint32_t gmmR0AllocatePagesFromEmptyChunksOnSameNode(PGMMCHUNKFREESET pSet, PGVM pGVM,
+ uint32_t iPage, uint32_t cPages, PGMMPAGEDESC paPages)
+{
+ PGMMCHUNK pChunk = pSet->apLists[GMM_CHUNK_FREE_SET_UNUSED_LIST];
+ if (pChunk)
+ {
+ uint16_t const idNumaNode = gmmR0GetCurrentNumaNodeId();
+ while (pChunk)
+ {
+ PGMMCHUNK pNext = pChunk->pFreeNext;
+
+ if (pChunk->idNumaNode == idNumaNode)
+ {
+ pChunk->hGVM = pGVM->hSelf;
+ iPage = gmmR0AllocatePagesFromChunk(pChunk, pGVM->hSelf, iPage, cPages, paPages);
+ if (iPage >= cPages)
+ {
+ pGVM->gmm.s.idLastChunkHint = pChunk->cFree ? pChunk->Core.Key : NIL_GMM_CHUNKID;
+ return iPage;
+ }
+ }
+
+ pChunk = pNext;
+ }
+ }
+ return iPage;
+}
+
+
+/**
+ * Pick pages from non-empty chunks on the same NUMA node.
+ *
+ * @returns The new page descriptor table index.
+ * @param pSet The set to pick from.
+ * @param pGVM Pointer to the global VM structure.
+ * @param iPage The current page descriptor table index.
+ * @param cPages The total number of pages to allocate.
+ * @param paPages The page descriptor table (input + ouput).
+ */
+static uint32_t gmmR0AllocatePagesFromSameNode(PGMMCHUNKFREESET pSet, PGVM pGVM,
+ uint32_t iPage, uint32_t cPages, PGMMPAGEDESC paPages)
+{
+ /** @todo start by picking from chunks with about the right size first? */
+ uint16_t const idNumaNode = gmmR0GetCurrentNumaNodeId();
+ unsigned iList = GMM_CHUNK_FREE_SET_UNUSED_LIST;
+ while (iList-- > 0)
+ {
+ PGMMCHUNK pChunk = pSet->apLists[iList];
+ while (pChunk)
+ {
+ PGMMCHUNK pNext = pChunk->pFreeNext;
+
+ if (pChunk->idNumaNode == idNumaNode)
+ {
+ iPage = gmmR0AllocatePagesFromChunk(pChunk, pGVM->hSelf, iPage, cPages, paPages);
+ if (iPage >= cPages)
+ {
+ pGVM->gmm.s.idLastChunkHint = pChunk->cFree ? pChunk->Core.Key : NIL_GMM_CHUNKID;
+ return iPage;
+ }
+ }
+
+ pChunk = pNext;
+ }
+ }
+ return iPage;
+}
+
+
+/**
+ * Pick pages that are in chunks already associated with the VM.
+ *
+ * @returns The new page descriptor table index.
+ * @param pGMM Pointer to the GMM instance data.
+ * @param pGVM Pointer to the global VM structure.
+ * @param pSet The set to pick from.
+ * @param iPage The current page descriptor table index.
+ * @param cPages The total number of pages to allocate.
+ * @param paPages The page descriptor table (input + ouput).
+ */
+static uint32_t gmmR0AllocatePagesAssociatedWithVM(PGMM pGMM, PGVM pGVM, PGMMCHUNKFREESET pSet,
+ uint32_t iPage, uint32_t cPages, PGMMPAGEDESC paPages)
+{
+ uint16_t const hGVM = pGVM->hSelf;
+
+ /* Hint. */
+ if (pGVM->gmm.s.idLastChunkHint != NIL_GMM_CHUNKID)
+ {
+ PGMMCHUNK pChunk = gmmR0GetChunk(pGMM, pGVM->gmm.s.idLastChunkHint);
+ if (pChunk && pChunk->cFree)
+ {
+ iPage = gmmR0AllocatePagesFromChunk(pChunk, hGVM, iPage, cPages, paPages);
+ if (iPage >= cPages)
+ return iPage;
+ }
+ }
+
+ /* Scan. */
+ for (unsigned iList = 0; iList < RT_ELEMENTS(pSet->apLists); iList++)
+ {
+ PGMMCHUNK pChunk = pSet->apLists[iList];
+ while (pChunk)
+ {
+ PGMMCHUNK pNext = pChunk->pFreeNext;
+
+ if (pChunk->hGVM == hGVM)
+ {
+ iPage = gmmR0AllocatePagesFromChunk(pChunk, hGVM, iPage, cPages, paPages);
+ if (iPage >= cPages)
+ {
+ pGVM->gmm.s.idLastChunkHint = pChunk->cFree ? pChunk->Core.Key : NIL_GMM_CHUNKID;
+ return iPage;
+ }
+ }
+
+ pChunk = pNext;
+ }
+ }
+ return iPage;
+}
+
+
+
+/**
+ * Pick pages in bound memory mode.
+ *
+ * @returns The new page descriptor table index.
+ * @param pGVM Pointer to the global VM structure.
+ * @param iPage The current page descriptor table index.
+ * @param cPages The total number of pages to allocate.
+ * @param paPages The page descriptor table (input + ouput).
+ */
+static uint32_t gmmR0AllocatePagesInBoundMode(PGVM pGVM, uint32_t iPage, uint32_t cPages, PGMMPAGEDESC paPages)
+{
+ for (unsigned iList = 0; iList < RT_ELEMENTS(pGVM->gmm.s.Private.apLists); iList++)
+ {
+ PGMMCHUNK pChunk = pGVM->gmm.s.Private.apLists[iList];
+ while (pChunk)
+ {
+ Assert(pChunk->hGVM == pGVM->hSelf);
+ PGMMCHUNK pNext = pChunk->pFreeNext;
+ iPage = gmmR0AllocatePagesFromChunk(pChunk, pGVM->hSelf, iPage, cPages, paPages);
+ if (iPage >= cPages)
+ return iPage;
+ pChunk = pNext;
+ }
+ }
+ return iPage;
+}
+
+
+/**
+ * Checks if we should start picking pages from chunks of other VMs because
+ * we're getting close to the system memory or reserved limit.
+ *
+ * @returns @c true if we should, @c false if we should first try allocate more
+ * chunks.
+ */
+static bool gmmR0ShouldAllocatePagesInOtherChunksBecauseOfLimits(PGVM pGVM)
+{
+ /*
+ * Don't allocate a new chunk if we're
+ */
+ uint64_t cPgReserved = pGVM->gmm.s.Stats.Reserved.cBasePages
+ + pGVM->gmm.s.Stats.Reserved.cFixedPages
+ - pGVM->gmm.s.Stats.cBalloonedPages
+ /** @todo what about shared pages? */;
+ uint64_t cPgAllocated = pGVM->gmm.s.Stats.Allocated.cBasePages
+ + pGVM->gmm.s.Stats.Allocated.cFixedPages;
+ uint64_t cPgDelta = cPgReserved - cPgAllocated;
+ if (cPgDelta < GMM_CHUNK_NUM_PAGES * 4)
+ return true;
+ /** @todo make the threshold configurable, also test the code to see if
+ * this ever kicks in (we might be reserving too much or smth). */
+
+ /*
+ * Check how close we're to the max memory limit and how many fragments
+ * there are?...
+ */
+ /** @todo. */
+
+ return false;
+}
+
+
+/**
+ * Checks if we should start picking pages from chunks of other VMs because
+ * there is a lot of free pages around.
+ *
+ * @returns @c true if we should, @c false if we should first try allocate more
+ * chunks.
+ */
+static bool gmmR0ShouldAllocatePagesInOtherChunksBecauseOfLotsFree(PGMM pGMM)
+{
+ /*
+ * Setting the limit at 16 chunks (32 MB) at the moment.
+ */
+ if (pGMM->PrivateX.cFreePages >= GMM_CHUNK_NUM_PAGES * 16)
+ return true;
+ return false;
+}
+
+
+/**
+ * Common worker for GMMR0AllocateHandyPages and GMMR0AllocatePages.
+ *
+ * @returns VBox status code:
+ * @retval VINF_SUCCESS on success.
+ * @retval VERR_GMM_SEED_ME if seeding via GMMR0SeedChunk or
+ * gmmR0AllocateMoreChunks is necessary.
+ * @retval VERR_GMM_HIT_GLOBAL_LIMIT if we've exhausted the available pages.
+ * @retval VERR_GMM_HIT_VM_ACCOUNT_LIMIT if we've hit the VM account limit,
+ * that is we're trying to allocate more than we've reserved.
+ *
+ * @param pGMM Pointer to the GMM instance data.
+ * @param pGVM Pointer to the VM.
+ * @param cPages The number of pages to allocate.
+ * @param paPages Pointer to the page descriptors. See GMMPAGEDESC for
+ * details on what is expected on input.
+ * @param enmAccount The account to charge.
+ *
+ * @remarks Call takes the giant GMM lock.
+ */
+static int gmmR0AllocatePagesNew(PGMM pGMM, PGVM pGVM, uint32_t cPages, PGMMPAGEDESC paPages, GMMACCOUNT enmAccount)
+{
+ Assert(pGMM->hMtxOwner == RTThreadNativeSelf());
+
+ /*
+ * Check allocation limits.
+ */
+ if (RT_UNLIKELY(pGMM->cAllocatedPages + cPages > pGMM->cMaxPages))
+ return VERR_GMM_HIT_GLOBAL_LIMIT;
+
+ switch (enmAccount)
+ {
+ case GMMACCOUNT_BASE:
+ if (RT_UNLIKELY( pGVM->gmm.s.Stats.Allocated.cBasePages + pGVM->gmm.s.Stats.cBalloonedPages + cPages
+ > pGVM->gmm.s.Stats.Reserved.cBasePages))
+ {
+ Log(("gmmR0AllocatePages:Base: Reserved=%#llx Allocated+Ballooned+Requested=%#llx+%#llx+%#x!\n",
+ pGVM->gmm.s.Stats.Reserved.cBasePages, pGVM->gmm.s.Stats.Allocated.cBasePages,
+ pGVM->gmm.s.Stats.cBalloonedPages, cPages));
+ return VERR_GMM_HIT_VM_ACCOUNT_LIMIT;
+ }
+ break;
+ case GMMACCOUNT_SHADOW:
+ if (RT_UNLIKELY(pGVM->gmm.s.Stats.Allocated.cShadowPages + cPages > pGVM->gmm.s.Stats.Reserved.cShadowPages))
+ {
+ Log(("gmmR0AllocatePages:Shadow: Reserved=%#x Allocated+Requested=%#x+%#x!\n",
+ pGVM->gmm.s.Stats.Reserved.cShadowPages, pGVM->gmm.s.Stats.Allocated.cShadowPages, cPages));
+ return VERR_GMM_HIT_VM_ACCOUNT_LIMIT;
+ }
+ break;
+ case GMMACCOUNT_FIXED:
+ if (RT_UNLIKELY(pGVM->gmm.s.Stats.Allocated.cFixedPages + cPages > pGVM->gmm.s.Stats.Reserved.cFixedPages))
+ {
+ Log(("gmmR0AllocatePages:Fixed: Reserved=%#x Allocated+Requested=%#x+%#x!\n",
+ pGVM->gmm.s.Stats.Reserved.cFixedPages, pGVM->gmm.s.Stats.Allocated.cFixedPages, cPages));
+ return VERR_GMM_HIT_VM_ACCOUNT_LIMIT;
+ }
+ break;
+ default:
+ AssertMsgFailedReturn(("enmAccount=%d\n", enmAccount), VERR_IPE_NOT_REACHED_DEFAULT_CASE);
+ }
+
+ /*
+ * If we're in legacy memory mode, it's easy to figure if we have
+ * sufficient number of pages up-front.
+ */
+ if ( pGMM->fLegacyAllocationMode
+ && pGVM->gmm.s.Private.cFreePages < cPages)
+ {
+ Assert(pGMM->fBoundMemoryMode);
+ return VERR_GMM_SEED_ME;
+ }
+
+ /*
+ * Update the accounts before we proceed because we might be leaving the
+ * protection of the global mutex and thus run the risk of permitting
+ * too much memory to be allocated.
+ */
+ switch (enmAccount)
+ {
+ case GMMACCOUNT_BASE: pGVM->gmm.s.Stats.Allocated.cBasePages += cPages; break;
+ case GMMACCOUNT_SHADOW: pGVM->gmm.s.Stats.Allocated.cShadowPages += cPages; break;
+ case GMMACCOUNT_FIXED: pGVM->gmm.s.Stats.Allocated.cFixedPages += cPages; break;
+ default: AssertMsgFailedReturn(("enmAccount=%d\n", enmAccount), VERR_IPE_NOT_REACHED_DEFAULT_CASE);
+ }
+ pGVM->gmm.s.Stats.cPrivatePages += cPages;
+ pGMM->cAllocatedPages += cPages;
+
+ /*
+ * Part two of it's-easy-in-legacy-memory-mode.
+ */
+ uint32_t iPage = 0;
+ if (pGMM->fLegacyAllocationMode)
+ {
+ iPage = gmmR0AllocatePagesInBoundMode(pGVM, iPage, cPages, paPages);
+ AssertReleaseReturn(iPage == cPages, VERR_GMM_ALLOC_PAGES_IPE);
+ return VINF_SUCCESS;
+ }
+
+ /*
+ * Bound mode is also relatively straightforward.
+ */
+ int rc = VINF_SUCCESS;
+ if (pGMM->fBoundMemoryMode)
+ {
+ iPage = gmmR0AllocatePagesInBoundMode(pGVM, iPage, cPages, paPages);
+ if (iPage < cPages)
+ do
+ rc = gmmR0AllocateChunkNew(pGMM, pGVM, &pGVM->gmm.s.Private, cPages, paPages, &iPage);
+ while (iPage < cPages && RT_SUCCESS(rc));
+ }
+ /*
+ * Shared mode is trickier as we should try archive the same locality as
+ * in bound mode, but smartly make use of non-full chunks allocated by
+ * other VMs if we're low on memory.
+ */
+ else
+ {
+ /* Pick the most optimal pages first. */
+ iPage = gmmR0AllocatePagesAssociatedWithVM(pGMM, pGVM, &pGMM->PrivateX, iPage, cPages, paPages);
+ if (iPage < cPages)
+ {
+ /* Maybe we should try getting pages from chunks "belonging" to
+ other VMs before allocating more chunks? */
+ bool fTriedOnSameAlready = false;
+ if (gmmR0ShouldAllocatePagesInOtherChunksBecauseOfLimits(pGVM))
+ {
+ iPage = gmmR0AllocatePagesFromSameNode(&pGMM->PrivateX, pGVM, iPage, cPages, paPages);
+ fTriedOnSameAlready = true;
+ }
+
+ /* Allocate memory from empty chunks. */
+ if (iPage < cPages)
+ iPage = gmmR0AllocatePagesFromEmptyChunksOnSameNode(&pGMM->PrivateX, pGVM, iPage, cPages, paPages);
+
+ /* Grab empty shared chunks. */
+ if (iPage < cPages)
+ iPage = gmmR0AllocatePagesFromEmptyChunksOnSameNode(&pGMM->Shared, pGVM, iPage, cPages, paPages);
+
+ /* If there is a lof of free pages spread around, try not waste
+ system memory on more chunks. (Should trigger defragmentation.) */
+ if ( !fTriedOnSameAlready
+ && gmmR0ShouldAllocatePagesInOtherChunksBecauseOfLotsFree(pGMM))
+ {
+ iPage = gmmR0AllocatePagesFromSameNode(&pGMM->PrivateX, pGVM, iPage, cPages, paPages);
+ if (iPage < cPages)
+ iPage = gmmR0AllocatePagesIndiscriminately(&pGMM->PrivateX, pGVM, iPage, cPages, paPages);
+ }
+
+ /*
+ * Ok, try allocate new chunks.
+ */
+ if (iPage < cPages)
+ {
+ do
+ rc = gmmR0AllocateChunkNew(pGMM, pGVM, &pGMM->PrivateX, cPages, paPages, &iPage);
+ while (iPage < cPages && RT_SUCCESS(rc));
+
+ /* If the host is out of memory, take whatever we can get. */
+ if ( (rc == VERR_NO_MEMORY || rc == VERR_NO_PHYS_MEMORY)
+ && pGMM->PrivateX.cFreePages + pGMM->Shared.cFreePages >= cPages - iPage)
+ {
+ iPage = gmmR0AllocatePagesIndiscriminately(&pGMM->PrivateX, pGVM, iPage, cPages, paPages);
+ if (iPage < cPages)
+ iPage = gmmR0AllocatePagesIndiscriminately(&pGMM->Shared, pGVM, iPage, cPages, paPages);
+ AssertRelease(iPage == cPages);
+ rc = VINF_SUCCESS;
+ }
+ }
+ }
+ }
+
+ /*
+ * Clean up on failure. Since this is bound to be a low-memory condition
+ * we will give back any empty chunks that might be hanging around.
+ */
+ if (RT_FAILURE(rc))
+ {
+ /* Update the statistics. */
+ pGVM->gmm.s.Stats.cPrivatePages -= cPages;
+ pGMM->cAllocatedPages -= cPages - iPage;
+ switch (enmAccount)
+ {
+ case GMMACCOUNT_BASE: pGVM->gmm.s.Stats.Allocated.cBasePages -= cPages; break;
+ case GMMACCOUNT_SHADOW: pGVM->gmm.s.Stats.Allocated.cShadowPages -= cPages; break;
+ case GMMACCOUNT_FIXED: pGVM->gmm.s.Stats.Allocated.cFixedPages -= cPages; break;
+ default: AssertMsgFailedReturn(("enmAccount=%d\n", enmAccount), VERR_IPE_NOT_REACHED_DEFAULT_CASE);
+ }
+
+ /* Release the pages. */
+ while (iPage-- > 0)
+ {
+ uint32_t idPage = paPages[iPage].idPage;
+ PGMMPAGE pPage = gmmR0GetPage(pGMM, idPage);
+ if (RT_LIKELY(pPage))
+ {
+ Assert(GMM_PAGE_IS_PRIVATE(pPage));
+ Assert(pPage->Private.hGVM == pGVM->hSelf);
+ gmmR0FreePrivatePage(pGMM, pGVM, idPage, pPage);
+ }
+ else
+ AssertMsgFailed(("idPage=%#x\n", idPage));
+
+ paPages[iPage].idPage = NIL_GMM_PAGEID;
+ paPages[iPage].idSharedPage = NIL_GMM_PAGEID;
+ paPages[iPage].HCPhysGCPhys = NIL_RTHCPHYS;
+ }
+
+ /* Free empty chunks. */
+ /** @todo */
+
+ /* return the fail status on failure */
+ return rc;
+ }
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Updates the previous allocations and allocates more pages.
+ *
+ * The handy pages are always taken from the 'base' memory account.
+ * The allocated pages are not cleared and will contains random garbage.
+ *
+ * @returns VBox status code:
+ * @retval VINF_SUCCESS on success.
+ * @retval VERR_NOT_OWNER if the caller is not an EMT.
+ * @retval VERR_GMM_PAGE_NOT_FOUND if one of the pages to update wasn't found.
+ * @retval VERR_GMM_PAGE_NOT_PRIVATE if one of the pages to update wasn't a
+ * private page.
+ * @retval VERR_GMM_PAGE_NOT_SHARED if one of the pages to update wasn't a
+ * shared page.
+ * @retval VERR_GMM_NOT_PAGE_OWNER if one of the pages to be updated wasn't
+ * owned by the VM.
+ * @retval VERR_GMM_SEED_ME if seeding via GMMR0SeedChunk is necessary.
+ * @retval VERR_GMM_HIT_GLOBAL_LIMIT if we've exhausted the available pages.
+ * @retval VERR_GMM_HIT_VM_ACCOUNT_LIMIT if we've hit the VM account limit,
+ * that is we're trying to allocate more than we've reserved.
+ *
+ * @param pGVM The global (ring-0) VM structure.
+ * @param pVM The cross context VM structure.
+ * @param idCpu The VCPU id.
+ * @param cPagesToUpdate The number of pages to update (starting from the head).
+ * @param cPagesToAlloc The number of pages to allocate (starting from the head).
+ * @param paPages The array of page descriptors.
+ * See GMMPAGEDESC for details on what is expected on input.
+ * @thread EMT(idCpu)
+ */
+GMMR0DECL(int) GMMR0AllocateHandyPages(PGVM pGVM, PVM pVM, VMCPUID idCpu, uint32_t cPagesToUpdate,
+ uint32_t cPagesToAlloc, PGMMPAGEDESC paPages)
+{
+ LogFlow(("GMMR0AllocateHandyPages: pGVM=%p pVM=%p cPagesToUpdate=%#x cPagesToAlloc=%#x paPages=%p\n",
+ pGVM, pVM, cPagesToUpdate, cPagesToAlloc, paPages));
+
+ /*
+ * Validate, get basics and take the semaphore.
+ * (This is a relatively busy path, so make predictions where possible.)
+ */
+ PGMM pGMM;
+ GMM_GET_VALID_INSTANCE(pGMM, VERR_GMM_INSTANCE);
+ int rc = GVMMR0ValidateGVMandVMandEMT(pGVM, pVM, idCpu);
+ if (RT_FAILURE(rc))
+ return rc;
+
+ AssertPtrReturn(paPages, VERR_INVALID_PARAMETER);
+ AssertMsgReturn( (cPagesToUpdate && cPagesToUpdate < 1024)
+ || (cPagesToAlloc && cPagesToAlloc < 1024),
+ ("cPagesToUpdate=%#x cPagesToAlloc=%#x\n", cPagesToUpdate, cPagesToAlloc),
+ VERR_INVALID_PARAMETER);
+
+ unsigned iPage = 0;
+ for (; iPage < cPagesToUpdate; iPage++)
+ {
+ AssertMsgReturn( ( paPages[iPage].HCPhysGCPhys <= GMM_GCPHYS_LAST
+ && !(paPages[iPage].HCPhysGCPhys & PAGE_OFFSET_MASK))
+ || paPages[iPage].HCPhysGCPhys == NIL_RTHCPHYS
+ || paPages[iPage].HCPhysGCPhys == GMM_GCPHYS_UNSHAREABLE,
+ ("#%#x: %RHp\n", iPage, paPages[iPage].HCPhysGCPhys),
+ VERR_INVALID_PARAMETER);
+ AssertMsgReturn( paPages[iPage].idPage <= GMM_PAGEID_LAST
+ /*|| paPages[iPage].idPage == NIL_GMM_PAGEID*/,
+ ("#%#x: %#x\n", iPage, paPages[iPage].idPage), VERR_INVALID_PARAMETER);
+ AssertMsgReturn( paPages[iPage].idPage <= GMM_PAGEID_LAST
+ /*|| paPages[iPage].idSharedPage == NIL_GMM_PAGEID*/,
+ ("#%#x: %#x\n", iPage, paPages[iPage].idSharedPage), VERR_INVALID_PARAMETER);
+ }
+
+ for (; iPage < cPagesToAlloc; iPage++)
+ {
+ AssertMsgReturn(paPages[iPage].HCPhysGCPhys == NIL_RTHCPHYS, ("#%#x: %RHp\n", iPage, paPages[iPage].HCPhysGCPhys), VERR_INVALID_PARAMETER);
+ AssertMsgReturn(paPages[iPage].idPage == NIL_GMM_PAGEID, ("#%#x: %#x\n", iPage, paPages[iPage].idPage), VERR_INVALID_PARAMETER);
+ AssertMsgReturn(paPages[iPage].idSharedPage == NIL_GMM_PAGEID, ("#%#x: %#x\n", iPage, paPages[iPage].idSharedPage), VERR_INVALID_PARAMETER);
+ }
+
+ gmmR0MutexAcquire(pGMM);
+ if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
+ {
+ /* No allocations before the initial reservation has been made! */
+ if (RT_LIKELY( pGVM->gmm.s.Stats.Reserved.cBasePages
+ && pGVM->gmm.s.Stats.Reserved.cFixedPages
+ && pGVM->gmm.s.Stats.Reserved.cShadowPages))
+ {
+ /*
+ * Perform the updates.
+ * Stop on the first error.
+ */
+ for (iPage = 0; iPage < cPagesToUpdate; iPage++)
+ {
+ if (paPages[iPage].idPage != NIL_GMM_PAGEID)
+ {
+ PGMMPAGE pPage = gmmR0GetPage(pGMM, paPages[iPage].idPage);
+ if (RT_LIKELY(pPage))
+ {
+ if (RT_LIKELY(GMM_PAGE_IS_PRIVATE(pPage)))
+ {
+ if (RT_LIKELY(pPage->Private.hGVM == pGVM->hSelf))
+ {
+ AssertCompile(NIL_RTHCPHYS > GMM_GCPHYS_LAST && GMM_GCPHYS_UNSHAREABLE > GMM_GCPHYS_LAST);
+ if (RT_LIKELY(paPages[iPage].HCPhysGCPhys <= GMM_GCPHYS_LAST))
+ pPage->Private.pfn = paPages[iPage].HCPhysGCPhys >> PAGE_SHIFT;
+ else if (paPages[iPage].HCPhysGCPhys == GMM_GCPHYS_UNSHAREABLE)
+ pPage->Private.pfn = GMM_PAGE_PFN_UNSHAREABLE;
+ /* else: NIL_RTHCPHYS nothing */
+
+ paPages[iPage].idPage = NIL_GMM_PAGEID;
+ paPages[iPage].HCPhysGCPhys = NIL_RTHCPHYS;
+ }
+ else
+ {
+ Log(("GMMR0AllocateHandyPages: #%#x/%#x: Not owner! hGVM=%#x hSelf=%#x\n",
+ iPage, paPages[iPage].idPage, pPage->Private.hGVM, pGVM->hSelf));
+ rc = VERR_GMM_NOT_PAGE_OWNER;
+ break;
+ }
+ }
+ else
+ {
+ Log(("GMMR0AllocateHandyPages: #%#x/%#x: Not private! %.*Rhxs (type %d)\n", iPage, paPages[iPage].idPage, sizeof(*pPage), pPage, pPage->Common.u2State));
+ rc = VERR_GMM_PAGE_NOT_PRIVATE;
+ break;
+ }
+ }
+ else
+ {
+ Log(("GMMR0AllocateHandyPages: #%#x/%#x: Not found! (private)\n", iPage, paPages[iPage].idPage));
+ rc = VERR_GMM_PAGE_NOT_FOUND;
+ break;
+ }
+ }
+
+ if (paPages[iPage].idSharedPage != NIL_GMM_PAGEID)
+ {
+ PGMMPAGE pPage = gmmR0GetPage(pGMM, paPages[iPage].idSharedPage);
+ if (RT_LIKELY(pPage))
+ {
+ if (RT_LIKELY(GMM_PAGE_IS_SHARED(pPage)))
+ {
+ AssertCompile(NIL_RTHCPHYS > GMM_GCPHYS_LAST && GMM_GCPHYS_UNSHAREABLE > GMM_GCPHYS_LAST);
+ Assert(pPage->Shared.cRefs);
+ Assert(pGVM->gmm.s.Stats.cSharedPages);
+ Assert(pGVM->gmm.s.Stats.Allocated.cBasePages);
+
+ Log(("GMMR0AllocateHandyPages: free shared page %x cRefs=%d\n", paPages[iPage].idSharedPage, pPage->Shared.cRefs));
+ pGVM->gmm.s.Stats.cSharedPages--;
+ pGVM->gmm.s.Stats.Allocated.cBasePages--;
+ if (!--pPage->Shared.cRefs)
+ gmmR0FreeSharedPage(pGMM, pGVM, paPages[iPage].idSharedPage, pPage);
+ else
+ {
+ Assert(pGMM->cDuplicatePages);
+ pGMM->cDuplicatePages--;
+ }
+
+ paPages[iPage].idSharedPage = NIL_GMM_PAGEID;
+ }
+ else
+ {
+ Log(("GMMR0AllocateHandyPages: #%#x/%#x: Not shared!\n", iPage, paPages[iPage].idSharedPage));
+ rc = VERR_GMM_PAGE_NOT_SHARED;
+ break;
+ }
+ }
+ else
+ {
+ Log(("GMMR0AllocateHandyPages: #%#x/%#x: Not found! (shared)\n", iPage, paPages[iPage].idSharedPage));
+ rc = VERR_GMM_PAGE_NOT_FOUND;
+ break;
+ }
+ }
+ } /* for each page to update */
+
+ if (RT_SUCCESS(rc) && cPagesToAlloc > 0)
+ {
+#if defined(VBOX_STRICT) && 0 /** @todo re-test this later. Appeared to be a PGM init bug. */
+ for (iPage = 0; iPage < cPagesToAlloc; iPage++)
+ {
+ Assert(paPages[iPage].HCPhysGCPhys == NIL_RTHCPHYS);
+ Assert(paPages[iPage].idPage == NIL_GMM_PAGEID);
+ Assert(paPages[iPage].idSharedPage == NIL_GMM_PAGEID);
+ }
+#endif
+
+ /*
+ * Join paths with GMMR0AllocatePages for the allocation.
+ * Note! gmmR0AllocateMoreChunks may leave the protection of the mutex!
+ */
+ rc = gmmR0AllocatePagesNew(pGMM, pGVM, cPagesToAlloc, paPages, GMMACCOUNT_BASE);
+ }
+ }
+ else
+ rc = VERR_WRONG_ORDER;
+ GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
+ }
+ else
+ rc = VERR_GMM_IS_NOT_SANE;
+ gmmR0MutexRelease(pGMM);
+ LogFlow(("GMMR0AllocateHandyPages: returns %Rrc\n", rc));
+ return rc;
+}
+
+
+/**
+ * Allocate one or more pages.
+ *
+ * This is typically used for ROMs and MMIO2 (VRAM) during VM creation.
+ * The allocated pages are not cleared and will contain random garbage.
+ *
+ * @returns VBox status code:
+ * @retval VINF_SUCCESS on success.
+ * @retval VERR_NOT_OWNER if the caller is not an EMT.
+ * @retval VERR_GMM_SEED_ME if seeding via GMMR0SeedChunk is necessary.
+ * @retval VERR_GMM_HIT_GLOBAL_LIMIT if we've exhausted the available pages.
+ * @retval VERR_GMM_HIT_VM_ACCOUNT_LIMIT if we've hit the VM account limit,
+ * that is we're trying to allocate more than we've reserved.
+ *
+ * @param pGVM The global (ring-0) VM structure.
+ * @param pVM The cross context VM structure.
+ * @param idCpu The VCPU id.
+ * @param cPages The number of pages to allocate.
+ * @param paPages Pointer to the page descriptors.
+ * See GMMPAGEDESC for details on what is expected on
+ * input.
+ * @param enmAccount The account to charge.
+ *
+ * @thread EMT.
+ */
+GMMR0DECL(int) GMMR0AllocatePages(PGVM pGVM, PVM pVM, VMCPUID idCpu, uint32_t cPages, PGMMPAGEDESC paPages, GMMACCOUNT enmAccount)
+{
+ LogFlow(("GMMR0AllocatePages: pGVM=%p pVM=%p cPages=%#x paPages=%p enmAccount=%d\n", pGVM, pVM, cPages, paPages, enmAccount));
+
+ /*
+ * Validate, get basics and take the semaphore.
+ */
+ PGMM pGMM;
+ GMM_GET_VALID_INSTANCE(pGMM, VERR_GMM_INSTANCE);
+ int rc = GVMMR0ValidateGVMandVMandEMT(pGVM, pVM, idCpu);
+ if (RT_FAILURE(rc))
+ return rc;
+
+ AssertPtrReturn(paPages, VERR_INVALID_PARAMETER);
+ AssertMsgReturn(enmAccount > GMMACCOUNT_INVALID && enmAccount < GMMACCOUNT_END, ("%d\n", enmAccount), VERR_INVALID_PARAMETER);
+ AssertMsgReturn(cPages > 0 && cPages < RT_BIT(32 - PAGE_SHIFT), ("%#x\n", cPages), VERR_INVALID_PARAMETER);
+
+ for (unsigned iPage = 0; iPage < cPages; iPage++)
+ {
+ AssertMsgReturn( paPages[iPage].HCPhysGCPhys == NIL_RTHCPHYS
+ || paPages[iPage].HCPhysGCPhys == GMM_GCPHYS_UNSHAREABLE
+ || ( enmAccount == GMMACCOUNT_BASE
+ && paPages[iPage].HCPhysGCPhys <= GMM_GCPHYS_LAST
+ && !(paPages[iPage].HCPhysGCPhys & PAGE_OFFSET_MASK)),
+ ("#%#x: %RHp enmAccount=%d\n", iPage, paPages[iPage].HCPhysGCPhys, enmAccount),
+ VERR_INVALID_PARAMETER);
+ AssertMsgReturn(paPages[iPage].idPage == NIL_GMM_PAGEID, ("#%#x: %#x\n", iPage, paPages[iPage].idPage), VERR_INVALID_PARAMETER);
+ AssertMsgReturn(paPages[iPage].idSharedPage == NIL_GMM_PAGEID, ("#%#x: %#x\n", iPage, paPages[iPage].idSharedPage), VERR_INVALID_PARAMETER);
+ }
+
+ gmmR0MutexAcquire(pGMM);
+ if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
+ {
+
+ /* No allocations before the initial reservation has been made! */
+ if (RT_LIKELY( pGVM->gmm.s.Stats.Reserved.cBasePages
+ && pGVM->gmm.s.Stats.Reserved.cFixedPages
+ && pGVM->gmm.s.Stats.Reserved.cShadowPages))
+ rc = gmmR0AllocatePagesNew(pGMM, pGVM, cPages, paPages, enmAccount);
+ else
+ rc = VERR_WRONG_ORDER;
+ GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
+ }
+ else
+ rc = VERR_GMM_IS_NOT_SANE;
+ gmmR0MutexRelease(pGMM);
+ LogFlow(("GMMR0AllocatePages: returns %Rrc\n", rc));
+ return rc;
+}
+
+
+/**
+ * VMMR0 request wrapper for GMMR0AllocatePages.
+ *
+ * @returns see GMMR0AllocatePages.
+ * @param pGVM The global (ring-0) VM structure.
+ * @param pVM The cross context VM structure.
+ * @param idCpu The VCPU id.
+ * @param pReq Pointer to the request packet.
+ */
+GMMR0DECL(int) GMMR0AllocatePagesReq(PGVM pGVM, PVM pVM, VMCPUID idCpu, PGMMALLOCATEPAGESREQ pReq)
+{
+ /*
+ * Validate input and pass it on.
+ */
+ AssertPtrReturn(pReq, VERR_INVALID_POINTER);
+ AssertMsgReturn(pReq->Hdr.cbReq >= RT_UOFFSETOF(GMMALLOCATEPAGESREQ, aPages[0]),
+ ("%#x < %#x\n", pReq->Hdr.cbReq, RT_UOFFSETOF(GMMALLOCATEPAGESREQ, aPages[0])),
+ VERR_INVALID_PARAMETER);
+ AssertMsgReturn(pReq->Hdr.cbReq == RT_UOFFSETOF_DYN(GMMALLOCATEPAGESREQ, aPages[pReq->cPages]),
+ ("%#x != %#x\n", pReq->Hdr.cbReq, RT_UOFFSETOF_DYN(GMMALLOCATEPAGESREQ, aPages[pReq->cPages])),
+ VERR_INVALID_PARAMETER);
+
+ return GMMR0AllocatePages(pGVM, pVM, idCpu, pReq->cPages, &pReq->aPages[0], pReq->enmAccount);
+}
+
+
+/**
+ * Allocate a large page to represent guest RAM
+ *
+ * The allocated pages are not cleared and will contains random garbage.
+ *
+ * @returns VBox status code:
+ * @retval VINF_SUCCESS on success.
+ * @retval VERR_NOT_OWNER if the caller is not an EMT.
+ * @retval VERR_GMM_SEED_ME if seeding via GMMR0SeedChunk is necessary.
+ * @retval VERR_GMM_HIT_GLOBAL_LIMIT if we've exhausted the available pages.
+ * @retval VERR_GMM_HIT_VM_ACCOUNT_LIMIT if we've hit the VM account limit,
+ * that is we're trying to allocate more than we've reserved.
+ * @returns see GMMR0AllocatePages.
+ *
+ * @param pGVM The global (ring-0) VM structure.
+ * @param pVM The cross context VM structure.
+ * @param idCpu The VCPU id.
+ * @param cbPage Large page size.
+ * @param pIdPage Where to return the GMM page ID of the page.
+ * @param pHCPhys Where to return the host physical address of the page.
+ */
+GMMR0DECL(int) GMMR0AllocateLargePage(PGVM pGVM, PVM pVM, VMCPUID idCpu, uint32_t cbPage, uint32_t *pIdPage, RTHCPHYS *pHCPhys)
+{
+ LogFlow(("GMMR0AllocateLargePage: pGVM=%p pVM=%p cbPage=%x\n", pGVM, pVM, cbPage));
+
+ AssertReturn(cbPage == GMM_CHUNK_SIZE, VERR_INVALID_PARAMETER);
+ AssertPtrReturn(pIdPage, VERR_INVALID_PARAMETER);
+ AssertPtrReturn(pHCPhys, VERR_INVALID_PARAMETER);
+
+ /*
+ * Validate, get basics and take the semaphore.
+ */
+ PGMM pGMM;
+ GMM_GET_VALID_INSTANCE(pGMM, VERR_GMM_INSTANCE);
+ int rc = GVMMR0ValidateGVMandVMandEMT(pGVM, pVM, idCpu);
+ if (RT_FAILURE(rc))
+ return rc;
+
+ /* Not supported in legacy mode where we allocate the memory in ring 3 and lock it in ring 0. */
+ if (pGMM->fLegacyAllocationMode)
+ return VERR_NOT_SUPPORTED;
+
+ *pHCPhys = NIL_RTHCPHYS;
+ *pIdPage = NIL_GMM_PAGEID;
+
+ gmmR0MutexAcquire(pGMM);
+ if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
+ {
+ const unsigned cPages = (GMM_CHUNK_SIZE >> PAGE_SHIFT);
+ if (RT_UNLIKELY( pGVM->gmm.s.Stats.Allocated.cBasePages + pGVM->gmm.s.Stats.cBalloonedPages + cPages
+ > pGVM->gmm.s.Stats.Reserved.cBasePages))
+ {
+ Log(("GMMR0AllocateLargePage: Reserved=%#llx Allocated+Requested=%#llx+%#x!\n",
+ pGVM->gmm.s.Stats.Reserved.cBasePages, pGVM->gmm.s.Stats.Allocated.cBasePages, cPages));
+ gmmR0MutexRelease(pGMM);
+ return VERR_GMM_HIT_VM_ACCOUNT_LIMIT;
+ }
+
+ /*
+ * Allocate a new large page chunk.
+ *
+ * Note! We leave the giant GMM lock temporarily as the allocation might
+ * take a long time. gmmR0RegisterChunk will retake it (ugly).
+ */
+ AssertCompile(GMM_CHUNK_SIZE == _2M);
+ gmmR0MutexRelease(pGMM);
+
+ RTR0MEMOBJ hMemObj;
+ rc = RTR0MemObjAllocPhysEx(&hMemObj, GMM_CHUNK_SIZE, NIL_RTHCPHYS, GMM_CHUNK_SIZE);
+ if (RT_SUCCESS(rc))
+ {
+ PGMMCHUNKFREESET pSet = pGMM->fBoundMemoryMode ? &pGVM->gmm.s.Private : &pGMM->PrivateX;
+ PGMMCHUNK pChunk;
+ rc = gmmR0RegisterChunk(pGMM, pSet, hMemObj, pGVM->hSelf, GMM_CHUNK_FLAGS_LARGE_PAGE, &pChunk);
+ if (RT_SUCCESS(rc))
+ {
+ /*
+ * Allocate all the pages in the chunk.
+ */
+ /* Unlink the new chunk from the free list. */
+ gmmR0UnlinkChunk(pChunk);
+
+ /** @todo rewrite this to skip the looping. */
+ /* Allocate all pages. */
+ GMMPAGEDESC PageDesc;
+ gmmR0AllocatePage(pChunk, pGVM->hSelf, &PageDesc);
+
+ /* Return the first page as we'll use the whole chunk as one big page. */
+ *pIdPage = PageDesc.idPage;
+ *pHCPhys = PageDesc.HCPhysGCPhys;
+
+ for (unsigned i = 1; i < cPages; i++)
+ gmmR0AllocatePage(pChunk, pGVM->hSelf, &PageDesc);
+
+ /* Update accounting. */
+ pGVM->gmm.s.Stats.Allocated.cBasePages += cPages;
+ pGVM->gmm.s.Stats.cPrivatePages += cPages;
+ pGMM->cAllocatedPages += cPages;
+
+ gmmR0LinkChunk(pChunk, pSet);
+ gmmR0MutexRelease(pGMM);
+ }
+ else
+ RTR0MemObjFree(hMemObj, false /* fFreeMappings */);
+ }
+ }
+ else
+ {
+ gmmR0MutexRelease(pGMM);
+ rc = VERR_GMM_IS_NOT_SANE;
+ }
+
+ LogFlow(("GMMR0AllocateLargePage: returns %Rrc\n", rc));
+ return rc;
+}
+
+
+/**
+ * Free a large page.
+ *
+ * @returns VBox status code:
+ * @param pGVM The global (ring-0) VM structure.
+ * @param pVM The cross context VM structure.
+ * @param idCpu The VCPU id.
+ * @param idPage The large page id.
+ */
+GMMR0DECL(int) GMMR0FreeLargePage(PGVM pGVM, PVM pVM, VMCPUID idCpu, uint32_t idPage)
+{
+ LogFlow(("GMMR0FreeLargePage: pGVM=%p pVM=%p idPage=%x\n", pGVM, pVM, idPage));
+
+ /*
+ * Validate, get basics and take the semaphore.
+ */
+ PGMM pGMM;
+ GMM_GET_VALID_INSTANCE(pGMM, VERR_GMM_INSTANCE);
+ int rc = GVMMR0ValidateGVMandVMandEMT(pGVM, pVM, idCpu);
+ if (RT_FAILURE(rc))
+ return rc;
+
+ /* Not supported in legacy mode where we allocate the memory in ring 3 and lock it in ring 0. */
+ if (pGMM->fLegacyAllocationMode)
+ return VERR_NOT_SUPPORTED;
+
+ gmmR0MutexAcquire(pGMM);
+ if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
+ {
+ const unsigned cPages = (GMM_CHUNK_SIZE >> PAGE_SHIFT);
+
+ if (RT_UNLIKELY(pGVM->gmm.s.Stats.Allocated.cBasePages < cPages))
+ {
+ Log(("GMMR0FreeLargePage: allocated=%#llx cPages=%#x!\n", pGVM->gmm.s.Stats.Allocated.cBasePages, cPages));
+ gmmR0MutexRelease(pGMM);
+ return VERR_GMM_ATTEMPT_TO_FREE_TOO_MUCH;
+ }
+
+ PGMMPAGE pPage = gmmR0GetPage(pGMM, idPage);
+ if (RT_LIKELY( pPage
+ && GMM_PAGE_IS_PRIVATE(pPage)))
+ {
+ PGMMCHUNK pChunk = gmmR0GetChunk(pGMM, idPage >> GMM_CHUNKID_SHIFT);
+ Assert(pChunk);
+ Assert(pChunk->cFree < GMM_CHUNK_NUM_PAGES);
+ Assert(pChunk->cPrivate > 0);
+
+ /* Release the memory immediately. */
+ gmmR0FreeChunk(pGMM, NULL, pChunk, false /*fRelaxedSem*/); /** @todo this can be relaxed too! */
+
+ /* Update accounting. */
+ pGVM->gmm.s.Stats.Allocated.cBasePages -= cPages;
+ pGVM->gmm.s.Stats.cPrivatePages -= cPages;
+ pGMM->cAllocatedPages -= cPages;
+ }
+ else
+ rc = VERR_GMM_PAGE_NOT_FOUND;
+ }
+ else
+ rc = VERR_GMM_IS_NOT_SANE;
+
+ gmmR0MutexRelease(pGMM);
+ LogFlow(("GMMR0FreeLargePage: returns %Rrc\n", rc));
+ return rc;
+}
+
+
+/**
+ * VMMR0 request wrapper for GMMR0FreeLargePage.
+ *
+ * @returns see GMMR0FreeLargePage.
+ * @param pGVM The global (ring-0) VM structure.
+ * @param pVM The cross context VM structure.
+ * @param idCpu The VCPU id.
+ * @param pReq Pointer to the request packet.
+ */
+GMMR0DECL(int) GMMR0FreeLargePageReq(PGVM pGVM, PVM pVM, VMCPUID idCpu, PGMMFREELARGEPAGEREQ pReq)
+{
+ /*
+ * Validate input and pass it on.
+ */
+ AssertPtrReturn(pReq, VERR_INVALID_POINTER);
+ AssertMsgReturn(pReq->Hdr.cbReq == sizeof(GMMFREEPAGESREQ),
+ ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(GMMFREEPAGESREQ)),
+ VERR_INVALID_PARAMETER);
+
+ return GMMR0FreeLargePage(pGVM, pVM, idCpu, pReq->idPage);
+}
+
+
+/**
+ * Frees a chunk, giving it back to the host OS.
+ *
+ * @param pGMM Pointer to the GMM instance.
+ * @param pGVM This is set when called from GMMR0CleanupVM so we can
+ * unmap and free the chunk in one go.
+ * @param pChunk The chunk to free.
+ * @param fRelaxedSem Whether we can release the semaphore while doing the
+ * freeing (@c true) or not.
+ */
+static bool gmmR0FreeChunk(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk, bool fRelaxedSem)
+{
+ Assert(pChunk->Core.Key != NIL_GMM_CHUNKID);
+
+ GMMR0CHUNKMTXSTATE MtxState;
+ gmmR0ChunkMutexAcquire(&MtxState, pGMM, pChunk, GMMR0CHUNK_MTX_KEEP_GIANT);
+
+ /*
+ * Cleanup hack! Unmap the chunk from the callers address space.
+ * This shouldn't happen, so screw lock contention...
+ */
+ if ( pChunk->cMappingsX
+ && !pGMM->fLegacyAllocationMode
+ && pGVM)
+ gmmR0UnmapChunkLocked(pGMM, pGVM, pChunk);
+
+ /*
+ * If there are current mappings of the chunk, then request the
+ * VMs to unmap them. Reposition the chunk in the free list so
+ * it won't be a likely candidate for allocations.
+ */
+ if (pChunk->cMappingsX)
+ {
+ /** @todo R0 -> VM request */
+ /* The chunk can be mapped by more than one VM if fBoundMemoryMode is false! */
+ Log(("gmmR0FreeChunk: chunk still has %d mappings; don't free!\n", pChunk->cMappingsX));
+ gmmR0ChunkMutexRelease(&MtxState, pChunk);
+ return false;
+ }
+
+
+ /*
+ * Save and trash the handle.
+ */
+ RTR0MEMOBJ const hMemObj = pChunk->hMemObj;
+ pChunk->hMemObj = NIL_RTR0MEMOBJ;
+
+ /*
+ * Unlink it from everywhere.
+ */
+ gmmR0UnlinkChunk(pChunk);
+
+ RTListNodeRemove(&pChunk->ListNode);
+
+ PAVLU32NODECORE pCore = RTAvlU32Remove(&pGMM->pChunks, pChunk->Core.Key);
+ Assert(pCore == &pChunk->Core); NOREF(pCore);
+
+ PGMMCHUNKTLBE pTlbe = &pGMM->ChunkTLB.aEntries[GMM_CHUNKTLB_IDX(pChunk->Core.Key)];
+ if (pTlbe->pChunk == pChunk)
+ {
+ pTlbe->idChunk = NIL_GMM_CHUNKID;
+ pTlbe->pChunk = NULL;
+ }
+
+ Assert(pGMM->cChunks > 0);
+ pGMM->cChunks--;
+
+ /*
+ * Free the Chunk ID before dropping the locks and freeing the rest.
+ */
+ gmmR0FreeChunkId(pGMM, pChunk->Core.Key);
+ pChunk->Core.Key = NIL_GMM_CHUNKID;
+
+ pGMM->cFreedChunks++;
+
+ gmmR0ChunkMutexRelease(&MtxState, NULL);
+ if (fRelaxedSem)
+ gmmR0MutexRelease(pGMM);
+
+ RTMemFree(pChunk->paMappingsX);
+ pChunk->paMappingsX = NULL;
+
+ RTMemFree(pChunk);
+
+ int rc = RTR0MemObjFree(hMemObj, false /* fFreeMappings */);
+ AssertLogRelRC(rc);
+
+ if (fRelaxedSem)
+ gmmR0MutexAcquire(pGMM);
+ return fRelaxedSem;
+}
+
+
+/**
+ * Free page worker.
+ *
+ * The caller does all the statistic decrementing, we do all the incrementing.
+ *
+ * @param pGMM Pointer to the GMM instance data.
+ * @param pGVM Pointer to the GVM instance.
+ * @param pChunk Pointer to the chunk this page belongs to.
+ * @param idPage The Page ID.
+ * @param pPage Pointer to the page.
+ */
+static void gmmR0FreePageWorker(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk, uint32_t idPage, PGMMPAGE pPage)
+{
+ Log3(("F pPage=%p iPage=%#x/%#x u2State=%d iFreeHead=%#x\n",
+ pPage, pPage - &pChunk->aPages[0], idPage, pPage->Common.u2State, pChunk->iFreeHead)); NOREF(idPage);
+
+ /*
+ * Put the page on the free list.
+ */
+ pPage->u = 0;
+ pPage->Free.u2State = GMM_PAGE_STATE_FREE;
+ Assert(pChunk->iFreeHead < RT_ELEMENTS(pChunk->aPages) || pChunk->iFreeHead == UINT16_MAX);
+ pPage->Free.iNext = pChunk->iFreeHead;
+ pChunk->iFreeHead = pPage - &pChunk->aPages[0];
+
+ /*
+ * Update statistics (the cShared/cPrivate stats are up to date already),
+ * and relink the chunk if necessary.
+ */
+ unsigned const cFree = pChunk->cFree;
+ if ( !cFree
+ || gmmR0SelectFreeSetList(cFree) != gmmR0SelectFreeSetList(cFree + 1))
+ {
+ gmmR0UnlinkChunk(pChunk);
+ pChunk->cFree++;
+ gmmR0SelectSetAndLinkChunk(pGMM, pGVM, pChunk);
+ }
+ else
+ {
+ pChunk->cFree = cFree + 1;
+ pChunk->pSet->cFreePages++;
+ }
+
+ /*
+ * If the chunk becomes empty, consider giving memory back to the host OS.
+ *
+ * The current strategy is to try give it back if there are other chunks
+ * in this free list, meaning if there are at least 240 free pages in this
+ * category. Note that since there are probably mappings of the chunk,
+ * it won't be freed up instantly, which probably screws up this logic
+ * a bit...
+ */
+ /** @todo Do this on the way out. */
+ if (RT_UNLIKELY( pChunk->cFree == GMM_CHUNK_NUM_PAGES
+ && pChunk->pFreeNext
+ && pChunk->pFreePrev /** @todo this is probably misfiring, see reset... */
+ && !pGMM->fLegacyAllocationMode))
+ gmmR0FreeChunk(pGMM, NULL, pChunk, false);
+
+}
+
+
+/**
+ * Frees a shared page, the page is known to exist and be valid and such.
+ *
+ * @param pGMM Pointer to the GMM instance.
+ * @param pGVM Pointer to the GVM instance.
+ * @param idPage The page id.
+ * @param pPage The page structure.
+ */
+DECLINLINE(void) gmmR0FreeSharedPage(PGMM pGMM, PGVM pGVM, uint32_t idPage, PGMMPAGE pPage)
+{
+ PGMMCHUNK pChunk = gmmR0GetChunk(pGMM, idPage >> GMM_CHUNKID_SHIFT);
+ Assert(pChunk);
+ Assert(pChunk->cFree < GMM_CHUNK_NUM_PAGES);
+ Assert(pChunk->cShared > 0);
+ Assert(pGMM->cSharedPages > 0);
+ Assert(pGMM->cAllocatedPages > 0);
+ Assert(!pPage->Shared.cRefs);
+
+ pChunk->cShared--;
+ pGMM->cAllocatedPages--;
+ pGMM->cSharedPages--;
+ gmmR0FreePageWorker(pGMM, pGVM, pChunk, idPage, pPage);
+}
+
+
+/**
+ * Frees a private page, the page is known to exist and be valid and such.
+ *
+ * @param pGMM Pointer to the GMM instance.
+ * @param pGVM Pointer to the GVM instance.
+ * @param idPage The page id.
+ * @param pPage The page structure.
+ */
+DECLINLINE(void) gmmR0FreePrivatePage(PGMM pGMM, PGVM pGVM, uint32_t idPage, PGMMPAGE pPage)
+{
+ PGMMCHUNK pChunk = gmmR0GetChunk(pGMM, idPage >> GMM_CHUNKID_SHIFT);
+ Assert(pChunk);
+ Assert(pChunk->cFree < GMM_CHUNK_NUM_PAGES);
+ Assert(pChunk->cPrivate > 0);
+ Assert(pGMM->cAllocatedPages > 0);
+
+ pChunk->cPrivate--;
+ pGMM->cAllocatedPages--;
+ gmmR0FreePageWorker(pGMM, pGVM, pChunk, idPage, pPage);
+}
+
+
+/**
+ * Common worker for GMMR0FreePages and GMMR0BalloonedPages.
+ *
+ * @returns VBox status code:
+ * @retval xxx
+ *
+ * @param pGMM Pointer to the GMM instance data.
+ * @param pGVM Pointer to the VM.
+ * @param cPages The number of pages to free.
+ * @param paPages Pointer to the page descriptors.
+ * @param enmAccount The account this relates to.
+ */
+static int gmmR0FreePages(PGMM pGMM, PGVM pGVM, uint32_t cPages, PGMMFREEPAGEDESC paPages, GMMACCOUNT enmAccount)
+{
+ /*
+ * Check that the request isn't impossible wrt to the account status.
+ */
+ switch (enmAccount)
+ {
+ case GMMACCOUNT_BASE:
+ if (RT_UNLIKELY(pGVM->gmm.s.Stats.Allocated.cBasePages < cPages))
+ {
+ Log(("gmmR0FreePages: allocated=%#llx cPages=%#x!\n", pGVM->gmm.s.Stats.Allocated.cBasePages, cPages));
+ return VERR_GMM_ATTEMPT_TO_FREE_TOO_MUCH;
+ }
+ break;
+ case GMMACCOUNT_SHADOW:
+ if (RT_UNLIKELY(pGVM->gmm.s.Stats.Allocated.cShadowPages < cPages))
+ {
+ Log(("gmmR0FreePages: allocated=%#llx cPages=%#x!\n", pGVM->gmm.s.Stats.Allocated.cShadowPages, cPages));
+ return VERR_GMM_ATTEMPT_TO_FREE_TOO_MUCH;
+ }
+ break;
+ case GMMACCOUNT_FIXED:
+ if (RT_UNLIKELY(pGVM->gmm.s.Stats.Allocated.cFixedPages < cPages))
+ {
+ Log(("gmmR0FreePages: allocated=%#llx cPages=%#x!\n", pGVM->gmm.s.Stats.Allocated.cFixedPages, cPages));
+ return VERR_GMM_ATTEMPT_TO_FREE_TOO_MUCH;
+ }
+ break;
+ default:
+ AssertMsgFailedReturn(("enmAccount=%d\n", enmAccount), VERR_IPE_NOT_REACHED_DEFAULT_CASE);
+ }
+
+ /*
+ * Walk the descriptors and free the pages.
+ *
+ * Statistics (except the account) are being updated as we go along,
+ * unlike the alloc code. Also, stop on the first error.
+ */
+ int rc = VINF_SUCCESS;
+ uint32_t iPage;
+ for (iPage = 0; iPage < cPages; iPage++)
+ {
+ uint32_t idPage = paPages[iPage].idPage;
+ PGMMPAGE pPage = gmmR0GetPage(pGMM, idPage);
+ if (RT_LIKELY(pPage))
+ {
+ if (RT_LIKELY(GMM_PAGE_IS_PRIVATE(pPage)))
+ {
+ if (RT_LIKELY(pPage->Private.hGVM == pGVM->hSelf))
+ {
+ Assert(pGVM->gmm.s.Stats.cPrivatePages);
+ pGVM->gmm.s.Stats.cPrivatePages--;
+ gmmR0FreePrivatePage(pGMM, pGVM, idPage, pPage);
+ }
+ else
+ {
+ Log(("gmmR0AllocatePages: #%#x/%#x: not owner! hGVM=%#x hSelf=%#x\n", iPage, idPage,
+ pPage->Private.hGVM, pGVM->hSelf));
+ rc = VERR_GMM_NOT_PAGE_OWNER;
+ break;
+ }
+ }
+ else if (RT_LIKELY(GMM_PAGE_IS_SHARED(pPage)))
+ {
+ Assert(pGVM->gmm.s.Stats.cSharedPages);
+ Assert(pPage->Shared.cRefs);
+#if defined(VBOX_WITH_PAGE_SHARING) && defined(VBOX_STRICT) && HC_ARCH_BITS == 64
+ if (pPage->Shared.u14Checksum)
+ {
+ uint32_t uChecksum = gmmR0StrictPageChecksum(pGMM, pGVM, idPage);
+ uChecksum &= UINT32_C(0x00003fff);
+ AssertMsg(!uChecksum || uChecksum == pPage->Shared.u14Checksum,
+ ("%#x vs %#x - idPage=%#x\n", uChecksum, pPage->Shared.u14Checksum, idPage));
+ }
+#endif
+ pGVM->gmm.s.Stats.cSharedPages--;
+ if (!--pPage->Shared.cRefs)
+ gmmR0FreeSharedPage(pGMM, pGVM, idPage, pPage);
+ else
+ {
+ Assert(pGMM->cDuplicatePages);
+ pGMM->cDuplicatePages--;
+ }
+ }
+ else
+ {
+ Log(("gmmR0AllocatePages: #%#x/%#x: already free!\n", iPage, idPage));
+ rc = VERR_GMM_PAGE_ALREADY_FREE;
+ break;
+ }
+ }
+ else
+ {
+ Log(("gmmR0AllocatePages: #%#x/%#x: not found!\n", iPage, idPage));
+ rc = VERR_GMM_PAGE_NOT_FOUND;
+ break;
+ }
+ paPages[iPage].idPage = NIL_GMM_PAGEID;
+ }
+
+ /*
+ * Update the account.
+ */
+ switch (enmAccount)
+ {
+ case GMMACCOUNT_BASE: pGVM->gmm.s.Stats.Allocated.cBasePages -= iPage; break;
+ case GMMACCOUNT_SHADOW: pGVM->gmm.s.Stats.Allocated.cShadowPages -= iPage; break;
+ case GMMACCOUNT_FIXED: pGVM->gmm.s.Stats.Allocated.cFixedPages -= iPage; break;
+ default:
+ AssertMsgFailedReturn(("enmAccount=%d\n", enmAccount), VERR_IPE_NOT_REACHED_DEFAULT_CASE);
+ }
+
+ /*
+ * Any threshold stuff to be done here?
+ */
+
+ return rc;
+}
+
+
+/**
+ * Free one or more pages.
+ *
+ * This is typically used at reset time or power off.
+ *
+ * @returns VBox status code:
+ * @retval xxx
+ *
+ * @param pGVM The global (ring-0) VM structure.
+ * @param pVM The cross context VM structure.
+ * @param idCpu The VCPU id.
+ * @param cPages The number of pages to allocate.
+ * @param paPages Pointer to the page descriptors containing the page IDs
+ * for each page.
+ * @param enmAccount The account this relates to.
+ * @thread EMT.
+ */
+GMMR0DECL(int) GMMR0FreePages(PGVM pGVM, PVM pVM, VMCPUID idCpu, uint32_t cPages, PGMMFREEPAGEDESC paPages, GMMACCOUNT enmAccount)
+{
+ LogFlow(("GMMR0FreePages: pGVM=%p pVM=%p cPages=%#x paPages=%p enmAccount=%d\n", pGVM, pVM, cPages, paPages, enmAccount));
+
+ /*
+ * Validate input and get the basics.
+ */
+ PGMM pGMM;
+ GMM_GET_VALID_INSTANCE(pGMM, VERR_GMM_INSTANCE);
+ int rc = GVMMR0ValidateGVMandVMandEMT(pGVM, pVM, idCpu);
+ if (RT_FAILURE(rc))
+ return rc;
+
+ AssertPtrReturn(paPages, VERR_INVALID_PARAMETER);
+ AssertMsgReturn(enmAccount > GMMACCOUNT_INVALID && enmAccount < GMMACCOUNT_END, ("%d\n", enmAccount), VERR_INVALID_PARAMETER);
+ AssertMsgReturn(cPages > 0 && cPages < RT_BIT(32 - PAGE_SHIFT), ("%#x\n", cPages), VERR_INVALID_PARAMETER);
+
+ for (unsigned iPage = 0; iPage < cPages; iPage++)
+ AssertMsgReturn( paPages[iPage].idPage <= GMM_PAGEID_LAST
+ /*|| paPages[iPage].idPage == NIL_GMM_PAGEID*/,
+ ("#%#x: %#x\n", iPage, paPages[iPage].idPage), VERR_INVALID_PARAMETER);
+
+ /*
+ * Take the semaphore and call the worker function.
+ */
+ gmmR0MutexAcquire(pGMM);
+ if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
+ {
+ rc = gmmR0FreePages(pGMM, pGVM, cPages, paPages, enmAccount);
+ GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
+ }
+ else
+ rc = VERR_GMM_IS_NOT_SANE;
+ gmmR0MutexRelease(pGMM);
+ LogFlow(("GMMR0FreePages: returns %Rrc\n", rc));
+ return rc;
+}
+
+
+/**
+ * VMMR0 request wrapper for GMMR0FreePages.
+ *
+ * @returns see GMMR0FreePages.
+ * @param pGVM The global (ring-0) VM structure.
+ * @param pVM The cross context VM structure.
+ * @param idCpu The VCPU id.
+ * @param pReq Pointer to the request packet.
+ */
+GMMR0DECL(int) GMMR0FreePagesReq(PGVM pGVM, PVM pVM, VMCPUID idCpu, PGMMFREEPAGESREQ pReq)
+{
+ /*
+ * Validate input and pass it on.
+ */
+ AssertPtrReturn(pReq, VERR_INVALID_POINTER);
+ AssertMsgReturn(pReq->Hdr.cbReq >= RT_UOFFSETOF(GMMFREEPAGESREQ, aPages[0]),
+ ("%#x < %#x\n", pReq->Hdr.cbReq, RT_UOFFSETOF(GMMFREEPAGESREQ, aPages[0])),
+ VERR_INVALID_PARAMETER);
+ AssertMsgReturn(pReq->Hdr.cbReq == RT_UOFFSETOF_DYN(GMMFREEPAGESREQ, aPages[pReq->cPages]),
+ ("%#x != %#x\n", pReq->Hdr.cbReq, RT_UOFFSETOF_DYN(GMMFREEPAGESREQ, aPages[pReq->cPages])),
+ VERR_INVALID_PARAMETER);
+
+ return GMMR0FreePages(pGVM, pVM, idCpu, pReq->cPages, &pReq->aPages[0], pReq->enmAccount);
+}
+
+
+/**
+ * Report back on a memory ballooning request.
+ *
+ * The request may or may not have been initiated by the GMM. If it was initiated
+ * by the GMM it is important that this function is called even if no pages were
+ * ballooned.
+ *
+ * @returns VBox status code:
+ * @retval VERR_GMM_ATTEMPT_TO_FREE_TOO_MUCH
+ * @retval VERR_GMM_ATTEMPT_TO_DEFLATE_TOO_MUCH
+ * @retval VERR_GMM_OVERCOMMITTED_TRY_AGAIN_IN_A_BIT - reset condition
+ * indicating that we won't necessarily have sufficient RAM to boot
+ * the VM again and that it should pause until this changes (we'll try
+ * balloon some other VM). (For standard deflate we have little choice
+ * but to hope the VM won't use the memory that was returned to it.)
+ *
+ * @param pGVM The global (ring-0) VM structure.
+ * @param pVM The cross context VM structure.
+ * @param idCpu The VCPU id.
+ * @param enmAction Inflate/deflate/reset.
+ * @param cBalloonedPages The number of pages that was ballooned.
+ *
+ * @thread EMT(idCpu)
+ */
+GMMR0DECL(int) GMMR0BalloonedPages(PGVM pGVM, PVM pVM, VMCPUID idCpu, GMMBALLOONACTION enmAction, uint32_t cBalloonedPages)
+{
+ LogFlow(("GMMR0BalloonedPages: pGVM=%p pVM=%p enmAction=%d cBalloonedPages=%#x\n",
+ pGVM, pVM, enmAction, cBalloonedPages));
+
+ AssertMsgReturn(cBalloonedPages < RT_BIT(32 - PAGE_SHIFT), ("%#x\n", cBalloonedPages), VERR_INVALID_PARAMETER);
+
+ /*
+ * Validate input and get the basics.
+ */
+ PGMM pGMM;
+ GMM_GET_VALID_INSTANCE(pGMM, VERR_GMM_INSTANCE);
+ int rc = GVMMR0ValidateGVMandVMandEMT(pGVM, pVM, idCpu);
+ if (RT_FAILURE(rc))
+ return rc;
+
+ /*
+ * Take the semaphore and do some more validations.
+ */
+ gmmR0MutexAcquire(pGMM);
+ if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
+ {
+ switch (enmAction)
+ {
+ case GMMBALLOONACTION_INFLATE:
+ {
+ if (RT_LIKELY(pGVM->gmm.s.Stats.Allocated.cBasePages + pGVM->gmm.s.Stats.cBalloonedPages + cBalloonedPages
+ <= pGVM->gmm.s.Stats.Reserved.cBasePages))
+ {
+ /*
+ * Record the ballooned memory.
+ */
+ pGMM->cBalloonedPages += cBalloonedPages;
+ if (pGVM->gmm.s.Stats.cReqBalloonedPages)
+ {
+ /* Codepath never taken. Might be interesting in the future to request ballooned memory from guests in low memory conditions.. */
+ AssertFailed();
+
+ pGVM->gmm.s.Stats.cBalloonedPages += cBalloonedPages;
+ pGVM->gmm.s.Stats.cReqActuallyBalloonedPages += cBalloonedPages;
+ Log(("GMMR0BalloonedPages: +%#x - Global=%#llx / VM: Total=%#llx Req=%#llx Actual=%#llx (pending)\n",
+ cBalloonedPages, pGMM->cBalloonedPages, pGVM->gmm.s.Stats.cBalloonedPages,
+ pGVM->gmm.s.Stats.cReqBalloonedPages, pGVM->gmm.s.Stats.cReqActuallyBalloonedPages));
+ }
+ else
+ {
+ pGVM->gmm.s.Stats.cBalloonedPages += cBalloonedPages;
+ Log(("GMMR0BalloonedPages: +%#x - Global=%#llx / VM: Total=%#llx (user)\n",
+ cBalloonedPages, pGMM->cBalloonedPages, pGVM->gmm.s.Stats.cBalloonedPages));
+ }
+ }
+ else
+ {
+ Log(("GMMR0BalloonedPages: cBasePages=%#llx Total=%#llx cBalloonedPages=%#llx Reserved=%#llx\n",
+ pGVM->gmm.s.Stats.Allocated.cBasePages, pGVM->gmm.s.Stats.cBalloonedPages, cBalloonedPages,
+ pGVM->gmm.s.Stats.Reserved.cBasePages));
+ rc = VERR_GMM_ATTEMPT_TO_FREE_TOO_MUCH;
+ }
+ break;
+ }
+
+ case GMMBALLOONACTION_DEFLATE:
+ {
+ /* Deflate. */
+ if (pGVM->gmm.s.Stats.cBalloonedPages >= cBalloonedPages)
+ {
+ /*
+ * Record the ballooned memory.
+ */
+ Assert(pGMM->cBalloonedPages >= cBalloonedPages);
+ pGMM->cBalloonedPages -= cBalloonedPages;
+ pGVM->gmm.s.Stats.cBalloonedPages -= cBalloonedPages;
+ if (pGVM->gmm.s.Stats.cReqDeflatePages)
+ {
+ AssertFailed(); /* This is path is for later. */
+ Log(("GMMR0BalloonedPages: -%#x - Global=%#llx / VM: Total=%#llx Req=%#llx\n",
+ cBalloonedPages, pGMM->cBalloonedPages, pGVM->gmm.s.Stats.cBalloonedPages, pGVM->gmm.s.Stats.cReqDeflatePages));
+
+ /*
+ * Anything we need to do here now when the request has been completed?
+ */
+ pGVM->gmm.s.Stats.cReqDeflatePages = 0;
+ }
+ else
+ Log(("GMMR0BalloonedPages: -%#x - Global=%#llx / VM: Total=%#llx (user)\n",
+ cBalloonedPages, pGMM->cBalloonedPages, pGVM->gmm.s.Stats.cBalloonedPages));
+ }
+ else
+ {
+ Log(("GMMR0BalloonedPages: Total=%#llx cBalloonedPages=%#llx\n", pGVM->gmm.s.Stats.cBalloonedPages, cBalloonedPages));
+ rc = VERR_GMM_ATTEMPT_TO_DEFLATE_TOO_MUCH;
+ }
+ break;
+ }
+
+ case GMMBALLOONACTION_RESET:
+ {
+ /* Reset to an empty balloon. */
+ Assert(pGMM->cBalloonedPages >= pGVM->gmm.s.Stats.cBalloonedPages);
+
+ pGMM->cBalloonedPages -= pGVM->gmm.s.Stats.cBalloonedPages;
+ pGVM->gmm.s.Stats.cBalloonedPages = 0;
+ break;
+ }
+
+ default:
+ rc = VERR_INVALID_PARAMETER;
+ break;
+ }
+ GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
+ }
+ else
+ rc = VERR_GMM_IS_NOT_SANE;
+
+ gmmR0MutexRelease(pGMM);
+ LogFlow(("GMMR0BalloonedPages: returns %Rrc\n", rc));
+ return rc;
+}
+
+
+/**
+ * VMMR0 request wrapper for GMMR0BalloonedPages.
+ *
+ * @returns see GMMR0BalloonedPages.
+ * @param pGVM The global (ring-0) VM structure.
+ * @param pVM The cross context VM structure.
+ * @param idCpu The VCPU id.
+ * @param pReq Pointer to the request packet.
+ */
+GMMR0DECL(int) GMMR0BalloonedPagesReq(PGVM pGVM, PVM pVM, VMCPUID idCpu, PGMMBALLOONEDPAGESREQ pReq)
+{
+ /*
+ * Validate input and pass it on.
+ */
+ AssertPtrReturn(pReq, VERR_INVALID_POINTER);
+ AssertMsgReturn(pReq->Hdr.cbReq == sizeof(GMMBALLOONEDPAGESREQ),
+ ("%#x < %#x\n", pReq->Hdr.cbReq, sizeof(GMMBALLOONEDPAGESREQ)),
+ VERR_INVALID_PARAMETER);
+
+ return GMMR0BalloonedPages(pGVM, pVM, idCpu, pReq->enmAction, pReq->cBalloonedPages);
+}
+
+
+/**
+ * Return memory statistics for the hypervisor
+ *
+ * @returns VBox status code.
+ * @param pReq Pointer to the request packet.
+ */
+GMMR0DECL(int) GMMR0QueryHypervisorMemoryStatsReq(PGMMMEMSTATSREQ pReq)
+{
+ /*
+ * Validate input and pass it on.
+ */
+ AssertPtrReturn(pReq, VERR_INVALID_POINTER);
+ AssertMsgReturn(pReq->Hdr.cbReq == sizeof(GMMMEMSTATSREQ),
+ ("%#x < %#x\n", pReq->Hdr.cbReq, sizeof(GMMMEMSTATSREQ)),
+ VERR_INVALID_PARAMETER);
+
+ /*
+ * Validate input and get the basics.
+ */
+ PGMM pGMM;
+ GMM_GET_VALID_INSTANCE(pGMM, VERR_GMM_INSTANCE);
+ pReq->cAllocPages = pGMM->cAllocatedPages;
+ pReq->cFreePages = (pGMM->cChunks << (GMM_CHUNK_SHIFT- PAGE_SHIFT)) - pGMM->cAllocatedPages;
+ pReq->cBalloonedPages = pGMM->cBalloonedPages;
+ pReq->cMaxPages = pGMM->cMaxPages;
+ pReq->cSharedPages = pGMM->cDuplicatePages;
+ GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
+
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Return memory statistics for the VM
+ *
+ * @returns VBox status code.
+ * @param pGVM The global (ring-0) VM structure.
+ * @param pVM The cross context VM structure.
+ * @param idCpu Cpu id.
+ * @param pReq Pointer to the request packet.
+ *
+ * @thread EMT(idCpu)
+ */
+GMMR0DECL(int) GMMR0QueryMemoryStatsReq(PGVM pGVM, PVM pVM, VMCPUID idCpu, PGMMMEMSTATSREQ pReq)
+{
+ /*
+ * Validate input and pass it on.
+ */
+ AssertPtrReturn(pReq, VERR_INVALID_POINTER);
+ AssertMsgReturn(pReq->Hdr.cbReq == sizeof(GMMMEMSTATSREQ),
+ ("%#x < %#x\n", pReq->Hdr.cbReq, sizeof(GMMMEMSTATSREQ)),
+ VERR_INVALID_PARAMETER);
+
+ /*
+ * Validate input and get the basics.
+ */
+ PGMM pGMM;
+ GMM_GET_VALID_INSTANCE(pGMM, VERR_GMM_INSTANCE);
+ int rc = GVMMR0ValidateGVMandVMandEMT(pGVM, pVM, idCpu);
+ if (RT_FAILURE(rc))
+ return rc;
+
+ /*
+ * Take the semaphore and do some more validations.
+ */
+ gmmR0MutexAcquire(pGMM);
+ if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
+ {
+ pReq->cAllocPages = pGVM->gmm.s.Stats.Allocated.cBasePages;
+ pReq->cBalloonedPages = pGVM->gmm.s.Stats.cBalloonedPages;
+ pReq->cMaxPages = pGVM->gmm.s.Stats.Reserved.cBasePages;
+ pReq->cFreePages = pReq->cMaxPages - pReq->cAllocPages;
+ }
+ else
+ rc = VERR_GMM_IS_NOT_SANE;
+
+ gmmR0MutexRelease(pGMM);
+ LogFlow(("GMMR3QueryVMMemoryStats: returns %Rrc\n", rc));
+ return rc;
+}
+
+
+/**
+ * Worker for gmmR0UnmapChunk and gmmr0FreeChunk.
+ *
+ * Don't call this in legacy allocation mode!
+ *
+ * @returns VBox status code.
+ * @param pGMM Pointer to the GMM instance data.
+ * @param pGVM Pointer to the Global VM structure.
+ * @param pChunk Pointer to the chunk to be unmapped.
+ */
+static int gmmR0UnmapChunkLocked(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk)
+{
+ Assert(!pGMM->fLegacyAllocationMode); NOREF(pGMM);
+
+ /*
+ * Find the mapping and try unmapping it.
+ */
+ uint32_t cMappings = pChunk->cMappingsX;
+ for (uint32_t i = 0; i < cMappings; i++)
+ {
+ Assert(pChunk->paMappingsX[i].pGVM && pChunk->paMappingsX[i].hMapObj != NIL_RTR0MEMOBJ);
+ if (pChunk->paMappingsX[i].pGVM == pGVM)
+ {
+ /* unmap */
+ int rc = RTR0MemObjFree(pChunk->paMappingsX[i].hMapObj, false /* fFreeMappings (NA) */);
+ if (RT_SUCCESS(rc))
+ {
+ /* update the record. */
+ cMappings--;
+ if (i < cMappings)
+ pChunk->paMappingsX[i] = pChunk->paMappingsX[cMappings];
+ pChunk->paMappingsX[cMappings].hMapObj = NIL_RTR0MEMOBJ;
+ pChunk->paMappingsX[cMappings].pGVM = NULL;
+ Assert(pChunk->cMappingsX - 1U == cMappings);
+ pChunk->cMappingsX = cMappings;
+ }
+
+ return rc;
+ }
+ }
+
+ Log(("gmmR0UnmapChunk: Chunk %#x is not mapped into pGVM=%p/%#x\n", pChunk->Core.Key, pGVM, pGVM->hSelf));
+ return VERR_GMM_CHUNK_NOT_MAPPED;
+}
+
+
+/**
+ * Unmaps a chunk previously mapped into the address space of the current process.
+ *
+ * @returns VBox status code.
+ * @param pGMM Pointer to the GMM instance data.
+ * @param pGVM Pointer to the Global VM structure.
+ * @param pChunk Pointer to the chunk to be unmapped.
+ * @param fRelaxedSem Whether we can release the semaphore while doing the
+ * mapping (@c true) or not.
+ */
+static int gmmR0UnmapChunk(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk, bool fRelaxedSem)
+{
+ if (!pGMM->fLegacyAllocationMode)
+ {
+ /*
+ * Lock the chunk and if possible leave the giant GMM lock.
+ */
+ GMMR0CHUNKMTXSTATE MtxState;
+ int rc = gmmR0ChunkMutexAcquire(&MtxState, pGMM, pChunk,
+ fRelaxedSem ? GMMR0CHUNK_MTX_RETAKE_GIANT : GMMR0CHUNK_MTX_KEEP_GIANT);
+ if (RT_SUCCESS(rc))
+ {
+ rc = gmmR0UnmapChunkLocked(pGMM, pGVM, pChunk);
+ gmmR0ChunkMutexRelease(&MtxState, pChunk);
+ }
+ return rc;
+ }
+
+ if (pChunk->hGVM == pGVM->hSelf)
+ return VINF_SUCCESS;
+
+ Log(("gmmR0UnmapChunk: Chunk %#x is not mapped into pGVM=%p/%#x (legacy)\n", pChunk->Core.Key, pGVM, pGVM->hSelf));
+ return VERR_GMM_CHUNK_NOT_MAPPED;
+}
+
+
+/**
+ * Worker for gmmR0MapChunk.
+ *
+ * @returns VBox status code.
+ * @param pGMM Pointer to the GMM instance data.
+ * @param pGVM Pointer to the Global VM structure.
+ * @param pChunk Pointer to the chunk to be mapped.
+ * @param ppvR3 Where to store the ring-3 address of the mapping.
+ * In the VERR_GMM_CHUNK_ALREADY_MAPPED case, this will be
+ * contain the address of the existing mapping.
+ */
+static int gmmR0MapChunkLocked(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk, PRTR3PTR ppvR3)
+{
+ /*
+ * If we're in legacy mode this is simple.
+ */
+ if (pGMM->fLegacyAllocationMode)
+ {
+ if (pChunk->hGVM != pGVM->hSelf)
+ {
+ Log(("gmmR0MapChunk: chunk %#x is already mapped at %p!\n", pChunk->Core.Key, *ppvR3));
+ return VERR_GMM_CHUNK_NOT_FOUND;
+ }
+
+ *ppvR3 = RTR0MemObjAddressR3(pChunk->hMemObj);
+ return VINF_SUCCESS;
+ }
+
+ /*
+ * Check to see if the chunk is already mapped.
+ */
+ for (uint32_t i = 0; i < pChunk->cMappingsX; i++)
+ {
+ Assert(pChunk->paMappingsX[i].pGVM && pChunk->paMappingsX[i].hMapObj != NIL_RTR0MEMOBJ);
+ if (pChunk->paMappingsX[i].pGVM == pGVM)
+ {
+ *ppvR3 = RTR0MemObjAddressR3(pChunk->paMappingsX[i].hMapObj);
+ Log(("gmmR0MapChunk: chunk %#x is already mapped at %p!\n", pChunk->Core.Key, *ppvR3));
+#ifdef VBOX_WITH_PAGE_SHARING
+ /* The ring-3 chunk cache can be out of sync; don't fail. */
+ return VINF_SUCCESS;
+#else
+ return VERR_GMM_CHUNK_ALREADY_MAPPED;
+#endif
+ }
+ }
+
+ /*
+ * Do the mapping.
+ */
+ RTR0MEMOBJ hMapObj;
+ int rc = RTR0MemObjMapUser(&hMapObj, pChunk->hMemObj, (RTR3PTR)-1, 0, RTMEM_PROT_READ | RTMEM_PROT_WRITE, NIL_RTR0PROCESS);
+ if (RT_SUCCESS(rc))
+ {
+ /* reallocate the array? assumes few users per chunk (usually one). */
+ unsigned iMapping = pChunk->cMappingsX;
+ if ( iMapping <= 3
+ || (iMapping & 3) == 0)
+ {
+ unsigned cNewSize = iMapping <= 3
+ ? iMapping + 1
+ : iMapping + 4;
+ Assert(cNewSize < 4 || RT_ALIGN_32(cNewSize, 4) == cNewSize);
+ if (RT_UNLIKELY(cNewSize > UINT16_MAX))
+ {
+ rc = RTR0MemObjFree(hMapObj, false /* fFreeMappings (NA) */); AssertRC(rc);
+ return VERR_GMM_TOO_MANY_CHUNK_MAPPINGS;
+ }
+
+ void *pvMappings = RTMemRealloc(pChunk->paMappingsX, cNewSize * sizeof(pChunk->paMappingsX[0]));
+ if (RT_UNLIKELY(!pvMappings))
+ {
+ rc = RTR0MemObjFree(hMapObj, false /* fFreeMappings (NA) */); AssertRC(rc);
+ return VERR_NO_MEMORY;
+ }
+ pChunk->paMappingsX = (PGMMCHUNKMAP)pvMappings;
+ }
+
+ /* insert new entry */
+ pChunk->paMappingsX[iMapping].hMapObj = hMapObj;
+ pChunk->paMappingsX[iMapping].pGVM = pGVM;
+ Assert(pChunk->cMappingsX == iMapping);
+ pChunk->cMappingsX = iMapping + 1;
+
+ *ppvR3 = RTR0MemObjAddressR3(hMapObj);
+ }
+
+ return rc;
+}
+
+
+/**
+ * Maps a chunk into the user address space of the current process.
+ *
+ * @returns VBox status code.
+ * @param pGMM Pointer to the GMM instance data.
+ * @param pGVM Pointer to the Global VM structure.
+ * @param pChunk Pointer to the chunk to be mapped.
+ * @param fRelaxedSem Whether we can release the semaphore while doing the
+ * mapping (@c true) or not.
+ * @param ppvR3 Where to store the ring-3 address of the mapping.
+ * In the VERR_GMM_CHUNK_ALREADY_MAPPED case, this will be
+ * contain the address of the existing mapping.
+ */
+static int gmmR0MapChunk(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk, bool fRelaxedSem, PRTR3PTR ppvR3)
+{
+ /*
+ * Take the chunk lock and leave the giant GMM lock when possible, then
+ * call the worker function.
+ */
+ GMMR0CHUNKMTXSTATE MtxState;
+ int rc = gmmR0ChunkMutexAcquire(&MtxState, pGMM, pChunk,
+ fRelaxedSem ? GMMR0CHUNK_MTX_RETAKE_GIANT : GMMR0CHUNK_MTX_KEEP_GIANT);
+ if (RT_SUCCESS(rc))
+ {
+ rc = gmmR0MapChunkLocked(pGMM, pGVM, pChunk, ppvR3);
+ gmmR0ChunkMutexRelease(&MtxState, pChunk);
+ }
+
+ return rc;
+}
+
+
+
+#if defined(VBOX_WITH_PAGE_SHARING) || (defined(VBOX_STRICT) && HC_ARCH_BITS == 64)
+/**
+ * Check if a chunk is mapped into the specified VM
+ *
+ * @returns mapped yes/no
+ * @param pGMM Pointer to the GMM instance.
+ * @param pGVM Pointer to the Global VM structure.
+ * @param pChunk Pointer to the chunk to be mapped.
+ * @param ppvR3 Where to store the ring-3 address of the mapping.
+ */
+static bool gmmR0IsChunkMapped(PGMM pGMM, PGVM pGVM, PGMMCHUNK pChunk, PRTR3PTR ppvR3)
+{
+ GMMR0CHUNKMTXSTATE MtxState;
+ gmmR0ChunkMutexAcquire(&MtxState, pGMM, pChunk, GMMR0CHUNK_MTX_KEEP_GIANT);
+ for (uint32_t i = 0; i < pChunk->cMappingsX; i++)
+ {
+ Assert(pChunk->paMappingsX[i].pGVM && pChunk->paMappingsX[i].hMapObj != NIL_RTR0MEMOBJ);
+ if (pChunk->paMappingsX[i].pGVM == pGVM)
+ {
+ *ppvR3 = RTR0MemObjAddressR3(pChunk->paMappingsX[i].hMapObj);
+ gmmR0ChunkMutexRelease(&MtxState, pChunk);
+ return true;
+ }
+ }
+ *ppvR3 = NULL;
+ gmmR0ChunkMutexRelease(&MtxState, pChunk);
+ return false;
+}
+#endif /* VBOX_WITH_PAGE_SHARING || (VBOX_STRICT && 64-BIT) */
+
+
+/**
+ * Map a chunk and/or unmap another chunk.
+ *
+ * The mapping and unmapping applies to the current process.
+ *
+ * This API does two things because it saves a kernel call per mapping when
+ * when the ring-3 mapping cache is full.
+ *
+ * @returns VBox status code.
+ * @param pGVM The global (ring-0) VM structure.
+ * @param pVM The cross context VM structure.
+ * @param idChunkMap The chunk to map. NIL_GMM_CHUNKID if nothing to map.
+ * @param idChunkUnmap The chunk to unmap. NIL_GMM_CHUNKID if nothing to unmap.
+ * @param ppvR3 Where to store the address of the mapped chunk. NULL is ok if nothing to map.
+ * @thread EMT ???
+ */
+GMMR0DECL(int) GMMR0MapUnmapChunk(PGVM pGVM, PVM pVM, uint32_t idChunkMap, uint32_t idChunkUnmap, PRTR3PTR ppvR3)
+{
+ LogFlow(("GMMR0MapUnmapChunk: pGVM=%p pVM=%p idChunkMap=%#x idChunkUnmap=%#x ppvR3=%p\n",
+ pGVM, pVM, idChunkMap, idChunkUnmap, ppvR3));
+
+ /*
+ * Validate input and get the basics.
+ */
+ PGMM pGMM;
+ GMM_GET_VALID_INSTANCE(pGMM, VERR_GMM_INSTANCE);
+ int rc = GVMMR0ValidateGVMandVM(pGVM, pVM);
+ if (RT_FAILURE(rc))
+ return rc;
+
+ AssertCompile(NIL_GMM_CHUNKID == 0);
+ AssertMsgReturn(idChunkMap <= GMM_CHUNKID_LAST, ("%#x\n", idChunkMap), VERR_INVALID_PARAMETER);
+ AssertMsgReturn(idChunkUnmap <= GMM_CHUNKID_LAST, ("%#x\n", idChunkUnmap), VERR_INVALID_PARAMETER);
+
+ if ( idChunkMap == NIL_GMM_CHUNKID
+ && idChunkUnmap == NIL_GMM_CHUNKID)
+ return VERR_INVALID_PARAMETER;
+
+ if (idChunkMap != NIL_GMM_CHUNKID)
+ {
+ AssertPtrReturn(ppvR3, VERR_INVALID_POINTER);
+ *ppvR3 = NIL_RTR3PTR;
+ }
+
+ /*
+ * Take the semaphore and do the work.
+ *
+ * The unmapping is done last since it's easier to undo a mapping than
+ * undoing an unmapping. The ring-3 mapping cache cannot not be so big
+ * that it pushes the user virtual address space to within a chunk of
+ * it it's limits, so, no problem here.
+ */
+ gmmR0MutexAcquire(pGMM);
+ if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
+ {
+ PGMMCHUNK pMap = NULL;
+ if (idChunkMap != NIL_GVM_HANDLE)
+ {
+ pMap = gmmR0GetChunk(pGMM, idChunkMap);
+ if (RT_LIKELY(pMap))
+ rc = gmmR0MapChunk(pGMM, pGVM, pMap, true /*fRelaxedSem*/, ppvR3);
+ else
+ {
+ Log(("GMMR0MapUnmapChunk: idChunkMap=%#x\n", idChunkMap));
+ rc = VERR_GMM_CHUNK_NOT_FOUND;
+ }
+ }
+/** @todo split this operation, the bail out might (theoretcially) not be
+ * entirely safe. */
+
+ if ( idChunkUnmap != NIL_GMM_CHUNKID
+ && RT_SUCCESS(rc))
+ {
+ PGMMCHUNK pUnmap = gmmR0GetChunk(pGMM, idChunkUnmap);
+ if (RT_LIKELY(pUnmap))
+ rc = gmmR0UnmapChunk(pGMM, pGVM, pUnmap, true /*fRelaxedSem*/);
+ else
+ {
+ Log(("GMMR0MapUnmapChunk: idChunkUnmap=%#x\n", idChunkUnmap));
+ rc = VERR_GMM_CHUNK_NOT_FOUND;
+ }
+
+ if (RT_FAILURE(rc) && pMap)
+ gmmR0UnmapChunk(pGMM, pGVM, pMap, false /*fRelaxedSem*/);
+ }
+
+ GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
+ }
+ else
+ rc = VERR_GMM_IS_NOT_SANE;
+ gmmR0MutexRelease(pGMM);
+
+ LogFlow(("GMMR0MapUnmapChunk: returns %Rrc\n", rc));
+ return rc;
+}
+
+
+/**
+ * VMMR0 request wrapper for GMMR0MapUnmapChunk.
+ *
+ * @returns see GMMR0MapUnmapChunk.
+ * @param pGVM The global (ring-0) VM structure.
+ * @param pVM The cross context VM structure.
+ * @param pReq Pointer to the request packet.
+ */
+GMMR0DECL(int) GMMR0MapUnmapChunkReq(PGVM pGVM, PVM pVM, PGMMMAPUNMAPCHUNKREQ pReq)
+{
+ /*
+ * Validate input and pass it on.
+ */
+ AssertPtrReturn(pReq, VERR_INVALID_POINTER);
+ AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
+
+ return GMMR0MapUnmapChunk(pGVM, pVM, pReq->idChunkMap, pReq->idChunkUnmap, &pReq->pvR3);
+}
+
+
+/**
+ * Legacy mode API for supplying pages.
+ *
+ * The specified user address points to a allocation chunk sized block that
+ * will be locked down and used by the GMM when the GM asks for pages.
+ *
+ * @returns VBox status code.
+ * @param pGVM The global (ring-0) VM structure.
+ * @param pVM The cross context VM structure.
+ * @param idCpu The VCPU id.
+ * @param pvR3 Pointer to the chunk size memory block to lock down.
+ */
+GMMR0DECL(int) GMMR0SeedChunk(PGVM pGVM, PVM pVM, VMCPUID idCpu, RTR3PTR pvR3)
+{
+ /*
+ * Validate input and get the basics.
+ */
+ PGMM pGMM;
+ GMM_GET_VALID_INSTANCE(pGMM, VERR_GMM_INSTANCE);
+ int rc = GVMMR0ValidateGVMandVMandEMT(pGVM, pVM, idCpu);
+ if (RT_FAILURE(rc))
+ return rc;
+
+ AssertPtrReturn(pvR3, VERR_INVALID_POINTER);
+ AssertReturn(!(PAGE_OFFSET_MASK & pvR3), VERR_INVALID_POINTER);
+
+ if (!pGMM->fLegacyAllocationMode)
+ {
+ Log(("GMMR0SeedChunk: not in legacy allocation mode!\n"));
+ return VERR_NOT_SUPPORTED;
+ }
+
+ /*
+ * Lock the memory and add it as new chunk with our hGVM.
+ * (The GMM locking is done inside gmmR0RegisterChunk.)
+ */
+ RTR0MEMOBJ MemObj;
+ rc = RTR0MemObjLockUser(&MemObj, pvR3, GMM_CHUNK_SIZE, RTMEM_PROT_READ | RTMEM_PROT_WRITE, NIL_RTR0PROCESS);
+ if (RT_SUCCESS(rc))
+ {
+ rc = gmmR0RegisterChunk(pGMM, &pGVM->gmm.s.Private, MemObj, pGVM->hSelf, 0 /*fChunkFlags*/, NULL);
+ if (RT_SUCCESS(rc))
+ gmmR0MutexRelease(pGMM);
+ else
+ RTR0MemObjFree(MemObj, false /* fFreeMappings */);
+ }
+
+ LogFlow(("GMMR0SeedChunk: rc=%d (pvR3=%p)\n", rc, pvR3));
+ return rc;
+}
+
+#ifdef VBOX_WITH_PAGE_SHARING
+
+# ifdef VBOX_STRICT
+/**
+ * For checksumming shared pages in strict builds.
+ *
+ * The purpose is making sure that a page doesn't change.
+ *
+ * @returns Checksum, 0 on failure.
+ * @param pGMM The GMM instance data.
+ * @param pGVM Pointer to the kernel-only VM instace data.
+ * @param idPage The page ID.
+ */
+static uint32_t gmmR0StrictPageChecksum(PGMM pGMM, PGVM pGVM, uint32_t idPage)
+{
+ PGMMCHUNK pChunk = gmmR0GetChunk(pGMM, idPage >> GMM_CHUNKID_SHIFT);
+ AssertMsgReturn(pChunk, ("idPage=%#x\n", idPage), 0);
+
+ uint8_t *pbChunk;
+ if (!gmmR0IsChunkMapped(pGMM, pGVM, pChunk, (PRTR3PTR)&pbChunk))
+ return 0;
+ uint8_t const *pbPage = pbChunk + ((idPage & GMM_PAGEID_IDX_MASK) << PAGE_SHIFT);
+
+ return RTCrc32(pbPage, PAGE_SIZE);
+}
+# endif /* VBOX_STRICT */
+
+
+/**
+ * Calculates the module hash value.
+ *
+ * @returns Hash value.
+ * @param pszModuleName The module name.
+ * @param pszVersion The module version string.
+ */
+static uint32_t gmmR0ShModCalcHash(const char *pszModuleName, const char *pszVersion)
+{
+ return RTStrHash1ExN(3, pszModuleName, RTSTR_MAX, "::", (size_t)2, pszVersion, RTSTR_MAX);
+}
+
+
+/**
+ * Finds a global module.
+ *
+ * @returns Pointer to the global module on success, NULL if not found.
+ * @param pGMM The GMM instance data.
+ * @param uHash The hash as calculated by gmmR0ShModCalcHash.
+ * @param cbModule The module size.
+ * @param enmGuestOS The guest OS type.
+ * @param cRegions The number of regions.
+ * @param pszModuleName The module name.
+ * @param pszVersion The module version.
+ * @param paRegions The region descriptions.
+ */
+static PGMMSHAREDMODULE gmmR0ShModFindGlobal(PGMM pGMM, uint32_t uHash, uint32_t cbModule, VBOXOSFAMILY enmGuestOS,
+ uint32_t cRegions, const char *pszModuleName, const char *pszVersion,
+ struct VMMDEVSHAREDREGIONDESC const *paRegions)
+{
+ for (PGMMSHAREDMODULE pGblMod = (PGMMSHAREDMODULE)RTAvllU32Get(&pGMM->pGlobalSharedModuleTree, uHash);
+ pGblMod;
+ pGblMod = (PGMMSHAREDMODULE)pGblMod->Core.pList)
+ {
+ if (pGblMod->cbModule != cbModule)
+ continue;
+ if (pGblMod->enmGuestOS != enmGuestOS)
+ continue;
+ if (pGblMod->cRegions != cRegions)
+ continue;
+ if (strcmp(pGblMod->szName, pszModuleName))
+ continue;
+ if (strcmp(pGblMod->szVersion, pszVersion))
+ continue;
+
+ uint32_t i;
+ for (i = 0; i < cRegions; i++)
+ {
+ uint32_t off = paRegions[i].GCRegionAddr & PAGE_OFFSET_MASK;
+ if (pGblMod->aRegions[i].off != off)
+ break;
+
+ uint32_t cb = RT_ALIGN_32(paRegions[i].cbRegion + off, PAGE_SIZE);
+ if (pGblMod->aRegions[i].cb != cb)
+ break;
+ }
+
+ if (i == cRegions)
+ return pGblMod;
+ }
+
+ return NULL;
+}
+
+
+/**
+ * Creates a new global module.
+ *
+ * @returns VBox status code.
+ * @param pGMM The GMM instance data.
+ * @param uHash The hash as calculated by gmmR0ShModCalcHash.
+ * @param cbModule The module size.
+ * @param enmGuestOS The guest OS type.
+ * @param cRegions The number of regions.
+ * @param pszModuleName The module name.
+ * @param pszVersion The module version.
+ * @param paRegions The region descriptions.
+ * @param ppGblMod Where to return the new module on success.
+ */
+static int gmmR0ShModNewGlobal(PGMM pGMM, uint32_t uHash, uint32_t cbModule, VBOXOSFAMILY enmGuestOS,
+ uint32_t cRegions, const char *pszModuleName, const char *pszVersion,
+ struct VMMDEVSHAREDREGIONDESC const *paRegions, PGMMSHAREDMODULE *ppGblMod)
+{
+ Log(("gmmR0ShModNewGlobal: %s %s size %#x os %u rgn %u\n", pszModuleName, pszVersion, cbModule, enmGuestOS, cRegions));
+ if (pGMM->cShareableModules >= GMM_MAX_SHARED_GLOBAL_MODULES)
+ {
+ Log(("gmmR0ShModNewGlobal: Too many modules\n"));
+ return VERR_GMM_TOO_MANY_GLOBAL_MODULES;
+ }
+
+ PGMMSHAREDMODULE pGblMod = (PGMMSHAREDMODULE)RTMemAllocZ(RT_UOFFSETOF_DYN(GMMSHAREDMODULE, aRegions[cRegions]));
+ if (!pGblMod)
+ {
+ Log(("gmmR0ShModNewGlobal: No memory\n"));
+ return VERR_NO_MEMORY;
+ }
+
+ pGblMod->Core.Key = uHash;
+ pGblMod->cbModule = cbModule;
+ pGblMod->cRegions = cRegions;
+ pGblMod->cUsers = 1;
+ pGblMod->enmGuestOS = enmGuestOS;
+ strcpy(pGblMod->szName, pszModuleName);
+ strcpy(pGblMod->szVersion, pszVersion);
+
+ for (uint32_t i = 0; i < cRegions; i++)
+ {
+ Log(("gmmR0ShModNewGlobal: rgn[%u]=%RGvLB%#x\n", i, paRegions[i].GCRegionAddr, paRegions[i].cbRegion));
+ pGblMod->aRegions[i].off = paRegions[i].GCRegionAddr & PAGE_OFFSET_MASK;
+ pGblMod->aRegions[i].cb = paRegions[i].cbRegion + pGblMod->aRegions[i].off;
+ pGblMod->aRegions[i].cb = RT_ALIGN_32(pGblMod->aRegions[i].cb, PAGE_SIZE);
+ pGblMod->aRegions[i].paidPages = NULL; /* allocated when needed. */
+ }
+
+ bool fInsert = RTAvllU32Insert(&pGMM->pGlobalSharedModuleTree, &pGblMod->Core);
+ Assert(fInsert); NOREF(fInsert);
+ pGMM->cShareableModules++;
+
+ *ppGblMod = pGblMod;
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Deletes a global module which is no longer referenced by anyone.
+ *
+ * @param pGMM The GMM instance data.
+ * @param pGblMod The module to delete.
+ */
+static void gmmR0ShModDeleteGlobal(PGMM pGMM, PGMMSHAREDMODULE pGblMod)
+{
+ Assert(pGblMod->cUsers == 0);
+ Assert(pGMM->cShareableModules > 0 && pGMM->cShareableModules <= GMM_MAX_SHARED_GLOBAL_MODULES);
+
+ void *pvTest = RTAvllU32RemoveNode(&pGMM->pGlobalSharedModuleTree, &pGblMod->Core);
+ Assert(pvTest == pGblMod); NOREF(pvTest);
+ pGMM->cShareableModules--;
+
+ uint32_t i = pGblMod->cRegions;
+ while (i-- > 0)
+ {
+ if (pGblMod->aRegions[i].paidPages)
+ {
+ /* We don't doing anything to the pages as they are handled by the
+ copy-on-write mechanism in PGM. */
+ RTMemFree(pGblMod->aRegions[i].paidPages);
+ pGblMod->aRegions[i].paidPages = NULL;
+ }
+ }
+ RTMemFree(pGblMod);
+}
+
+
+static int gmmR0ShModNewPerVM(PGVM pGVM, RTGCPTR GCBaseAddr, uint32_t cRegions, const VMMDEVSHAREDREGIONDESC *paRegions,
+ PGMMSHAREDMODULEPERVM *ppRecVM)
+{
+ if (pGVM->gmm.s.Stats.cShareableModules >= GMM_MAX_SHARED_PER_VM_MODULES)
+ return VERR_GMM_TOO_MANY_PER_VM_MODULES;
+
+ PGMMSHAREDMODULEPERVM pRecVM;
+ pRecVM = (PGMMSHAREDMODULEPERVM)RTMemAllocZ(RT_UOFFSETOF_DYN(GMMSHAREDMODULEPERVM, aRegionsGCPtrs[cRegions]));
+ if (!pRecVM)
+ return VERR_NO_MEMORY;
+
+ pRecVM->Core.Key = GCBaseAddr;
+ for (uint32_t i = 0; i < cRegions; i++)
+ pRecVM->aRegionsGCPtrs[i] = paRegions[i].GCRegionAddr;
+
+ bool fInsert = RTAvlGCPtrInsert(&pGVM->gmm.s.pSharedModuleTree, &pRecVM->Core);
+ Assert(fInsert); NOREF(fInsert);
+ pGVM->gmm.s.Stats.cShareableModules++;
+
+ *ppRecVM = pRecVM;
+ return VINF_SUCCESS;
+}
+
+
+static void gmmR0ShModDeletePerVM(PGMM pGMM, PGVM pGVM, PGMMSHAREDMODULEPERVM pRecVM, bool fRemove)
+{
+ /*
+ * Free the per-VM module.
+ */
+ PGMMSHAREDMODULE pGblMod = pRecVM->pGlobalModule;
+ pRecVM->pGlobalModule = NULL;
+
+ if (fRemove)
+ {
+ void *pvTest = RTAvlGCPtrRemove(&pGVM->gmm.s.pSharedModuleTree, pRecVM->Core.Key);
+ Assert(pvTest == &pRecVM->Core); NOREF(pvTest);
+ }
+
+ RTMemFree(pRecVM);
+
+ /*
+ * Release the global module.
+ * (In the registration bailout case, it might not be.)
+ */
+ if (pGblMod)
+ {
+ Assert(pGblMod->cUsers > 0);
+ pGblMod->cUsers--;
+ if (pGblMod->cUsers == 0)
+ gmmR0ShModDeleteGlobal(pGMM, pGblMod);
+ }
+}
+
+#endif /* VBOX_WITH_PAGE_SHARING */
+
+/**
+ * Registers a new shared module for the VM.
+ *
+ * @returns VBox status code.
+ * @param pGVM The global (ring-0) VM structure.
+ * @param pVM The cross context VM structure.
+ * @param idCpu The VCPU id.
+ * @param enmGuestOS The guest OS type.
+ * @param pszModuleName The module name.
+ * @param pszVersion The module version.
+ * @param GCPtrModBase The module base address.
+ * @param cbModule The module size.
+ * @param cRegions The mumber of shared region descriptors.
+ * @param paRegions Pointer to an array of shared region(s).
+ * @thread EMT(idCpu)
+ */
+GMMR0DECL(int) GMMR0RegisterSharedModule(PGVM pGVM, PVM pVM, VMCPUID idCpu, VBOXOSFAMILY enmGuestOS, char *pszModuleName,
+ char *pszVersion, RTGCPTR GCPtrModBase, uint32_t cbModule,
+ uint32_t cRegions, struct VMMDEVSHAREDREGIONDESC const *paRegions)
+{
+#ifdef VBOX_WITH_PAGE_SHARING
+ /*
+ * Validate input and get the basics.
+ *
+ * Note! Turns out the module size does necessarily match the size of the
+ * regions. (iTunes on XP)
+ */
+ PGMM pGMM;
+ GMM_GET_VALID_INSTANCE(pGMM, VERR_GMM_INSTANCE);
+ int rc = GVMMR0ValidateGVMandVMandEMT(pGVM, pVM, idCpu);
+ if (RT_FAILURE(rc))
+ return rc;
+
+ if (RT_UNLIKELY(cRegions > VMMDEVSHAREDREGIONDESC_MAX))
+ return VERR_GMM_TOO_MANY_REGIONS;
+
+ if (RT_UNLIKELY(cbModule == 0 || cbModule > _1G))
+ return VERR_GMM_BAD_SHARED_MODULE_SIZE;
+
+ uint32_t cbTotal = 0;
+ for (uint32_t i = 0; i < cRegions; i++)
+ {
+ if (RT_UNLIKELY(paRegions[i].cbRegion == 0 || paRegions[i].cbRegion > _1G))
+ return VERR_GMM_SHARED_MODULE_BAD_REGIONS_SIZE;
+
+ cbTotal += paRegions[i].cbRegion;
+ if (RT_UNLIKELY(cbTotal > _1G))
+ return VERR_GMM_SHARED_MODULE_BAD_REGIONS_SIZE;
+ }
+
+ AssertPtrReturn(pszModuleName, VERR_INVALID_POINTER);
+ if (RT_UNLIKELY(!memchr(pszModuleName, '\0', GMM_SHARED_MODULE_MAX_NAME_STRING)))
+ return VERR_GMM_MODULE_NAME_TOO_LONG;
+
+ AssertPtrReturn(pszVersion, VERR_INVALID_POINTER);
+ if (RT_UNLIKELY(!memchr(pszVersion, '\0', GMM_SHARED_MODULE_MAX_VERSION_STRING)))
+ return VERR_GMM_MODULE_NAME_TOO_LONG;
+
+ uint32_t const uHash = gmmR0ShModCalcHash(pszModuleName, pszVersion);
+ Log(("GMMR0RegisterSharedModule %s %s base %RGv size %x hash %x\n", pszModuleName, pszVersion, GCPtrModBase, cbModule, uHash));
+
+ /*
+ * Take the semaphore and do some more validations.
+ */
+ gmmR0MutexAcquire(pGMM);
+ if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
+ {
+ /*
+ * Check if this module is already locally registered and register
+ * it if it isn't. The base address is a unique module identifier
+ * locally.
+ */
+ PGMMSHAREDMODULEPERVM pRecVM = (PGMMSHAREDMODULEPERVM)RTAvlGCPtrGet(&pGVM->gmm.s.pSharedModuleTree, GCPtrModBase);
+ bool fNewModule = pRecVM == NULL;
+ if (fNewModule)
+ {
+ rc = gmmR0ShModNewPerVM(pGVM, GCPtrModBase, cRegions, paRegions, &pRecVM);
+ if (RT_SUCCESS(rc))
+ {
+ /*
+ * Find a matching global module, register a new one if needed.
+ */
+ PGMMSHAREDMODULE pGblMod = gmmR0ShModFindGlobal(pGMM, uHash, cbModule, enmGuestOS, cRegions,
+ pszModuleName, pszVersion, paRegions);
+ if (!pGblMod)
+ {
+ Assert(fNewModule);
+ rc = gmmR0ShModNewGlobal(pGMM, uHash, cbModule, enmGuestOS, cRegions,
+ pszModuleName, pszVersion, paRegions, &pGblMod);
+ if (RT_SUCCESS(rc))
+ {
+ pRecVM->pGlobalModule = pGblMod; /* (One referenced returned by gmmR0ShModNewGlobal.) */
+ Log(("GMMR0RegisterSharedModule: new module %s %s\n", pszModuleName, pszVersion));
+ }
+ else
+ gmmR0ShModDeletePerVM(pGMM, pGVM, pRecVM, true /*fRemove*/);
+ }
+ else
+ {
+ Assert(pGblMod->cUsers > 0 && pGblMod->cUsers < UINT32_MAX / 2);
+ pGblMod->cUsers++;
+ pRecVM->pGlobalModule = pGblMod;
+
+ Log(("GMMR0RegisterSharedModule: new per vm module %s %s, gbl users %d\n", pszModuleName, pszVersion, pGblMod->cUsers));
+ }
+ }
+ }
+ else
+ {
+ /*
+ * Attempt to re-register an existing module.
+ */
+ PGMMSHAREDMODULE pGblMod = gmmR0ShModFindGlobal(pGMM, uHash, cbModule, enmGuestOS, cRegions,
+ pszModuleName, pszVersion, paRegions);
+ if (pRecVM->pGlobalModule == pGblMod)
+ {
+ Log(("GMMR0RegisterSharedModule: already registered %s %s, gbl users %d\n", pszModuleName, pszVersion, pGblMod->cUsers));
+ rc = VINF_GMM_SHARED_MODULE_ALREADY_REGISTERED;
+ }
+ else
+ {
+ /** @todo may have to unregister+register when this happens in case it's caused
+ * by VBoxService crashing and being restarted... */
+ Log(("GMMR0RegisterSharedModule: Address clash!\n"
+ " incoming at %RGvLB%#x %s %s rgns %u\n"
+ " existing at %RGvLB%#x %s %s rgns %u\n",
+ GCPtrModBase, cbModule, pszModuleName, pszVersion, cRegions,
+ pRecVM->Core.Key, pRecVM->pGlobalModule->cbModule, pRecVM->pGlobalModule->szName,
+ pRecVM->pGlobalModule->szVersion, pRecVM->pGlobalModule->cRegions));
+ rc = VERR_GMM_SHARED_MODULE_ADDRESS_CLASH;
+ }
+ }
+ GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
+ }
+ else
+ rc = VERR_GMM_IS_NOT_SANE;
+
+ gmmR0MutexRelease(pGMM);
+ return rc;
+#else
+
+ NOREF(pGVM); NOREF(pVM); NOREF(idCpu); NOREF(enmGuestOS); NOREF(pszModuleName); NOREF(pszVersion);
+ NOREF(GCPtrModBase); NOREF(cbModule); NOREF(cRegions); NOREF(paRegions);
+ return VERR_NOT_IMPLEMENTED;
+#endif
+}
+
+
+/**
+ * VMMR0 request wrapper for GMMR0RegisterSharedModule.
+ *
+ * @returns see GMMR0RegisterSharedModule.
+ * @param pGVM The global (ring-0) VM structure.
+ * @param pVM The cross context VM structure.
+ * @param idCpu The VCPU id.
+ * @param pReq Pointer to the request packet.
+ */
+GMMR0DECL(int) GMMR0RegisterSharedModuleReq(PGVM pGVM, PVM pVM, VMCPUID idCpu, PGMMREGISTERSHAREDMODULEREQ pReq)
+{
+ /*
+ * Validate input and pass it on.
+ */
+ AssertPtrReturn(pReq, VERR_INVALID_POINTER);
+ AssertMsgReturn( pReq->Hdr.cbReq >= sizeof(*pReq)
+ && pReq->Hdr.cbReq == RT_UOFFSETOF_DYN(GMMREGISTERSHAREDMODULEREQ, aRegions[pReq->cRegions]),
+ ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
+
+ /* Pass back return code in the request packet to preserve informational codes. (VMMR3CallR0 chokes on them) */
+ pReq->rc = GMMR0RegisterSharedModule(pGVM, pVM, idCpu, pReq->enmGuestOS, pReq->szName, pReq->szVersion,
+ pReq->GCBaseAddr, pReq->cbModule, pReq->cRegions, pReq->aRegions);
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Unregisters a shared module for the VM
+ *
+ * @returns VBox status code.
+ * @param pGVM The global (ring-0) VM structure.
+ * @param pVM The cross context VM structure.
+ * @param idCpu The VCPU id.
+ * @param pszModuleName The module name.
+ * @param pszVersion The module version.
+ * @param GCPtrModBase The module base address.
+ * @param cbModule The module size.
+ */
+GMMR0DECL(int) GMMR0UnregisterSharedModule(PGVM pGVM, PVM pVM, VMCPUID idCpu, char *pszModuleName, char *pszVersion,
+ RTGCPTR GCPtrModBase, uint32_t cbModule)
+{
+#ifdef VBOX_WITH_PAGE_SHARING
+ /*
+ * Validate input and get the basics.
+ */
+ PGMM pGMM;
+ GMM_GET_VALID_INSTANCE(pGMM, VERR_GMM_INSTANCE);
+ int rc = GVMMR0ValidateGVMandVMandEMT(pGVM, pVM, idCpu);
+ if (RT_FAILURE(rc))
+ return rc;
+
+ AssertPtrReturn(pszModuleName, VERR_INVALID_POINTER);
+ AssertPtrReturn(pszVersion, VERR_INVALID_POINTER);
+ if (RT_UNLIKELY(!memchr(pszModuleName, '\0', GMM_SHARED_MODULE_MAX_NAME_STRING)))
+ return VERR_GMM_MODULE_NAME_TOO_LONG;
+ if (RT_UNLIKELY(!memchr(pszVersion, '\0', GMM_SHARED_MODULE_MAX_VERSION_STRING)))
+ return VERR_GMM_MODULE_NAME_TOO_LONG;
+
+ Log(("GMMR0UnregisterSharedModule %s %s base=%RGv size %x\n", pszModuleName, pszVersion, GCPtrModBase, cbModule));
+
+ /*
+ * Take the semaphore and do some more validations.
+ */
+ gmmR0MutexAcquire(pGMM);
+ if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
+ {
+ /*
+ * Locate and remove the specified module.
+ */
+ PGMMSHAREDMODULEPERVM pRecVM = (PGMMSHAREDMODULEPERVM)RTAvlGCPtrGet(&pGVM->gmm.s.pSharedModuleTree, GCPtrModBase);
+ if (pRecVM)
+ {
+ /** @todo Do we need to do more validations here, like that the
+ * name + version + cbModule matches? */
+ NOREF(cbModule);
+ Assert(pRecVM->pGlobalModule);
+ gmmR0ShModDeletePerVM(pGMM, pGVM, pRecVM, true /*fRemove*/);
+ }
+ else
+ rc = VERR_GMM_SHARED_MODULE_NOT_FOUND;
+
+ GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
+ }
+ else
+ rc = VERR_GMM_IS_NOT_SANE;
+
+ gmmR0MutexRelease(pGMM);
+ return rc;
+#else
+
+ NOREF(pGVM); NOREF(pVM); NOREF(idCpu); NOREF(pszModuleName); NOREF(pszVersion); NOREF(GCPtrModBase); NOREF(cbModule);
+ return VERR_NOT_IMPLEMENTED;
+#endif
+}
+
+
+/**
+ * VMMR0 request wrapper for GMMR0UnregisterSharedModule.
+ *
+ * @returns see GMMR0UnregisterSharedModule.
+ * @param pGVM The global (ring-0) VM structure.
+ * @param pVM The cross context VM structure.
+ * @param idCpu The VCPU id.
+ * @param pReq Pointer to the request packet.
+ */
+GMMR0DECL(int) GMMR0UnregisterSharedModuleReq(PGVM pGVM, PVM pVM, VMCPUID idCpu, PGMMUNREGISTERSHAREDMODULEREQ pReq)
+{
+ /*
+ * Validate input and pass it on.
+ */
+ AssertPtrReturn(pReq, VERR_INVALID_POINTER);
+ AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
+
+ return GMMR0UnregisterSharedModule(pGVM, pVM, idCpu, pReq->szName, pReq->szVersion, pReq->GCBaseAddr, pReq->cbModule);
+}
+
+#ifdef VBOX_WITH_PAGE_SHARING
+
+/**
+ * Increase the use count of a shared page, the page is known to exist and be valid and such.
+ *
+ * @param pGMM Pointer to the GMM instance.
+ * @param pGVM Pointer to the GVM instance.
+ * @param pPage The page structure.
+ */
+DECLINLINE(void) gmmR0UseSharedPage(PGMM pGMM, PGVM pGVM, PGMMPAGE pPage)
+{
+ Assert(pGMM->cSharedPages > 0);
+ Assert(pGMM->cAllocatedPages > 0);
+
+ pGMM->cDuplicatePages++;
+
+ pPage->Shared.cRefs++;
+ pGVM->gmm.s.Stats.cSharedPages++;
+ pGVM->gmm.s.Stats.Allocated.cBasePages++;
+}
+
+
+/**
+ * Converts a private page to a shared page, the page is known to exist and be valid and such.
+ *
+ * @param pGMM Pointer to the GMM instance.
+ * @param pGVM Pointer to the GVM instance.
+ * @param HCPhys Host physical address
+ * @param idPage The Page ID
+ * @param pPage The page structure.
+ * @param pPageDesc Shared page descriptor
+ */
+DECLINLINE(void) gmmR0ConvertToSharedPage(PGMM pGMM, PGVM pGVM, RTHCPHYS HCPhys, uint32_t idPage, PGMMPAGE pPage,
+ PGMMSHAREDPAGEDESC pPageDesc)
+{
+ PGMMCHUNK pChunk = gmmR0GetChunk(pGMM, idPage >> GMM_CHUNKID_SHIFT);
+ Assert(pChunk);
+ Assert(pChunk->cFree < GMM_CHUNK_NUM_PAGES);
+ Assert(GMM_PAGE_IS_PRIVATE(pPage));
+
+ pChunk->cPrivate--;
+ pChunk->cShared++;
+
+ pGMM->cSharedPages++;
+
+ pGVM->gmm.s.Stats.cSharedPages++;
+ pGVM->gmm.s.Stats.cPrivatePages--;
+
+ /* Modify the page structure. */
+ pPage->Shared.pfn = (uint32_t)(uint64_t)(HCPhys >> PAGE_SHIFT);
+ pPage->Shared.cRefs = 1;
+#ifdef VBOX_STRICT
+ pPageDesc->u32StrictChecksum = gmmR0StrictPageChecksum(pGMM, pGVM, idPage);
+ pPage->Shared.u14Checksum = pPageDesc->u32StrictChecksum;
+#else
+ NOREF(pPageDesc);
+ pPage->Shared.u14Checksum = 0;
+#endif
+ pPage->Shared.u2State = GMM_PAGE_STATE_SHARED;
+}
+
+
+static int gmmR0SharedModuleCheckPageFirstTime(PGMM pGMM, PGVM pGVM, PGMMSHAREDMODULE pModule,
+ unsigned idxRegion, unsigned idxPage,
+ PGMMSHAREDPAGEDESC pPageDesc, PGMMSHAREDREGIONDESC pGlobalRegion)
+{
+ NOREF(pModule);
+
+ /* Easy case: just change the internal page type. */
+ PGMMPAGE pPage = gmmR0GetPage(pGMM, pPageDesc->idPage);
+ AssertMsgReturn(pPage, ("idPage=%#x (GCPhys=%RGp HCPhys=%RHp idxRegion=%#x idxPage=%#x) #1\n",
+ pPageDesc->idPage, pPageDesc->GCPhys, pPageDesc->HCPhys, idxRegion, idxPage),
+ VERR_PGM_PHYS_INVALID_PAGE_ID);
+ NOREF(idxRegion);
+
+ AssertMsg(pPageDesc->GCPhys == (pPage->Private.pfn << 12), ("desc %RGp gmm %RGp\n", pPageDesc->HCPhys, (pPage->Private.pfn << 12)));
+
+ gmmR0ConvertToSharedPage(pGMM, pGVM, pPageDesc->HCPhys, pPageDesc->idPage, pPage, pPageDesc);
+
+ /* Keep track of these references. */
+ pGlobalRegion->paidPages[idxPage] = pPageDesc->idPage;
+
+ return VINF_SUCCESS;
+}
+
+/**
+ * Checks specified shared module range for changes
+ *
+ * Performs the following tasks:
+ * - If a shared page is new, then it changes the GMM page type to shared and
+ * returns it in the pPageDesc descriptor.
+ * - If a shared page already exists, then it checks if the VM page is
+ * identical and if so frees the VM page and returns the shared page in
+ * pPageDesc descriptor.
+ *
+ * @remarks ASSUMES the caller has acquired the GMM semaphore!!
+ *
+ * @returns VBox status code.
+ * @param pGVM Pointer to the GVM instance data.
+ * @param pModule Module description
+ * @param idxRegion Region index
+ * @param idxPage Page index
+ * @param pPageDesc Page descriptor
+ */
+GMMR0DECL(int) GMMR0SharedModuleCheckPage(PGVM pGVM, PGMMSHAREDMODULE pModule, uint32_t idxRegion, uint32_t idxPage,
+ PGMMSHAREDPAGEDESC pPageDesc)
+{
+ int rc;
+ PGMM pGMM;
+ GMM_GET_VALID_INSTANCE(pGMM, VERR_GMM_INSTANCE);
+ pPageDesc->u32StrictChecksum = 0;
+
+ AssertMsgReturn(idxRegion < pModule->cRegions,
+ ("idxRegion=%#x cRegions=%#x %s %s\n", idxRegion, pModule->cRegions, pModule->szName, pModule->szVersion),
+ VERR_INVALID_PARAMETER);
+
+ uint32_t const cPages = pModule->aRegions[idxRegion].cb >> PAGE_SHIFT;
+ AssertMsgReturn(idxPage < cPages,
+ ("idxRegion=%#x cRegions=%#x %s %s\n", idxRegion, pModule->cRegions, pModule->szName, pModule->szVersion),
+ VERR_INVALID_PARAMETER);
+
+ LogFlow(("GMMR0SharedModuleCheckRange %s base %RGv region %d idxPage %d\n", pModule->szName, pModule->Core.Key, idxRegion, idxPage));
+
+ /*
+ * First time; create a page descriptor array.
+ */
+ PGMMSHAREDREGIONDESC pGlobalRegion = &pModule->aRegions[idxRegion];
+ if (!pGlobalRegion->paidPages)
+ {
+ Log(("Allocate page descriptor array for %d pages\n", cPages));
+ pGlobalRegion->paidPages = (uint32_t *)RTMemAlloc(cPages * sizeof(pGlobalRegion->paidPages[0]));
+ AssertReturn(pGlobalRegion->paidPages, VERR_NO_MEMORY);
+
+ /* Invalidate all descriptors. */
+ uint32_t i = cPages;
+ while (i-- > 0)
+ pGlobalRegion->paidPages[i] = NIL_GMM_PAGEID;
+ }
+
+ /*
+ * We've seen this shared page for the first time?
+ */
+ if (pGlobalRegion->paidPages[idxPage] == NIL_GMM_PAGEID)
+ {
+ Log(("New shared page guest %RGp host %RHp\n", pPageDesc->GCPhys, pPageDesc->HCPhys));
+ return gmmR0SharedModuleCheckPageFirstTime(pGMM, pGVM, pModule, idxRegion, idxPage, pPageDesc, pGlobalRegion);
+ }
+
+ /*
+ * We've seen it before...
+ */
+ Log(("Replace existing page guest %RGp host %RHp id %#x -> id %#x\n",
+ pPageDesc->GCPhys, pPageDesc->HCPhys, pPageDesc->idPage, pGlobalRegion->paidPages[idxPage]));
+ Assert(pPageDesc->idPage != pGlobalRegion->paidPages[idxPage]);
+
+ /*
+ * Get the shared page source.
+ */
+ PGMMPAGE pPage = gmmR0GetPage(pGMM, pGlobalRegion->paidPages[idxPage]);
+ AssertMsgReturn(pPage, ("idPage=%#x (idxRegion=%#x idxPage=%#x) #2\n", pPageDesc->idPage, idxRegion, idxPage),
+ VERR_PGM_PHYS_INVALID_PAGE_ID);
+
+ if (pPage->Common.u2State != GMM_PAGE_STATE_SHARED)
+ {
+ /*
+ * Page was freed at some point; invalidate this entry.
+ */
+ /** @todo this isn't really bullet proof. */
+ Log(("Old shared page was freed -> create a new one\n"));
+ pGlobalRegion->paidPages[idxPage] = NIL_GMM_PAGEID;
+ return gmmR0SharedModuleCheckPageFirstTime(pGMM, pGVM, pModule, idxRegion, idxPage, pPageDesc, pGlobalRegion);
+ }
+
+ Log(("Replace existing page guest host %RHp -> %RHp\n", pPageDesc->HCPhys, ((uint64_t)pPage->Shared.pfn) << PAGE_SHIFT));
+
+ /*
+ * Calculate the virtual address of the local page.
+ */
+ PGMMCHUNK pChunk = gmmR0GetChunk(pGMM, pPageDesc->idPage >> GMM_CHUNKID_SHIFT);
+ AssertMsgReturn(pChunk, ("idPage=%#x (idxRegion=%#x idxPage=%#x) #4\n", pPageDesc->idPage, idxRegion, idxPage),
+ VERR_PGM_PHYS_INVALID_PAGE_ID);
+
+ uint8_t *pbChunk;
+ AssertMsgReturn(gmmR0IsChunkMapped(pGMM, pGVM, pChunk, (PRTR3PTR)&pbChunk),
+ ("idPage=%#x (idxRegion=%#x idxPage=%#x) #3\n", pPageDesc->idPage, idxRegion, idxPage),
+ VERR_PGM_PHYS_INVALID_PAGE_ID);
+ uint8_t *pbLocalPage = pbChunk + ((pPageDesc->idPage & GMM_PAGEID_IDX_MASK) << PAGE_SHIFT);
+
+ /*
+ * Calculate the virtual address of the shared page.
+ */
+ pChunk = gmmR0GetChunk(pGMM, pGlobalRegion->paidPages[idxPage] >> GMM_CHUNKID_SHIFT);
+ Assert(pChunk); /* can't fail as gmmR0GetPage succeeded. */
+
+ /*
+ * Get the virtual address of the physical page; map the chunk into the VM
+ * process if not already done.
+ */
+ if (!gmmR0IsChunkMapped(pGMM, pGVM, pChunk, (PRTR3PTR)&pbChunk))
+ {
+ Log(("Map chunk into process!\n"));
+ rc = gmmR0MapChunk(pGMM, pGVM, pChunk, false /*fRelaxedSem*/, (PRTR3PTR)&pbChunk);
+ AssertRCReturn(rc, rc);
+ }
+ uint8_t *pbSharedPage = pbChunk + ((pGlobalRegion->paidPages[idxPage] & GMM_PAGEID_IDX_MASK) << PAGE_SHIFT);
+
+#ifdef VBOX_STRICT
+ pPageDesc->u32StrictChecksum = RTCrc32(pbSharedPage, PAGE_SIZE);
+ uint32_t uChecksum = pPageDesc->u32StrictChecksum & UINT32_C(0x00003fff);
+ AssertMsg(!uChecksum || uChecksum == pPage->Shared.u14Checksum || !pPage->Shared.u14Checksum,
+ ("%#x vs %#x - idPage=%#x - %s %s\n", uChecksum, pPage->Shared.u14Checksum,
+ pGlobalRegion->paidPages[idxPage], pModule->szName, pModule->szVersion));
+#endif
+
+ /** @todo write ASMMemComparePage. */
+ if (memcmp(pbSharedPage, pbLocalPage, PAGE_SIZE))
+ {
+ Log(("Unexpected differences found between local and shared page; skip\n"));
+ /* Signal to the caller that this one hasn't changed. */
+ pPageDesc->idPage = NIL_GMM_PAGEID;
+ return VINF_SUCCESS;
+ }
+
+ /*
+ * Free the old local page.
+ */
+ GMMFREEPAGEDESC PageDesc;
+ PageDesc.idPage = pPageDesc->idPage;
+ rc = gmmR0FreePages(pGMM, pGVM, 1, &PageDesc, GMMACCOUNT_BASE);
+ AssertRCReturn(rc, rc);
+
+ gmmR0UseSharedPage(pGMM, pGVM, pPage);
+
+ /*
+ * Pass along the new physical address & page id.
+ */
+ pPageDesc->HCPhys = ((uint64_t)pPage->Shared.pfn) << PAGE_SHIFT;
+ pPageDesc->idPage = pGlobalRegion->paidPages[idxPage];
+
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * RTAvlGCPtrDestroy callback.
+ *
+ * @returns 0 or VERR_GMM_INSTANCE.
+ * @param pNode The node to destroy.
+ * @param pvArgs Pointer to an argument packet.
+ */
+static DECLCALLBACK(int) gmmR0CleanupSharedModule(PAVLGCPTRNODECORE pNode, void *pvArgs)
+{
+ gmmR0ShModDeletePerVM(((GMMR0SHMODPERVMDTORARGS *)pvArgs)->pGMM,
+ ((GMMR0SHMODPERVMDTORARGS *)pvArgs)->pGVM,
+ (PGMMSHAREDMODULEPERVM)pNode,
+ false /*fRemove*/);
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Used by GMMR0CleanupVM to clean up shared modules.
+ *
+ * This is called without taking the GMM lock so that it can be yielded as
+ * needed here.
+ *
+ * @param pGMM The GMM handle.
+ * @param pGVM The global VM handle.
+ */
+static void gmmR0SharedModuleCleanup(PGMM pGMM, PGVM pGVM)
+{
+ gmmR0MutexAcquire(pGMM);
+ GMM_CHECK_SANITY_UPON_ENTERING(pGMM);
+
+ GMMR0SHMODPERVMDTORARGS Args;
+ Args.pGVM = pGVM;
+ Args.pGMM = pGMM;
+ RTAvlGCPtrDestroy(&pGVM->gmm.s.pSharedModuleTree, gmmR0CleanupSharedModule, &Args);
+
+ AssertMsg(pGVM->gmm.s.Stats.cShareableModules == 0, ("%d\n", pGVM->gmm.s.Stats.cShareableModules));
+ pGVM->gmm.s.Stats.cShareableModules = 0;
+
+ gmmR0MutexRelease(pGMM);
+}
+
+#endif /* VBOX_WITH_PAGE_SHARING */
+
+/**
+ * Removes all shared modules for the specified VM
+ *
+ * @returns VBox status code.
+ * @param pGVM The global (ring-0) VM structure.
+ * @param pVM The cross context VM structure.
+ * @param idCpu The VCPU id.
+ */
+GMMR0DECL(int) GMMR0ResetSharedModules(PGVM pGVM, PVM pVM, VMCPUID idCpu)
+{
+#ifdef VBOX_WITH_PAGE_SHARING
+ /*
+ * Validate input and get the basics.
+ */
+ PGMM pGMM;
+ GMM_GET_VALID_INSTANCE(pGMM, VERR_GMM_INSTANCE);
+ int rc = GVMMR0ValidateGVMandVMandEMT(pGVM, pVM, idCpu);
+ if (RT_FAILURE(rc))
+ return rc;
+
+ /*
+ * Take the semaphore and do some more validations.
+ */
+ gmmR0MutexAcquire(pGMM);
+ if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
+ {
+ Log(("GMMR0ResetSharedModules\n"));
+ GMMR0SHMODPERVMDTORARGS Args;
+ Args.pGVM = pGVM;
+ Args.pGMM = pGMM;
+ RTAvlGCPtrDestroy(&pGVM->gmm.s.pSharedModuleTree, gmmR0CleanupSharedModule, &Args);
+ pGVM->gmm.s.Stats.cShareableModules = 0;
+
+ rc = VINF_SUCCESS;
+ GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
+ }
+ else
+ rc = VERR_GMM_IS_NOT_SANE;
+
+ gmmR0MutexRelease(pGMM);
+ return rc;
+#else
+ RT_NOREF(pGVM, pVM, idCpu);
+ return VERR_NOT_IMPLEMENTED;
+#endif
+}
+
+#ifdef VBOX_WITH_PAGE_SHARING
+
+/**
+ * Tree enumeration callback for checking a shared module.
+ */
+static DECLCALLBACK(int) gmmR0CheckSharedModule(PAVLGCPTRNODECORE pNode, void *pvUser)
+{
+ GMMCHECKSHAREDMODULEINFO *pArgs = (GMMCHECKSHAREDMODULEINFO*)pvUser;
+ PGMMSHAREDMODULEPERVM pRecVM = (PGMMSHAREDMODULEPERVM)pNode;
+ PGMMSHAREDMODULE pGblMod = pRecVM->pGlobalModule;
+
+ Log(("gmmR0CheckSharedModule: check %s %s base=%RGv size=%x\n",
+ pGblMod->szName, pGblMod->szVersion, pGblMod->Core.Key, pGblMod->cbModule));
+
+ int rc = PGMR0SharedModuleCheck(pArgs->pGVM->pVM, pArgs->pGVM, pArgs->idCpu, pGblMod, pRecVM->aRegionsGCPtrs);
+ if (RT_FAILURE(rc))
+ return rc;
+ return VINF_SUCCESS;
+}
+
+#endif /* VBOX_WITH_PAGE_SHARING */
+
+/**
+ * Check all shared modules for the specified VM.
+ *
+ * @returns VBox status code.
+ * @param pGVM The global (ring-0) VM structure.
+ * @param pVM The cross context VM structure.
+ * @param idCpu The calling EMT number.
+ * @thread EMT(idCpu)
+ */
+GMMR0DECL(int) GMMR0CheckSharedModules(PGVM pGVM, PVM pVM, VMCPUID idCpu)
+{
+#ifdef VBOX_WITH_PAGE_SHARING
+ /*
+ * Validate input and get the basics.
+ */
+ PGMM pGMM;
+ GMM_GET_VALID_INSTANCE(pGMM, VERR_GMM_INSTANCE);
+ int rc = GVMMR0ValidateGVMandVMandEMT(pGVM, pVM, idCpu);
+ if (RT_FAILURE(rc))
+ return rc;
+
+# ifndef DEBUG_sandervl
+ /*
+ * Take the semaphore and do some more validations.
+ */
+ gmmR0MutexAcquire(pGMM);
+# endif
+ if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
+ {
+ /*
+ * Walk the tree, checking each module.
+ */
+ Log(("GMMR0CheckSharedModules\n"));
+
+ GMMCHECKSHAREDMODULEINFO Args;
+ Args.pGVM = pGVM;
+ Args.idCpu = idCpu;
+ rc = RTAvlGCPtrDoWithAll(&pGVM->gmm.s.pSharedModuleTree, true /* fFromLeft */, gmmR0CheckSharedModule, &Args);
+
+ Log(("GMMR0CheckSharedModules done (rc=%Rrc)!\n", rc));
+ GMM_CHECK_SANITY_UPON_LEAVING(pGMM);
+ }
+ else
+ rc = VERR_GMM_IS_NOT_SANE;
+
+# ifndef DEBUG_sandervl
+ gmmR0MutexRelease(pGMM);
+# endif
+ return rc;
+#else
+ RT_NOREF(pGVM, pVM, idCpu);
+ return VERR_NOT_IMPLEMENTED;
+#endif
+}
+
+#if defined(VBOX_STRICT) && HC_ARCH_BITS == 64
+
+/**
+ * RTAvlU32DoWithAll callback.
+ *
+ * @returns 0
+ * @param pNode The node to search.
+ * @param pvUser Pointer to the input argument packet.
+ */
+static DECLCALLBACK(int) gmmR0FindDupPageInChunk(PAVLU32NODECORE pNode, void *pvUser)
+{
+ PGMMCHUNK pChunk = (PGMMCHUNK)pNode;
+ GMMFINDDUPPAGEINFO *pArgs = (GMMFINDDUPPAGEINFO *)pvUser;
+ PGVM pGVM = pArgs->pGVM;
+ PGMM pGMM = pArgs->pGMM;
+ uint8_t *pbChunk;
+
+ /* Only take chunks not mapped into this VM process; not entirely correct. */
+ if (!gmmR0IsChunkMapped(pGMM, pGVM, pChunk, (PRTR3PTR)&pbChunk))
+ {
+ int rc = gmmR0MapChunk(pGMM, pGVM, pChunk, false /*fRelaxedSem*/, (PRTR3PTR)&pbChunk);
+ if (RT_SUCCESS(rc))
+ {
+ /*
+ * Look for duplicate pages
+ */
+ unsigned iPage = (GMM_CHUNK_SIZE >> PAGE_SHIFT);
+ while (iPage-- > 0)
+ {
+ if (GMM_PAGE_IS_PRIVATE(&pChunk->aPages[iPage]))
+ {
+ uint8_t *pbDestPage = pbChunk + (iPage << PAGE_SHIFT);
+
+ if (!memcmp(pArgs->pSourcePage, pbDestPage, PAGE_SIZE))
+ {
+ pArgs->fFoundDuplicate = true;
+ break;
+ }
+ }
+ }
+ gmmR0UnmapChunk(pGMM, pGVM, pChunk, false /*fRelaxedSem*/);
+ }
+ }
+ return pArgs->fFoundDuplicate; /* (stops search if true) */
+}
+
+
+/**
+ * Find a duplicate of the specified page in other active VMs
+ *
+ * @returns VBox status code.
+ * @param pGVM The global (ring-0) VM structure.
+ * @param pVM The cross context VM structure.
+ * @param pReq Pointer to the request packet.
+ */
+GMMR0DECL(int) GMMR0FindDuplicatePageReq(PGVM pGVM, PVM pVM, PGMMFINDDUPLICATEPAGEREQ pReq)
+{
+ /*
+ * Validate input and pass it on.
+ */
+ AssertPtrReturn(pReq, VERR_INVALID_POINTER);
+ AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
+
+ PGMM pGMM;
+ GMM_GET_VALID_INSTANCE(pGMM, VERR_GMM_INSTANCE);
+
+ int rc = GVMMR0ValidateGVMandVM(pGVM, pVM);
+ if (RT_FAILURE(rc))
+ return rc;
+
+ /*
+ * Take the semaphore and do some more validations.
+ */
+ rc = gmmR0MutexAcquire(pGMM);
+ if (GMM_CHECK_SANITY_UPON_ENTERING(pGMM))
+ {
+ uint8_t *pbChunk;
+ PGMMCHUNK pChunk = gmmR0GetChunk(pGMM, pReq->idPage >> GMM_CHUNKID_SHIFT);
+ if (pChunk)
+ {
+ if (gmmR0IsChunkMapped(pGMM, pGVM, pChunk, (PRTR3PTR)&pbChunk))
+ {
+ uint8_t *pbSourcePage = pbChunk + ((pReq->idPage & GMM_PAGEID_IDX_MASK) << PAGE_SHIFT);
+ PGMMPAGE pPage = gmmR0GetPage(pGMM, pReq->idPage);
+ if (pPage)
+ {
+ GMMFINDDUPPAGEINFO Args;
+ Args.pGVM = pGVM;
+ Args.pGMM = pGMM;
+ Args.pSourcePage = pbSourcePage;
+ Args.fFoundDuplicate = false;
+ RTAvlU32DoWithAll(&pGMM->pChunks, true /* fFromLeft */, gmmR0FindDupPageInChunk, &Args);
+
+ pReq->fDuplicate = Args.fFoundDuplicate;
+ }
+ else
+ {
+ AssertFailed();
+ rc = VERR_PGM_PHYS_INVALID_PAGE_ID;
+ }
+ }
+ else
+ AssertFailed();
+ }
+ else
+ AssertFailed();
+ }
+ else
+ rc = VERR_GMM_IS_NOT_SANE;
+
+ gmmR0MutexRelease(pGMM);
+ return rc;
+}
+
+#endif /* VBOX_STRICT && HC_ARCH_BITS == 64 */
+
+
+/**
+ * Retrieves the GMM statistics visible to the caller.
+ *
+ * @returns VBox status code.
+ *
+ * @param pStats Where to put the statistics.
+ * @param pSession The current session.
+ * @param pGVM The GVM to obtain statistics for. Optional.
+ * @param pVM The VM structure corresponding to @a pGVM.
+ */
+GMMR0DECL(int) GMMR0QueryStatistics(PGMMSTATS pStats, PSUPDRVSESSION pSession, PGVM pGVM, PVM pVM)
+{
+ LogFlow(("GVMMR0QueryStatistics: pStats=%p pSession=%p pGVM=%p pVM=%p\n", pStats, pSession, pGVM, pVM));
+
+ /*
+ * Validate input.
+ */
+ AssertPtrReturn(pSession, VERR_INVALID_POINTER);
+ AssertPtrReturn(pStats, VERR_INVALID_POINTER);
+ pStats->cMaxPages = 0; /* (crash before taking the mutex...) */
+
+ PGMM pGMM;
+ GMM_GET_VALID_INSTANCE(pGMM, VERR_GMM_INSTANCE);
+
+ /*
+ * Validate the VM handle, if not NULL, and lock the GMM.
+ */
+ int rc;
+ if (pGVM)
+ {
+ rc = GVMMR0ValidateGVMandVM(pGVM, pVM);
+ if (RT_FAILURE(rc))
+ return rc;
+ }
+
+ rc = gmmR0MutexAcquire(pGMM);
+ if (RT_FAILURE(rc))
+ return rc;
+
+ /*
+ * Copy out the GMM statistics.
+ */
+ pStats->cMaxPages = pGMM->cMaxPages;
+ pStats->cReservedPages = pGMM->cReservedPages;
+ pStats->cOverCommittedPages = pGMM->cOverCommittedPages;
+ pStats->cAllocatedPages = pGMM->cAllocatedPages;
+ pStats->cSharedPages = pGMM->cSharedPages;
+ pStats->cDuplicatePages = pGMM->cDuplicatePages;
+ pStats->cLeftBehindSharedPages = pGMM->cLeftBehindSharedPages;
+ pStats->cBalloonedPages = pGMM->cBalloonedPages;
+ pStats->cChunks = pGMM->cChunks;
+ pStats->cFreedChunks = pGMM->cFreedChunks;
+ pStats->cShareableModules = pGMM->cShareableModules;
+ RT_ZERO(pStats->au64Reserved);
+
+ /*
+ * Copy out the VM statistics.
+ */
+ if (pGVM)
+ pStats->VMStats = pGVM->gmm.s.Stats;
+ else
+ RT_ZERO(pStats->VMStats);
+
+ gmmR0MutexRelease(pGMM);
+ return rc;
+}
+
+
+/**
+ * VMMR0 request wrapper for GMMR0QueryStatistics.
+ *
+ * @returns see GMMR0QueryStatistics.
+ * @param pGVM The global (ring-0) VM structure. Optional.
+ * @param pVM The cross context VM structure. Optional.
+ * @param pReq Pointer to the request packet.
+ */
+GMMR0DECL(int) GMMR0QueryStatisticsReq(PGVM pGVM, PVM pVM, PGMMQUERYSTATISTICSSREQ pReq)
+{
+ /*
+ * Validate input and pass it on.
+ */
+ AssertPtrReturn(pReq, VERR_INVALID_POINTER);
+ AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
+
+ return GMMR0QueryStatistics(&pReq->Stats, pReq->pSession, pGVM, pVM);
+}
+
+
+/**
+ * Resets the specified GMM statistics.
+ *
+ * @returns VBox status code.
+ *
+ * @param pStats Which statistics to reset, that is, non-zero fields
+ * indicates which to reset.
+ * @param pSession The current session.
+ * @param pGVM The GVM to reset statistics for. Optional.
+ * @param pVM The VM structure corresponding to @a pGVM.
+ */
+GMMR0DECL(int) GMMR0ResetStatistics(PCGMMSTATS pStats, PSUPDRVSESSION pSession, PGVM pGVM, PVM pVM)
+{
+ NOREF(pStats); NOREF(pSession); NOREF(pVM); NOREF(pGVM);
+ /* Currently nothing we can reset at the moment. */
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * VMMR0 request wrapper for GMMR0ResetStatistics.
+ *
+ * @returns see GMMR0ResetStatistics.
+ * @param pGVM The global (ring-0) VM structure. Optional.
+ * @param pVM The cross context VM structure. Optional.
+ * @param pReq Pointer to the request packet.
+ */
+GMMR0DECL(int) GMMR0ResetStatisticsReq(PGVM pGVM, PVM pVM, PGMMRESETSTATISTICSSREQ pReq)
+{
+ /*
+ * Validate input and pass it on.
+ */
+ AssertPtrReturn(pReq, VERR_INVALID_POINTER);
+ AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
+
+ return GMMR0ResetStatistics(&pReq->Stats, pReq->pSession, pGVM, pVM);
+}
+
diff --git a/src/VBox/VMM/VMMR0/GMMR0Internal.h b/src/VBox/VMM/VMMR0/GMMR0Internal.h
new file mode 100644
index 00000000..51de8549
--- /dev/null
+++ b/src/VBox/VMM/VMMR0/GMMR0Internal.h
@@ -0,0 +1,92 @@
+/* $Id: GMMR0Internal.h $ */
+/** @file
+ * GMM - The Global Memory Manager, Internal Header.
+ */
+
+/*
+ * Copyright (C) 2007-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+#ifndef VMM_INCLUDED_SRC_VMMR0_GMMR0Internal_h
+#define VMM_INCLUDED_SRC_VMMR0_GMMR0Internal_h
+#ifndef RT_WITHOUT_PRAGMA_ONCE
+# pragma once
+#endif
+
+#include <VBox/vmm/gmm.h>
+#include <iprt/avl.h>
+
+
+/**
+ * Shared module registration info (per VM)
+ */
+typedef struct GMMSHAREDMODULEPERVM
+{
+ /** Tree node. */
+ AVLGCPTRNODECORE Core;
+ /** Pointer to global shared module info. */
+ PGMMSHAREDMODULE pGlobalModule;
+ /** Pointer to the region addresses.
+ *
+ * They can differe between VMs because of address space scrambling or
+ * simply different loading order. */
+ RTGCPTR64 aRegionsGCPtrs[1];
+} GMMSHAREDMODULEPERVM;
+/** Pointer to a GMMSHAREDMODULEPERVM. */
+typedef GMMSHAREDMODULEPERVM *PGMMSHAREDMODULEPERVM;
+
+
+/** Pointer to a GMM allocation chunk. */
+typedef struct GMMCHUNK *PGMMCHUNK;
+
+
+/** The GMMCHUNK::cFree shift count employed by gmmR0SelectFreeSetList. */
+#define GMM_CHUNK_FREE_SET_SHIFT 4
+/** Index of the list containing completely unused chunks.
+ * The code ASSUMES this is the last list. */
+#define GMM_CHUNK_FREE_SET_UNUSED_LIST (GMM_CHUNK_NUM_PAGES >> GMM_CHUNK_FREE_SET_SHIFT)
+
+/**
+ * A set of free chunks.
+ */
+typedef struct GMMCHUNKFREESET
+{
+ /** The number of free pages in the set. */
+ uint64_t cFreePages;
+ /** The generation ID for the set. This is incremented whenever
+ * something is linked or unlinked from this set. */
+ uint64_t idGeneration;
+ /** Chunks ordered by increasing number of free pages.
+ * In the final list the chunks are completely unused. */
+ PGMMCHUNK apLists[GMM_CHUNK_FREE_SET_UNUSED_LIST + 1];
+} GMMCHUNKFREESET;
+
+
+
+/**
+ * The per-VM GMM data.
+ */
+typedef struct GMMPERVM
+{
+ /** Free set for use in bound mode. */
+ GMMCHUNKFREESET Private;
+ /** The VM statistics. */
+ GMMVMSTATS Stats;
+ /** Shared module tree (per-vm). */
+ PAVLGCPTRNODECORE pSharedModuleTree;
+ /** Hints at the last chunk we allocated some memory from. */
+ uint32_t idLastChunkHint;
+} GMMPERVM;
+/** Pointer to the per-VM GMM data. */
+typedef GMMPERVM *PGMMPERVM;
+
+#endif /* !VMM_INCLUDED_SRC_VMMR0_GMMR0Internal_h */
+
diff --git a/src/VBox/VMM/VMMR0/GVMMR0.cpp b/src/VBox/VMM/VMMR0/GVMMR0.cpp
new file mode 100644
index 00000000..13aef810
--- /dev/null
+++ b/src/VBox/VMM/VMMR0/GVMMR0.cpp
@@ -0,0 +1,3106 @@
+/* $Id: GVMMR0.cpp $ */
+/** @file
+ * GVMM - Global VM Manager.
+ */
+
+/*
+ * Copyright (C) 2007-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+
+/** @page pg_gvmm GVMM - The Global VM Manager
+ *
+ * The Global VM Manager lives in ring-0. Its main function at the moment is
+ * to manage a list of all running VMs, keep a ring-0 only structure (GVM) for
+ * each of them, and assign them unique identifiers (so GMM can track page
+ * owners). The GVMM also manage some of the host CPU resources, like the
+ * periodic preemption timer.
+ *
+ * The GVMM will create a ring-0 object for each VM when it is registered, this
+ * is both for session cleanup purposes and for having a point where it is
+ * possible to implement usage polices later (in SUPR0ObjRegister).
+ *
+ *
+ * @section sec_gvmm_ppt Periodic Preemption Timer (PPT)
+ *
+ * On system that sports a high resolution kernel timer API, we use per-cpu
+ * timers to generate interrupts that preempts VT-x, AMD-V and raw-mode guest
+ * execution. The timer frequency is calculating by taking the max
+ * TMCalcHostTimerFrequency for all VMs running on a CPU for the last ~160 ms
+ * (RT_ELEMENTS((PGVMMHOSTCPU)0, Ppt.aHzHistory) *
+ * GVMMHOSTCPU_PPT_HIST_INTERVAL_NS).
+ *
+ * The TMCalcHostTimerFrequency() part of the things gets its takes the max
+ * TMTimerSetFrequencyHint() value and adjusts by the current catch-up percent,
+ * warp drive percent and some fudge factors. VMMR0.cpp reports the result via
+ * GVMMR0SchedUpdatePeriodicPreemptionTimer() before switching to the VT-x,
+ * AMD-V and raw-mode execution environments.
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#define LOG_GROUP LOG_GROUP_GVMM
+#include <VBox/vmm/gvmm.h>
+#include <VBox/vmm/gmm.h>
+#include "GVMMR0Internal.h"
+#include <VBox/vmm/gvm.h>
+#include <VBox/vmm/vm.h>
+#include <VBox/vmm/vmcpuset.h>
+#include <VBox/vmm/vmm.h>
+#ifdef VBOX_WITH_NEM_R0
+# include <VBox/vmm/nem.h>
+#endif
+#include <VBox/param.h>
+#include <VBox/err.h>
+
+#include <iprt/asm.h>
+#include <iprt/asm-amd64-x86.h>
+#include <iprt/critsect.h>
+#include <iprt/mem.h>
+#include <iprt/semaphore.h>
+#include <iprt/time.h>
+#include <VBox/log.h>
+#include <iprt/thread.h>
+#include <iprt/process.h>
+#include <iprt/param.h>
+#include <iprt/string.h>
+#include <iprt/assert.h>
+#include <iprt/mem.h>
+#include <iprt/memobj.h>
+#include <iprt/mp.h>
+#include <iprt/cpuset.h>
+#include <iprt/spinlock.h>
+#include <iprt/timer.h>
+
+#include "dtrace/VBoxVMM.h"
+
+
+/*********************************************************************************************************************************
+* Defined Constants And Macros *
+*********************************************************************************************************************************/
+#if defined(RT_OS_LINUX) || defined(RT_OS_SOLARIS) || defined(DOXYGEN_RUNNING)
+/** Define this to enable the periodic preemption timer. */
+# define GVMM_SCHED_WITH_PPT
+#endif
+
+
+/** @def GVMM_CHECK_SMAP_SETUP
+ * SMAP check setup. */
+/** @def GVMM_CHECK_SMAP_CHECK
+ * Checks that the AC flag is set if SMAP is enabled. If AC is not set,
+ * it will be logged and @a a_BadExpr is executed. */
+/** @def GVMM_CHECK_SMAP_CHECK2
+ * Checks that the AC flag is set if SMAP is enabled. If AC is not set, it will
+ * be logged, written to the VMs assertion text buffer, and @a a_BadExpr is
+ * executed. */
+#if defined(VBOX_STRICT) || 1
+# define GVMM_CHECK_SMAP_SETUP() uint32_t const fKernelFeatures = SUPR0GetKernelFeatures()
+# define GVMM_CHECK_SMAP_CHECK(a_BadExpr) \
+ do { \
+ if (fKernelFeatures & SUPKERNELFEATURES_SMAP) \
+ { \
+ RTCCUINTREG fEflCheck = ASMGetFlags(); \
+ if (RT_LIKELY(fEflCheck & X86_EFL_AC)) \
+ { /* likely */ } \
+ else \
+ { \
+ SUPR0Printf("%s, line %d: EFLAGS.AC is clear! (%#x)\n", __FUNCTION__, __LINE__, (uint32_t)fEflCheck); \
+ a_BadExpr; \
+ } \
+ } \
+ } while (0)
+# define GVMM_CHECK_SMAP_CHECK2(a_pVM, a_BadExpr) \
+ do { \
+ if (fKernelFeatures & SUPKERNELFEATURES_SMAP) \
+ { \
+ RTCCUINTREG fEflCheck = ASMGetFlags(); \
+ if (RT_LIKELY(fEflCheck & X86_EFL_AC)) \
+ { /* likely */ } \
+ else \
+ { \
+ SUPR0BadContext((a_pVM) ? (a_pVM)->pSession : NULL, __FILE__, __LINE__, "EFLAGS.AC is zero!"); \
+ a_BadExpr; \
+ } \
+ } \
+ } while (0)
+#else
+# define GVMM_CHECK_SMAP_SETUP() uint32_t const fKernelFeatures = 0
+# define GVMM_CHECK_SMAP_CHECK(a_BadExpr) NOREF(fKernelFeatures)
+# define GVMM_CHECK_SMAP_CHECK2(a_pVM, a_BadExpr) NOREF(fKernelFeatures)
+#endif
+
+
+
+/*********************************************************************************************************************************
+* Structures and Typedefs *
+*********************************************************************************************************************************/
+
+/**
+ * Global VM handle.
+ */
+typedef struct GVMHANDLE
+{
+ /** The index of the next handle in the list (free or used). (0 is nil.) */
+ uint16_t volatile iNext;
+ /** Our own index / handle value. */
+ uint16_t iSelf;
+ /** The process ID of the handle owner.
+ * This is used for access checks. */
+ RTPROCESS ProcId;
+ /** The pointer to the ring-0 only (aka global) VM structure. */
+ PGVM pGVM;
+ /** The ring-0 mapping of the shared VM instance data. */
+ PVM pVM;
+ /** The virtual machine object. */
+ void *pvObj;
+ /** The session this VM is associated with. */
+ PSUPDRVSESSION pSession;
+ /** The ring-0 handle of the EMT0 thread.
+ * This is used for ownership checks as well as looking up a VM handle by thread
+ * at times like assertions. */
+ RTNATIVETHREAD hEMT0;
+} GVMHANDLE;
+/** Pointer to a global VM handle. */
+typedef GVMHANDLE *PGVMHANDLE;
+
+/** Number of GVM handles (including the NIL handle). */
+#if HC_ARCH_BITS == 64
+# define GVMM_MAX_HANDLES 8192
+#else
+# define GVMM_MAX_HANDLES 128
+#endif
+
+/**
+ * Per host CPU GVMM data.
+ */
+typedef struct GVMMHOSTCPU
+{
+ /** Magic number (GVMMHOSTCPU_MAGIC). */
+ uint32_t volatile u32Magic;
+ /** The CPU ID. */
+ RTCPUID idCpu;
+ /** The CPU set index. */
+ uint32_t idxCpuSet;
+
+#ifdef GVMM_SCHED_WITH_PPT
+ /** Periodic preemption timer data. */
+ struct
+ {
+ /** The handle to the periodic preemption timer. */
+ PRTTIMER pTimer;
+ /** Spinlock protecting the data below. */
+ RTSPINLOCK hSpinlock;
+ /** The smalles Hz that we need to care about. (static) */
+ uint32_t uMinHz;
+ /** The number of ticks between each historization. */
+ uint32_t cTicksHistoriziationInterval;
+ /** The current historization tick (counting up to
+ * cTicksHistoriziationInterval and then resetting). */
+ uint32_t iTickHistorization;
+ /** The current timer interval. This is set to 0 when inactive. */
+ uint32_t cNsInterval;
+ /** The current timer frequency. This is set to 0 when inactive. */
+ uint32_t uTimerHz;
+ /** The current max frequency reported by the EMTs.
+ * This gets historicize and reset by the timer callback. This is
+ * read without holding the spinlock, so needs atomic updating. */
+ uint32_t volatile uDesiredHz;
+ /** Whether the timer was started or not. */
+ bool volatile fStarted;
+ /** Set if we're starting timer. */
+ bool volatile fStarting;
+ /** The index of the next history entry (mod it). */
+ uint32_t iHzHistory;
+ /** Historicized uDesiredHz values. The array wraps around, new entries
+ * are added at iHzHistory. This is updated approximately every
+ * GVMMHOSTCPU_PPT_HIST_INTERVAL_NS by the timer callback. */
+ uint32_t aHzHistory[8];
+ /** Statistics counter for recording the number of interval changes. */
+ uint32_t cChanges;
+ /** Statistics counter for recording the number of timer starts. */
+ uint32_t cStarts;
+ } Ppt;
+#endif /* GVMM_SCHED_WITH_PPT */
+
+} GVMMHOSTCPU;
+/** Pointer to the per host CPU GVMM data. */
+typedef GVMMHOSTCPU *PGVMMHOSTCPU;
+/** The GVMMHOSTCPU::u32Magic value (Petra, Tanya & Rachel Haden). */
+#define GVMMHOSTCPU_MAGIC UINT32_C(0x19711011)
+/** The interval on history entry should cover (approximately) give in
+ * nanoseconds. */
+#define GVMMHOSTCPU_PPT_HIST_INTERVAL_NS UINT32_C(20000000)
+
+
+/**
+ * The GVMM instance data.
+ */
+typedef struct GVMM
+{
+ /** Eyecatcher / magic. */
+ uint32_t u32Magic;
+ /** The index of the head of the free handle chain. (0 is nil.) */
+ uint16_t volatile iFreeHead;
+ /** The index of the head of the active handle chain. (0 is nil.) */
+ uint16_t volatile iUsedHead;
+ /** The number of VMs. */
+ uint16_t volatile cVMs;
+ /** Alignment padding. */
+ uint16_t u16Reserved;
+ /** The number of EMTs. */
+ uint32_t volatile cEMTs;
+ /** The number of EMTs that have halted in GVMMR0SchedHalt. */
+ uint32_t volatile cHaltedEMTs;
+ /** Mini lock for restricting early wake-ups to one thread. */
+ bool volatile fDoingEarlyWakeUps;
+ bool afPadding[3]; /**< explicit alignment padding. */
+ /** When the next halted or sleeping EMT will wake up.
+ * This is set to 0 when it needs recalculating and to UINT64_MAX when
+ * there are no halted or sleeping EMTs in the GVMM. */
+ uint64_t uNsNextEmtWakeup;
+ /** The lock used to serialize VM creation, destruction and associated events that
+ * isn't performance critical. Owners may acquire the list lock. */
+ RTCRITSECT CreateDestroyLock;
+ /** The lock used to serialize used list updates and accesses.
+ * This indirectly includes scheduling since the scheduler will have to walk the
+ * used list to examin running VMs. Owners may not acquire any other locks. */
+ RTCRITSECTRW UsedLock;
+ /** The handle array.
+ * The size of this array defines the maximum number of currently running VMs.
+ * The first entry is unused as it represents the NIL handle. */
+ GVMHANDLE aHandles[GVMM_MAX_HANDLES];
+
+ /** @gcfgm{/GVMM/cEMTsMeansCompany, 32-bit, 0, UINT32_MAX, 1}
+ * The number of EMTs that means we no longer consider ourselves alone on a
+ * CPU/Core.
+ */
+ uint32_t cEMTsMeansCompany;
+ /** @gcfgm{/GVMM/MinSleepAlone,32-bit, 0, 100000000, 750000, ns}
+ * The minimum sleep time for when we're alone, in nano seconds.
+ */
+ uint32_t nsMinSleepAlone;
+ /** @gcfgm{/GVMM/MinSleepCompany,32-bit,0, 100000000, 15000, ns}
+ * The minimum sleep time for when we've got company, in nano seconds.
+ */
+ uint32_t nsMinSleepCompany;
+ /** @gcfgm{/GVMM/EarlyWakeUp1, 32-bit, 0, 100000000, 25000, ns}
+ * The limit for the first round of early wake-ups, given in nano seconds.
+ */
+ uint32_t nsEarlyWakeUp1;
+ /** @gcfgm{/GVMM/EarlyWakeUp2, 32-bit, 0, 100000000, 50000, ns}
+ * The limit for the second round of early wake-ups, given in nano seconds.
+ */
+ uint32_t nsEarlyWakeUp2;
+
+ /** Set if we're doing early wake-ups.
+ * This reflects nsEarlyWakeUp1 and nsEarlyWakeUp2. */
+ bool volatile fDoEarlyWakeUps;
+
+ /** The number of entries in the host CPU array (aHostCpus). */
+ uint32_t cHostCpus;
+ /** Per host CPU data (variable length). */
+ GVMMHOSTCPU aHostCpus[1];
+} GVMM;
+AssertCompileMemberAlignment(GVMM, CreateDestroyLock, 8);
+AssertCompileMemberAlignment(GVMM, UsedLock, 8);
+AssertCompileMemberAlignment(GVMM, uNsNextEmtWakeup, 8);
+/** Pointer to the GVMM instance data. */
+typedef GVMM *PGVMM;
+
+/** The GVMM::u32Magic value (Charlie Haden). */
+#define GVMM_MAGIC UINT32_C(0x19370806)
+
+
+
+/*********************************************************************************************************************************
+* Global Variables *
+*********************************************************************************************************************************/
+/** Pointer to the GVMM instance data.
+ * (Just my general dislike for global variables.) */
+static PGVMM g_pGVMM = NULL;
+
+/** Macro for obtaining and validating the g_pGVMM pointer.
+ * On failure it will return from the invoking function with the specified return value.
+ *
+ * @param pGVMM The name of the pGVMM variable.
+ * @param rc The return value on failure. Use VERR_GVMM_INSTANCE for VBox
+ * status codes.
+ */
+#define GVMM_GET_VALID_INSTANCE(pGVMM, rc) \
+ do { \
+ (pGVMM) = g_pGVMM;\
+ AssertPtrReturn((pGVMM), (rc)); \
+ AssertMsgReturn((pGVMM)->u32Magic == GVMM_MAGIC, ("%p - %#x\n", (pGVMM), (pGVMM)->u32Magic), (rc)); \
+ } while (0)
+
+/** Macro for obtaining and validating the g_pGVMM pointer, void function variant.
+ * On failure it will return from the invoking function.
+ *
+ * @param pGVMM The name of the pGVMM variable.
+ */
+#define GVMM_GET_VALID_INSTANCE_VOID(pGVMM) \
+ do { \
+ (pGVMM) = g_pGVMM;\
+ AssertPtrReturnVoid((pGVMM)); \
+ AssertMsgReturnVoid((pGVMM)->u32Magic == GVMM_MAGIC, ("%p - %#x\n", (pGVMM), (pGVMM)->u32Magic)); \
+ } while (0)
+
+
+/*********************************************************************************************************************************
+* Internal Functions *
+*********************************************************************************************************************************/
+static void gvmmR0InitPerVMData(PGVM pGVM);
+static DECLCALLBACK(void) gvmmR0HandleObjDestructor(void *pvObj, void *pvGVMM, void *pvHandle);
+static int gvmmR0ByGVMandVM(PGVM pGVM, PVM pVM, PGVMM *ppGVMM, bool fTakeUsedLock);
+static int gvmmR0ByGVMandVMandEMT(PGVM pGVM, PVM pVM, VMCPUID idCpu, PGVMM *ppGVMM);
+
+#ifdef GVMM_SCHED_WITH_PPT
+static DECLCALLBACK(void) gvmmR0SchedPeriodicPreemptionTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
+#endif
+
+
+/**
+ * Initializes the GVMM.
+ *
+ * This is called while owning the loader semaphore (see supdrvIOCtl_LdrLoad()).
+ *
+ * @returns VBox status code.
+ */
+GVMMR0DECL(int) GVMMR0Init(void)
+{
+ LogFlow(("GVMMR0Init:\n"));
+
+ /*
+ * Allocate and initialize the instance data.
+ */
+ uint32_t cHostCpus = RTMpGetArraySize();
+ AssertMsgReturn(cHostCpus > 0 && cHostCpus < _64K, ("%d", (int)cHostCpus), VERR_GVMM_HOST_CPU_RANGE);
+
+ PGVMM pGVMM = (PGVMM)RTMemAllocZ(RT_UOFFSETOF_DYN(GVMM, aHostCpus[cHostCpus]));
+ if (!pGVMM)
+ return VERR_NO_MEMORY;
+ int rc = RTCritSectInitEx(&pGVMM->CreateDestroyLock, 0, NIL_RTLOCKVALCLASS, RTLOCKVAL_SUB_CLASS_NONE,
+ "GVMM-CreateDestroyLock");
+ if (RT_SUCCESS(rc))
+ {
+ rc = RTCritSectRwInitEx(&pGVMM->UsedLock, 0, NIL_RTLOCKVALCLASS, RTLOCKVAL_SUB_CLASS_NONE, "GVMM-UsedLock");
+ if (RT_SUCCESS(rc))
+ {
+ pGVMM->u32Magic = GVMM_MAGIC;
+ pGVMM->iUsedHead = 0;
+ pGVMM->iFreeHead = 1;
+
+ /* the nil handle */
+ pGVMM->aHandles[0].iSelf = 0;
+ pGVMM->aHandles[0].iNext = 0;
+
+ /* the tail */
+ unsigned i = RT_ELEMENTS(pGVMM->aHandles) - 1;
+ pGVMM->aHandles[i].iSelf = i;
+ pGVMM->aHandles[i].iNext = 0; /* nil */
+
+ /* the rest */
+ while (i-- > 1)
+ {
+ pGVMM->aHandles[i].iSelf = i;
+ pGVMM->aHandles[i].iNext = i + 1;
+ }
+
+ /* The default configuration values. */
+ uint32_t cNsResolution = RTSemEventMultiGetResolution();
+ pGVMM->cEMTsMeansCompany = 1; /** @todo should be adjusted to relative to the cpu count or something... */
+ if (cNsResolution >= 5*RT_NS_100US)
+ {
+ pGVMM->nsMinSleepAlone = 750000 /* ns (0.750 ms) */; /** @todo this should be adjusted to be 75% (or something) of the scheduler granularity... */
+ pGVMM->nsMinSleepCompany = 15000 /* ns (0.015 ms) */;
+ pGVMM->nsEarlyWakeUp1 = 25000 /* ns (0.025 ms) */;
+ pGVMM->nsEarlyWakeUp2 = 50000 /* ns (0.050 ms) */;
+ }
+ else if (cNsResolution > RT_NS_100US)
+ {
+ pGVMM->nsMinSleepAlone = cNsResolution / 2;
+ pGVMM->nsMinSleepCompany = cNsResolution / 4;
+ pGVMM->nsEarlyWakeUp1 = 0;
+ pGVMM->nsEarlyWakeUp2 = 0;
+ }
+ else
+ {
+ pGVMM->nsMinSleepAlone = 2000;
+ pGVMM->nsMinSleepCompany = 2000;
+ pGVMM->nsEarlyWakeUp1 = 0;
+ pGVMM->nsEarlyWakeUp2 = 0;
+ }
+ pGVMM->fDoEarlyWakeUps = pGVMM->nsEarlyWakeUp1 > 0 && pGVMM->nsEarlyWakeUp2 > 0;
+
+ /* The host CPU data. */
+ pGVMM->cHostCpus = cHostCpus;
+ uint32_t iCpu = cHostCpus;
+ RTCPUSET PossibleSet;
+ RTMpGetSet(&PossibleSet);
+ while (iCpu-- > 0)
+ {
+ pGVMM->aHostCpus[iCpu].idxCpuSet = iCpu;
+#ifdef GVMM_SCHED_WITH_PPT
+ pGVMM->aHostCpus[iCpu].Ppt.pTimer = NULL;
+ pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
+ pGVMM->aHostCpus[iCpu].Ppt.uMinHz = 5; /** @todo Add some API which figures this one out. (not *that* important) */
+ pGVMM->aHostCpus[iCpu].Ppt.cTicksHistoriziationInterval = 1;
+ //pGVMM->aHostCpus[iCpu].Ppt.iTickHistorization = 0;
+ //pGVMM->aHostCpus[iCpu].Ppt.cNsInterval = 0;
+ //pGVMM->aHostCpus[iCpu].Ppt.uTimerHz = 0;
+ //pGVMM->aHostCpus[iCpu].Ppt.uDesiredHz = 0;
+ //pGVMM->aHostCpus[iCpu].Ppt.fStarted = false;
+ //pGVMM->aHostCpus[iCpu].Ppt.fStarting = false;
+ //pGVMM->aHostCpus[iCpu].Ppt.iHzHistory = 0;
+ //pGVMM->aHostCpus[iCpu].Ppt.aHzHistory = {0};
+#endif
+
+ if (RTCpuSetIsMember(&PossibleSet, iCpu))
+ {
+ pGVMM->aHostCpus[iCpu].idCpu = RTMpCpuIdFromSetIndex(iCpu);
+ pGVMM->aHostCpus[iCpu].u32Magic = GVMMHOSTCPU_MAGIC;
+
+#ifdef GVMM_SCHED_WITH_PPT
+ rc = RTTimerCreateEx(&pGVMM->aHostCpus[iCpu].Ppt.pTimer,
+ 50*1000*1000 /* whatever */,
+ RTTIMER_FLAGS_CPU(iCpu) | RTTIMER_FLAGS_HIGH_RES,
+ gvmmR0SchedPeriodicPreemptionTimerCallback,
+ &pGVMM->aHostCpus[iCpu]);
+ if (RT_SUCCESS(rc))
+ rc = RTSpinlockCreate(&pGVMM->aHostCpus[iCpu].Ppt.hSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "GVMM/CPU");
+ if (RT_FAILURE(rc))
+ {
+ while (iCpu < cHostCpus)
+ {
+ RTTimerDestroy(pGVMM->aHostCpus[iCpu].Ppt.pTimer);
+ RTSpinlockDestroy(pGVMM->aHostCpus[iCpu].Ppt.hSpinlock);
+ pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
+ iCpu++;
+ }
+ break;
+ }
+#endif
+ }
+ else
+ {
+ pGVMM->aHostCpus[iCpu].idCpu = NIL_RTCPUID;
+ pGVMM->aHostCpus[iCpu].u32Magic = 0;
+ }
+ }
+ if (RT_SUCCESS(rc))
+ {
+ g_pGVMM = pGVMM;
+ LogFlow(("GVMMR0Init: pGVMM=%p cHostCpus=%u\n", pGVMM, cHostCpus));
+ return VINF_SUCCESS;
+ }
+
+ /* bail out. */
+ RTCritSectRwDelete(&pGVMM->UsedLock);
+ }
+ RTCritSectDelete(&pGVMM->CreateDestroyLock);
+ }
+
+ RTMemFree(pGVMM);
+ return rc;
+}
+
+
+/**
+ * Terminates the GVM.
+ *
+ * This is called while owning the loader semaphore (see supdrvLdrFree()).
+ * And unless something is wrong, there should be absolutely no VMs
+ * registered at this point.
+ */
+GVMMR0DECL(void) GVMMR0Term(void)
+{
+ LogFlow(("GVMMR0Term:\n"));
+
+ PGVMM pGVMM = g_pGVMM;
+ g_pGVMM = NULL;
+ if (RT_UNLIKELY(!VALID_PTR(pGVMM)))
+ {
+ SUPR0Printf("GVMMR0Term: pGVMM=%RKv\n", pGVMM);
+ return;
+ }
+
+ /*
+ * First of all, stop all active timers.
+ */
+ uint32_t cActiveTimers = 0;
+ uint32_t iCpu = pGVMM->cHostCpus;
+ while (iCpu-- > 0)
+ {
+ ASMAtomicWriteU32(&pGVMM->aHostCpus[iCpu].u32Magic, ~GVMMHOSTCPU_MAGIC);
+#ifdef GVMM_SCHED_WITH_PPT
+ if ( pGVMM->aHostCpus[iCpu].Ppt.pTimer != NULL
+ && RT_SUCCESS(RTTimerStop(pGVMM->aHostCpus[iCpu].Ppt.pTimer)))
+ cActiveTimers++;
+#endif
+ }
+ if (cActiveTimers)
+ RTThreadSleep(1); /* fudge */
+
+ /*
+ * Invalidate the and free resources.
+ */
+ pGVMM->u32Magic = ~GVMM_MAGIC;
+ RTCritSectRwDelete(&pGVMM->UsedLock);
+ RTCritSectDelete(&pGVMM->CreateDestroyLock);
+
+ pGVMM->iFreeHead = 0;
+ if (pGVMM->iUsedHead)
+ {
+ SUPR0Printf("GVMMR0Term: iUsedHead=%#x! (cVMs=%#x cEMTs=%#x)\n", pGVMM->iUsedHead, pGVMM->cVMs, pGVMM->cEMTs);
+ pGVMM->iUsedHead = 0;
+ }
+
+#ifdef GVMM_SCHED_WITH_PPT
+ iCpu = pGVMM->cHostCpus;
+ while (iCpu-- > 0)
+ {
+ RTTimerDestroy(pGVMM->aHostCpus[iCpu].Ppt.pTimer);
+ pGVMM->aHostCpus[iCpu].Ppt.pTimer = NULL;
+ RTSpinlockDestroy(pGVMM->aHostCpus[iCpu].Ppt.hSpinlock);
+ pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
+ }
+#endif
+
+ RTMemFree(pGVMM);
+}
+
+
+/**
+ * A quick hack for setting global config values.
+ *
+ * @returns VBox status code.
+ *
+ * @param pSession The session handle. Used for authentication.
+ * @param pszName The variable name.
+ * @param u64Value The new value.
+ */
+GVMMR0DECL(int) GVMMR0SetConfig(PSUPDRVSESSION pSession, const char *pszName, uint64_t u64Value)
+{
+ /*
+ * Validate input.
+ */
+ PGVMM pGVMM;
+ GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
+ AssertPtrReturn(pSession, VERR_INVALID_HANDLE);
+ AssertPtrReturn(pszName, VERR_INVALID_POINTER);
+
+ /*
+ * String switch time!
+ */
+ if (strncmp(pszName, RT_STR_TUPLE("/GVMM/")))
+ return VERR_CFGM_VALUE_NOT_FOUND; /* borrow status codes from CFGM... */
+ int rc = VINF_SUCCESS;
+ pszName += sizeof("/GVMM/") - 1;
+ if (!strcmp(pszName, "cEMTsMeansCompany"))
+ {
+ if (u64Value <= UINT32_MAX)
+ pGVMM->cEMTsMeansCompany = u64Value;
+ else
+ rc = VERR_OUT_OF_RANGE;
+ }
+ else if (!strcmp(pszName, "MinSleepAlone"))
+ {
+ if (u64Value <= RT_NS_100MS)
+ pGVMM->nsMinSleepAlone = u64Value;
+ else
+ rc = VERR_OUT_OF_RANGE;
+ }
+ else if (!strcmp(pszName, "MinSleepCompany"))
+ {
+ if (u64Value <= RT_NS_100MS)
+ pGVMM->nsMinSleepCompany = u64Value;
+ else
+ rc = VERR_OUT_OF_RANGE;
+ }
+ else if (!strcmp(pszName, "EarlyWakeUp1"))
+ {
+ if (u64Value <= RT_NS_100MS)
+ {
+ pGVMM->nsEarlyWakeUp1 = u64Value;
+ pGVMM->fDoEarlyWakeUps = pGVMM->nsEarlyWakeUp1 > 0 && pGVMM->nsEarlyWakeUp2 > 0;
+ }
+ else
+ rc = VERR_OUT_OF_RANGE;
+ }
+ else if (!strcmp(pszName, "EarlyWakeUp2"))
+ {
+ if (u64Value <= RT_NS_100MS)
+ {
+ pGVMM->nsEarlyWakeUp2 = u64Value;
+ pGVMM->fDoEarlyWakeUps = pGVMM->nsEarlyWakeUp1 > 0 && pGVMM->nsEarlyWakeUp2 > 0;
+ }
+ else
+ rc = VERR_OUT_OF_RANGE;
+ }
+ else
+ rc = VERR_CFGM_VALUE_NOT_FOUND;
+ return rc;
+}
+
+
+/**
+ * A quick hack for getting global config values.
+ *
+ * @returns VBox status code.
+ *
+ * @param pSession The session handle. Used for authentication.
+ * @param pszName The variable name.
+ * @param pu64Value Where to return the value.
+ */
+GVMMR0DECL(int) GVMMR0QueryConfig(PSUPDRVSESSION pSession, const char *pszName, uint64_t *pu64Value)
+{
+ /*
+ * Validate input.
+ */
+ PGVMM pGVMM;
+ GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
+ AssertPtrReturn(pSession, VERR_INVALID_HANDLE);
+ AssertPtrReturn(pszName, VERR_INVALID_POINTER);
+ AssertPtrReturn(pu64Value, VERR_INVALID_POINTER);
+
+ /*
+ * String switch time!
+ */
+ if (strncmp(pszName, RT_STR_TUPLE("/GVMM/")))
+ return VERR_CFGM_VALUE_NOT_FOUND; /* borrow status codes from CFGM... */
+ int rc = VINF_SUCCESS;
+ pszName += sizeof("/GVMM/") - 1;
+ if (!strcmp(pszName, "cEMTsMeansCompany"))
+ *pu64Value = pGVMM->cEMTsMeansCompany;
+ else if (!strcmp(pszName, "MinSleepAlone"))
+ *pu64Value = pGVMM->nsMinSleepAlone;
+ else if (!strcmp(pszName, "MinSleepCompany"))
+ *pu64Value = pGVMM->nsMinSleepCompany;
+ else if (!strcmp(pszName, "EarlyWakeUp1"))
+ *pu64Value = pGVMM->nsEarlyWakeUp1;
+ else if (!strcmp(pszName, "EarlyWakeUp2"))
+ *pu64Value = pGVMM->nsEarlyWakeUp2;
+ else
+ rc = VERR_CFGM_VALUE_NOT_FOUND;
+ return rc;
+}
+
+
+/**
+ * Acquire the 'used' lock in shared mode.
+ *
+ * This prevents destruction of the VM while we're in ring-0.
+ *
+ * @returns IPRT status code, see RTSemFastMutexRequest.
+ * @param a_pGVMM The GVMM instance data.
+ * @sa GVMMR0_USED_SHARED_UNLOCK, GVMMR0_USED_EXCLUSIVE_LOCK
+ */
+#define GVMMR0_USED_SHARED_LOCK(a_pGVMM) RTCritSectRwEnterShared(&(a_pGVMM)->UsedLock)
+
+/**
+ * Release the 'used' lock in when owning it in shared mode.
+ *
+ * @returns IPRT status code, see RTSemFastMutexRequest.
+ * @param a_pGVMM The GVMM instance data.
+ * @sa GVMMR0_USED_SHARED_LOCK
+ */
+#define GVMMR0_USED_SHARED_UNLOCK(a_pGVMM) RTCritSectRwLeaveShared(&(a_pGVMM)->UsedLock)
+
+/**
+ * Acquire the 'used' lock in exclusive mode.
+ *
+ * Only use this function when making changes to the used list.
+ *
+ * @returns IPRT status code, see RTSemFastMutexRequest.
+ * @param a_pGVMM The GVMM instance data.
+ * @sa GVMMR0_USED_EXCLUSIVE_UNLOCK
+ */
+#define GVMMR0_USED_EXCLUSIVE_LOCK(a_pGVMM) RTCritSectRwEnterExcl(&(a_pGVMM)->UsedLock)
+
+/**
+ * Release the 'used' lock when owning it in exclusive mode.
+ *
+ * @returns IPRT status code, see RTSemFastMutexRelease.
+ * @param a_pGVMM The GVMM instance data.
+ * @sa GVMMR0_USED_EXCLUSIVE_LOCK, GVMMR0_USED_SHARED_UNLOCK
+ */
+#define GVMMR0_USED_EXCLUSIVE_UNLOCK(a_pGVMM) RTCritSectRwLeaveExcl(&(a_pGVMM)->UsedLock)
+
+
+/**
+ * Try acquire the 'create & destroy' lock.
+ *
+ * @returns IPRT status code, see RTSemFastMutexRequest.
+ * @param pGVMM The GVMM instance data.
+ */
+DECLINLINE(int) gvmmR0CreateDestroyLock(PGVMM pGVMM)
+{
+ LogFlow(("++gvmmR0CreateDestroyLock(%p)\n", pGVMM));
+ int rc = RTCritSectEnter(&pGVMM->CreateDestroyLock);
+ LogFlow(("gvmmR0CreateDestroyLock(%p)->%Rrc\n", pGVMM, rc));
+ return rc;
+}
+
+
+/**
+ * Release the 'create & destroy' lock.
+ *
+ * @returns IPRT status code, see RTSemFastMutexRequest.
+ * @param pGVMM The GVMM instance data.
+ */
+DECLINLINE(int) gvmmR0CreateDestroyUnlock(PGVMM pGVMM)
+{
+ LogFlow(("--gvmmR0CreateDestroyUnlock(%p)\n", pGVMM));
+ int rc = RTCritSectLeave(&pGVMM->CreateDestroyLock);
+ AssertRC(rc);
+ return rc;
+}
+
+
+/**
+ * Request wrapper for the GVMMR0CreateVM API.
+ *
+ * @returns VBox status code.
+ * @param pReq The request buffer.
+ * @param pSession The session handle. The VM will be associated with this.
+ */
+GVMMR0DECL(int) GVMMR0CreateVMReq(PGVMMCREATEVMREQ pReq, PSUPDRVSESSION pSession)
+{
+ /*
+ * Validate the request.
+ */
+ if (!VALID_PTR(pReq))
+ return VERR_INVALID_POINTER;
+ if (pReq->Hdr.cbReq != sizeof(*pReq))
+ return VERR_INVALID_PARAMETER;
+ if (pReq->pSession != pSession)
+ return VERR_INVALID_POINTER;
+
+ /*
+ * Execute it.
+ */
+ PVM pVM;
+ pReq->pVMR0 = NULL;
+ pReq->pVMR3 = NIL_RTR3PTR;
+ int rc = GVMMR0CreateVM(pSession, pReq->cCpus, &pVM);
+ if (RT_SUCCESS(rc))
+ {
+ pReq->pVMR0 = pVM;
+ pReq->pVMR3 = pVM->pVMR3;
+ }
+ return rc;
+}
+
+
+/**
+ * Allocates the VM structure and registers it with GVM.
+ *
+ * The caller will become the VM owner and there by the EMT.
+ *
+ * @returns VBox status code.
+ * @param pSession The support driver session.
+ * @param cCpus Number of virtual CPUs for the new VM.
+ * @param ppVM Where to store the pointer to the VM structure.
+ *
+ * @thread EMT.
+ */
+GVMMR0DECL(int) GVMMR0CreateVM(PSUPDRVSESSION pSession, uint32_t cCpus, PVM *ppVM)
+{
+ LogFlow(("GVMMR0CreateVM: pSession=%p\n", pSession));
+ PGVMM pGVMM;
+ GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
+
+ AssertPtrReturn(ppVM, VERR_INVALID_POINTER);
+ *ppVM = NULL;
+
+ if ( cCpus == 0
+ || cCpus > VMM_MAX_CPU_COUNT)
+ return VERR_INVALID_PARAMETER;
+
+ RTNATIVETHREAD hEMT0 = RTThreadNativeSelf();
+ AssertReturn(hEMT0 != NIL_RTNATIVETHREAD, VERR_GVMM_BROKEN_IPRT);
+ RTPROCESS ProcId = RTProcSelf();
+ AssertReturn(ProcId != NIL_RTPROCESS, VERR_GVMM_BROKEN_IPRT);
+
+ /*
+ * The whole allocation process is protected by the lock.
+ */
+ int rc = gvmmR0CreateDestroyLock(pGVMM);
+ AssertRCReturn(rc, rc);
+
+ /*
+ * Only one VM per session.
+ */
+ if (SUPR0GetSessionVM(pSession) != NULL)
+ {
+ gvmmR0CreateDestroyUnlock(pGVMM);
+ SUPR0Printf("GVMMR0CreateVM: The session %p already got a VM: %p\n", pSession, SUPR0GetSessionVM(pSession));
+ return VERR_ALREADY_EXISTS;
+ }
+
+ /*
+ * Allocate a handle first so we don't waste resources unnecessarily.
+ */
+ uint16_t iHandle = pGVMM->iFreeHead;
+ if (iHandle)
+ {
+ PGVMHANDLE pHandle = &pGVMM->aHandles[iHandle];
+
+ /* consistency checks, a bit paranoid as always. */
+ if ( !pHandle->pVM
+ && !pHandle->pGVM
+ && !pHandle->pvObj
+ && pHandle->iSelf == iHandle)
+ {
+ pHandle->pvObj = SUPR0ObjRegister(pSession, SUPDRVOBJTYPE_VM, gvmmR0HandleObjDestructor, pGVMM, pHandle);
+ if (pHandle->pvObj)
+ {
+ /*
+ * Move the handle from the free to used list and perform permission checks.
+ */
+ rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
+ AssertRC(rc);
+
+ pGVMM->iFreeHead = pHandle->iNext;
+ pHandle->iNext = pGVMM->iUsedHead;
+ pGVMM->iUsedHead = iHandle;
+ pGVMM->cVMs++;
+
+ pHandle->pVM = NULL;
+ pHandle->pGVM = NULL;
+ pHandle->pSession = pSession;
+ pHandle->hEMT0 = NIL_RTNATIVETHREAD;
+ pHandle->ProcId = NIL_RTPROCESS;
+
+ GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
+
+ rc = SUPR0ObjVerifyAccess(pHandle->pvObj, pSession, NULL);
+ if (RT_SUCCESS(rc))
+ {
+ /*
+ * Allocate the global VM structure (GVM) and initialize it.
+ */
+ PGVM pGVM = (PGVM)RTMemAllocZ(RT_UOFFSETOF_DYN(GVM, aCpus[cCpus]));
+ if (pGVM)
+ {
+ pGVM->u32Magic = GVM_MAGIC;
+ pGVM->hSelf = iHandle;
+ pGVM->pVM = NULL;
+ pGVM->cCpus = cCpus;
+ pGVM->pSession = pSession;
+
+ gvmmR0InitPerVMData(pGVM);
+ GMMR0InitPerVMData(pGVM);
+
+ /*
+ * Allocate the shared VM structure and associated page array.
+ */
+ const uint32_t cbVM = RT_UOFFSETOF_DYN(VM, aCpus[cCpus]);
+ const uint32_t cPages = RT_ALIGN_32(cbVM, PAGE_SIZE) >> PAGE_SHIFT;
+ rc = RTR0MemObjAllocLow(&pGVM->gvmm.s.VMMemObj, cPages << PAGE_SHIFT, false /* fExecutable */);
+ if (RT_SUCCESS(rc))
+ {
+ PVM pVM = (PVM)RTR0MemObjAddress(pGVM->gvmm.s.VMMemObj); AssertPtr(pVM);
+ memset(pVM, 0, cPages << PAGE_SHIFT);
+ pVM->enmVMState = VMSTATE_CREATING;
+ pVM->pVMR0 = pVM;
+ pVM->pSession = pSession;
+ pVM->hSelf = iHandle;
+ pVM->cbSelf = cbVM;
+ pVM->cCpus = cCpus;
+ pVM->uCpuExecutionCap = 100; /* default is no cap. */
+ pVM->offVMCPU = RT_UOFFSETOF_DYN(VM, aCpus);
+ AssertCompileMemberAlignment(VM, cpum, 64);
+ AssertCompileMemberAlignment(VM, tm, 64);
+ AssertCompileMemberAlignment(VM, aCpus, PAGE_SIZE);
+
+ rc = RTR0MemObjAllocPage(&pGVM->gvmm.s.VMPagesMemObj, cPages * sizeof(SUPPAGE), false /* fExecutable */);
+ if (RT_SUCCESS(rc))
+ {
+ PSUPPAGE paPages = (PSUPPAGE)RTR0MemObjAddress(pGVM->gvmm.s.VMPagesMemObj); AssertPtr(paPages);
+ for (uint32_t iPage = 0; iPage < cPages; iPage++)
+ {
+ paPages[iPage].uReserved = 0;
+ paPages[iPage].Phys = RTR0MemObjGetPagePhysAddr(pGVM->gvmm.s.VMMemObj, iPage);
+ Assert(paPages[iPage].Phys != NIL_RTHCPHYS);
+ }
+
+ /*
+ * Map them into ring-3.
+ */
+ rc = RTR0MemObjMapUser(&pGVM->gvmm.s.VMMapObj, pGVM->gvmm.s.VMMemObj, (RTR3PTR)-1, 0,
+ RTMEM_PROT_READ | RTMEM_PROT_WRITE, NIL_RTR0PROCESS);
+ if (RT_SUCCESS(rc))
+ {
+ PVMR3 pVMR3 = RTR0MemObjAddressR3(pGVM->gvmm.s.VMMapObj);
+ pVM->pVMR3 = pVMR3;
+ AssertPtr((void *)pVMR3);
+
+ /* Initialize all the VM pointers. */
+ for (VMCPUID i = 0; i < cCpus; i++)
+ {
+ pVM->aCpus[i].idCpu = i;
+ pVM->aCpus[i].pVMR0 = pVM;
+ pVM->aCpus[i].pVMR3 = pVMR3;
+ pVM->aCpus[i].idHostCpu = NIL_RTCPUID;
+ pVM->aCpus[i].hNativeThreadR0 = NIL_RTNATIVETHREAD;
+ }
+
+ rc = RTR0MemObjMapUser(&pGVM->gvmm.s.VMPagesMapObj, pGVM->gvmm.s.VMPagesMemObj, (RTR3PTR)-1,
+ 0 /* uAlignment */, RTMEM_PROT_READ | RTMEM_PROT_WRITE,
+ NIL_RTR0PROCESS);
+ if (RT_SUCCESS(rc))
+ {
+ pVM->paVMPagesR3 = RTR0MemObjAddressR3(pGVM->gvmm.s.VMPagesMapObj);
+ AssertPtr((void *)pVM->paVMPagesR3);
+
+ /* complete the handle - take the UsedLock sem just to be careful. */
+ rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
+ AssertRC(rc);
+
+ pHandle->pVM = pVM;
+ pHandle->pGVM = pGVM;
+ pHandle->hEMT0 = hEMT0;
+ pHandle->ProcId = ProcId;
+ pGVM->pVM = pVM;
+ pGVM->pVMR3 = pVMR3;
+ pGVM->aCpus[0].hEMT = hEMT0;
+ pVM->aCpus[0].hNativeThreadR0 = hEMT0;
+ pGVMM->cEMTs += cCpus;
+
+ for (VMCPUID i = 0; i < cCpus; i++)
+ {
+ pGVM->aCpus[i].pVCpu = &pVM->aCpus[i];
+ pGVM->aCpus[i].pVM = pVM;
+ }
+
+ /* Associate it with the session and create the context hook for EMT0. */
+ rc = SUPR0SetSessionVM(pSession, pGVM, pVM);
+ if (RT_SUCCESS(rc))
+ {
+ rc = VMMR0ThreadCtxHookCreateForEmt(&pVM->aCpus[0]);
+ if (RT_SUCCESS(rc))
+ {
+ /*
+ * Done!
+ */
+ VBOXVMM_R0_GVMM_VM_CREATED(pGVM, pVM, ProcId, (void *)hEMT0, cCpus);
+
+ GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
+ gvmmR0CreateDestroyUnlock(pGVMM);
+
+ CPUMR0RegisterVCpuThread(&pVM->aCpus[0]);
+
+ *ppVM = pVM;
+ Log(("GVMMR0CreateVM: pVM=%p pVMR3=%p pGVM=%p hGVM=%d\n", pVM, pVMR3, pGVM, iHandle));
+ return VINF_SUCCESS;
+ }
+
+ SUPR0SetSessionVM(pSession, NULL, NULL);
+ }
+ GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
+ }
+
+ RTR0MemObjFree(pGVM->gvmm.s.VMMapObj, false /* fFreeMappings */);
+ pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
+ }
+ RTR0MemObjFree(pGVM->gvmm.s.VMPagesMemObj, false /* fFreeMappings */);
+ pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ;
+ }
+ RTR0MemObjFree(pGVM->gvmm.s.VMMemObj, false /* fFreeMappings */);
+ pGVM->gvmm.s.VMMemObj = NIL_RTR0MEMOBJ;
+ }
+ }
+ }
+ /* else: The user wasn't permitted to create this VM. */
+
+ /*
+ * The handle will be freed by gvmmR0HandleObjDestructor as we release the
+ * object reference here. A little extra mess because of non-recursive lock.
+ */
+ void *pvObj = pHandle->pvObj;
+ pHandle->pvObj = NULL;
+ gvmmR0CreateDestroyUnlock(pGVMM);
+
+ SUPR0ObjRelease(pvObj, pSession);
+
+ SUPR0Printf("GVMMR0CreateVM: failed, rc=%d\n", rc);
+ return rc;
+ }
+
+ rc = VERR_NO_MEMORY;
+ }
+ else
+ rc = VERR_GVMM_IPE_1;
+ }
+ else
+ rc = VERR_GVM_TOO_MANY_VMS;
+
+ gvmmR0CreateDestroyUnlock(pGVMM);
+ return rc;
+}
+
+
+/**
+ * Initializes the per VM data belonging to GVMM.
+ *
+ * @param pGVM Pointer to the global VM structure.
+ */
+static void gvmmR0InitPerVMData(PGVM pGVM)
+{
+ AssertCompile(RT_SIZEOFMEMB(GVM,gvmm.s) <= RT_SIZEOFMEMB(GVM,gvmm.padding));
+ AssertCompile(RT_SIZEOFMEMB(GVMCPU,gvmm.s) <= RT_SIZEOFMEMB(GVMCPU,gvmm.padding));
+ pGVM->gvmm.s.VMMemObj = NIL_RTR0MEMOBJ;
+ pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
+ pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ;
+ pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
+ pGVM->gvmm.s.fDoneVMMR0Init = false;
+ pGVM->gvmm.s.fDoneVMMR0Term = false;
+
+ for (VMCPUID i = 0; i < pGVM->cCpus; i++)
+ {
+ pGVM->aCpus[i].idCpu = i;
+ pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
+ pGVM->aCpus[i].hEMT = NIL_RTNATIVETHREAD;
+ pGVM->aCpus[i].pGVM = pGVM;
+ pGVM->aCpus[i].pVCpu = NULL;
+ pGVM->aCpus[i].pVM = NULL;
+ }
+}
+
+
+/**
+ * Does the VM initialization.
+ *
+ * @returns VBox status code.
+ * @param pGVM The global (ring-0) VM structure.
+ */
+GVMMR0DECL(int) GVMMR0InitVM(PGVM pGVM)
+{
+ LogFlow(("GVMMR0InitVM: pGVM=%p\n", pGVM));
+
+ int rc = VERR_INTERNAL_ERROR_3;
+ if ( !pGVM->gvmm.s.fDoneVMMR0Init
+ && pGVM->aCpus[0].gvmm.s.HaltEventMulti == NIL_RTSEMEVENTMULTI)
+ {
+ for (VMCPUID i = 0; i < pGVM->cCpus; i++)
+ {
+ rc = RTSemEventMultiCreate(&pGVM->aCpus[i].gvmm.s.HaltEventMulti);
+ if (RT_FAILURE(rc))
+ {
+ pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
+ break;
+ }
+ }
+ }
+ else
+ rc = VERR_WRONG_ORDER;
+
+ LogFlow(("GVMMR0InitVM: returns %Rrc\n", rc));
+ return rc;
+}
+
+
+/**
+ * Indicates that we're done with the ring-0 initialization
+ * of the VM.
+ *
+ * @param pGVM The global (ring-0) VM structure.
+ * @thread EMT(0)
+ */
+GVMMR0DECL(void) GVMMR0DoneInitVM(PGVM pGVM)
+{
+ /* Set the indicator. */
+ pGVM->gvmm.s.fDoneVMMR0Init = true;
+}
+
+
+/**
+ * Indicates that we're doing the ring-0 termination of the VM.
+ *
+ * @returns true if termination hasn't been done already, false if it has.
+ * @param pGVM Pointer to the global VM structure. Optional.
+ * @thread EMT(0) or session cleanup thread.
+ */
+GVMMR0DECL(bool) GVMMR0DoingTermVM(PGVM pGVM)
+{
+ /* Validate the VM structure, state and handle. */
+ AssertPtrReturn(pGVM, false);
+
+ /* Set the indicator. */
+ if (pGVM->gvmm.s.fDoneVMMR0Term)
+ return false;
+ pGVM->gvmm.s.fDoneVMMR0Term = true;
+ return true;
+}
+
+
+/**
+ * Destroys the VM, freeing all associated resources (the ring-0 ones anyway).
+ *
+ * This is call from the vmR3DestroyFinalBit and from a error path in VMR3Create,
+ * and the caller is not the EMT thread, unfortunately. For security reasons, it
+ * would've been nice if the caller was actually the EMT thread or that we somehow
+ * could've associated the calling thread with the VM up front.
+ *
+ * @returns VBox status code.
+ * @param pGVM The global (ring-0) VM structure.
+ * @param pVM The cross context VM structure.
+ *
+ * @thread EMT(0) if it's associated with the VM, otherwise any thread.
+ */
+GVMMR0DECL(int) GVMMR0DestroyVM(PGVM pGVM, PVM pVM)
+{
+ LogFlow(("GVMMR0DestroyVM: pGVM=%p pVM=%p\n", pGVM, pVM));
+ PGVMM pGVMM;
+ GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
+
+ /*
+ * Validate the VM structure, state and caller.
+ */
+ AssertPtrReturn(pGVM, VERR_INVALID_POINTER);
+ AssertPtrReturn(pVM, VERR_INVALID_POINTER);
+ AssertReturn(!((uintptr_t)pVM & PAGE_OFFSET_MASK), VERR_INVALID_POINTER);
+ AssertReturn(pGVM->pVM == pVM, VERR_INVALID_POINTER);
+ AssertMsgReturn(pVM->enmVMState >= VMSTATE_CREATING && pVM->enmVMState <= VMSTATE_TERMINATED, ("%d\n", pVM->enmVMState),
+ VERR_WRONG_ORDER);
+
+ uint32_t hGVM = pGVM->hSelf;
+ ASMCompilerBarrier();
+ AssertReturn(hGVM != NIL_GVM_HANDLE, VERR_INVALID_VM_HANDLE);
+ AssertReturn(hGVM < RT_ELEMENTS(pGVMM->aHandles), VERR_INVALID_VM_HANDLE);
+
+ PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
+ AssertReturn(pHandle->pVM == pVM, VERR_NOT_OWNER);
+
+ RTPROCESS ProcId = RTProcSelf();
+ RTNATIVETHREAD hSelf = RTThreadNativeSelf();
+ AssertReturn( ( pHandle->hEMT0 == hSelf
+ && pHandle->ProcId == ProcId)
+ || pHandle->hEMT0 == NIL_RTNATIVETHREAD, VERR_NOT_OWNER);
+
+ /*
+ * Lookup the handle and destroy the object.
+ * Since the lock isn't recursive and we'll have to leave it before dereferencing the
+ * object, we take some precautions against racing callers just in case...
+ */
+ int rc = gvmmR0CreateDestroyLock(pGVMM);
+ AssertRC(rc);
+
+ /* Be careful here because we might theoretically be racing someone else cleaning up. */
+ if ( pHandle->pVM == pVM
+ && ( ( pHandle->hEMT0 == hSelf
+ && pHandle->ProcId == ProcId)
+ || pHandle->hEMT0 == NIL_RTNATIVETHREAD)
+ && VALID_PTR(pHandle->pvObj)
+ && VALID_PTR(pHandle->pSession)
+ && VALID_PTR(pHandle->pGVM)
+ && pHandle->pGVM->u32Magic == GVM_MAGIC)
+ {
+ /* Check that other EMTs have deregistered. */
+ uint32_t cNotDeregistered = 0;
+ for (VMCPUID idCpu = 1; idCpu < pGVM->cCpus; idCpu++)
+ cNotDeregistered += pGVM->aCpus[idCpu].hEMT != ~(RTNATIVETHREAD)1; /* see GVMMR0DeregisterVCpu for the value */
+ if (cNotDeregistered == 0)
+ {
+ /* Grab the object pointer. */
+ void *pvObj = pHandle->pvObj;
+ pHandle->pvObj = NULL;
+ gvmmR0CreateDestroyUnlock(pGVMM);
+
+ SUPR0ObjRelease(pvObj, pHandle->pSession);
+ }
+ else
+ {
+ gvmmR0CreateDestroyUnlock(pGVMM);
+ rc = VERR_GVMM_NOT_ALL_EMTS_DEREGISTERED;
+ }
+ }
+ else
+ {
+ SUPR0Printf("GVMMR0DestroyVM: pHandle=%RKv:{.pVM=%p, .hEMT0=%p, .ProcId=%u, .pvObj=%p} pVM=%p hSelf=%p\n",
+ pHandle, pHandle->pVM, pHandle->hEMT0, pHandle->ProcId, pHandle->pvObj, pVM, hSelf);
+ gvmmR0CreateDestroyUnlock(pGVMM);
+ rc = VERR_GVMM_IPE_2;
+ }
+
+ return rc;
+}
+
+
+/**
+ * Performs VM cleanup task as part of object destruction.
+ *
+ * @param pGVM The GVM pointer.
+ */
+static void gvmmR0CleanupVM(PGVM pGVM)
+{
+ if ( pGVM->gvmm.s.fDoneVMMR0Init
+ && !pGVM->gvmm.s.fDoneVMMR0Term)
+ {
+ if ( pGVM->gvmm.s.VMMemObj != NIL_RTR0MEMOBJ
+ && RTR0MemObjAddress(pGVM->gvmm.s.VMMemObj) == pGVM->pVM)
+ {
+ LogFlow(("gvmmR0CleanupVM: Calling VMMR0TermVM\n"));
+ VMMR0TermVM(pGVM, pGVM->pVM, NIL_VMCPUID);
+ }
+ else
+ AssertMsgFailed(("gvmmR0CleanupVM: VMMemObj=%p pVM=%p\n", pGVM->gvmm.s.VMMemObj, pGVM->pVM));
+ }
+
+ GMMR0CleanupVM(pGVM);
+#ifdef VBOX_WITH_NEM_R0
+ NEMR0CleanupVM(pGVM);
+#endif
+
+ AssertCompile((uintptr_t)NIL_RTTHREADCTXHOOK == 0); /* Depends on zero initialized memory working for NIL at the moment. */
+ for (VMCPUID idCpu = 0; idCpu < pGVM->cCpus; idCpu++)
+ {
+ /** @todo Can we busy wait here for all thread-context hooks to be
+ * deregistered before releasing (destroying) it? Only until we find a
+ * solution for not deregistering hooks everytime we're leaving HMR0
+ * context. */
+ VMMR0ThreadCtxHookDestroyForEmt(&pGVM->pVM->aCpus[idCpu]);
+ }
+}
+
+
+/**
+ * @callback_method_impl{FNSUPDRVDESTRUCTOR,VM handle destructor}
+ *
+ * pvUser1 is the GVM instance pointer.
+ * pvUser2 is the handle pointer.
+ */
+static DECLCALLBACK(void) gvmmR0HandleObjDestructor(void *pvObj, void *pvUser1, void *pvUser2)
+{
+ LogFlow(("gvmmR0HandleObjDestructor: %p %p %p\n", pvObj, pvUser1, pvUser2));
+
+ NOREF(pvObj);
+
+ /*
+ * Some quick, paranoid, input validation.
+ */
+ PGVMHANDLE pHandle = (PGVMHANDLE)pvUser2;
+ AssertPtr(pHandle);
+ PGVMM pGVMM = (PGVMM)pvUser1;
+ Assert(pGVMM == g_pGVMM);
+ const uint16_t iHandle = pHandle - &pGVMM->aHandles[0];
+ if ( !iHandle
+ || iHandle >= RT_ELEMENTS(pGVMM->aHandles)
+ || iHandle != pHandle->iSelf)
+ {
+ SUPR0Printf("GVM: handle %d is out of range or corrupt (iSelf=%d)!\n", iHandle, pHandle->iSelf);
+ return;
+ }
+
+ int rc = gvmmR0CreateDestroyLock(pGVMM);
+ AssertRC(rc);
+ rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
+ AssertRC(rc);
+
+ /*
+ * This is a tad slow but a doubly linked list is too much hassle.
+ */
+ if (RT_UNLIKELY(pHandle->iNext >= RT_ELEMENTS(pGVMM->aHandles)))
+ {
+ SUPR0Printf("GVM: used list index %d is out of range!\n", pHandle->iNext);
+ GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
+ gvmmR0CreateDestroyUnlock(pGVMM);
+ return;
+ }
+
+ if (pGVMM->iUsedHead == iHandle)
+ pGVMM->iUsedHead = pHandle->iNext;
+ else
+ {
+ uint16_t iPrev = pGVMM->iUsedHead;
+ int c = RT_ELEMENTS(pGVMM->aHandles) + 2;
+ while (iPrev)
+ {
+ if (RT_UNLIKELY(iPrev >= RT_ELEMENTS(pGVMM->aHandles)))
+ {
+ SUPR0Printf("GVM: used list index %d is out of range!\n", iPrev);
+ GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
+ gvmmR0CreateDestroyUnlock(pGVMM);
+ return;
+ }
+ if (RT_UNLIKELY(c-- <= 0))
+ {
+ iPrev = 0;
+ break;
+ }
+
+ if (pGVMM->aHandles[iPrev].iNext == iHandle)
+ break;
+ iPrev = pGVMM->aHandles[iPrev].iNext;
+ }
+ if (!iPrev)
+ {
+ SUPR0Printf("GVM: can't find the handle previous previous of %d!\n", pHandle->iSelf);
+ GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
+ gvmmR0CreateDestroyUnlock(pGVMM);
+ return;
+ }
+
+ Assert(pGVMM->aHandles[iPrev].iNext == iHandle);
+ pGVMM->aHandles[iPrev].iNext = pHandle->iNext;
+ }
+ pHandle->iNext = 0;
+ pGVMM->cVMs--;
+
+ /*
+ * Do the global cleanup round.
+ */
+ PGVM pGVM = pHandle->pGVM;
+ if ( VALID_PTR(pGVM)
+ && pGVM->u32Magic == GVM_MAGIC)
+ {
+ pGVMM->cEMTs -= pGVM->cCpus;
+
+ if (pGVM->pSession)
+ SUPR0SetSessionVM(pGVM->pSession, NULL, NULL);
+
+ GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
+
+ gvmmR0CleanupVM(pGVM);
+
+ /*
+ * Do the GVMM cleanup - must be done last.
+ */
+ /* The VM and VM pages mappings/allocations. */
+ if (pGVM->gvmm.s.VMPagesMapObj != NIL_RTR0MEMOBJ)
+ {
+ rc = RTR0MemObjFree(pGVM->gvmm.s.VMPagesMapObj, false /* fFreeMappings */); AssertRC(rc);
+ pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
+ }
+
+ if (pGVM->gvmm.s.VMMapObj != NIL_RTR0MEMOBJ)
+ {
+ rc = RTR0MemObjFree(pGVM->gvmm.s.VMMapObj, false /* fFreeMappings */); AssertRC(rc);
+ pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
+ }
+
+ if (pGVM->gvmm.s.VMPagesMemObj != NIL_RTR0MEMOBJ)
+ {
+ rc = RTR0MemObjFree(pGVM->gvmm.s.VMPagesMemObj, false /* fFreeMappings */); AssertRC(rc);
+ pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ;
+ }
+
+ if (pGVM->gvmm.s.VMMemObj != NIL_RTR0MEMOBJ)
+ {
+ rc = RTR0MemObjFree(pGVM->gvmm.s.VMMemObj, false /* fFreeMappings */); AssertRC(rc);
+ pGVM->gvmm.s.VMMemObj = NIL_RTR0MEMOBJ;
+ }
+
+ for (VMCPUID i = 0; i < pGVM->cCpus; i++)
+ {
+ if (pGVM->aCpus[i].gvmm.s.HaltEventMulti != NIL_RTSEMEVENTMULTI)
+ {
+ rc = RTSemEventMultiDestroy(pGVM->aCpus[i].gvmm.s.HaltEventMulti); AssertRC(rc);
+ pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
+ }
+ }
+
+ /* the GVM structure itself. */
+ pGVM->u32Magic |= UINT32_C(0x80000000);
+ RTMemFree(pGVM);
+
+ /* Re-acquire the UsedLock before freeing the handle since we're updating handle fields. */
+ rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
+ AssertRC(rc);
+ }
+ /* else: GVMMR0CreateVM cleanup. */
+
+ /*
+ * Free the handle.
+ */
+ pHandle->iNext = pGVMM->iFreeHead;
+ pGVMM->iFreeHead = iHandle;
+ ASMAtomicWriteNullPtr(&pHandle->pGVM);
+ ASMAtomicWriteNullPtr(&pHandle->pVM);
+ ASMAtomicWriteNullPtr(&pHandle->pvObj);
+ ASMAtomicWriteNullPtr(&pHandle->pSession);
+ ASMAtomicWriteHandle(&pHandle->hEMT0, NIL_RTNATIVETHREAD);
+ ASMAtomicWriteU32(&pHandle->ProcId, NIL_RTPROCESS);
+
+ GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
+ gvmmR0CreateDestroyUnlock(pGVMM);
+ LogFlow(("gvmmR0HandleObjDestructor: returns\n"));
+}
+
+
+/**
+ * Registers the calling thread as the EMT of a Virtual CPU.
+ *
+ * Note that VCPU 0 is automatically registered during VM creation.
+ *
+ * @returns VBox status code
+ * @param pGVM The global (ring-0) VM structure.
+ * @param pVM The cross context VM structure.
+ * @param idCpu VCPU id to register the current thread as.
+ */
+GVMMR0DECL(int) GVMMR0RegisterVCpu(PGVM pGVM, PVM pVM, VMCPUID idCpu)
+{
+ AssertReturn(idCpu != 0, VERR_INVALID_FUNCTION);
+
+ /*
+ * Validate the VM structure, state and handle.
+ */
+ PGVMM pGVMM;
+ int rc = gvmmR0ByGVMandVM(pGVM, pVM, &pGVMM, false /* fTakeUsedLock */); /** @todo take lock here. */
+ if (RT_SUCCESS(rc))
+ {
+ if (idCpu < pGVM->cCpus)
+ {
+ /* Check that the EMT isn't already assigned to a thread. */
+ if (pGVM->aCpus[idCpu].hEMT == NIL_RTNATIVETHREAD)
+ {
+ Assert(pVM->aCpus[idCpu].hNativeThreadR0 == NIL_RTNATIVETHREAD);
+
+ /* A thread may only be one EMT. */
+ RTNATIVETHREAD const hNativeSelf = RTThreadNativeSelf();
+ for (VMCPUID iCpu = 0; iCpu < pGVM->cCpus; iCpu++)
+ AssertBreakStmt(pGVM->aCpus[iCpu].hEMT != hNativeSelf, rc = VERR_INVALID_PARAMETER);
+ if (RT_SUCCESS(rc))
+ {
+ /*
+ * Do the assignment, then try setup the hook. Undo if that fails.
+ */
+ pVM->aCpus[idCpu].hNativeThreadR0 = pGVM->aCpus[idCpu].hEMT = RTThreadNativeSelf();
+
+ rc = VMMR0ThreadCtxHookCreateForEmt(&pVM->aCpus[idCpu]);
+ if (RT_SUCCESS(rc))
+ CPUMR0RegisterVCpuThread(&pVM->aCpus[idCpu]);
+ else
+ pVM->aCpus[idCpu].hNativeThreadR0 = pGVM->aCpus[idCpu].hEMT = NIL_RTNATIVETHREAD;
+ }
+ }
+ else
+ rc = VERR_ACCESS_DENIED;
+ }
+ else
+ rc = VERR_INVALID_CPU_ID;
+ }
+ return rc;
+}
+
+
+/**
+ * Deregisters the calling thread as the EMT of a Virtual CPU.
+ *
+ * Note that VCPU 0 shall call GVMMR0DestroyVM intead of this API.
+ *
+ * @returns VBox status code
+ * @param pGVM The global (ring-0) VM structure.
+ * @param pVM The cross context VM structure.
+ * @param idCpu VCPU id to register the current thread as.
+ */
+GVMMR0DECL(int) GVMMR0DeregisterVCpu(PGVM pGVM, PVM pVM, VMCPUID idCpu)
+{
+ AssertReturn(idCpu != 0, VERR_INVALID_FUNCTION);
+
+ /*
+ * Validate the VM structure, state and handle.
+ */
+ PGVMM pGVMM;
+ int rc = gvmmR0ByGVMandVMandEMT(pGVM, pVM, idCpu, &pGVMM);
+ if (RT_SUCCESS(rc))
+ {
+ /*
+ * Take the destruction lock and recheck the handle state to
+ * prevent racing GVMMR0DestroyVM.
+ */
+ gvmmR0CreateDestroyLock(pGVMM);
+ uint32_t hSelf = pGVM->hSelf;
+ ASMCompilerBarrier();
+ if ( hSelf < RT_ELEMENTS(pGVMM->aHandles)
+ && pGVMM->aHandles[hSelf].pvObj != NULL
+ && pGVMM->aHandles[hSelf].pGVM == pGVM)
+ {
+ /*
+ * Do per-EMT cleanups.
+ */
+ VMMR0ThreadCtxHookDestroyForEmt(&pVM->aCpus[idCpu]);
+
+ /*
+ * Invalidate hEMT. We don't use NIL here as that would allow
+ * GVMMR0RegisterVCpu to be called again, and we don't want that.
+ */
+ AssertCompile(~(RTNATIVETHREAD)1 != NIL_RTNATIVETHREAD);
+ pGVM->aCpus[idCpu].hEMT = ~(RTNATIVETHREAD)1;
+ pVM->aCpus[idCpu].hNativeThreadR0 = NIL_RTNATIVETHREAD;
+ }
+
+ gvmmR0CreateDestroyUnlock(pGVMM);
+ }
+ return rc;
+}
+
+
+/**
+ * Lookup a GVM structure by its handle.
+ *
+ * @returns The GVM pointer on success, NULL on failure.
+ * @param hGVM The global VM handle. Asserts on bad handle.
+ */
+GVMMR0DECL(PGVM) GVMMR0ByHandle(uint32_t hGVM)
+{
+ PGVMM pGVMM;
+ GVMM_GET_VALID_INSTANCE(pGVMM, NULL);
+
+ /*
+ * Validate.
+ */
+ AssertReturn(hGVM != NIL_GVM_HANDLE, NULL);
+ AssertReturn(hGVM < RT_ELEMENTS(pGVMM->aHandles), NULL);
+
+ /*
+ * Look it up.
+ */
+ PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
+ AssertPtrReturn(pHandle->pVM, NULL);
+ AssertPtrReturn(pHandle->pvObj, NULL);
+ PGVM pGVM = pHandle->pGVM;
+ AssertPtrReturn(pGVM, NULL);
+ AssertReturn(pGVM->pVM == pHandle->pVM, NULL);
+
+ return pHandle->pGVM;
+}
+
+
+/**
+ * Lookup a GVM structure by the shared VM structure.
+ *
+ * The calling thread must be in the same process as the VM. All current lookups
+ * are by threads inside the same process, so this will not be an issue.
+ *
+ * @returns VBox status code.
+ * @param pVM The cross context VM structure.
+ * @param ppGVM Where to store the GVM pointer.
+ * @param ppGVMM Where to store the pointer to the GVMM instance data.
+ * @param fTakeUsedLock Whether to take the used lock or not. We take it in
+ * shared mode when requested.
+ *
+ * Be very careful if not taking the lock as it's
+ * possible that the VM will disappear then!
+ *
+ * @remark This will not assert on an invalid pVM but try return silently.
+ */
+static int gvmmR0ByVM(PVM pVM, PGVM *ppGVM, PGVMM *ppGVMM, bool fTakeUsedLock)
+{
+ RTPROCESS ProcId = RTProcSelf();
+ PGVMM pGVMM;
+ GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
+
+ /*
+ * Validate.
+ */
+ if (RT_UNLIKELY( !VALID_PTR(pVM)
+ || ((uintptr_t)pVM & PAGE_OFFSET_MASK)))
+ return VERR_INVALID_POINTER;
+ if (RT_UNLIKELY( pVM->enmVMState < VMSTATE_CREATING
+ || pVM->enmVMState >= VMSTATE_TERMINATED))
+ return VERR_INVALID_POINTER;
+
+ uint16_t hGVM = pVM->hSelf;
+ ASMCompilerBarrier();
+ if (RT_UNLIKELY( hGVM == NIL_GVM_HANDLE
+ || hGVM >= RT_ELEMENTS(pGVMM->aHandles)))
+ return VERR_INVALID_HANDLE;
+
+ /*
+ * Look it up.
+ */
+ PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
+ PGVM pGVM;
+ if (fTakeUsedLock)
+ {
+ int rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
+ AssertRCReturn(rc, rc);
+
+ pGVM = pHandle->pGVM;
+ if (RT_UNLIKELY( pHandle->pVM != pVM
+ || pHandle->ProcId != ProcId
+ || !VALID_PTR(pHandle->pvObj)
+ || !VALID_PTR(pGVM)
+ || pGVM->pVM != pVM))
+ {
+ GVMMR0_USED_SHARED_UNLOCK(pGVMM);
+ return VERR_INVALID_HANDLE;
+ }
+ }
+ else
+ {
+ if (RT_UNLIKELY(pHandle->pVM != pVM))
+ return VERR_INVALID_HANDLE;
+ if (RT_UNLIKELY(pHandle->ProcId != ProcId))
+ return VERR_INVALID_HANDLE;
+ if (RT_UNLIKELY(!VALID_PTR(pHandle->pvObj)))
+ return VERR_INVALID_HANDLE;
+
+ pGVM = pHandle->pGVM;
+ if (RT_UNLIKELY(!VALID_PTR(pGVM)))
+ return VERR_INVALID_HANDLE;
+ if (RT_UNLIKELY(pGVM->pVM != pVM))
+ return VERR_INVALID_HANDLE;
+ }
+
+ *ppGVM = pGVM;
+ *ppGVMM = pGVMM;
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Fast look up a GVM structure by the cross context VM structure.
+ *
+ * This is mainly used a glue function, so performance is .
+ *
+ * @returns GVM on success, NULL on failure.
+ * @param pVM The cross context VM structure. ASSUMES to be
+ * reasonably valid, so we can do fewer checks than in
+ * gvmmR0ByVM.
+ *
+ * @note Do not use this on pVM structures from userland!
+ */
+GVMMR0DECL(PGVM) GVMMR0FastGetGVMByVM(PVM pVM)
+{
+ AssertPtr(pVM);
+ Assert(!((uintptr_t)pVM & PAGE_OFFSET_MASK));
+
+ PGVMM pGVMM;
+ GVMM_GET_VALID_INSTANCE(pGVMM, NULL);
+
+ /*
+ * Validate.
+ */
+ uint16_t hGVM = pVM->hSelf;
+ ASMCompilerBarrier();
+ AssertReturn(hGVM != NIL_GVM_HANDLE && hGVM < RT_ELEMENTS(pGVMM->aHandles), NULL);
+
+ /*
+ * Look it up and check pVM against the value in the handle and GVM structures.
+ */
+ PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
+ AssertReturn(pHandle->pVM == pVM, NULL);
+
+ PGVM pGVM = pHandle->pGVM;
+ AssertPtrReturn(pGVM, NULL);
+ AssertReturn(pGVM->pVM == pVM, NULL);
+
+ return pGVM;
+}
+
+
+/**
+ * Check that the given GVM and VM structures match up.
+ *
+ * The calling thread must be in the same process as the VM. All current lookups
+ * are by threads inside the same process, so this will not be an issue.
+ *
+ * @returns VBox status code.
+ * @param pGVM The global (ring-0) VM structure.
+ * @param pVM The cross context VM structure.
+ * @param ppGVMM Where to store the pointer to the GVMM instance data.
+ * @param fTakeUsedLock Whether to take the used lock or not. We take it in
+ * shared mode when requested.
+ *
+ * Be very careful if not taking the lock as it's
+ * possible that the VM will disappear then!
+ *
+ * @remark This will not assert on an invalid pVM but try return silently.
+ */
+static int gvmmR0ByGVMandVM(PGVM pGVM, PVM pVM, PGVMM *ppGVMM, bool fTakeUsedLock)
+{
+ /*
+ * Check the pointers.
+ */
+ int rc;
+ if (RT_LIKELY(RT_VALID_PTR(pGVM)))
+ {
+ if (RT_LIKELY( RT_VALID_PTR(pVM)
+ && ((uintptr_t)pVM & PAGE_OFFSET_MASK) == 0))
+ {
+ if (RT_LIKELY(pGVM->pVM == pVM))
+ {
+ /*
+ * Get the pGVMM instance and check the VM handle.
+ */
+ PGVMM pGVMM;
+ GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
+
+ uint16_t hGVM = pGVM->hSelf;
+ if (RT_LIKELY( hGVM != NIL_GVM_HANDLE
+ && hGVM < RT_ELEMENTS(pGVMM->aHandles)))
+ {
+ RTPROCESS const pidSelf = RTProcSelf();
+ PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
+ if (fTakeUsedLock)
+ {
+ rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
+ AssertRCReturn(rc, rc);
+ }
+
+ if (RT_LIKELY( pHandle->pGVM == pGVM
+ && pHandle->pVM == pVM
+ && pHandle->ProcId == pidSelf
+ && RT_VALID_PTR(pHandle->pvObj)))
+ {
+ /*
+ * Some more VM data consistency checks.
+ */
+ if (RT_LIKELY( pVM->cCpus == pGVM->cCpus
+ && pVM->hSelf == hGVM
+ && pVM->enmVMState >= VMSTATE_CREATING
+ && pVM->enmVMState <= VMSTATE_TERMINATED
+ && pVM->pVMR0 == pVM))
+ {
+ *ppGVMM = pGVMM;
+ return VINF_SUCCESS;
+ }
+ }
+
+ if (fTakeUsedLock)
+ GVMMR0_USED_SHARED_UNLOCK(pGVMM);
+ }
+ }
+ rc = VERR_INVALID_VM_HANDLE;
+ }
+ else
+ rc = VERR_INVALID_POINTER;
+ }
+ else
+ rc = VERR_INVALID_POINTER;
+ return rc;
+}
+
+
+/**
+ * Check that the given GVM and VM structures match up.
+ *
+ * The calling thread must be in the same process as the VM. All current lookups
+ * are by threads inside the same process, so this will not be an issue.
+ *
+ * @returns VBox status code.
+ * @param pGVM The global (ring-0) VM structure.
+ * @param pVM The cross context VM structure.
+ * @param idCpu The (alleged) Virtual CPU ID of the calling EMT.
+ * @param ppGVMM Where to store the pointer to the GVMM instance data.
+ * @thread EMT
+ *
+ * @remarks This will assert in all failure paths.
+ */
+static int gvmmR0ByGVMandVMandEMT(PGVM pGVM, PVM pVM, VMCPUID idCpu, PGVMM *ppGVMM)
+{
+ /*
+ * Check the pointers.
+ */
+ AssertPtrReturn(pGVM, VERR_INVALID_POINTER);
+
+ AssertPtrReturn(pVM, VERR_INVALID_POINTER);
+ AssertReturn(((uintptr_t)pVM & PAGE_OFFSET_MASK) == 0, VERR_INVALID_POINTER);
+ AssertReturn(pGVM->pVM == pVM, VERR_INVALID_VM_HANDLE);
+
+
+ /*
+ * Get the pGVMM instance and check the VM handle.
+ */
+ PGVMM pGVMM;
+ GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
+
+ uint16_t hGVM = pGVM->hSelf;
+ ASMCompilerBarrier();
+ AssertReturn( hGVM != NIL_GVM_HANDLE
+ && hGVM < RT_ELEMENTS(pGVMM->aHandles), VERR_INVALID_VM_HANDLE);
+
+ RTPROCESS const pidSelf = RTProcSelf();
+ PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
+ AssertReturn( pHandle->pGVM == pGVM
+ && pHandle->pVM == pVM
+ && pHandle->ProcId == pidSelf
+ && RT_VALID_PTR(pHandle->pvObj),
+ VERR_INVALID_HANDLE);
+
+ /*
+ * Check the EMT claim.
+ */
+ RTNATIVETHREAD const hAllegedEMT = RTThreadNativeSelf();
+ AssertReturn(idCpu < pGVM->cCpus, VERR_INVALID_CPU_ID);
+ AssertReturn(pGVM->aCpus[idCpu].hEMT == hAllegedEMT, VERR_NOT_OWNER);
+
+ /*
+ * Some more VM data consistency checks.
+ */
+ AssertReturn(pVM->cCpus == pGVM->cCpus, VERR_INCONSISTENT_VM_HANDLE);
+ AssertReturn(pVM->hSelf == hGVM, VERR_INCONSISTENT_VM_HANDLE);
+ AssertReturn(pVM->pVMR0 == pVM, VERR_INCONSISTENT_VM_HANDLE);
+ AssertReturn( pVM->enmVMState >= VMSTATE_CREATING
+ && pVM->enmVMState <= VMSTATE_TERMINATED, VERR_INCONSISTENT_VM_HANDLE);
+
+ *ppGVMM = pGVMM;
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Validates a GVM/VM pair.
+ *
+ * @returns VBox status code.
+ * @param pGVM The global (ring-0) VM structure.
+ * @param pVM The cross context VM structure.
+ */
+GVMMR0DECL(int) GVMMR0ValidateGVMandVM(PGVM pGVM, PVM pVM)
+{
+ PGVMM pGVMM;
+ return gvmmR0ByGVMandVM(pGVM, pVM, &pGVMM, false /*fTakeUsedLock*/);
+}
+
+
+
+/**
+ * Validates a GVM/VM/EMT combo.
+ *
+ * @returns VBox status code.
+ * @param pGVM The global (ring-0) VM structure.
+ * @param pVM The cross context VM structure.
+ * @param idCpu The Virtual CPU ID of the calling EMT.
+ * @thread EMT(idCpu)
+ */
+GVMMR0DECL(int) GVMMR0ValidateGVMandVMandEMT(PGVM pGVM, PVM pVM, VMCPUID idCpu)
+{
+ PGVMM pGVMM;
+ return gvmmR0ByGVMandVMandEMT(pGVM, pVM, idCpu, &pGVMM);
+}
+
+
+/**
+ * Looks up the VM belonging to the specified EMT thread.
+ *
+ * This is used by the assertion machinery in VMMR0.cpp to avoid causing
+ * unnecessary kernel panics when the EMT thread hits an assertion. The
+ * call may or not be an EMT thread.
+ *
+ * @returns Pointer to the VM on success, NULL on failure.
+ * @param hEMT The native thread handle of the EMT.
+ * NIL_RTNATIVETHREAD means the current thread
+ */
+GVMMR0DECL(PVM) GVMMR0GetVMByEMT(RTNATIVETHREAD hEMT)
+{
+ /*
+ * No Assertions here as we're usually called in a AssertMsgN or
+ * RTAssert* context.
+ */
+ PGVMM pGVMM = g_pGVMM;
+ if ( !VALID_PTR(pGVMM)
+ || pGVMM->u32Magic != GVMM_MAGIC)
+ return NULL;
+
+ if (hEMT == NIL_RTNATIVETHREAD)
+ hEMT = RTThreadNativeSelf();
+ RTPROCESS ProcId = RTProcSelf();
+
+ /*
+ * Search the handles in a linear fashion as we don't dare to take the lock (assert).
+ */
+/** @todo introduce some pid hash table here, please. */
+ for (unsigned i = 1; i < RT_ELEMENTS(pGVMM->aHandles); i++)
+ {
+ if ( pGVMM->aHandles[i].iSelf == i
+ && pGVMM->aHandles[i].ProcId == ProcId
+ && VALID_PTR(pGVMM->aHandles[i].pvObj)
+ && VALID_PTR(pGVMM->aHandles[i].pVM)
+ && VALID_PTR(pGVMM->aHandles[i].pGVM))
+ {
+ if (pGVMM->aHandles[i].hEMT0 == hEMT)
+ return pGVMM->aHandles[i].pVM;
+
+ /* This is fearly safe with the current process per VM approach. */
+ PGVM pGVM = pGVMM->aHandles[i].pGVM;
+ VMCPUID const cCpus = pGVM->cCpus;
+ ASMCompilerBarrier();
+ if ( cCpus < 1
+ || cCpus > VMM_MAX_CPU_COUNT)
+ continue;
+ for (VMCPUID idCpu = 1; idCpu < cCpus; idCpu++)
+ if (pGVM->aCpus[idCpu].hEMT == hEMT)
+ return pGVMM->aHandles[i].pVM;
+ }
+ }
+ return NULL;
+}
+
+
+/**
+ * Looks up the GVMCPU belonging to the specified EMT thread.
+ *
+ * This is used by the assertion machinery in VMMR0.cpp to avoid causing
+ * unnecessary kernel panics when the EMT thread hits an assertion. The
+ * call may or not be an EMT thread.
+ *
+ * @returns Pointer to the VM on success, NULL on failure.
+ * @param hEMT The native thread handle of the EMT.
+ * NIL_RTNATIVETHREAD means the current thread
+ */
+GVMMR0DECL(PGVMCPU) GVMMR0GetGVCpuByEMT(RTNATIVETHREAD hEMT)
+{
+ /*
+ * No Assertions here as we're usually called in a AssertMsgN,
+ * RTAssert*, Log and LogRel contexts.
+ */
+ PGVMM pGVMM = g_pGVMM;
+ if ( !VALID_PTR(pGVMM)
+ || pGVMM->u32Magic != GVMM_MAGIC)
+ return NULL;
+
+ if (hEMT == NIL_RTNATIVETHREAD)
+ hEMT = RTThreadNativeSelf();
+ RTPROCESS ProcId = RTProcSelf();
+
+ /*
+ * Search the handles in a linear fashion as we don't dare to take the lock (assert).
+ */
+/** @todo introduce some pid hash table here, please. */
+ for (unsigned i = 1; i < RT_ELEMENTS(pGVMM->aHandles); i++)
+ {
+ if ( pGVMM->aHandles[i].iSelf == i
+ && pGVMM->aHandles[i].ProcId == ProcId
+ && VALID_PTR(pGVMM->aHandles[i].pvObj)
+ && VALID_PTR(pGVMM->aHandles[i].pVM)
+ && VALID_PTR(pGVMM->aHandles[i].pGVM))
+ {
+ PGVM pGVM = pGVMM->aHandles[i].pGVM;
+ if (pGVMM->aHandles[i].hEMT0 == hEMT)
+ return &pGVM->aCpus[0];
+
+ /* This is fearly safe with the current process per VM approach. */
+ VMCPUID const cCpus = pGVM->cCpus;
+ ASMCompilerBarrier();
+ ASMCompilerBarrier();
+ if ( cCpus < 1
+ || cCpus > VMM_MAX_CPU_COUNT)
+ continue;
+ for (VMCPUID idCpu = 1; idCpu < cCpus; idCpu++)
+ if (pGVM->aCpus[idCpu].hEMT == hEMT)
+ return &pGVM->aCpus[idCpu];
+ }
+ }
+ return NULL;
+}
+
+
+/**
+ * This is will wake up expired and soon-to-be expired VMs.
+ *
+ * @returns Number of VMs that has been woken up.
+ * @param pGVMM Pointer to the GVMM instance data.
+ * @param u64Now The current time.
+ */
+static unsigned gvmmR0SchedDoWakeUps(PGVMM pGVMM, uint64_t u64Now)
+{
+ /*
+ * Skip this if we've got disabled because of high resolution wakeups or by
+ * the user.
+ */
+ if (!pGVMM->fDoEarlyWakeUps)
+ return 0;
+
+/** @todo Rewrite this algorithm. See performance defect XYZ. */
+
+ /*
+ * A cheap optimization to stop wasting so much time here on big setups.
+ */
+ const uint64_t uNsEarlyWakeUp2 = u64Now + pGVMM->nsEarlyWakeUp2;
+ if ( pGVMM->cHaltedEMTs == 0
+ || uNsEarlyWakeUp2 > pGVMM->uNsNextEmtWakeup)
+ return 0;
+
+ /*
+ * Only one thread doing this at a time.
+ */
+ if (!ASMAtomicCmpXchgBool(&pGVMM->fDoingEarlyWakeUps, true, false))
+ return 0;
+
+ /*
+ * The first pass will wake up VMs which have actually expired
+ * and look for VMs that should be woken up in the 2nd and 3rd passes.
+ */
+ const uint64_t uNsEarlyWakeUp1 = u64Now + pGVMM->nsEarlyWakeUp1;
+ uint64_t u64Min = UINT64_MAX;
+ unsigned cWoken = 0;
+ unsigned cHalted = 0;
+ unsigned cTodo2nd = 0;
+ unsigned cTodo3rd = 0;
+ for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
+ i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
+ i = pGVMM->aHandles[i].iNext)
+ {
+ PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
+ if ( VALID_PTR(pCurGVM)
+ && pCurGVM->u32Magic == GVM_MAGIC)
+ {
+ for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
+ {
+ PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
+ uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
+ if (u64)
+ {
+ if (u64 <= u64Now)
+ {
+ if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
+ {
+ int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
+ AssertRC(rc);
+ cWoken++;
+ }
+ }
+ else
+ {
+ cHalted++;
+ if (u64 <= uNsEarlyWakeUp1)
+ cTodo2nd++;
+ else if (u64 <= uNsEarlyWakeUp2)
+ cTodo3rd++;
+ else if (u64 < u64Min)
+ u64 = u64Min;
+ }
+ }
+ }
+ }
+ AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
+ }
+
+ if (cTodo2nd)
+ {
+ for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
+ i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
+ i = pGVMM->aHandles[i].iNext)
+ {
+ PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
+ if ( VALID_PTR(pCurGVM)
+ && pCurGVM->u32Magic == GVM_MAGIC)
+ {
+ for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
+ {
+ PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
+ uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
+ if ( u64
+ && u64 <= uNsEarlyWakeUp1)
+ {
+ if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
+ {
+ int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
+ AssertRC(rc);
+ cWoken++;
+ }
+ }
+ }
+ }
+ AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
+ }
+ }
+
+ if (cTodo3rd)
+ {
+ for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
+ i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
+ i = pGVMM->aHandles[i].iNext)
+ {
+ PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
+ if ( VALID_PTR(pCurGVM)
+ && pCurGVM->u32Magic == GVM_MAGIC)
+ {
+ for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
+ {
+ PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
+ uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
+ if ( u64
+ && u64 <= uNsEarlyWakeUp2)
+ {
+ if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
+ {
+ int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
+ AssertRC(rc);
+ cWoken++;
+ }
+ }
+ }
+ }
+ AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
+ }
+ }
+
+ /*
+ * Set the minimum value.
+ */
+ pGVMM->uNsNextEmtWakeup = u64Min;
+
+ ASMAtomicWriteBool(&pGVMM->fDoingEarlyWakeUps, false);
+ return cWoken;
+}
+
+
+/**
+ * Halt the EMT thread.
+ *
+ * @returns VINF_SUCCESS normal wakeup (timeout or kicked by other thread).
+ * VERR_INTERRUPTED if a signal was scheduled for the thread.
+ * @param pGVM The global (ring-0) VM structure.
+ * @param pVM The cross context VM structure.
+ * @param pGVCpu The global (ring-0) CPU structure of the calling
+ * EMT.
+ * @param u64ExpireGipTime The time for the sleep to expire expressed as GIP time.
+ * @thread EMT(pGVCpu).
+ */
+GVMMR0DECL(int) GVMMR0SchedHalt(PGVM pGVM, PVM pVM, PGVMCPU pGVCpu, uint64_t u64ExpireGipTime)
+{
+ LogFlow(("GVMMR0SchedHalt: pGVM=%p pVM=%p pGVCpu=%p(%d) u64ExpireGipTime=%#RX64\n",
+ pGVM, pVM, pGVCpu, pGVCpu->idCpu, u64ExpireGipTime));
+ GVMM_CHECK_SMAP_SETUP();
+ GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+
+ PGVMM pGVMM;
+ GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
+
+ pGVM->gvmm.s.StatsSched.cHaltCalls++;
+ Assert(!pGVCpu->gvmm.s.u64HaltExpire);
+
+ /*
+ * If we're doing early wake-ups, we must take the UsedList lock before we
+ * start querying the current time.
+ * Note! Interrupts must NOT be disabled at this point because we ask for GIP time!
+ */
+ bool const fDoEarlyWakeUps = pGVMM->fDoEarlyWakeUps;
+ if (fDoEarlyWakeUps)
+ {
+ int rc2 = GVMMR0_USED_SHARED_LOCK(pGVMM); AssertRC(rc2);
+ GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+ }
+
+ pGVCpu->gvmm.s.iCpuEmt = ASMGetApicId();
+
+ /* GIP hack: We might are frequently sleeping for short intervals where the
+ difference between GIP and system time matters on systems with high resolution
+ system time. So, convert the input from GIP to System time in that case. */
+ Assert(ASMGetFlags() & X86_EFL_IF);
+ const uint64_t u64NowSys = RTTimeSystemNanoTS();
+ const uint64_t u64NowGip = RTTimeNanoTS();
+ GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+
+ if (fDoEarlyWakeUps)
+ {
+ pGVM->gvmm.s.StatsSched.cHaltWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64NowGip);
+ GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+ }
+
+ /*
+ * Go to sleep if we must...
+ * Cap the sleep time to 1 second to be on the safe side.
+ */
+ int rc;
+ uint64_t cNsInterval = u64ExpireGipTime - u64NowGip;
+ if ( u64NowGip < u64ExpireGipTime
+ && cNsInterval >= (pGVMM->cEMTs > pGVMM->cEMTsMeansCompany
+ ? pGVMM->nsMinSleepCompany
+ : pGVMM->nsMinSleepAlone))
+ {
+ pGVM->gvmm.s.StatsSched.cHaltBlocking++;
+ if (cNsInterval > RT_NS_1SEC)
+ u64ExpireGipTime = u64NowGip + RT_NS_1SEC;
+ ASMAtomicWriteU64(&pGVCpu->gvmm.s.u64HaltExpire, u64ExpireGipTime);
+ ASMAtomicIncU32(&pGVMM->cHaltedEMTs);
+ if (fDoEarlyWakeUps)
+ {
+ if (u64ExpireGipTime < pGVMM->uNsNextEmtWakeup)
+ pGVMM->uNsNextEmtWakeup = u64ExpireGipTime;
+ GVMMR0_USED_SHARED_UNLOCK(pGVMM);
+ }
+ GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+
+ rc = RTSemEventMultiWaitEx(pGVCpu->gvmm.s.HaltEventMulti,
+ RTSEMWAIT_FLAGS_ABSOLUTE | RTSEMWAIT_FLAGS_NANOSECS | RTSEMWAIT_FLAGS_INTERRUPTIBLE,
+ u64NowGip > u64NowSys ? u64ExpireGipTime : u64NowSys + cNsInterval);
+ GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+
+ ASMAtomicWriteU64(&pGVCpu->gvmm.s.u64HaltExpire, 0);
+ ASMAtomicDecU32(&pGVMM->cHaltedEMTs);
+
+ /* Reset the semaphore to try prevent a few false wake-ups. */
+ if (rc == VINF_SUCCESS)
+ {
+ RTSemEventMultiReset(pGVCpu->gvmm.s.HaltEventMulti);
+ GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+ }
+ else if (rc == VERR_TIMEOUT)
+ {
+ pGVM->gvmm.s.StatsSched.cHaltTimeouts++;
+ rc = VINF_SUCCESS;
+ }
+ }
+ else
+ {
+ pGVM->gvmm.s.StatsSched.cHaltNotBlocking++;
+ if (fDoEarlyWakeUps)
+ GVMMR0_USED_SHARED_UNLOCK(pGVMM);
+ GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+ RTSemEventMultiReset(pGVCpu->gvmm.s.HaltEventMulti);
+ GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+ rc = VINF_SUCCESS;
+ }
+
+ return rc;
+}
+
+
+/**
+ * Halt the EMT thread.
+ *
+ * @returns VINF_SUCCESS normal wakeup (timeout or kicked by other thread).
+ * VERR_INTERRUPTED if a signal was scheduled for the thread.
+ * @param pGVM The global (ring-0) VM structure.
+ * @param pVM The cross context VM structure.
+ * @param idCpu The Virtual CPU ID of the calling EMT.
+ * @param u64ExpireGipTime The time for the sleep to expire expressed as GIP time.
+ * @thread EMT(idCpu).
+ */
+GVMMR0DECL(int) GVMMR0SchedHaltReq(PGVM pGVM, PVM pVM, VMCPUID idCpu, uint64_t u64ExpireGipTime)
+{
+ GVMM_CHECK_SMAP_SETUP();
+ GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+ PGVMM pGVMM;
+ int rc = gvmmR0ByGVMandVMandEMT(pGVM, pVM, idCpu, &pGVMM);
+ if (RT_SUCCESS(rc))
+ {
+ GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+ rc = GVMMR0SchedHalt(pGVM, pVM, &pGVM->aCpus[idCpu], u64ExpireGipTime);
+ }
+ GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+ return rc;
+}
+
+
+
+/**
+ * Worker for GVMMR0SchedWakeUp and GVMMR0SchedWakeUpAndPokeCpus that wakes up
+ * the a sleeping EMT.
+ *
+ * @retval VINF_SUCCESS if successfully woken up.
+ * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
+ *
+ * @param pGVM The global (ring-0) VM structure.
+ * @param pGVCpu The global (ring-0) VCPU structure.
+ */
+DECLINLINE(int) gvmmR0SchedWakeUpOne(PGVM pGVM, PGVMCPU pGVCpu)
+{
+ pGVM->gvmm.s.StatsSched.cWakeUpCalls++;
+
+ /*
+ * Signal the semaphore regardless of whether it's current blocked on it.
+ *
+ * The reason for this is that there is absolutely no way we can be 100%
+ * certain that it isn't *about* go to go to sleep on it and just got
+ * delayed a bit en route. So, we will always signal the semaphore when
+ * the it is flagged as halted in the VMM.
+ */
+/** @todo we can optimize some of that by means of the pVCpu->enmState now. */
+ int rc;
+ if (pGVCpu->gvmm.s.u64HaltExpire)
+ {
+ rc = VINF_SUCCESS;
+ ASMAtomicWriteU64(&pGVCpu->gvmm.s.u64HaltExpire, 0);
+ }
+ else
+ {
+ rc = VINF_GVM_NOT_BLOCKED;
+ pGVM->gvmm.s.StatsSched.cWakeUpNotHalted++;
+ }
+
+ int rc2 = RTSemEventMultiSignal(pGVCpu->gvmm.s.HaltEventMulti);
+ AssertRC(rc2);
+
+ return rc;
+}
+
+
+/**
+ * Wakes up the halted EMT thread so it can service a pending request.
+ *
+ * @returns VBox status code.
+ * @retval VINF_SUCCESS if successfully woken up.
+ * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
+ *
+ * @param pGVM The global (ring-0) VM structure.
+ * @param pVM The cross context VM structure.
+ * @param idCpu The Virtual CPU ID of the EMT to wake up.
+ * @param fTakeUsedLock Take the used lock or not
+ * @thread Any but EMT(idCpu).
+ */
+GVMMR0DECL(int) GVMMR0SchedWakeUpEx(PGVM pGVM, PVM pVM, VMCPUID idCpu, bool fTakeUsedLock)
+{
+ GVMM_CHECK_SMAP_SETUP();
+ GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+
+ /*
+ * Validate input and take the UsedLock.
+ */
+ PGVMM pGVMM;
+ int rc = gvmmR0ByGVMandVM(pGVM, pVM, &pGVMM, fTakeUsedLock);
+ GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+ if (RT_SUCCESS(rc))
+ {
+ if (idCpu < pGVM->cCpus)
+ {
+ /*
+ * Do the actual job.
+ */
+ rc = gvmmR0SchedWakeUpOne(pGVM, &pGVM->aCpus[idCpu]);
+ GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+
+ if (fTakeUsedLock && pGVMM->fDoEarlyWakeUps)
+ {
+ /*
+ * While we're here, do a round of scheduling.
+ */
+ Assert(ASMGetFlags() & X86_EFL_IF);
+ const uint64_t u64Now = RTTimeNanoTS(); /* (GIP time) */
+ pGVM->gvmm.s.StatsSched.cWakeUpWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64Now);
+ GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+ }
+ }
+ else
+ rc = VERR_INVALID_CPU_ID;
+
+ if (fTakeUsedLock)
+ {
+ int rc2 = GVMMR0_USED_SHARED_UNLOCK(pGVMM);
+ AssertRC(rc2);
+ GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+ }
+ }
+
+ LogFlow(("GVMMR0SchedWakeUpEx: returns %Rrc\n", rc));
+ return rc;
+}
+
+
+/**
+ * Wakes up the halted EMT thread so it can service a pending request.
+ *
+ * @returns VBox status code.
+ * @retval VINF_SUCCESS if successfully woken up.
+ * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
+ *
+ * @param pGVM The global (ring-0) VM structure.
+ * @param pVM The cross context VM structure.
+ * @param idCpu The Virtual CPU ID of the EMT to wake up.
+ * @thread Any but EMT(idCpu).
+ */
+GVMMR0DECL(int) GVMMR0SchedWakeUp(PGVM pGVM, PVM pVM, VMCPUID idCpu)
+{
+ return GVMMR0SchedWakeUpEx(pGVM, pVM, idCpu, true /* fTakeUsedLock */);
+}
+
+
+/**
+ * Wakes up the halted EMT thread so it can service a pending request, no GVM
+ * parameter and no used locking.
+ *
+ * @returns VBox status code.
+ * @retval VINF_SUCCESS if successfully woken up.
+ * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
+ *
+ * @param pVM The cross context VM structure.
+ * @param idCpu The Virtual CPU ID of the EMT to wake up.
+ * @thread Any but EMT(idCpu).
+ * @deprecated Don't use in new code if possible! Use the GVM variant.
+ */
+GVMMR0DECL(int) GVMMR0SchedWakeUpNoGVMNoLock(PVM pVM, VMCPUID idCpu)
+{
+ GVMM_CHECK_SMAP_SETUP();
+ GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+ PGVM pGVM;
+ PGVMM pGVMM;
+ int rc = gvmmR0ByVM(pVM, &pGVM, &pGVMM, false /*fTakeUsedLock*/);
+ GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+ if (RT_SUCCESS(rc))
+ rc = GVMMR0SchedWakeUpEx(pGVM, pVM, idCpu, false /*fTakeUsedLock*/);
+ return rc;
+}
+
+
+/**
+ * Worker common to GVMMR0SchedPoke and GVMMR0SchedWakeUpAndPokeCpus that pokes
+ * the Virtual CPU if it's still busy executing guest code.
+ *
+ * @returns VBox status code.
+ * @retval VINF_SUCCESS if poked successfully.
+ * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
+ *
+ * @param pGVM The global (ring-0) VM structure.
+ * @param pVCpu The cross context virtual CPU structure.
+ */
+DECLINLINE(int) gvmmR0SchedPokeOne(PGVM pGVM, PVMCPU pVCpu)
+{
+ pGVM->gvmm.s.StatsSched.cPokeCalls++;
+
+ RTCPUID idHostCpu = pVCpu->idHostCpu;
+ if ( idHostCpu == NIL_RTCPUID
+ || VMCPU_GET_STATE(pVCpu) != VMCPUSTATE_STARTED_EXEC)
+ {
+ pGVM->gvmm.s.StatsSched.cPokeNotBusy++;
+ return VINF_GVM_NOT_BUSY_IN_GC;
+ }
+
+ /* Note: this function is not implemented on Darwin and Linux (kernel < 2.6.19) */
+ RTMpPokeCpu(idHostCpu);
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Pokes an EMT if it's still busy running guest code.
+ *
+ * @returns VBox status code.
+ * @retval VINF_SUCCESS if poked successfully.
+ * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
+ *
+ * @param pGVM The global (ring-0) VM structure.
+ * @param pVM The cross context VM structure.
+ * @param idCpu The ID of the virtual CPU to poke.
+ * @param fTakeUsedLock Take the used lock or not
+ */
+GVMMR0DECL(int) GVMMR0SchedPokeEx(PGVM pGVM, PVM pVM, VMCPUID idCpu, bool fTakeUsedLock)
+{
+ /*
+ * Validate input and take the UsedLock.
+ */
+ PGVMM pGVMM;
+ int rc = gvmmR0ByGVMandVM(pGVM, pVM, &pGVMM, fTakeUsedLock);
+ if (RT_SUCCESS(rc))
+ {
+ if (idCpu < pGVM->cCpus)
+ rc = gvmmR0SchedPokeOne(pGVM, &pVM->aCpus[idCpu]);
+ else
+ rc = VERR_INVALID_CPU_ID;
+
+ if (fTakeUsedLock)
+ {
+ int rc2 = GVMMR0_USED_SHARED_UNLOCK(pGVMM);
+ AssertRC(rc2);
+ }
+ }
+
+ LogFlow(("GVMMR0SchedWakeUpAndPokeCpus: returns %Rrc\n", rc));
+ return rc;
+}
+
+
+/**
+ * Pokes an EMT if it's still busy running guest code.
+ *
+ * @returns VBox status code.
+ * @retval VINF_SUCCESS if poked successfully.
+ * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
+ *
+ * @param pGVM The global (ring-0) VM structure.
+ * @param pVM The cross context VM structure.
+ * @param idCpu The ID of the virtual CPU to poke.
+ */
+GVMMR0DECL(int) GVMMR0SchedPoke(PGVM pGVM, PVM pVM, VMCPUID idCpu)
+{
+ return GVMMR0SchedPokeEx(pGVM, pVM, idCpu, true /* fTakeUsedLock */);
+}
+
+
+/**
+ * Pokes an EMT if it's still busy running guest code, no GVM parameter and no
+ * used locking.
+ *
+ * @returns VBox status code.
+ * @retval VINF_SUCCESS if poked successfully.
+ * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
+ *
+ * @param pVM The cross context VM structure.
+ * @param idCpu The ID of the virtual CPU to poke.
+ *
+ * @deprecated Don't use in new code if possible! Use the GVM variant.
+ */
+GVMMR0DECL(int) GVMMR0SchedPokeNoGVMNoLock(PVM pVM, VMCPUID idCpu)
+{
+ PGVM pGVM;
+ PGVMM pGVMM;
+ int rc = gvmmR0ByVM(pVM, &pGVM, &pGVMM, false /*fTakeUsedLock*/);
+ if (RT_SUCCESS(rc))
+ {
+ if (idCpu < pGVM->cCpus)
+ rc = gvmmR0SchedPokeOne(pGVM, &pVM->aCpus[idCpu]);
+ else
+ rc = VERR_INVALID_CPU_ID;
+ }
+ return rc;
+}
+
+
+/**
+ * Wakes up a set of halted EMT threads so they can service pending request.
+ *
+ * @returns VBox status code, no informational stuff.
+ *
+ * @param pGVM The global (ring-0) VM structure.
+ * @param pVM The cross context VM structure.
+ * @param pSleepSet The set of sleepers to wake up.
+ * @param pPokeSet The set of CPUs to poke.
+ */
+GVMMR0DECL(int) GVMMR0SchedWakeUpAndPokeCpus(PGVM pGVM, PVM pVM, PCVMCPUSET pSleepSet, PCVMCPUSET pPokeSet)
+{
+ AssertPtrReturn(pSleepSet, VERR_INVALID_POINTER);
+ AssertPtrReturn(pPokeSet, VERR_INVALID_POINTER);
+ GVMM_CHECK_SMAP_SETUP();
+ GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+ RTNATIVETHREAD hSelf = RTThreadNativeSelf();
+
+ /*
+ * Validate input and take the UsedLock.
+ */
+ PGVMM pGVMM;
+ int rc = gvmmR0ByGVMandVM(pGVM, pVM, &pGVMM, true /* fTakeUsedLock */);
+ GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+ if (RT_SUCCESS(rc))
+ {
+ rc = VINF_SUCCESS;
+ VMCPUID idCpu = pGVM->cCpus;
+ while (idCpu-- > 0)
+ {
+ /* Don't try poke or wake up ourselves. */
+ if (pGVM->aCpus[idCpu].hEMT == hSelf)
+ continue;
+
+ /* just ignore errors for now. */
+ if (VMCPUSET_IS_PRESENT(pSleepSet, idCpu))
+ {
+ gvmmR0SchedWakeUpOne(pGVM, &pGVM->aCpus[idCpu]);
+ GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+ }
+ else if (VMCPUSET_IS_PRESENT(pPokeSet, idCpu))
+ {
+ gvmmR0SchedPokeOne(pGVM, &pVM->aCpus[idCpu]);
+ GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+ }
+ }
+
+ int rc2 = GVMMR0_USED_SHARED_UNLOCK(pGVMM);
+ AssertRC(rc2);
+ GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+ }
+
+ LogFlow(("GVMMR0SchedWakeUpAndPokeCpus: returns %Rrc\n", rc));
+ return rc;
+}
+
+
+/**
+ * VMMR0 request wrapper for GVMMR0SchedWakeUpAndPokeCpus.
+ *
+ * @returns see GVMMR0SchedWakeUpAndPokeCpus.
+ * @param pGVM The global (ring-0) VM structure.
+ * @param pVM The cross context VM structure.
+ * @param pReq Pointer to the request packet.
+ */
+GVMMR0DECL(int) GVMMR0SchedWakeUpAndPokeCpusReq(PGVM pGVM, PVM pVM, PGVMMSCHEDWAKEUPANDPOKECPUSREQ pReq)
+{
+ /*
+ * Validate input and pass it on.
+ */
+ AssertPtrReturn(pReq, VERR_INVALID_POINTER);
+ AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
+
+ return GVMMR0SchedWakeUpAndPokeCpus(pGVM, pVM, &pReq->SleepSet, &pReq->PokeSet);
+}
+
+
+
+/**
+ * Poll the schedule to see if someone else should get a chance to run.
+ *
+ * This is a bit hackish and will not work too well if the machine is
+ * under heavy load from non-VM processes.
+ *
+ * @returns VINF_SUCCESS if not yielded.
+ * VINF_GVM_YIELDED if an attempt to switch to a different VM task was made.
+ * @param pGVM The global (ring-0) VM structure.
+ * @param pVM The cross context VM structure.
+ * @param idCpu The Virtual CPU ID of the calling EMT.
+ * @param fYield Whether to yield or not.
+ * This is for when we're spinning in the halt loop.
+ * @thread EMT(idCpu).
+ */
+GVMMR0DECL(int) GVMMR0SchedPoll(PGVM pGVM, PVM pVM, VMCPUID idCpu, bool fYield)
+{
+ /*
+ * Validate input.
+ */
+ PGVMM pGVMM;
+ int rc = gvmmR0ByGVMandVMandEMT(pGVM, pVM, idCpu, &pGVMM);
+ if (RT_SUCCESS(rc))
+ {
+ /*
+ * We currently only implement helping doing wakeups (fYield = false), so don't
+ * bother taking the lock if gvmmR0SchedDoWakeUps is not going to do anything.
+ */
+ if (!fYield && pGVMM->fDoEarlyWakeUps)
+ {
+ rc = GVMMR0_USED_SHARED_LOCK(pGVMM); AssertRC(rc);
+ pGVM->gvmm.s.StatsSched.cPollCalls++;
+
+ Assert(ASMGetFlags() & X86_EFL_IF);
+ const uint64_t u64Now = RTTimeNanoTS(); /* (GIP time) */
+
+ pGVM->gvmm.s.StatsSched.cPollWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64Now);
+
+ GVMMR0_USED_SHARED_UNLOCK(pGVMM);
+ }
+ /*
+ * Not quite sure what we could do here...
+ */
+ else if (fYield)
+ rc = VERR_NOT_IMPLEMENTED; /** @todo implement this... */
+ else
+ rc = VINF_SUCCESS;
+ }
+
+ LogFlow(("GVMMR0SchedWakeUp: returns %Rrc\n", rc));
+ return rc;
+}
+
+
+#ifdef GVMM_SCHED_WITH_PPT
+/**
+ * Timer callback for the periodic preemption timer.
+ *
+ * @param pTimer The timer handle.
+ * @param pvUser Pointer to the per cpu structure.
+ * @param iTick The current tick.
+ */
+static DECLCALLBACK(void) gvmmR0SchedPeriodicPreemptionTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
+{
+ PGVMMHOSTCPU pCpu = (PGVMMHOSTCPU)pvUser;
+ NOREF(pTimer); NOREF(iTick);
+
+ /*
+ * Termination check
+ */
+ if (pCpu->u32Magic != GVMMHOSTCPU_MAGIC)
+ return;
+
+ /*
+ * Do the house keeping.
+ */
+ RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
+
+ if (++pCpu->Ppt.iTickHistorization >= pCpu->Ppt.cTicksHistoriziationInterval)
+ {
+ /*
+ * Historicize the max frequency.
+ */
+ uint32_t iHzHistory = ++pCpu->Ppt.iHzHistory % RT_ELEMENTS(pCpu->Ppt.aHzHistory);
+ pCpu->Ppt.aHzHistory[iHzHistory] = pCpu->Ppt.uDesiredHz;
+ pCpu->Ppt.iTickHistorization = 0;
+ pCpu->Ppt.uDesiredHz = 0;
+
+ /*
+ * Check if the current timer frequency.
+ */
+ uint32_t uHistMaxHz = 0;
+ for (uint32_t i = 0; i < RT_ELEMENTS(pCpu->Ppt.aHzHistory); i++)
+ if (pCpu->Ppt.aHzHistory[i] > uHistMaxHz)
+ uHistMaxHz = pCpu->Ppt.aHzHistory[i];
+ if (uHistMaxHz == pCpu->Ppt.uTimerHz)
+ RTSpinlockRelease(pCpu->Ppt.hSpinlock);
+ else if (uHistMaxHz)
+ {
+ /*
+ * Reprogram it.
+ */
+ pCpu->Ppt.cChanges++;
+ pCpu->Ppt.iTickHistorization = 0;
+ pCpu->Ppt.uTimerHz = uHistMaxHz;
+ uint32_t const cNsInterval = RT_NS_1SEC / uHistMaxHz;
+ pCpu->Ppt.cNsInterval = cNsInterval;
+ if (cNsInterval < GVMMHOSTCPU_PPT_HIST_INTERVAL_NS)
+ pCpu->Ppt.cTicksHistoriziationInterval = ( GVMMHOSTCPU_PPT_HIST_INTERVAL_NS
+ + GVMMHOSTCPU_PPT_HIST_INTERVAL_NS / 2 - 1)
+ / cNsInterval;
+ else
+ pCpu->Ppt.cTicksHistoriziationInterval = 1;
+ RTSpinlockRelease(pCpu->Ppt.hSpinlock);
+
+ /*SUPR0Printf("Cpu%u: change to %u Hz / %u ns\n", pCpu->idxCpuSet, uHistMaxHz, cNsInterval);*/
+ RTTimerChangeInterval(pTimer, cNsInterval);
+ }
+ else
+ {
+ /*
+ * Stop it.
+ */
+ pCpu->Ppt.fStarted = false;
+ pCpu->Ppt.uTimerHz = 0;
+ pCpu->Ppt.cNsInterval = 0;
+ RTSpinlockRelease(pCpu->Ppt.hSpinlock);
+
+ /*SUPR0Printf("Cpu%u: stopping (%u Hz)\n", pCpu->idxCpuSet, uHistMaxHz);*/
+ RTTimerStop(pTimer);
+ }
+ }
+ else
+ RTSpinlockRelease(pCpu->Ppt.hSpinlock);
+}
+#endif /* GVMM_SCHED_WITH_PPT */
+
+
+/**
+ * Updates the periodic preemption timer for the calling CPU.
+ *
+ * The caller must have disabled preemption!
+ * The caller must check that the host can do high resolution timers.
+ *
+ * @param pVM The cross context VM structure.
+ * @param idHostCpu The current host CPU id.
+ * @param uHz The desired frequency.
+ */
+GVMMR0DECL(void) GVMMR0SchedUpdatePeriodicPreemptionTimer(PVM pVM, RTCPUID idHostCpu, uint32_t uHz)
+{
+ NOREF(pVM);
+#ifdef GVMM_SCHED_WITH_PPT
+ Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+ Assert(RTTimerCanDoHighResolution());
+
+ /*
+ * Resolve the per CPU data.
+ */
+ uint32_t iCpu = RTMpCpuIdToSetIndex(idHostCpu);
+ PGVMM pGVMM = g_pGVMM;
+ if ( !VALID_PTR(pGVMM)
+ || pGVMM->u32Magic != GVMM_MAGIC)
+ return;
+ AssertMsgReturnVoid(iCpu < pGVMM->cHostCpus, ("iCpu=%d cHostCpus=%d\n", iCpu, pGVMM->cHostCpus));
+ PGVMMHOSTCPU pCpu = &pGVMM->aHostCpus[iCpu];
+ AssertMsgReturnVoid( pCpu->u32Magic == GVMMHOSTCPU_MAGIC
+ && pCpu->idCpu == idHostCpu,
+ ("u32Magic=%#x idCpu=% idHostCpu=%d\n", pCpu->u32Magic, pCpu->idCpu, idHostCpu));
+
+ /*
+ * Check whether we need to do anything about the timer.
+ * We have to be a little bit careful since we might be race the timer
+ * callback here.
+ */
+ if (uHz > 16384)
+ uHz = 16384; /** @todo add a query method for this! */
+ if (RT_UNLIKELY( uHz > ASMAtomicReadU32(&pCpu->Ppt.uDesiredHz)
+ && uHz >= pCpu->Ppt.uMinHz
+ && !pCpu->Ppt.fStarting /* solaris paranoia */))
+ {
+ RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
+
+ pCpu->Ppt.uDesiredHz = uHz;
+ uint32_t cNsInterval = 0;
+ if (!pCpu->Ppt.fStarted)
+ {
+ pCpu->Ppt.cStarts++;
+ pCpu->Ppt.fStarted = true;
+ pCpu->Ppt.fStarting = true;
+ pCpu->Ppt.iTickHistorization = 0;
+ pCpu->Ppt.uTimerHz = uHz;
+ pCpu->Ppt.cNsInterval = cNsInterval = RT_NS_1SEC / uHz;
+ if (cNsInterval < GVMMHOSTCPU_PPT_HIST_INTERVAL_NS)
+ pCpu->Ppt.cTicksHistoriziationInterval = ( GVMMHOSTCPU_PPT_HIST_INTERVAL_NS
+ + GVMMHOSTCPU_PPT_HIST_INTERVAL_NS / 2 - 1)
+ / cNsInterval;
+ else
+ pCpu->Ppt.cTicksHistoriziationInterval = 1;
+ }
+
+ RTSpinlockRelease(pCpu->Ppt.hSpinlock);
+
+ if (cNsInterval)
+ {
+ RTTimerChangeInterval(pCpu->Ppt.pTimer, cNsInterval);
+ int rc = RTTimerStart(pCpu->Ppt.pTimer, cNsInterval);
+ AssertRC(rc);
+
+ RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
+ if (RT_FAILURE(rc))
+ pCpu->Ppt.fStarted = false;
+ pCpu->Ppt.fStarting = false;
+ RTSpinlockRelease(pCpu->Ppt.hSpinlock);
+ }
+ }
+#else /* !GVMM_SCHED_WITH_PPT */
+ NOREF(idHostCpu); NOREF(uHz);
+#endif /* !GVMM_SCHED_WITH_PPT */
+}
+
+
+/**
+ * Retrieves the GVMM statistics visible to the caller.
+ *
+ * @returns VBox status code.
+ *
+ * @param pStats Where to put the statistics.
+ * @param pSession The current session.
+ * @param pGVM The GVM to obtain statistics for. Optional.
+ * @param pVM The VM structure corresponding to @a pGVM.
+ */
+GVMMR0DECL(int) GVMMR0QueryStatistics(PGVMMSTATS pStats, PSUPDRVSESSION pSession, PGVM pGVM, PVM pVM)
+{
+ LogFlow(("GVMMR0QueryStatistics: pStats=%p pSession=%p pGVM=%p pVM=%p\n", pStats, pSession, pGVM, pVM));
+
+ /*
+ * Validate input.
+ */
+ AssertPtrReturn(pSession, VERR_INVALID_POINTER);
+ AssertPtrReturn(pStats, VERR_INVALID_POINTER);
+ pStats->cVMs = 0; /* (crash before taking the sem...) */
+
+ /*
+ * Take the lock and get the VM statistics.
+ */
+ PGVMM pGVMM;
+ if (pGVM)
+ {
+ int rc = gvmmR0ByGVMandVM(pGVM, pVM, &pGVMM, true /*fTakeUsedLock*/);
+ if (RT_FAILURE(rc))
+ return rc;
+ pStats->SchedVM = pGVM->gvmm.s.StatsSched;
+ }
+ else
+ {
+ GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
+ memset(&pStats->SchedVM, 0, sizeof(pStats->SchedVM));
+
+ int rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
+ AssertRCReturn(rc, rc);
+ }
+
+ /*
+ * Enumerate the VMs and add the ones visible to the statistics.
+ */
+ pStats->cVMs = 0;
+ pStats->cEMTs = 0;
+ memset(&pStats->SchedSum, 0, sizeof(pStats->SchedSum));
+
+ for (unsigned i = pGVMM->iUsedHead;
+ i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
+ i = pGVMM->aHandles[i].iNext)
+ {
+ PGVM pOtherGVM = pGVMM->aHandles[i].pGVM;
+ void *pvObj = pGVMM->aHandles[i].pvObj;
+ if ( VALID_PTR(pvObj)
+ && VALID_PTR(pOtherGVM)
+ && pOtherGVM->u32Magic == GVM_MAGIC
+ && RT_SUCCESS(SUPR0ObjVerifyAccess(pvObj, pSession, NULL)))
+ {
+ pStats->cVMs++;
+ pStats->cEMTs += pOtherGVM->cCpus;
+
+ pStats->SchedSum.cHaltCalls += pOtherGVM->gvmm.s.StatsSched.cHaltCalls;
+ pStats->SchedSum.cHaltBlocking += pOtherGVM->gvmm.s.StatsSched.cHaltBlocking;
+ pStats->SchedSum.cHaltTimeouts += pOtherGVM->gvmm.s.StatsSched.cHaltTimeouts;
+ pStats->SchedSum.cHaltNotBlocking += pOtherGVM->gvmm.s.StatsSched.cHaltNotBlocking;
+ pStats->SchedSum.cHaltWakeUps += pOtherGVM->gvmm.s.StatsSched.cHaltWakeUps;
+
+ pStats->SchedSum.cWakeUpCalls += pOtherGVM->gvmm.s.StatsSched.cWakeUpCalls;
+ pStats->SchedSum.cWakeUpNotHalted += pOtherGVM->gvmm.s.StatsSched.cWakeUpNotHalted;
+ pStats->SchedSum.cWakeUpWakeUps += pOtherGVM->gvmm.s.StatsSched.cWakeUpWakeUps;
+
+ pStats->SchedSum.cPokeCalls += pOtherGVM->gvmm.s.StatsSched.cPokeCalls;
+ pStats->SchedSum.cPokeNotBusy += pOtherGVM->gvmm.s.StatsSched.cPokeNotBusy;
+
+ pStats->SchedSum.cPollCalls += pOtherGVM->gvmm.s.StatsSched.cPollCalls;
+ pStats->SchedSum.cPollHalts += pOtherGVM->gvmm.s.StatsSched.cPollHalts;
+ pStats->SchedSum.cPollWakeUps += pOtherGVM->gvmm.s.StatsSched.cPollWakeUps;
+ }
+ }
+
+ /*
+ * Copy out the per host CPU statistics.
+ */
+ uint32_t iDstCpu = 0;
+ uint32_t cSrcCpus = pGVMM->cHostCpus;
+ for (uint32_t iSrcCpu = 0; iSrcCpu < cSrcCpus; iSrcCpu++)
+ {
+ if (pGVMM->aHostCpus[iSrcCpu].idCpu != NIL_RTCPUID)
+ {
+ pStats->aHostCpus[iDstCpu].idCpu = pGVMM->aHostCpus[iSrcCpu].idCpu;
+ pStats->aHostCpus[iDstCpu].idxCpuSet = pGVMM->aHostCpus[iSrcCpu].idxCpuSet;
+#ifdef GVMM_SCHED_WITH_PPT
+ pStats->aHostCpus[iDstCpu].uDesiredHz = pGVMM->aHostCpus[iSrcCpu].Ppt.uDesiredHz;
+ pStats->aHostCpus[iDstCpu].uTimerHz = pGVMM->aHostCpus[iSrcCpu].Ppt.uTimerHz;
+ pStats->aHostCpus[iDstCpu].cChanges = pGVMM->aHostCpus[iSrcCpu].Ppt.cChanges;
+ pStats->aHostCpus[iDstCpu].cStarts = pGVMM->aHostCpus[iSrcCpu].Ppt.cStarts;
+#else
+ pStats->aHostCpus[iDstCpu].uDesiredHz = 0;
+ pStats->aHostCpus[iDstCpu].uTimerHz = 0;
+ pStats->aHostCpus[iDstCpu].cChanges = 0;
+ pStats->aHostCpus[iDstCpu].cStarts = 0;
+#endif
+ iDstCpu++;
+ if (iDstCpu >= RT_ELEMENTS(pStats->aHostCpus))
+ break;
+ }
+ }
+ pStats->cHostCpus = iDstCpu;
+
+ GVMMR0_USED_SHARED_UNLOCK(pGVMM);
+
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * VMMR0 request wrapper for GVMMR0QueryStatistics.
+ *
+ * @returns see GVMMR0QueryStatistics.
+ * @param pGVM The global (ring-0) VM structure. Optional.
+ * @param pVM The cross context VM structure. Optional.
+ * @param pReq Pointer to the request packet.
+ * @param pSession The current session.
+ */
+GVMMR0DECL(int) GVMMR0QueryStatisticsReq(PGVM pGVM, PVM pVM, PGVMMQUERYSTATISTICSSREQ pReq, PSUPDRVSESSION pSession)
+{
+ /*
+ * Validate input and pass it on.
+ */
+ AssertPtrReturn(pReq, VERR_INVALID_POINTER);
+ AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
+ AssertReturn(pReq->pSession == pSession, VERR_INVALID_PARAMETER);
+
+ return GVMMR0QueryStatistics(&pReq->Stats, pSession, pGVM, pVM);
+}
+
+
+/**
+ * Resets the specified GVMM statistics.
+ *
+ * @returns VBox status code.
+ *
+ * @param pStats Which statistics to reset, that is, non-zero fields indicates which to reset.
+ * @param pSession The current session.
+ * @param pGVM The GVM to reset statistics for. Optional.
+ * @param pVM The VM structure corresponding to @a pGVM.
+ */
+GVMMR0DECL(int) GVMMR0ResetStatistics(PCGVMMSTATS pStats, PSUPDRVSESSION pSession, PGVM pGVM, PVM pVM)
+{
+ LogFlow(("GVMMR0ResetStatistics: pStats=%p pSession=%p pGVM=%p pVM=%p\n", pStats, pSession, pGVM, pVM));
+
+ /*
+ * Validate input.
+ */
+ AssertPtrReturn(pSession, VERR_INVALID_POINTER);
+ AssertPtrReturn(pStats, VERR_INVALID_POINTER);
+
+ /*
+ * Take the lock and get the VM statistics.
+ */
+ PGVMM pGVMM;
+ if (pGVM)
+ {
+ int rc = gvmmR0ByGVMandVM(pGVM, pVM, &pGVMM, true /*fTakeUsedLock*/);
+ if (RT_FAILURE(rc))
+ return rc;
+# define MAYBE_RESET_FIELD(field) \
+ do { if (pStats->SchedVM. field ) { pGVM->gvmm.s.StatsSched. field = 0; } } while (0)
+ MAYBE_RESET_FIELD(cHaltCalls);
+ MAYBE_RESET_FIELD(cHaltBlocking);
+ MAYBE_RESET_FIELD(cHaltTimeouts);
+ MAYBE_RESET_FIELD(cHaltNotBlocking);
+ MAYBE_RESET_FIELD(cHaltWakeUps);
+ MAYBE_RESET_FIELD(cWakeUpCalls);
+ MAYBE_RESET_FIELD(cWakeUpNotHalted);
+ MAYBE_RESET_FIELD(cWakeUpWakeUps);
+ MAYBE_RESET_FIELD(cPokeCalls);
+ MAYBE_RESET_FIELD(cPokeNotBusy);
+ MAYBE_RESET_FIELD(cPollCalls);
+ MAYBE_RESET_FIELD(cPollHalts);
+ MAYBE_RESET_FIELD(cPollWakeUps);
+# undef MAYBE_RESET_FIELD
+ }
+ else
+ {
+ GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
+
+ int rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
+ AssertRCReturn(rc, rc);
+ }
+
+ /*
+ * Enumerate the VMs and add the ones visible to the statistics.
+ */
+ if (!ASMMemIsZero(&pStats->SchedSum, sizeof(pStats->SchedSum)))
+ {
+ for (unsigned i = pGVMM->iUsedHead;
+ i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
+ i = pGVMM->aHandles[i].iNext)
+ {
+ PGVM pOtherGVM = pGVMM->aHandles[i].pGVM;
+ void *pvObj = pGVMM->aHandles[i].pvObj;
+ if ( VALID_PTR(pvObj)
+ && VALID_PTR(pOtherGVM)
+ && pOtherGVM->u32Magic == GVM_MAGIC
+ && RT_SUCCESS(SUPR0ObjVerifyAccess(pvObj, pSession, NULL)))
+ {
+# define MAYBE_RESET_FIELD(field) \
+ do { if (pStats->SchedSum. field ) { pOtherGVM->gvmm.s.StatsSched. field = 0; } } while (0)
+ MAYBE_RESET_FIELD(cHaltCalls);
+ MAYBE_RESET_FIELD(cHaltBlocking);
+ MAYBE_RESET_FIELD(cHaltTimeouts);
+ MAYBE_RESET_FIELD(cHaltNotBlocking);
+ MAYBE_RESET_FIELD(cHaltWakeUps);
+ MAYBE_RESET_FIELD(cWakeUpCalls);
+ MAYBE_RESET_FIELD(cWakeUpNotHalted);
+ MAYBE_RESET_FIELD(cWakeUpWakeUps);
+ MAYBE_RESET_FIELD(cPokeCalls);
+ MAYBE_RESET_FIELD(cPokeNotBusy);
+ MAYBE_RESET_FIELD(cPollCalls);
+ MAYBE_RESET_FIELD(cPollHalts);
+ MAYBE_RESET_FIELD(cPollWakeUps);
+# undef MAYBE_RESET_FIELD
+ }
+ }
+ }
+
+ GVMMR0_USED_SHARED_UNLOCK(pGVMM);
+
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * VMMR0 request wrapper for GVMMR0ResetStatistics.
+ *
+ * @returns see GVMMR0ResetStatistics.
+ * @param pGVM The global (ring-0) VM structure. Optional.
+ * @param pVM The cross context VM structure. Optional.
+ * @param pReq Pointer to the request packet.
+ * @param pSession The current session.
+ */
+GVMMR0DECL(int) GVMMR0ResetStatisticsReq(PGVM pGVM, PVM pVM, PGVMMRESETSTATISTICSSREQ pReq, PSUPDRVSESSION pSession)
+{
+ /*
+ * Validate input and pass it on.
+ */
+ AssertPtrReturn(pReq, VERR_INVALID_POINTER);
+ AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
+ AssertReturn(pReq->pSession == pSession, VERR_INVALID_PARAMETER);
+
+ return GVMMR0ResetStatistics(&pReq->Stats, pSession, pGVM, pVM);
+}
+
diff --git a/src/VBox/VMM/VMMR0/GVMMR0Internal.h b/src/VBox/VMM/VMMR0/GVMMR0Internal.h
new file mode 100644
index 00000000..b343b3f5
--- /dev/null
+++ b/src/VBox/VMM/VMMR0/GVMMR0Internal.h
@@ -0,0 +1,69 @@
+/* $Id: GVMMR0Internal.h $ */
+/** @file
+ * GVMM - The Global VM Manager, Internal header.
+ */
+
+/*
+ * Copyright (C) 2007-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+#ifndef VMM_INCLUDED_SRC_VMMR0_GVMMR0Internal_h
+#define VMM_INCLUDED_SRC_VMMR0_GVMMR0Internal_h
+#ifndef RT_WITHOUT_PRAGMA_ONCE
+# pragma once
+#endif
+
+#include <iprt/mem.h>
+
+/**
+ * The GVMM per VM data.
+ */
+typedef struct GVMMPERVCPU
+{
+ /** The time the halted EMT thread expires.
+ * 0 if the EMT thread is blocked here. */
+ uint64_t volatile u64HaltExpire;
+ /** The event semaphore the EMT thread is blocking on. */
+ RTSEMEVENTMULTI HaltEventMulti;
+ /** The APIC ID of the CPU that EMT was scheduled on the last time we checked. */
+ uint8_t iCpuEmt;
+} GVMMPERVCPU;
+/** Pointer to the GVMM per VCPU data. */
+typedef GVMMPERVCPU *PGVMMPERVCPU;
+
+/**
+ * The GVMM per VM data.
+ */
+typedef struct GVMMPERVM
+{
+ /** The shared VM data structure allocation object (PVMR0). */
+ RTR0MEMOBJ VMMemObj;
+ /** The Ring-3 mapping of the shared VM data structure (PVMR3). */
+ RTR0MEMOBJ VMMapObj;
+ /** The allocation object for the VM pages. */
+ RTR0MEMOBJ VMPagesMemObj;
+ /** The ring-3 mapping of the VM pages. */
+ RTR0MEMOBJ VMPagesMapObj;
+
+ /** The scheduler statistics. */
+ GVMMSTATSSCHED StatsSched;
+
+ /** Whether the per-VM ring-0 initialization has been performed. */
+ bool fDoneVMMR0Init;
+ /** Whether the per-VM ring-0 termination is being or has been performed. */
+ bool fDoneVMMR0Term;
+} GVMMPERVM;
+/** Pointer to the GVMM per VM data. */
+typedef GVMMPERVM *PGVMMPERVM;
+
+
+#endif /* !VMM_INCLUDED_SRC_VMMR0_GVMMR0Internal_h */
+
diff --git a/src/VBox/VMM/VMMR0/HMR0.cpp b/src/VBox/VMM/VMMR0/HMR0.cpp
new file mode 100644
index 00000000..3386e7f1
--- /dev/null
+++ b/src/VBox/VMM/VMMR0/HMR0.cpp
@@ -0,0 +1,2005 @@
+/* $Id: HMR0.cpp $ */
+/** @file
+ * Hardware Assisted Virtualization Manager (HM) - Host Context Ring-0.
+ */
+
+/*
+ * Copyright (C) 2006-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#define LOG_GROUP LOG_GROUP_HM
+#define VMCPU_INCL_CPUM_GST_CTX
+#include <VBox/vmm/hm.h>
+#include <VBox/vmm/pgm.h>
+#include "HMInternal.h"
+#include <VBox/vmm/vm.h>
+#include <VBox/vmm/hm_svm.h>
+#include <VBox/vmm/hmvmxinline.h>
+#include <VBox/err.h>
+#include <VBox/log.h>
+#include <iprt/assert.h>
+#include <iprt/asm.h>
+#include <iprt/asm-amd64-x86.h>
+#include <iprt/cpuset.h>
+#include <iprt/mem.h>
+#include <iprt/memobj.h>
+#include <iprt/once.h>
+#include <iprt/param.h>
+#include <iprt/power.h>
+#include <iprt/string.h>
+#include <iprt/thread.h>
+#include <iprt/x86.h>
+#include "HMVMXR0.h"
+#include "HMSVMR0.h"
+
+
+/*********************************************************************************************************************************
+* Internal Functions *
+*********************************************************************************************************************************/
+static DECLCALLBACK(void) hmR0EnableCpuCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2);
+static DECLCALLBACK(void) hmR0DisableCpuCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2);
+static DECLCALLBACK(void) hmR0InitIntelCpu(RTCPUID idCpu, void *pvUser1, void *pvUser2);
+static DECLCALLBACK(void) hmR0InitAmdCpu(RTCPUID idCpu, void *pvUser1, void *pvUser2);
+static DECLCALLBACK(void) hmR0PowerCallback(RTPOWEREVENT enmEvent, void *pvUser);
+static DECLCALLBACK(void) hmR0MpEventCallback(RTMPEVENT enmEvent, RTCPUID idCpu, void *pvData);
+
+
+/*********************************************************************************************************************************
+* Structures and Typedefs *
+*********************************************************************************************************************************/
+/**
+ * This is used to manage the status code of a RTMpOnAll in HM.
+ */
+typedef struct HMR0FIRSTRC
+{
+ /** The status code. */
+ int32_t volatile rc;
+ /** The ID of the CPU reporting the first failure. */
+ RTCPUID volatile idCpu;
+} HMR0FIRSTRC;
+/** Pointer to a first return code structure. */
+typedef HMR0FIRSTRC *PHMR0FIRSTRC;
+
+
+/*********************************************************************************************************************************
+* Global Variables *
+*********************************************************************************************************************************/
+/**
+ * Global data.
+ */
+static struct
+{
+ /** Per CPU globals. */
+ HMPHYSCPU aCpuInfo[RTCPUSET_MAX_CPUS];
+
+ /** @name Ring-0 method table for AMD-V and VT-x specific operations.
+ * @{ */
+ DECLR0CALLBACKMEMBER(int, pfnEnterSession, (PVMCPU pVCpu));
+ DECLR0CALLBACKMEMBER(void, pfnThreadCtxCallback, (RTTHREADCTXEVENT enmEvent, PVMCPU pVCpu, bool fGlobalInit));
+ DECLR0CALLBACKMEMBER(int, pfnExportHostState, (PVMCPU pVCpu));
+ DECLR0CALLBACKMEMBER(VBOXSTRICTRC, pfnRunGuestCode, (PVMCPU pVCpu));
+ DECLR0CALLBACKMEMBER(int, pfnEnableCpu, (PHMPHYSCPU pHostCpu, PVM pVM, void *pvCpuPage, RTHCPHYS HCPhysCpuPage,
+ bool fEnabledByHost, PCSUPHWVIRTMSRS pHwvirtMsrs));
+ DECLR0CALLBACKMEMBER(int, pfnDisableCpu, (void *pvCpuPage, RTHCPHYS HCPhysCpuPage));
+ DECLR0CALLBACKMEMBER(int, pfnInitVM, (PVM pVM));
+ DECLR0CALLBACKMEMBER(int, pfnTermVM, (PVM pVM));
+ DECLR0CALLBACKMEMBER(int, pfnSetupVM, (PVM pVM));
+ /** @} */
+
+ /** Hardware-virtualization data. */
+ struct
+ {
+ union
+ {
+ /** VT-x data. */
+ struct
+ {
+ /** Host CR4 value (set by ring-0 VMX init) */
+ uint64_t u64HostCr4;
+ /** Host EFER value (set by ring-0 VMX init) */
+ uint64_t u64HostEfer;
+ /** Host SMM monitor control (used for logging/diagnostics) */
+ uint64_t u64HostSmmMonitorCtl;
+ /** Last instruction error. */
+ uint32_t ulLastInstrError;
+ /** The shift mask employed by the VMX-Preemption timer. */
+ uint8_t cPreemptTimerShift;
+ /** Padding. */
+ uint8_t abPadding[3];
+ /** Whether we're using the preemption timer or not. */
+ bool fUsePreemptTimer;
+ /** Whether we're using SUPR0EnableVTx or not. */
+ bool fUsingSUPR0EnableVTx;
+ /** Set if we've called SUPR0EnableVTx(true) and should disable it during
+ * module termination. */
+ bool fCalledSUPR0EnableVTx;
+ /** Set to by us to indicate VMX is supported by the CPU. */
+ bool fSupported;
+ } vmx;
+
+ /** AMD-V data. */
+ struct
+ {
+ /** SVM revision. */
+ uint32_t u32Rev;
+ /** SVM feature bits from cpuid 0x8000000a */
+ uint32_t u32Features;
+ /** Padding. */
+ bool afPadding[3];
+ /** Set by us to indicate SVM is supported by the CPU. */
+ bool fSupported;
+ } svm;
+ } u;
+ /** Maximum allowed ASID/VPID (inclusive). */
+ uint32_t uMaxAsid;
+ /** MSRs. */
+ SUPHWVIRTMSRS Msrs;
+ } hwvirt;
+
+ /** Last recorded error code during HM ring-0 init. */
+ int32_t rcInit;
+
+ /** If set, VT-x/AMD-V is enabled globally at init time, otherwise it's
+ * enabled and disabled each time it's used to execute guest code. */
+ bool fGlobalInit;
+ /** Indicates whether the host is suspending or not. We'll refuse a few
+ * actions when the host is being suspended to speed up the suspending and
+ * avoid trouble. */
+ bool volatile fSuspended;
+
+ /** Whether we've already initialized all CPUs.
+ * @remarks We could check the EnableAllCpusOnce state, but this is
+ * simpler and hopefully easier to understand. */
+ bool fEnabled;
+ /** Serialize initialization in HMR0EnableAllCpus. */
+ RTONCE EnableAllCpusOnce;
+} g_HmR0;
+
+
+/**
+ * Initializes a first return code structure.
+ *
+ * @param pFirstRc The structure to init.
+ */
+static void hmR0FirstRcInit(PHMR0FIRSTRC pFirstRc)
+{
+ pFirstRc->rc = VINF_SUCCESS;
+ pFirstRc->idCpu = NIL_RTCPUID;
+}
+
+
+/**
+ * Try set the status code (success ignored).
+ *
+ * @param pFirstRc The first return code structure.
+ * @param rc The status code.
+ */
+static void hmR0FirstRcSetStatus(PHMR0FIRSTRC pFirstRc, int rc)
+{
+ if ( RT_FAILURE(rc)
+ && ASMAtomicCmpXchgS32(&pFirstRc->rc, rc, VINF_SUCCESS))
+ pFirstRc->idCpu = RTMpCpuId();
+}
+
+
+/**
+ * Get the status code of a first return code structure.
+ *
+ * @returns The status code; VINF_SUCCESS or error status, no informational or
+ * warning errors.
+ * @param pFirstRc The first return code structure.
+ */
+static int hmR0FirstRcGetStatus(PHMR0FIRSTRC pFirstRc)
+{
+ return pFirstRc->rc;
+}
+
+
+#ifdef VBOX_STRICT
+# ifndef DEBUG_bird
+/**
+ * Get the CPU ID on which the failure status code was reported.
+ *
+ * @returns The CPU ID, NIL_RTCPUID if no failure was reported.
+ * @param pFirstRc The first return code structure.
+ */
+static RTCPUID hmR0FirstRcGetCpuId(PHMR0FIRSTRC pFirstRc)
+{
+ return pFirstRc->idCpu;
+}
+# endif
+#endif /* VBOX_STRICT */
+
+
+/** @name Dummy callback handlers.
+ * @{ */
+
+static DECLCALLBACK(int) hmR0DummyEnter(PVMCPU pVCpu)
+{
+ RT_NOREF1(pVCpu);
+ return VINF_SUCCESS;
+}
+
+static DECLCALLBACK(void) hmR0DummyThreadCtxCallback(RTTHREADCTXEVENT enmEvent, PVMCPU pVCpu, bool fGlobalInit)
+{
+ RT_NOREF3(enmEvent, pVCpu, fGlobalInit);
+}
+
+static DECLCALLBACK(int) hmR0DummyEnableCpu(PHMPHYSCPU pHostCpu, PVM pVM, void *pvCpuPage, RTHCPHYS HCPhysCpuPage,
+ bool fEnabledBySystem, PCSUPHWVIRTMSRS pHwvirtMsrs)
+{
+ RT_NOREF6(pHostCpu, pVM, pvCpuPage, HCPhysCpuPage, fEnabledBySystem, pHwvirtMsrs);
+ return VINF_SUCCESS;
+}
+
+static DECLCALLBACK(int) hmR0DummyDisableCpu(void *pvCpuPage, RTHCPHYS HCPhysCpuPage)
+{
+ RT_NOREF2(pvCpuPage, HCPhysCpuPage);
+ return VINF_SUCCESS;
+}
+
+static DECLCALLBACK(int) hmR0DummyInitVM(PVM pVM)
+{
+ RT_NOREF1(pVM);
+ return VINF_SUCCESS;
+}
+
+static DECLCALLBACK(int) hmR0DummyTermVM(PVM pVM)
+{
+ RT_NOREF1(pVM);
+ return VINF_SUCCESS;
+}
+
+static DECLCALLBACK(int) hmR0DummySetupVM(PVM pVM)
+{
+ RT_NOREF1(pVM);
+ return VINF_SUCCESS;
+}
+
+static DECLCALLBACK(VBOXSTRICTRC) hmR0DummyRunGuestCode(PVMCPU pVCpu)
+{
+ RT_NOREF(pVCpu);
+ return VINF_SUCCESS;
+}
+
+static DECLCALLBACK(int) hmR0DummyExportHostState(PVMCPU pVCpu)
+{
+ RT_NOREF1(pVCpu);
+ return VINF_SUCCESS;
+}
+
+/** @} */
+
+
+/**
+ * Checks if the CPU is subject to the "VMX-Preemption Timer Does Not Count
+ * Down at the Rate Specified" erratum.
+ *
+ * Errata names and related steppings:
+ * - BA86 - D0.
+ * - AAX65 - C2.
+ * - AAU65 - C2, K0.
+ * - AAO95 - B1.
+ * - AAT59 - C2.
+ * - AAK139 - D0.
+ * - AAM126 - C0, C1, D0.
+ * - AAN92 - B1.
+ * - AAJ124 - C0, D0.
+ * - AAP86 - B1.
+ *
+ * Steppings: B1, C0, C1, C2, D0, K0.
+ *
+ * @returns true if subject to it, false if not.
+ */
+static bool hmR0InitIntelIsSubjectToVmxPreemptTimerErratum(void)
+{
+ uint32_t u = ASMCpuId_EAX(1);
+ u &= ~(RT_BIT_32(14) | RT_BIT_32(15) | RT_BIT_32(28) | RT_BIT_32(29) | RT_BIT_32(30) | RT_BIT_32(31));
+ if ( u == UINT32_C(0x000206E6) /* 323344.pdf - BA86 - D0 - Intel Xeon Processor 7500 Series */
+ || u == UINT32_C(0x00020652) /* 323056.pdf - AAX65 - C2 - Intel Xeon Processor L3406 */
+ /* 322814.pdf - AAT59 - C2 - Intel CoreTM i7-600, i5-500, i5-400 and i3-300 Mobile Processor Series */
+ /* 322911.pdf - AAU65 - C2 - Intel CoreTM i5-600, i3-500 Desktop Processor Series and Intel Pentium Processor G6950 */
+ || u == UINT32_C(0x00020655) /* 322911.pdf - AAU65 - K0 - Intel CoreTM i5-600, i3-500 Desktop Processor Series and Intel Pentium Processor G6950 */
+ || u == UINT32_C(0x000106E5) /* 322373.pdf - AAO95 - B1 - Intel Xeon Processor 3400 Series */
+ /* 322166.pdf - AAN92 - B1 - Intel CoreTM i7-800 and i5-700 Desktop Processor Series */
+ /* 320767.pdf - AAP86 - B1 - Intel Core i7-900 Mobile Processor Extreme Edition Series, Intel Core i7-800 and i7-700 Mobile Processor Series */
+ || u == UINT32_C(0x000106A0) /* 321333.pdf - AAM126 - C0 - Intel Xeon Processor 3500 Series Specification */
+ || u == UINT32_C(0x000106A1) /* 321333.pdf - AAM126 - C1 - Intel Xeon Processor 3500 Series Specification */
+ || u == UINT32_C(0x000106A4) /* 320836.pdf - AAJ124 - C0 - Intel Core i7-900 Desktop Processor Extreme Edition Series and Intel Core i7-900 Desktop Processor Series */
+ || u == UINT32_C(0x000106A5) /* 321333.pdf - AAM126 - D0 - Intel Xeon Processor 3500 Series Specification */
+ /* 321324.pdf - AAK139 - D0 - Intel Xeon Processor 5500 Series Specification */
+ /* 320836.pdf - AAJ124 - D0 - Intel Core i7-900 Desktop Processor Extreme Edition Series and Intel Core i7-900 Desktop Processor Series */
+ )
+ return true;
+ return false;
+}
+
+
+/**
+ * Intel specific initialization code.
+ *
+ * @returns VBox status code (will only fail if out of memory).
+ */
+static int hmR0InitIntel(void)
+{
+ /* Read this MSR now as it may be useful for error reporting when initializing VT-x fails. */
+ g_HmR0.hwvirt.Msrs.u.vmx.u64FeatCtrl = ASMRdMsr(MSR_IA32_FEATURE_CONTROL);
+
+ /*
+ * First try use native kernel API for controlling VT-x.
+ * (This is only supported by some Mac OS X kernels atm.)
+ */
+ int rc = g_HmR0.rcInit = SUPR0EnableVTx(true /* fEnable */);
+ g_HmR0.hwvirt.u.vmx.fUsingSUPR0EnableVTx = rc != VERR_NOT_SUPPORTED;
+ if (g_HmR0.hwvirt.u.vmx.fUsingSUPR0EnableVTx)
+ {
+ AssertLogRelMsg(rc == VINF_SUCCESS || rc == VERR_VMX_IN_VMX_ROOT_MODE || rc == VERR_VMX_NO_VMX, ("%Rrc\n", rc));
+ if (RT_SUCCESS(rc))
+ {
+ g_HmR0.hwvirt.u.vmx.fSupported = true;
+ rc = SUPR0EnableVTx(false /* fEnable */);
+ AssertLogRelRC(rc);
+ }
+ }
+ else
+ {
+ HMR0FIRSTRC FirstRc;
+ hmR0FirstRcInit(&FirstRc);
+ g_HmR0.rcInit = RTMpOnAll(hmR0InitIntelCpu, &FirstRc, NULL);
+ if (RT_SUCCESS(g_HmR0.rcInit))
+ g_HmR0.rcInit = hmR0FirstRcGetStatus(&FirstRc);
+ }
+
+ if (RT_SUCCESS(g_HmR0.rcInit))
+ {
+ /* Read CR4 and EFER for logging/diagnostic purposes. */
+ g_HmR0.hwvirt.u.vmx.u64HostCr4 = ASMGetCR4();
+ g_HmR0.hwvirt.u.vmx.u64HostEfer = ASMRdMsr(MSR_K6_EFER);
+
+ /* Get VMX MSRs for determining VMX features we can ultimately use. */
+ SUPR0GetHwvirtMsrs(&g_HmR0.hwvirt.Msrs, SUPVTCAPS_VT_X, false /* fForce */);
+
+ /*
+ * Nested KVM workaround: Intel SDM section 34.15.5 describes that
+ * MSR_IA32_SMM_MONITOR_CTL depends on bit 49 of MSR_IA32_VMX_BASIC while
+ * table 35-2 says that this MSR is available if either VMX or SMX is supported.
+ */
+ uint64_t const uVmxBasicMsr = g_HmR0.hwvirt.Msrs.u.vmx.u64Basic;
+ if (RT_BF_GET(uVmxBasicMsr, VMX_BF_BASIC_DUAL_MON))
+ g_HmR0.hwvirt.u.vmx.u64HostSmmMonitorCtl = ASMRdMsr(MSR_IA32_SMM_MONITOR_CTL);
+
+ /* Initialize VPID - 16 bits ASID. */
+ g_HmR0.hwvirt.uMaxAsid = 0x10000; /* exclusive */
+
+ /*
+ * If the host OS has not enabled VT-x for us, try enter VMX root mode
+ * to really verify if VT-x is usable.
+ */
+ if (!g_HmR0.hwvirt.u.vmx.fUsingSUPR0EnableVTx)
+ {
+ /* Allocate a temporary VMXON region. */
+ RTR0MEMOBJ hScatchMemObj;
+ rc = RTR0MemObjAllocCont(&hScatchMemObj, PAGE_SIZE, false /* fExecutable */);
+ if (RT_FAILURE(rc))
+ {
+ LogRel(("hmR0InitIntel: RTR0MemObjAllocCont(,PAGE_SIZE,false) -> %Rrc\n", rc));
+ return rc;
+ }
+ void *pvScatchPage = RTR0MemObjAddress(hScatchMemObj);
+ RTHCPHYS HCPhysScratchPage = RTR0MemObjGetPagePhysAddr(hScatchMemObj, 0);
+ ASMMemZeroPage(pvScatchPage);
+
+ /* Set revision dword at the beginning of the VMXON structure. */
+ *(uint32_t *)pvScatchPage = RT_BF_GET(uVmxBasicMsr, VMX_BF_BASIC_VMCS_ID);
+
+ /* Make sure we don't get rescheduled to another CPU during this probe. */
+ RTCCUINTREG const fEFlags = ASMIntDisableFlags();
+
+ /* Check CR4.VMXE. */
+ g_HmR0.hwvirt.u.vmx.u64HostCr4 = ASMGetCR4();
+ if (!(g_HmR0.hwvirt.u.vmx.u64HostCr4 & X86_CR4_VMXE))
+ {
+ /* In theory this bit could be cleared behind our back. Which would cause #UD
+ faults when we try to execute the VMX instructions... */
+ ASMSetCR4(g_HmR0.hwvirt.u.vmx.u64HostCr4 | X86_CR4_VMXE);
+ }
+
+ /*
+ * The only way of checking if we're in VMX root mode or not is to try and enter it.
+ * There is no instruction or control bit that tells us if we're in VMX root mode.
+ * Therefore, try and enter VMX root mode here.
+ */
+ rc = VMXEnable(HCPhysScratchPage);
+ if (RT_SUCCESS(rc))
+ {
+ g_HmR0.hwvirt.u.vmx.fSupported = true;
+ VMXDisable();
+ }
+ else
+ {
+ /*
+ * KVM leaves the CPU in VMX root mode. Not only is this not allowed,
+ * it will crash the host when we enter raw mode, because:
+ *
+ * (a) clearing X86_CR4_VMXE in CR4 causes a #GP (we no longer modify
+ * this bit), and
+ * (b) turning off paging causes a #GP (unavoidable when switching
+ * from long to 32 bits mode or 32 bits to PAE).
+ *
+ * They should fix their code, but until they do we simply refuse to run.
+ */
+ g_HmR0.rcInit = VERR_VMX_IN_VMX_ROOT_MODE;
+ Assert(g_HmR0.hwvirt.u.vmx.fSupported == false);
+ }
+
+ /*
+ * Restore CR4 again; don't leave the X86_CR4_VMXE flag set if it was not
+ * set before (some software could incorrectly think it is in VMX mode).
+ */
+ ASMSetCR4(g_HmR0.hwvirt.u.vmx.u64HostCr4);
+ ASMSetFlags(fEFlags);
+
+ RTR0MemObjFree(hScatchMemObj, false);
+ }
+
+ if (g_HmR0.hwvirt.u.vmx.fSupported)
+ {
+ rc = VMXR0GlobalInit();
+ if (RT_FAILURE(rc))
+ g_HmR0.rcInit = rc;
+
+ /*
+ * Install the VT-x methods.
+ */
+ g_HmR0.pfnEnterSession = VMXR0Enter;
+ g_HmR0.pfnThreadCtxCallback = VMXR0ThreadCtxCallback;
+ g_HmR0.pfnExportHostState = VMXR0ExportHostState;
+ g_HmR0.pfnRunGuestCode = VMXR0RunGuestCode;
+ g_HmR0.pfnEnableCpu = VMXR0EnableCpu;
+ g_HmR0.pfnDisableCpu = VMXR0DisableCpu;
+ g_HmR0.pfnInitVM = VMXR0InitVM;
+ g_HmR0.pfnTermVM = VMXR0TermVM;
+ g_HmR0.pfnSetupVM = VMXR0SetupVM;
+
+ /*
+ * Check for the VMX-Preemption Timer and adjust for the "VMX-Preemption
+ * Timer Does Not Count Down at the Rate Specified" CPU erratum.
+ */
+ uint32_t const fPinCtls = RT_HI_U32(g_HmR0.hwvirt.Msrs.u.vmx.u64PinCtls);
+ if (fPinCtls & VMX_PIN_CTLS_PREEMPT_TIMER)
+ {
+ uint64_t const uVmxMiscMsr = g_HmR0.hwvirt.Msrs.u.vmx.u64Misc;
+ g_HmR0.hwvirt.u.vmx.fUsePreemptTimer = true;
+ g_HmR0.hwvirt.u.vmx.cPreemptTimerShift = RT_BF_GET(uVmxMiscMsr, VMX_BF_MISC_PREEMPT_TIMER_TSC);
+ if (hmR0InitIntelIsSubjectToVmxPreemptTimerErratum())
+ g_HmR0.hwvirt.u.vmx.cPreemptTimerShift = 0; /* This is about right most of the time here. */
+ }
+ }
+ }
+#ifdef LOG_ENABLED
+ else
+ SUPR0Printf("hmR0InitIntelCpu failed with rc=%Rrc\n", g_HmR0.rcInit);
+#endif
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * AMD-specific initialization code.
+ *
+ * @returns VBox status code (will only fail if out of memory).
+ */
+static int hmR0InitAmd(void)
+{
+ /* Call the global AMD-V initialization routine (should only fail in out-of-memory situations). */
+ int rc = SVMR0GlobalInit();
+ if (RT_FAILURE(rc))
+ {
+ g_HmR0.rcInit = rc;
+ return rc;
+ }
+
+ /*
+ * Install the AMD-V methods.
+ */
+ g_HmR0.pfnEnterSession = SVMR0Enter;
+ g_HmR0.pfnThreadCtxCallback = SVMR0ThreadCtxCallback;
+ g_HmR0.pfnExportHostState = SVMR0ExportHostState;
+ g_HmR0.pfnRunGuestCode = SVMR0RunGuestCode;
+ g_HmR0.pfnEnableCpu = SVMR0EnableCpu;
+ g_HmR0.pfnDisableCpu = SVMR0DisableCpu;
+ g_HmR0.pfnInitVM = SVMR0InitVM;
+ g_HmR0.pfnTermVM = SVMR0TermVM;
+ g_HmR0.pfnSetupVM = SVMR0SetupVM;
+
+ /* Query AMD features. */
+ uint32_t u32Dummy;
+ ASMCpuId(0x8000000a, &g_HmR0.hwvirt.u.svm.u32Rev, &g_HmR0.hwvirt.uMaxAsid, &u32Dummy, &g_HmR0.hwvirt.u.svm.u32Features);
+
+ /*
+ * We need to check if AMD-V has been properly initialized on all CPUs.
+ * Some BIOSes might do a poor job.
+ */
+ HMR0FIRSTRC FirstRc;
+ hmR0FirstRcInit(&FirstRc);
+ rc = RTMpOnAll(hmR0InitAmdCpu, &FirstRc, NULL);
+ AssertRC(rc);
+ if (RT_SUCCESS(rc))
+ rc = hmR0FirstRcGetStatus(&FirstRc);
+#ifndef DEBUG_bird
+ AssertMsg(rc == VINF_SUCCESS || rc == VERR_SVM_IN_USE,
+ ("hmR0InitAmdCpu failed for cpu %d with rc=%Rrc\n", hmR0FirstRcGetCpuId(&FirstRc), rc));
+#endif
+ if (RT_SUCCESS(rc))
+ {
+ SUPR0GetHwvirtMsrs(&g_HmR0.hwvirt.Msrs, SUPVTCAPS_AMD_V, false /* fForce */);
+ g_HmR0.hwvirt.u.svm.fSupported = true;
+ }
+ else
+ {
+ g_HmR0.rcInit = rc;
+ if (rc == VERR_SVM_DISABLED || rc == VERR_SVM_IN_USE)
+ rc = VINF_SUCCESS; /* Don't fail if AMD-V is disabled or in use. */
+ }
+ return rc;
+}
+
+
+/**
+ * Does global Ring-0 HM initialization (at module init).
+ *
+ * @returns VBox status code.
+ */
+VMMR0_INT_DECL(int) HMR0Init(void)
+{
+ /*
+ * Initialize the globals.
+ */
+ g_HmR0.fEnabled = false;
+ static RTONCE s_OnceInit = RTONCE_INITIALIZER;
+ g_HmR0.EnableAllCpusOnce = s_OnceInit;
+ for (unsigned i = 0; i < RT_ELEMENTS(g_HmR0.aCpuInfo); i++)
+ {
+ g_HmR0.aCpuInfo[i].idCpu = NIL_RTCPUID;
+ g_HmR0.aCpuInfo[i].hMemObj = NIL_RTR0MEMOBJ;
+ g_HmR0.aCpuInfo[i].HCPhysMemObj = NIL_RTHCPHYS;
+ g_HmR0.aCpuInfo[i].pvMemObj = NULL;
+#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
+ g_HmR0.aCpuInfo[i].n.svm.hNstGstMsrpm = NIL_RTR0MEMOBJ;
+ g_HmR0.aCpuInfo[i].n.svm.HCPhysNstGstMsrpm = NIL_RTHCPHYS;
+ g_HmR0.aCpuInfo[i].n.svm.pvNstGstMsrpm = NULL;
+#endif
+ }
+
+ /* Fill in all callbacks with placeholders. */
+ g_HmR0.pfnEnterSession = hmR0DummyEnter;
+ g_HmR0.pfnThreadCtxCallback = hmR0DummyThreadCtxCallback;
+ g_HmR0.pfnExportHostState = hmR0DummyExportHostState;
+ g_HmR0.pfnRunGuestCode = hmR0DummyRunGuestCode;
+ g_HmR0.pfnEnableCpu = hmR0DummyEnableCpu;
+ g_HmR0.pfnDisableCpu = hmR0DummyDisableCpu;
+ g_HmR0.pfnInitVM = hmR0DummyInitVM;
+ g_HmR0.pfnTermVM = hmR0DummyTermVM;
+ g_HmR0.pfnSetupVM = hmR0DummySetupVM;
+
+ /* Default is global VT-x/AMD-V init. */
+ g_HmR0.fGlobalInit = true;
+
+ /*
+ * Make sure aCpuInfo is big enough for all the CPUs on this system.
+ */
+ if (RTMpGetArraySize() > RT_ELEMENTS(g_HmR0.aCpuInfo))
+ {
+ LogRel(("HM: Too many real CPUs/cores/threads - %u, max %u\n", RTMpGetArraySize(), RT_ELEMENTS(g_HmR0.aCpuInfo)));
+ return VERR_TOO_MANY_CPUS;
+ }
+
+ /*
+ * Check for VT-x or AMD-V support.
+ * Return failure only in out-of-memory situations.
+ */
+ uint32_t fCaps = 0;
+ int rc = SUPR0GetVTSupport(&fCaps);
+ if (RT_SUCCESS(rc))
+ {
+ if (fCaps & SUPVTCAPS_VT_X)
+ {
+ rc = hmR0InitIntel();
+ if (RT_FAILURE(rc))
+ return rc;
+ }
+ else
+ {
+ Assert(fCaps & SUPVTCAPS_AMD_V);
+ rc = hmR0InitAmd();
+ if (RT_FAILURE(rc))
+ return rc;
+ }
+ }
+ else
+ g_HmR0.rcInit = VERR_UNSUPPORTED_CPU;
+
+ /*
+ * Register notification callbacks that we can use to disable/enable CPUs
+ * when brought offline/online or suspending/resuming.
+ */
+ if (!g_HmR0.hwvirt.u.vmx.fUsingSUPR0EnableVTx)
+ {
+ rc = RTMpNotificationRegister(hmR0MpEventCallback, NULL);
+ AssertRC(rc);
+
+ rc = RTPowerNotificationRegister(hmR0PowerCallback, NULL);
+ AssertRC(rc);
+ }
+
+ /* We return success here because module init shall not fail if HM fails to initialize. */
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Does global Ring-0 HM termination (at module termination).
+ *
+ * @returns VBox status code.
+ */
+VMMR0_INT_DECL(int) HMR0Term(void)
+{
+ int rc;
+ if ( g_HmR0.hwvirt.u.vmx.fSupported
+ && g_HmR0.hwvirt.u.vmx.fUsingSUPR0EnableVTx)
+ {
+ /*
+ * Simple if the host OS manages VT-x.
+ */
+ Assert(g_HmR0.fGlobalInit);
+
+ if (g_HmR0.hwvirt.u.vmx.fCalledSUPR0EnableVTx)
+ {
+ rc = SUPR0EnableVTx(false /* fEnable */);
+ g_HmR0.hwvirt.u.vmx.fCalledSUPR0EnableVTx = false;
+ }
+ else
+ rc = VINF_SUCCESS;
+
+ for (unsigned iCpu = 0; iCpu < RT_ELEMENTS(g_HmR0.aCpuInfo); iCpu++)
+ {
+ g_HmR0.aCpuInfo[iCpu].fConfigured = false;
+ Assert(g_HmR0.aCpuInfo[iCpu].hMemObj == NIL_RTR0MEMOBJ);
+ }
+ }
+ else
+ {
+ Assert(!g_HmR0.hwvirt.u.vmx.fSupported || !g_HmR0.hwvirt.u.vmx.fUsingSUPR0EnableVTx);
+
+ /* Doesn't really matter if this fails. */
+ rc = RTMpNotificationDeregister(hmR0MpEventCallback, NULL); AssertRC(rc);
+ rc = RTPowerNotificationDeregister(hmR0PowerCallback, NULL); AssertRC(rc);
+
+ /*
+ * Disable VT-x/AMD-V on all CPUs if we enabled it before.
+ */
+ if (g_HmR0.fGlobalInit)
+ {
+ HMR0FIRSTRC FirstRc;
+ hmR0FirstRcInit(&FirstRc);
+ rc = RTMpOnAll(hmR0DisableCpuCallback, NULL /* pvUser 1 */, &FirstRc);
+ Assert(RT_SUCCESS(rc) || rc == VERR_NOT_SUPPORTED);
+ if (RT_SUCCESS(rc))
+ rc = hmR0FirstRcGetStatus(&FirstRc);
+ }
+
+ /*
+ * Free the per-cpu pages used for VT-x and AMD-V.
+ */
+ for (unsigned i = 0; i < RT_ELEMENTS(g_HmR0.aCpuInfo); i++)
+ {
+ if (g_HmR0.aCpuInfo[i].hMemObj != NIL_RTR0MEMOBJ)
+ {
+ RTR0MemObjFree(g_HmR0.aCpuInfo[i].hMemObj, false);
+ g_HmR0.aCpuInfo[i].hMemObj = NIL_RTR0MEMOBJ;
+ g_HmR0.aCpuInfo[i].HCPhysMemObj = NIL_RTHCPHYS;
+ g_HmR0.aCpuInfo[i].pvMemObj = NULL;
+ }
+#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
+ if (g_HmR0.aCpuInfo[i].n.svm.hNstGstMsrpm != NIL_RTR0MEMOBJ)
+ {
+ RTR0MemObjFree(g_HmR0.aCpuInfo[i].n.svm.hNstGstMsrpm, false);
+ g_HmR0.aCpuInfo[i].n.svm.hNstGstMsrpm = NIL_RTR0MEMOBJ;
+ g_HmR0.aCpuInfo[i].n.svm.HCPhysNstGstMsrpm = NIL_RTHCPHYS;
+ g_HmR0.aCpuInfo[i].n.svm.pvNstGstMsrpm = NULL;
+ }
+#endif
+ }
+ }
+
+ /** @todo This needs cleaning up. There's no matching
+ * hmR0TermIntel()/hmR0TermAmd() and all the VT-x/AMD-V specific bits
+ * should move into their respective modules. */
+ /* Finally, call global VT-x/AMD-V termination. */
+ if (g_HmR0.hwvirt.u.vmx.fSupported)
+ VMXR0GlobalTerm();
+ else if (g_HmR0.hwvirt.u.svm.fSupported)
+ SVMR0GlobalTerm();
+
+ return rc;
+}
+
+
+/**
+ * Worker function used by hmR0PowerCallback() and HMR0Init() to initalize VT-x
+ * on a CPU.
+ *
+ * @param idCpu The identifier for the CPU the function is called on.
+ * @param pvUser1 Pointer to the first RC structure.
+ * @param pvUser2 Ignored.
+ */
+static DECLCALLBACK(void) hmR0InitIntelCpu(RTCPUID idCpu, void *pvUser1, void *pvUser2)
+{
+ PHMR0FIRSTRC pFirstRc = (PHMR0FIRSTRC)pvUser1;
+ Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+ Assert(idCpu == (RTCPUID)RTMpCpuIdToSetIndex(idCpu)); /** @todo fix idCpu == index assumption (rainy day) */
+ NOREF(idCpu); NOREF(pvUser2);
+
+ int rc = SUPR0GetVmxUsability(NULL /* pfIsSmxModeAmbiguous */);
+ hmR0FirstRcSetStatus(pFirstRc, rc);
+}
+
+
+/**
+ * Worker function used by hmR0PowerCallback() and HMR0Init() to initalize AMD-V
+ * on a CPU.
+ *
+ * @param idCpu The identifier for the CPU the function is called on.
+ * @param pvUser1 Pointer to the first RC structure.
+ * @param pvUser2 Ignored.
+ */
+static DECLCALLBACK(void) hmR0InitAmdCpu(RTCPUID idCpu, void *pvUser1, void *pvUser2)
+{
+ PHMR0FIRSTRC pFirstRc = (PHMR0FIRSTRC)pvUser1;
+ Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+ Assert(idCpu == (RTCPUID)RTMpCpuIdToSetIndex(idCpu)); /** @todo fix idCpu == index assumption (rainy day) */
+ NOREF(idCpu); NOREF(pvUser2);
+
+ int rc = SUPR0GetSvmUsability(true /* fInitSvm */);
+ hmR0FirstRcSetStatus(pFirstRc, rc);
+}
+
+
+/**
+ * Enable VT-x or AMD-V on the current CPU
+ *
+ * @returns VBox status code.
+ * @param pVM The cross context VM structure. Can be NULL.
+ * @param idCpu The identifier for the CPU the function is called on.
+ *
+ * @remarks Maybe called with interrupts disabled!
+ */
+static int hmR0EnableCpu(PVM pVM, RTCPUID idCpu)
+{
+ PHMPHYSCPU pHostCpu = &g_HmR0.aCpuInfo[idCpu];
+
+ Assert(idCpu == (RTCPUID)RTMpCpuIdToSetIndex(idCpu)); /** @todo fix idCpu == index assumption (rainy day) */
+ Assert(idCpu < RT_ELEMENTS(g_HmR0.aCpuInfo));
+ Assert(!pHostCpu->fConfigured);
+ Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+
+ pHostCpu->idCpu = idCpu;
+ /* Do NOT reset cTlbFlushes here, see @bugref{6255}. */
+
+ int rc;
+ if ( g_HmR0.hwvirt.u.vmx.fSupported
+ && g_HmR0.hwvirt.u.vmx.fUsingSUPR0EnableVTx)
+ rc = g_HmR0.pfnEnableCpu(pHostCpu, pVM, NULL /* pvCpuPage */, NIL_RTHCPHYS, true, &g_HmR0.hwvirt.Msrs);
+ else
+ {
+ AssertLogRelMsgReturn(pHostCpu->hMemObj != NIL_RTR0MEMOBJ, ("hmR0EnableCpu failed idCpu=%u.\n", idCpu), VERR_HM_IPE_1);
+ rc = g_HmR0.pfnEnableCpu(pHostCpu, pVM, pHostCpu->pvMemObj, pHostCpu->HCPhysMemObj, false, &g_HmR0.hwvirt.Msrs);
+ }
+ if (RT_SUCCESS(rc))
+ pHostCpu->fConfigured = true;
+ return rc;
+}
+
+
+/**
+ * Worker function passed to RTMpOnAll() that is to be called on all CPUs.
+ *
+ * @param idCpu The identifier for the CPU the function is called on.
+ * @param pvUser1 Opaque pointer to the VM (can be NULL!).
+ * @param pvUser2 The 2nd user argument.
+ */
+static DECLCALLBACK(void) hmR0EnableCpuCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
+{
+ PVM pVM = (PVM)pvUser1; /* can be NULL! */
+ PHMR0FIRSTRC pFirstRc = (PHMR0FIRSTRC)pvUser2;
+ AssertReturnVoid(g_HmR0.fGlobalInit);
+ Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+ hmR0FirstRcSetStatus(pFirstRc, hmR0EnableCpu(pVM, idCpu));
+}
+
+
+/**
+ * RTOnce callback employed by HMR0EnableAllCpus.
+ *
+ * @returns VBox status code.
+ * @param pvUser Pointer to the VM.
+ */
+static DECLCALLBACK(int32_t) hmR0EnableAllCpuOnce(void *pvUser)
+{
+ PVM pVM = (PVM)pvUser;
+
+ /*
+ * Indicate that we've initialized.
+ *
+ * Note! There is a potential race between this function and the suspend
+ * notification. Kind of unlikely though, so ignored for now.
+ */
+ AssertReturn(!g_HmR0.fEnabled, VERR_HM_ALREADY_ENABLED_IPE);
+ ASMAtomicWriteBool(&g_HmR0.fEnabled, true);
+
+ /*
+ * The global init variable is set by the first VM.
+ */
+ g_HmR0.fGlobalInit = pVM->hm.s.fGlobalInit;
+
+#ifdef VBOX_STRICT
+ for (unsigned i = 0; i < RT_ELEMENTS(g_HmR0.aCpuInfo); i++)
+ {
+ Assert(g_HmR0.aCpuInfo[i].hMemObj == NIL_RTR0MEMOBJ);
+ Assert(g_HmR0.aCpuInfo[i].HCPhysMemObj == NIL_RTHCPHYS);
+ Assert(g_HmR0.aCpuInfo[i].pvMemObj == NULL);
+ Assert(!g_HmR0.aCpuInfo[i].fConfigured);
+ Assert(!g_HmR0.aCpuInfo[i].cTlbFlushes);
+ Assert(!g_HmR0.aCpuInfo[i].uCurrentAsid);
+# ifdef VBOX_WITH_NESTED_HWVIRT_SVM
+ Assert(g_HmR0.aCpuInfo[i].n.svm.hNstGstMsrpm == NIL_RTR0MEMOBJ);
+ Assert(g_HmR0.aCpuInfo[i].n.svm.HCPhysNstGstMsrpm == NIL_RTHCPHYS);
+ Assert(g_HmR0.aCpuInfo[i].n.svm.pvNstGstMsrpm == NULL);
+# endif
+ }
+#endif
+
+ int rc;
+ if ( g_HmR0.hwvirt.u.vmx.fSupported
+ && g_HmR0.hwvirt.u.vmx.fUsingSUPR0EnableVTx)
+ {
+ /*
+ * Global VT-x initialization API (only darwin for now).
+ */
+ rc = SUPR0EnableVTx(true /* fEnable */);
+ if (RT_SUCCESS(rc))
+ {
+ g_HmR0.hwvirt.u.vmx.fCalledSUPR0EnableVTx = true;
+ /* If the host provides a VT-x init API, then we'll rely on that for global init. */
+ g_HmR0.fGlobalInit = pVM->hm.s.fGlobalInit = true;
+ }
+ else
+ AssertMsgFailed(("hmR0EnableAllCpuOnce/SUPR0EnableVTx: rc=%Rrc\n", rc));
+ }
+ else
+ {
+ /*
+ * We're doing the job ourselves.
+ */
+ /* Allocate one page per cpu for the global VT-x and AMD-V pages */
+ for (unsigned i = 0; i < RT_ELEMENTS(g_HmR0.aCpuInfo); i++)
+ {
+ Assert(g_HmR0.aCpuInfo[i].hMemObj == NIL_RTR0MEMOBJ);
+#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
+ Assert(g_HmR0.aCpuInfo[i].n.svm.hNstGstMsrpm == NIL_RTR0MEMOBJ);
+#endif
+ if (RTMpIsCpuPossible(RTMpCpuIdFromSetIndex(i)))
+ {
+ /** @todo NUMA */
+ rc = RTR0MemObjAllocCont(&g_HmR0.aCpuInfo[i].hMemObj, PAGE_SIZE, false /* executable R0 mapping */);
+ AssertLogRelRCReturn(rc, rc);
+
+ g_HmR0.aCpuInfo[i].HCPhysMemObj = RTR0MemObjGetPagePhysAddr(g_HmR0.aCpuInfo[i].hMemObj, 0);
+ Assert(g_HmR0.aCpuInfo[i].HCPhysMemObj != NIL_RTHCPHYS);
+ Assert(!(g_HmR0.aCpuInfo[i].HCPhysMemObj & PAGE_OFFSET_MASK));
+
+ g_HmR0.aCpuInfo[i].pvMemObj = RTR0MemObjAddress(g_HmR0.aCpuInfo[i].hMemObj);
+ AssertPtr(g_HmR0.aCpuInfo[i].pvMemObj);
+ ASMMemZeroPage(g_HmR0.aCpuInfo[i].pvMemObj);
+
+#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
+ rc = RTR0MemObjAllocCont(&g_HmR0.aCpuInfo[i].n.svm.hNstGstMsrpm, SVM_MSRPM_PAGES << X86_PAGE_4K_SHIFT,
+ false /* executable R0 mapping */);
+ AssertLogRelRCReturn(rc, rc);
+
+ g_HmR0.aCpuInfo[i].n.svm.HCPhysNstGstMsrpm = RTR0MemObjGetPagePhysAddr(g_HmR0.aCpuInfo[i].n.svm.hNstGstMsrpm, 0);
+ Assert(g_HmR0.aCpuInfo[i].n.svm.HCPhysNstGstMsrpm != NIL_RTHCPHYS);
+ Assert(!(g_HmR0.aCpuInfo[i].n.svm.HCPhysNstGstMsrpm & PAGE_OFFSET_MASK));
+
+ g_HmR0.aCpuInfo[i].n.svm.pvNstGstMsrpm = RTR0MemObjAddress(g_HmR0.aCpuInfo[i].n.svm.hNstGstMsrpm);
+ AssertPtr(g_HmR0.aCpuInfo[i].n.svm.pvNstGstMsrpm);
+ ASMMemFill32(g_HmR0.aCpuInfo[i].n.svm.pvNstGstMsrpm, SVM_MSRPM_PAGES << X86_PAGE_4K_SHIFT, UINT32_C(0xffffffff));
+#endif
+ }
+ }
+
+ rc = VINF_SUCCESS;
+ }
+
+ if ( RT_SUCCESS(rc)
+ && g_HmR0.fGlobalInit)
+ {
+ /* First time, so initialize each cpu/core. */
+ HMR0FIRSTRC FirstRc;
+ hmR0FirstRcInit(&FirstRc);
+ rc = RTMpOnAll(hmR0EnableCpuCallback, (void *)pVM, &FirstRc);
+ if (RT_SUCCESS(rc))
+ rc = hmR0FirstRcGetStatus(&FirstRc);
+ }
+
+ return rc;
+}
+
+
+/**
+ * Sets up HM on all cpus.
+ *
+ * @returns VBox status code.
+ * @param pVM The cross context VM structure.
+ */
+VMMR0_INT_DECL(int) HMR0EnableAllCpus(PVM pVM)
+{
+ /* Make sure we don't touch HM after we've disabled HM in preparation of a suspend. */
+ if (ASMAtomicReadBool(&g_HmR0.fSuspended))
+ return VERR_HM_SUSPEND_PENDING;
+
+ return RTOnce(&g_HmR0.EnableAllCpusOnce, hmR0EnableAllCpuOnce, pVM);
+}
+
+
+/**
+ * Disable VT-x or AMD-V on the current CPU.
+ *
+ * @returns VBox status code.
+ * @param idCpu The identifier for the CPU this function is called on.
+ *
+ * @remarks Must be called with preemption disabled.
+ */
+static int hmR0DisableCpu(RTCPUID idCpu)
+{
+ PHMPHYSCPU pHostCpu = &g_HmR0.aCpuInfo[idCpu];
+
+ Assert(!g_HmR0.hwvirt.u.vmx.fSupported || !g_HmR0.hwvirt.u.vmx.fUsingSUPR0EnableVTx);
+ Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+ Assert(idCpu == (RTCPUID)RTMpCpuIdToSetIndex(idCpu)); /** @todo fix idCpu == index assumption (rainy day) */
+ Assert(idCpu < RT_ELEMENTS(g_HmR0.aCpuInfo));
+ Assert(!pHostCpu->fConfigured || pHostCpu->hMemObj != NIL_RTR0MEMOBJ);
+ AssertRelease(idCpu == RTMpCpuId());
+
+ if (pHostCpu->hMemObj == NIL_RTR0MEMOBJ)
+ return pHostCpu->fConfigured ? VERR_NO_MEMORY : VINF_SUCCESS /* not initialized. */;
+ AssertPtr(pHostCpu->pvMemObj);
+ Assert(pHostCpu->HCPhysMemObj != NIL_RTHCPHYS);
+
+ int rc;
+ if (pHostCpu->fConfigured)
+ {
+ rc = g_HmR0.pfnDisableCpu(pHostCpu->pvMemObj, pHostCpu->HCPhysMemObj);
+ AssertRCReturn(rc, rc);
+
+ pHostCpu->fConfigured = false;
+ pHostCpu->idCpu = NIL_RTCPUID;
+ }
+ else
+ rc = VINF_SUCCESS; /* nothing to do */
+ return rc;
+}
+
+
+/**
+ * Worker function passed to RTMpOnAll() that is to be called on the target
+ * CPUs.
+ *
+ * @param idCpu The identifier for the CPU the function is called on.
+ * @param pvUser1 The 1st user argument.
+ * @param pvUser2 Opaque pointer to the FirstRc.
+ */
+static DECLCALLBACK(void) hmR0DisableCpuCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
+{
+ PHMR0FIRSTRC pFirstRc = (PHMR0FIRSTRC)pvUser2; NOREF(pvUser1);
+ AssertReturnVoid(g_HmR0.fGlobalInit);
+ hmR0FirstRcSetStatus(pFirstRc, hmR0DisableCpu(idCpu));
+}
+
+
+/**
+ * Worker function passed to RTMpOnSpecific() that is to be called on the target
+ * CPU.
+ *
+ * @param idCpu The identifier for the CPU the function is called on.
+ * @param pvUser1 Null, not used.
+ * @param pvUser2 Null, not used.
+ */
+static DECLCALLBACK(void) hmR0DisableCpuOnSpecificCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
+{
+ NOREF(pvUser1);
+ NOREF(pvUser2);
+ hmR0DisableCpu(idCpu);
+}
+
+
+/**
+ * Callback function invoked when a cpu goes online or offline.
+ *
+ * @param enmEvent The Mp event.
+ * @param idCpu The identifier for the CPU the function is called on.
+ * @param pvData Opaque data (PVM pointer).
+ */
+static DECLCALLBACK(void) hmR0MpEventCallback(RTMPEVENT enmEvent, RTCPUID idCpu, void *pvData)
+{
+ NOREF(pvData);
+ Assert(!g_HmR0.hwvirt.u.vmx.fSupported || !g_HmR0.hwvirt.u.vmx.fUsingSUPR0EnableVTx);
+
+ /*
+ * We only care about uninitializing a CPU that is going offline. When a
+ * CPU comes online, the initialization is done lazily in HMR0Enter().
+ */
+ switch (enmEvent)
+ {
+ case RTMPEVENT_OFFLINE:
+ {
+ RTTHREADPREEMPTSTATE PreemptState = RTTHREADPREEMPTSTATE_INITIALIZER;
+ RTThreadPreemptDisable(&PreemptState);
+ if (idCpu == RTMpCpuId())
+ {
+ int rc = hmR0DisableCpu(idCpu);
+ AssertRC(rc);
+ RTThreadPreemptRestore(&PreemptState);
+ }
+ else
+ {
+ RTThreadPreemptRestore(&PreemptState);
+ RTMpOnSpecific(idCpu, hmR0DisableCpuOnSpecificCallback, NULL /* pvUser1 */, NULL /* pvUser2 */);
+ }
+ break;
+ }
+
+ default:
+ break;
+ }
+}
+
+
+/**
+ * Called whenever a system power state change occurs.
+ *
+ * @param enmEvent The Power event.
+ * @param pvUser User argument.
+ */
+static DECLCALLBACK(void) hmR0PowerCallback(RTPOWEREVENT enmEvent, void *pvUser)
+{
+ NOREF(pvUser);
+ Assert(!g_HmR0.hwvirt.u.vmx.fSupported || !g_HmR0.hwvirt.u.vmx.fUsingSUPR0EnableVTx);
+
+#ifdef LOG_ENABLED
+ if (enmEvent == RTPOWEREVENT_SUSPEND)
+ SUPR0Printf("hmR0PowerCallback RTPOWEREVENT_SUSPEND\n");
+ else
+ SUPR0Printf("hmR0PowerCallback RTPOWEREVENT_RESUME\n");
+#endif
+
+ if (enmEvent == RTPOWEREVENT_SUSPEND)
+ ASMAtomicWriteBool(&g_HmR0.fSuspended, true);
+
+ if (g_HmR0.fEnabled)
+ {
+ int rc;
+ HMR0FIRSTRC FirstRc;
+ hmR0FirstRcInit(&FirstRc);
+
+ if (enmEvent == RTPOWEREVENT_SUSPEND)
+ {
+ if (g_HmR0.fGlobalInit)
+ {
+ /* Turn off VT-x or AMD-V on all CPUs. */
+ rc = RTMpOnAll(hmR0DisableCpuCallback, NULL /* pvUser 1 */, &FirstRc);
+ Assert(RT_SUCCESS(rc) || rc == VERR_NOT_SUPPORTED);
+ }
+ /* else nothing to do here for the local init case */
+ }
+ else
+ {
+ /* Reinit the CPUs from scratch as the suspend state might have
+ messed with the MSRs. (lousy BIOSes as usual) */
+ if (g_HmR0.hwvirt.u.vmx.fSupported)
+ rc = RTMpOnAll(hmR0InitIntelCpu, &FirstRc, NULL);
+ else
+ rc = RTMpOnAll(hmR0InitAmdCpu, &FirstRc, NULL);
+ Assert(RT_SUCCESS(rc) || rc == VERR_NOT_SUPPORTED);
+ if (RT_SUCCESS(rc))
+ rc = hmR0FirstRcGetStatus(&FirstRc);
+#ifdef LOG_ENABLED
+ if (RT_FAILURE(rc))
+ SUPR0Printf("hmR0PowerCallback hmR0InitXxxCpu failed with %Rc\n", rc);
+#endif
+ if (g_HmR0.fGlobalInit)
+ {
+ /* Turn VT-x or AMD-V back on on all CPUs. */
+ rc = RTMpOnAll(hmR0EnableCpuCallback, NULL /* pVM */, &FirstRc /* output ignored */);
+ Assert(RT_SUCCESS(rc) || rc == VERR_NOT_SUPPORTED);
+ }
+ /* else nothing to do here for the local init case */
+ }
+ }
+
+ if (enmEvent == RTPOWEREVENT_RESUME)
+ ASMAtomicWriteBool(&g_HmR0.fSuspended, false);
+}
+
+
+/**
+ * Does ring-0 per-VM HM initialization.
+ *
+ * This will call the CPU specific init. routine which may initialize and allocate
+ * resources for virtual CPUs.
+ *
+ * @returns VBox status code.
+ * @param pVM The cross context VM structure.
+ *
+ * @remarks This is called after HMR3Init(), see vmR3CreateU() and
+ * vmR3InitRing3().
+ */
+VMMR0_INT_DECL(int) HMR0InitVM(PVM pVM)
+{
+ AssertReturn(pVM, VERR_INVALID_PARAMETER);
+
+ /* Make sure we don't touch HM after we've disabled HM in preparation of a suspend. */
+ if (ASMAtomicReadBool(&g_HmR0.fSuspended))
+ return VERR_HM_SUSPEND_PENDING;
+
+ /*
+ * Copy globals to the VM structure.
+ */
+ Assert(!(pVM->hm.s.vmx.fSupported && pVM->hm.s.svm.fSupported));
+ if (pVM->hm.s.vmx.fSupported)
+ {
+ pVM->hm.s.vmx.fUsePreemptTimer &= g_HmR0.hwvirt.u.vmx.fUsePreemptTimer; /* Can be overridden by CFGM see HMR3Init(). */
+ pVM->hm.s.vmx.cPreemptTimerShift = g_HmR0.hwvirt.u.vmx.cPreemptTimerShift;
+ pVM->hm.s.vmx.u64HostCr4 = g_HmR0.hwvirt.u.vmx.u64HostCr4;
+ pVM->hm.s.vmx.u64HostEfer = g_HmR0.hwvirt.u.vmx.u64HostEfer;
+ pVM->hm.s.vmx.u64HostSmmMonitorCtl = g_HmR0.hwvirt.u.vmx.u64HostSmmMonitorCtl;
+ HMGetVmxMsrsFromHwvirtMsrs(&g_HmR0.hwvirt.Msrs, &pVM->hm.s.vmx.Msrs);
+ }
+ else if (pVM->hm.s.svm.fSupported)
+ {
+ pVM->hm.s.svm.u32Rev = g_HmR0.hwvirt.u.svm.u32Rev;
+ pVM->hm.s.svm.u32Features = g_HmR0.hwvirt.u.svm.u32Features;
+ pVM->hm.s.svm.u64MsrHwcr = g_HmR0.hwvirt.Msrs.u.svm.u64MsrHwcr;
+ }
+ pVM->hm.s.rcInit = g_HmR0.rcInit;
+ pVM->hm.s.uMaxAsid = g_HmR0.hwvirt.uMaxAsid;
+
+ /*
+ * Set default maximum inner loops in ring-0 before returning to ring-3.
+ * Can be overriden using CFGM.
+ */
+ if (!pVM->hm.s.cMaxResumeLoops)
+ {
+ pVM->hm.s.cMaxResumeLoops = 1024;
+ if (RTThreadPreemptIsPendingTrusty())
+ pVM->hm.s.cMaxResumeLoops = 8192;
+ }
+
+ /*
+ * Initialize some per-VCPU fields.
+ */
+ for (VMCPUID i = 0; i < pVM->cCpus; i++)
+ {
+ PVMCPU pVCpu = &pVM->aCpus[i];
+ pVCpu->hm.s.idEnteredCpu = NIL_RTCPUID;
+ pVCpu->hm.s.idLastCpu = NIL_RTCPUID;
+
+ /* We'll aways increment this the first time (host uses ASID 0). */
+ AssertReturn(!pVCpu->hm.s.uCurrentAsid, VERR_HM_IPE_3);
+ }
+
+ /*
+ * Get host kernel features that HM might need to know in order
+ * to co-operate and function properly with the host OS (e.g. SMAP).
+ *
+ * Technically, we could do this as part of the pre-init VM procedure
+ * but it shouldn't be done later than this point so we do it here.
+ */
+ pVM->hm.s.fHostKernelFeatures = SUPR0GetKernelFeatures();
+
+ /*
+ * Call the hardware specific initialization method.
+ */
+ return g_HmR0.pfnInitVM(pVM);
+}
+
+
+/**
+ * Does ring-0 per VM HM termination.
+ *
+ * @returns VBox status code.
+ * @param pVM The cross context VM structure.
+ */
+VMMR0_INT_DECL(int) HMR0TermVM(PVM pVM)
+{
+ Log(("HMR0TermVM: %p\n", pVM));
+ AssertReturn(pVM, VERR_INVALID_PARAMETER);
+
+ /*
+ * Call the hardware specific method.
+ *
+ * Note! We might be preparing for a suspend, so the pfnTermVM() functions should probably not
+ * mess with VT-x/AMD-V features on the CPU, currently all they do is free memory so this is safe.
+ */
+ return g_HmR0.pfnTermVM(pVM);
+}
+
+
+/**
+ * Sets up a VT-x or AMD-V session.
+ *
+ * This is mostly about setting up the hardware VM state.
+ *
+ * @returns VBox status code.
+ * @param pVM The cross context VM structure.
+ */
+VMMR0_INT_DECL(int) HMR0SetupVM(PVM pVM)
+{
+ Log(("HMR0SetupVM: %p\n", pVM));
+ AssertReturn(pVM, VERR_INVALID_PARAMETER);
+
+ /* Make sure we don't touch HM after we've disabled HM in preparation of a suspend. */
+ AssertReturn(!ASMAtomicReadBool(&g_HmR0.fSuspended), VERR_HM_SUSPEND_PENDING);
+
+ /* On first entry we'll sync everything. */
+ for (VMCPUID i = 0; i < pVM->cCpus; i++)
+ {
+ PVMCPU pVCpu = &pVM->aCpus[i];
+ pVCpu->hm.s.fCtxChanged |= HM_CHANGED_HOST_CONTEXT | HM_CHANGED_ALL_GUEST;
+ }
+
+ /*
+ * Call the hardware specific setup VM method. This requires the CPU to be
+ * enabled for AMD-V/VT-x and preemption to be prevented.
+ */
+ RTTHREADPREEMPTSTATE PreemptState = RTTHREADPREEMPTSTATE_INITIALIZER;
+ RTThreadPreemptDisable(&PreemptState);
+ RTCPUID const idCpu = RTMpCpuId();
+
+ /* Enable VT-x or AMD-V if local init is required. */
+ int rc;
+ if (!g_HmR0.fGlobalInit)
+ {
+ Assert(!g_HmR0.hwvirt.u.vmx.fSupported || !g_HmR0.hwvirt.u.vmx.fUsingSUPR0EnableVTx);
+ rc = hmR0EnableCpu(pVM, idCpu);
+ if (RT_FAILURE(rc))
+ {
+ RTThreadPreemptRestore(&PreemptState);
+ return rc;
+ }
+ }
+
+ /* Setup VT-x or AMD-V. */
+ rc = g_HmR0.pfnSetupVM(pVM);
+
+ /* Disable VT-x or AMD-V if local init was done before. */
+ if (!g_HmR0.fGlobalInit)
+ {
+ Assert(!g_HmR0.hwvirt.u.vmx.fSupported || !g_HmR0.hwvirt.u.vmx.fUsingSUPR0EnableVTx);
+ int rc2 = hmR0DisableCpu(idCpu);
+ AssertRC(rc2);
+ }
+
+ RTThreadPreemptRestore(&PreemptState);
+ return rc;
+}
+
+
+/**
+ * Turns on HM on the CPU if necessary and initializes the bare minimum state
+ * required for entering HM context.
+ *
+ * @returns VBox status code.
+ * @param pVCpu The cross context virtual CPU structure.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+VMMR0_INT_DECL(int) hmR0EnterCpu(PVMCPU pVCpu)
+{
+ Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+
+ int rc = VINF_SUCCESS;
+ RTCPUID const idCpu = RTMpCpuId();
+ PHMPHYSCPU pHostCpu = &g_HmR0.aCpuInfo[idCpu];
+ AssertPtr(pHostCpu);
+
+ /* Enable VT-x or AMD-V if local init is required, or enable if it's a freshly onlined CPU. */
+ if (!pHostCpu->fConfigured)
+ rc = hmR0EnableCpu(pVCpu->CTX_SUFF(pVM), idCpu);
+
+ /* Reload host-state (back from ring-3/migrated CPUs) and shared guest/host bits. */
+ if (g_HmR0.hwvirt.u.vmx.fSupported)
+ pVCpu->hm.s.fCtxChanged |= HM_CHANGED_HOST_CONTEXT | HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE;
+ else
+ pVCpu->hm.s.fCtxChanged |= HM_CHANGED_HOST_CONTEXT | HM_CHANGED_SVM_HOST_GUEST_SHARED_STATE;
+
+ Assert(pHostCpu->idCpu == idCpu && pHostCpu->idCpu != NIL_RTCPUID);
+ pVCpu->hm.s.idEnteredCpu = idCpu;
+ return rc;
+}
+
+
+/**
+ * Enters the VT-x or AMD-V session.
+ *
+ * @returns VBox status code.
+ * @param pVCpu The cross context virtual CPU structure.
+ *
+ * @remarks This is called with preemption disabled.
+ */
+VMMR0_INT_DECL(int) HMR0Enter(PVMCPU pVCpu)
+{
+ /* Make sure we can't enter a session after we've disabled HM in preparation of a suspend. */
+ AssertReturn(!ASMAtomicReadBool(&g_HmR0.fSuspended), VERR_HM_SUSPEND_PENDING);
+ Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+
+ /* Load the bare minimum state required for entering HM. */
+ int rc = hmR0EnterCpu(pVCpu);
+ if (RT_SUCCESS(rc))
+ {
+ if (g_HmR0.hwvirt.u.vmx.fSupported)
+ {
+ Assert((pVCpu->hm.s.fCtxChanged & (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE))
+ == (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE));
+ }
+ else
+ {
+ Assert((pVCpu->hm.s.fCtxChanged & (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_SVM_HOST_GUEST_SHARED_STATE))
+ == (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_SVM_HOST_GUEST_SHARED_STATE));
+ }
+
+#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE
+ AssertReturn(!VMMR0ThreadCtxHookIsEnabled(pVCpu), VERR_HM_IPE_5);
+ bool const fStartedSet = PGMR0DynMapStartOrMigrateAutoSet(pVCpu);
+#endif
+
+ /* Keep track of the CPU owning the VMCS for debugging scheduling weirdness and ring-3 calls. */
+ rc = g_HmR0.pfnEnterSession(pVCpu);
+ AssertMsgRCReturnStmt(rc, ("rc=%Rrc pVCpu=%p\n", rc, pVCpu), pVCpu->hm.s.idEnteredCpu = NIL_RTCPUID, rc);
+
+ /* Exports the host-state as we may be resuming code after a longjmp and quite
+ possibly now be scheduled on a different CPU. */
+ rc = g_HmR0.pfnExportHostState(pVCpu);
+ AssertMsgRCReturnStmt(rc, ("rc=%Rrc pVCpu=%p\n", rc, pVCpu), pVCpu->hm.s.idEnteredCpu = NIL_RTCPUID, rc);
+
+#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE
+ if (fStartedSet)
+ PGMRZDynMapReleaseAutoSet(pVCpu);
+#endif
+ }
+ return rc;
+}
+
+
+/**
+ * Deinitializes the bare minimum state used for HM context and if necessary
+ * disable HM on the CPU.
+ *
+ * @returns VBox status code.
+ * @param pVCpu The cross context virtual CPU structure.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+VMMR0_INT_DECL(int) HMR0LeaveCpu(PVMCPU pVCpu)
+{
+ Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+ VMCPU_ASSERT_EMT_RETURN(pVCpu, VERR_HM_WRONG_CPU);
+
+ RTCPUID const idCpu = RTMpCpuId();
+ PCHMPHYSCPU pHostCpu = &g_HmR0.aCpuInfo[idCpu];
+
+ if ( !g_HmR0.fGlobalInit
+ && pHostCpu->fConfigured)
+ {
+ int rc = hmR0DisableCpu(idCpu);
+ AssertRCReturn(rc, rc);
+ Assert(!pHostCpu->fConfigured);
+ Assert(pHostCpu->idCpu == NIL_RTCPUID);
+
+ /* For obtaining a non-zero ASID/VPID on next re-entry. */
+ pVCpu->hm.s.idLastCpu = NIL_RTCPUID;
+ }
+
+ /* Clear it while leaving HM context, hmPokeCpuForTlbFlush() relies on this. */
+ pVCpu->hm.s.idEnteredCpu = NIL_RTCPUID;
+
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Thread-context hook for HM.
+ *
+ * @param enmEvent The thread-context event.
+ * @param pvUser Opaque pointer to the VMCPU.
+ */
+VMMR0_INT_DECL(void) HMR0ThreadCtxCallback(RTTHREADCTXEVENT enmEvent, void *pvUser)
+{
+ PVMCPU pVCpu = (PVMCPU)pvUser;
+ Assert(pVCpu);
+ Assert(g_HmR0.pfnThreadCtxCallback);
+
+ g_HmR0.pfnThreadCtxCallback(enmEvent, pVCpu, g_HmR0.fGlobalInit);
+}
+
+
+/**
+ * Runs guest code in a hardware accelerated VM.
+ *
+ * @returns Strict VBox status code. (VBOXSTRICTRC isn't used because it's
+ * called from setjmp assembly.)
+ * @param pVM The cross context VM structure.
+ * @param pVCpu The cross context virtual CPU structure.
+ *
+ * @remarks Can be called with preemption enabled if thread-context hooks are
+ * used!!!
+ */
+VMMR0_INT_DECL(int) HMR0RunGuestCode(PVM pVM, PVMCPU pVCpu)
+{
+ RT_NOREF(pVM);
+
+#ifdef VBOX_STRICT
+ /* With thread-context hooks we would be running this code with preemption enabled. */
+ if (!RTThreadPreemptIsEnabled(NIL_RTTHREAD))
+ {
+ PCHMPHYSCPU pHostCpu = &g_HmR0.aCpuInfo[RTMpCpuId()];
+ Assert(!VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL));
+ Assert(pHostCpu->fConfigured);
+ AssertReturn(!ASMAtomicReadBool(&g_HmR0.fSuspended), VERR_HM_SUSPEND_PENDING);
+ }
+#endif
+
+#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE
+ AssertReturn(!VMMR0ThreadCtxHookIsEnabled(pVCpu), VERR_HM_IPE_4);
+ Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+ PGMRZDynMapStartAutoSet(pVCpu);
+#endif
+
+ VBOXSTRICTRC rcStrict = g_HmR0.pfnRunGuestCode(pVCpu);
+
+#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE
+ PGMRZDynMapReleaseAutoSet(pVCpu);
+#endif
+ return VBOXSTRICTRC_VAL(rcStrict);
+}
+
+
+/**
+ * Notification from CPUM that it has unloaded the guest FPU/SSE/AVX state from
+ * the host CPU and that guest access to it must be intercepted.
+ *
+ * @param pVCpu The cross context virtual CPU structure of the calling EMT.
+ */
+VMMR0_INT_DECL(void) HMR0NotifyCpumUnloadedGuestFpuState(PVMCPU pVCpu)
+{
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_CR0);
+}
+
+
+/**
+ * Notification from CPUM that it has modified the host CR0 (because of FPU).
+ *
+ * @param pVCpu The cross context virtual CPU structure of the calling EMT.
+ */
+VMMR0_INT_DECL(void) HMR0NotifyCpumModifiedHostCr0(PVMCPU pVCpu)
+{
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_HOST_CONTEXT);
+}
+
+
+#if HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS)
+
+/**
+ * Save guest FPU/XMM state (64 bits guest mode & 32 bits host only)
+ *
+ * @returns VBox status code.
+ * @param pVM The cross context VM structure.
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param pCtx Pointer to the guest CPU context.
+ */
+VMMR0_INT_DECL(int) HMR0SaveFPUState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
+{
+ RT_NOREF(pCtx);
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatFpu64SwitchBack);
+ if (pVM->hm.s.vmx.fSupported)
+ return VMXR0Execute64BitsHandler(pVCpu, HM64ON32OP_HMRCSaveGuestFPU64, 0, NULL);
+ return SVMR0Execute64BitsHandler(pVCpu, HM64ON32OP_HMRCSaveGuestFPU64, 0, NULL);
+}
+
+
+/**
+ * Save guest debug state (64 bits guest mode & 32 bits host only)
+ *
+ * @returns VBox status code.
+ * @param pVM The cross context VM structure.
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param pCtx Pointer to the guest CPU context.
+ */
+VMMR0_INT_DECL(int) HMR0SaveDebugState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
+{
+ RT_NOREF(pCtx);
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatDebug64SwitchBack);
+ if (pVM->hm.s.vmx.fSupported)
+ return VMXR0Execute64BitsHandler(pVCpu, HM64ON32OP_HMRCSaveGuestDebug64, 0, NULL);
+ return SVMR0Execute64BitsHandler(pVCpu, HM64ON32OP_HMRCSaveGuestDebug64, 0, NULL);
+}
+
+
+/**
+ * Test the 32->64 bits switcher.
+ *
+ * @returns VBox status code.
+ * @param pVM The cross context VM structure.
+ */
+VMMR0_INT_DECL(int) HMR0TestSwitcher3264(PVM pVM)
+{
+ PVMCPU pVCpu = &pVM->aCpus[0];
+ uint32_t aParam[5] = { 0, 1, 2, 3, 4 };
+ int rc;
+
+ STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatWorldSwitch3264, z);
+ if (pVM->hm.s.vmx.fSupported)
+ rc = VMXR0Execute64BitsHandler(pVCpu, HM64ON32OP_HMRCTestSwitcher64, 5, &aParam[0]);
+ else
+ rc = SVMR0Execute64BitsHandler(pVCpu, HM64ON32OP_HMRCTestSwitcher64, 5, &aParam[0]);
+ STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatWorldSwitch3264, z);
+
+ return rc;
+}
+
+#endif /* HC_ARCH_BITS == 32 && defined(VBOX_WITH_64_BITS_GUESTS) */
+
+/**
+ * Returns suspend status of the host.
+ *
+ * @returns Suspend pending or not.
+ */
+VMMR0_INT_DECL(bool) HMR0SuspendPending(void)
+{
+ return ASMAtomicReadBool(&g_HmR0.fSuspended);
+}
+
+
+/**
+ * Invalidates a guest page from the host TLB.
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param GCVirt Page to invalidate.
+ */
+VMMR0_INT_DECL(int) HMR0InvalidatePage(PVMCPU pVCpu, RTGCPTR GCVirt)
+{
+ PVM pVM = pVCpu->CTX_SUFF(pVM);
+ if (pVM->hm.s.vmx.fSupported)
+ return VMXR0InvalidatePage(pVCpu, GCVirt);
+ return SVMR0InvalidatePage(pVCpu, GCVirt);
+}
+
+
+/**
+ * Returns the cpu structure for the current cpu.
+ * Keep in mind that there is no guarantee it will stay the same (long jumps to ring 3!!!).
+ *
+ * @returns The cpu structure pointer.
+ */
+VMMR0_INT_DECL(PHMPHYSCPU) hmR0GetCurrentCpu(void)
+{
+ Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+ RTCPUID const idCpu = RTMpCpuId();
+ Assert(idCpu < RT_ELEMENTS(g_HmR0.aCpuInfo));
+ return &g_HmR0.aCpuInfo[idCpu];
+}
+
+
+/**
+ * Interface for importing state on demand (used by IEM).
+ *
+ * @returns VBox status code.
+ * @param pVCpu The cross context CPU structure.
+ * @param fWhat What to import, CPUMCTX_EXTRN_XXX.
+ */
+VMMR0_INT_DECL(int) HMR0ImportStateOnDemand(PVMCPU pVCpu, uint64_t fWhat)
+{
+ if (pVCpu->CTX_SUFF(pVM)->hm.s.vmx.fSupported)
+ return VMXR0ImportStateOnDemand(pVCpu, fWhat);
+ return SVMR0ImportStateOnDemand(pVCpu, fWhat);
+}
+
+
+#ifdef VBOX_WITH_RAW_MODE
+/**
+ * Raw-mode switcher hook - disable VT-x if it's active *and* the current
+ * switcher turns off paging.
+ *
+ * @returns VBox status code.
+ * @param pVM The cross context VM structure.
+ * @param enmSwitcher The switcher we're about to use.
+ * @param pfVTxDisabled Where to store whether VT-x was disabled or not.
+ */
+VMMR0_INT_DECL(int) HMR0EnterSwitcher(PVM pVM, VMMSWITCHER enmSwitcher, bool *pfVTxDisabled)
+{
+ NOREF(pVM);
+
+ Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+
+ *pfVTxDisabled = false;
+
+ /* No such issues with AMD-V */
+ if (!g_HmR0.hwvirt.u.vmx.fSupported)
+ return VINF_SUCCESS;
+
+ /* Check if the switching we're up to is safe. */
+ switch (enmSwitcher)
+ {
+ case VMMSWITCHER_32_TO_32:
+ case VMMSWITCHER_PAE_TO_PAE:
+ return VINF_SUCCESS; /* safe switchers as they don't turn off paging */
+
+ case VMMSWITCHER_32_TO_PAE:
+ case VMMSWITCHER_PAE_TO_32: /* is this one actually used?? */
+ case VMMSWITCHER_AMD64_TO_32:
+ case VMMSWITCHER_AMD64_TO_PAE:
+ break; /* unsafe switchers */
+
+ default:
+ AssertFailedReturn(VERR_HM_WRONG_SWITCHER);
+ }
+
+ /* When using SUPR0EnableVTx we must let the host suspend and resume VT-x,
+ regardless of whether we're currently using VT-x or not. */
+ if (g_HmR0.hwvirt.u.vmx.fUsingSUPR0EnableVTx)
+ {
+ *pfVTxDisabled = SUPR0SuspendVTxOnCpu();
+ return VINF_SUCCESS;
+ }
+
+ /** @todo Check if this code is presumptive wrt other VT-x users on the
+ * system... */
+
+ /* Nothing to do if we haven't enabled VT-x. */
+ if (!g_HmR0.fEnabled)
+ return VINF_SUCCESS;
+
+ /* Local init implies the CPU is currently not in VMX root mode. */
+ if (!g_HmR0.fGlobalInit)
+ return VINF_SUCCESS;
+
+ /* Ok, disable VT-x. */
+ PCHMPHYSCPU pHostCpu = hmR0GetCurrentCpu();
+ AssertReturn( pHostCpu
+ && pHostCpu->hMemObj != NIL_RTR0MEMOBJ
+ && pHostCpu->pvMemObj
+ && pHostCpu->HCPhysMemObj != NIL_RTHCPHYS,
+ VERR_HM_IPE_2);
+
+ *pfVTxDisabled = true;
+ return VMXR0DisableCpu(pHostCpu->pvMemObj, pHostCpu->HCPhysMemObj);
+}
+
+
+/**
+ * Raw-mode switcher hook - re-enable VT-x if was active *and* the current
+ * switcher turned off paging.
+ *
+ * @param pVM The cross context VM structure.
+ * @param fVTxDisabled Whether VT-x was disabled or not.
+ */
+VMMR0_INT_DECL(void) HMR0LeaveSwitcher(PVM pVM, bool fVTxDisabled)
+{
+ Assert(!ASMIntAreEnabled());
+
+ if (!fVTxDisabled)
+ return; /* nothing to do */
+
+ Assert(g_HmR0.hwvirt.u.vmx.fSupported);
+ if (g_HmR0.hwvirt.u.vmx.fUsingSUPR0EnableVTx)
+ SUPR0ResumeVTxOnCpu(fVTxDisabled);
+ else
+ {
+ Assert(g_HmR0.fEnabled);
+ Assert(g_HmR0.fGlobalInit);
+
+ PHMPHYSCPU pHostCpu = hmR0GetCurrentCpu();
+ AssertReturnVoid( pHostCpu
+ && pHostCpu->hMemObj != NIL_RTR0MEMOBJ
+ && pHostCpu->pvMemObj
+ && pHostCpu->HCPhysMemObj != NIL_RTHCPHYS);
+
+ VMXR0EnableCpu(pHostCpu, pVM, pHostCpu->pvMemObj, pHostCpu->HCPhysMemObj, false, &g_HmR0.hwvirt.Msrs);
+ }
+}
+#endif /* VBOX_WITH_RAW_MODE */
+
+
+#ifdef VBOX_STRICT
+/**
+ * Dumps a descriptor.
+ *
+ * @param pDesc Descriptor to dump.
+ * @param Sel Selector number.
+ * @param pszMsg Message to prepend the log entry with.
+ */
+VMMR0_INT_DECL(void) hmR0DumpDescriptor(PCX86DESCHC pDesc, RTSEL Sel, const char *pszMsg)
+{
+ /*
+ * Make variable description string.
+ */
+ static struct
+ {
+ unsigned cch;
+ const char *psz;
+ } const s_aTypes[32] =
+ {
+# define STRENTRY(str) { sizeof(str) - 1, str }
+
+ /* system */
+# if HC_ARCH_BITS == 64
+ STRENTRY("Reserved0 "), /* 0x00 */
+ STRENTRY("Reserved1 "), /* 0x01 */
+ STRENTRY("LDT "), /* 0x02 */
+ STRENTRY("Reserved3 "), /* 0x03 */
+ STRENTRY("Reserved4 "), /* 0x04 */
+ STRENTRY("Reserved5 "), /* 0x05 */
+ STRENTRY("Reserved6 "), /* 0x06 */
+ STRENTRY("Reserved7 "), /* 0x07 */
+ STRENTRY("Reserved8 "), /* 0x08 */
+ STRENTRY("TSS64Avail "), /* 0x09 */
+ STRENTRY("ReservedA "), /* 0x0a */
+ STRENTRY("TSS64Busy "), /* 0x0b */
+ STRENTRY("Call64 "), /* 0x0c */
+ STRENTRY("ReservedD "), /* 0x0d */
+ STRENTRY("Int64 "), /* 0x0e */
+ STRENTRY("Trap64 "), /* 0x0f */
+# else
+ STRENTRY("Reserved0 "), /* 0x00 */
+ STRENTRY("TSS16Avail "), /* 0x01 */
+ STRENTRY("LDT "), /* 0x02 */
+ STRENTRY("TSS16Busy "), /* 0x03 */
+ STRENTRY("Call16 "), /* 0x04 */
+ STRENTRY("Task "), /* 0x05 */
+ STRENTRY("Int16 "), /* 0x06 */
+ STRENTRY("Trap16 "), /* 0x07 */
+ STRENTRY("Reserved8 "), /* 0x08 */
+ STRENTRY("TSS32Avail "), /* 0x09 */
+ STRENTRY("ReservedA "), /* 0x0a */
+ STRENTRY("TSS32Busy "), /* 0x0b */
+ STRENTRY("Call32 "), /* 0x0c */
+ STRENTRY("ReservedD "), /* 0x0d */
+ STRENTRY("Int32 "), /* 0x0e */
+ STRENTRY("Trap32 "), /* 0x0f */
+# endif
+ /* non system */
+ STRENTRY("DataRO "), /* 0x10 */
+ STRENTRY("DataRO Accessed "), /* 0x11 */
+ STRENTRY("DataRW "), /* 0x12 */
+ STRENTRY("DataRW Accessed "), /* 0x13 */
+ STRENTRY("DataDownRO "), /* 0x14 */
+ STRENTRY("DataDownRO Accessed "), /* 0x15 */
+ STRENTRY("DataDownRW "), /* 0x16 */
+ STRENTRY("DataDownRW Accessed "), /* 0x17 */
+ STRENTRY("CodeEO "), /* 0x18 */
+ STRENTRY("CodeEO Accessed "), /* 0x19 */
+ STRENTRY("CodeER "), /* 0x1a */
+ STRENTRY("CodeER Accessed "), /* 0x1b */
+ STRENTRY("CodeConfEO "), /* 0x1c */
+ STRENTRY("CodeConfEO Accessed "), /* 0x1d */
+ STRENTRY("CodeConfER "), /* 0x1e */
+ STRENTRY("CodeConfER Accessed ") /* 0x1f */
+# undef SYSENTRY
+ };
+# define ADD_STR(psz, pszAdd) do { strcpy(psz, pszAdd); psz += strlen(pszAdd); } while (0)
+ char szMsg[128];
+ char *psz = &szMsg[0];
+ unsigned i = pDesc->Gen.u1DescType << 4 | pDesc->Gen.u4Type;
+ memcpy(psz, s_aTypes[i].psz, s_aTypes[i].cch);
+ psz += s_aTypes[i].cch;
+
+ if (pDesc->Gen.u1Present)
+ ADD_STR(psz, "Present ");
+ else
+ ADD_STR(psz, "Not-Present ");
+# if HC_ARCH_BITS == 64
+ if (pDesc->Gen.u1Long)
+ ADD_STR(psz, "64-bit ");
+ else
+ ADD_STR(psz, "Comp ");
+# else
+ if (pDesc->Gen.u1Granularity)
+ ADD_STR(psz, "Page ");
+ if (pDesc->Gen.u1DefBig)
+ ADD_STR(psz, "32-bit ");
+ else
+ ADD_STR(psz, "16-bit ");
+# endif
+# undef ADD_STR
+ *psz = '\0';
+
+ /*
+ * Limit and Base and format the output.
+ */
+#ifdef LOG_ENABLED
+ uint32_t u32Limit = X86DESC_LIMIT_G(pDesc);
+
+# if HC_ARCH_BITS == 64
+ uint64_t u32Base = X86DESC64_BASE(pDesc);
+ Log(("%s %04x - %RX64 %RX64 - base=%RX64 limit=%08x dpl=%d %s\n", pszMsg,
+ Sel, pDesc->au64[0], pDesc->au64[1], u32Base, u32Limit, pDesc->Gen.u2Dpl, szMsg));
+# else
+ uint32_t u32Base = X86DESC_BASE(pDesc);
+ Log(("%s %04x - %08x %08x - base=%08x limit=%08x dpl=%d %s\n", pszMsg,
+ Sel, pDesc->au32[0], pDesc->au32[1], u32Base, u32Limit, pDesc->Gen.u2Dpl, szMsg));
+# endif
+#else
+ NOREF(Sel); NOREF(pszMsg);
+#endif
+}
+
+
+/**
+ * Formats a full register dump.
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ */
+VMMR0_INT_DECL(void) hmR0DumpRegs(PVMCPU pVCpu)
+{
+ /*
+ * Format the flags.
+ */
+ static struct
+ {
+ const char *pszSet; const char *pszClear; uint32_t fFlag;
+ } const s_aFlags[] =
+ {
+ { "vip", NULL, X86_EFL_VIP },
+ { "vif", NULL, X86_EFL_VIF },
+ { "ac", NULL, X86_EFL_AC },
+ { "vm", NULL, X86_EFL_VM },
+ { "rf", NULL, X86_EFL_RF },
+ { "nt", NULL, X86_EFL_NT },
+ { "ov", "nv", X86_EFL_OF },
+ { "dn", "up", X86_EFL_DF },
+ { "ei", "di", X86_EFL_IF },
+ { "tf", NULL, X86_EFL_TF },
+ { "nt", "pl", X86_EFL_SF },
+ { "nz", "zr", X86_EFL_ZF },
+ { "ac", "na", X86_EFL_AF },
+ { "po", "pe", X86_EFL_PF },
+ { "cy", "nc", X86_EFL_CF },
+ };
+ char szEFlags[80];
+ char *psz = szEFlags;
+ PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+ uint32_t uEFlags = pCtx->eflags.u32;
+ for (unsigned i = 0; i < RT_ELEMENTS(s_aFlags); i++)
+ {
+ const char *pszAdd = s_aFlags[i].fFlag & uEFlags ? s_aFlags[i].pszSet : s_aFlags[i].pszClear;
+ if (pszAdd)
+ {
+ strcpy(psz, pszAdd);
+ psz += strlen(pszAdd);
+ *psz++ = ' ';
+ }
+ }
+ psz[-1] = '\0';
+
+ /*
+ * Format the registers.
+ */
+ if (CPUMIsGuestIn64BitCode(pVCpu))
+ {
+ Log(("rax=%016RX64 rbx=%016RX64 rcx=%016RX64 rdx=%016RX64\n"
+ "rsi=%016RX64 rdi=%016RX64 r8 =%016RX64 r9 =%016RX64\n"
+ "r10=%016RX64 r11=%016RX64 r12=%016RX64 r13=%016RX64\n"
+ "r14=%016RX64 r15=%016RX64\n"
+ "rip=%016RX64 rsp=%016RX64 rbp=%016RX64 iopl=%d %*s\n"
+ "cs={%04x base=%016RX64 limit=%08x flags=%08x}\n"
+ "ds={%04x base=%016RX64 limit=%08x flags=%08x}\n"
+ "es={%04x base=%016RX64 limit=%08x flags=%08x}\n"
+ "fs={%04x base=%016RX64 limit=%08x flags=%08x}\n"
+ "gs={%04x base=%016RX64 limit=%08x flags=%08x}\n"
+ "ss={%04x base=%016RX64 limit=%08x flags=%08x}\n"
+ "cr0=%016RX64 cr2=%016RX64 cr3=%016RX64 cr4=%016RX64\n"
+ "dr0=%016RX64 dr1=%016RX64 dr2=%016RX64 dr3=%016RX64\n"
+ "dr4=%016RX64 dr5=%016RX64 dr6=%016RX64 dr7=%016RX64\n"
+ "gdtr=%016RX64:%04x idtr=%016RX64:%04x eflags=%08x\n"
+ "ldtr={%04x base=%08RX64 limit=%08x flags=%08x}\n"
+ "tr ={%04x base=%08RX64 limit=%08x flags=%08x}\n"
+ "SysEnter={cs=%04llx eip=%08llx esp=%08llx}\n"
+ ,
+ pCtx->rax, pCtx->rbx, pCtx->rcx, pCtx->rdx, pCtx->rsi, pCtx->rdi,
+ pCtx->r8, pCtx->r9, pCtx->r10, pCtx->r11, pCtx->r12, pCtx->r13,
+ pCtx->r14, pCtx->r15,
+ pCtx->rip, pCtx->rsp, pCtx->rbp, X86_EFL_GET_IOPL(uEFlags), 31, szEFlags,
+ pCtx->cs.Sel, pCtx->cs.u64Base, pCtx->cs.u32Limit, pCtx->cs.Attr.u,
+ pCtx->ds.Sel, pCtx->ds.u64Base, pCtx->ds.u32Limit, pCtx->ds.Attr.u,
+ pCtx->es.Sel, pCtx->es.u64Base, pCtx->es.u32Limit, pCtx->es.Attr.u,
+ pCtx->fs.Sel, pCtx->fs.u64Base, pCtx->fs.u32Limit, pCtx->fs.Attr.u,
+ pCtx->gs.Sel, pCtx->gs.u64Base, pCtx->gs.u32Limit, pCtx->gs.Attr.u,
+ pCtx->ss.Sel, pCtx->ss.u64Base, pCtx->ss.u32Limit, pCtx->ss.Attr.u,
+ pCtx->cr0, pCtx->cr2, pCtx->cr3, pCtx->cr4,
+ pCtx->dr[0], pCtx->dr[1], pCtx->dr[2], pCtx->dr[3],
+ pCtx->dr[4], pCtx->dr[5], pCtx->dr[6], pCtx->dr[7],
+ pCtx->gdtr.pGdt, pCtx->gdtr.cbGdt, pCtx->idtr.pIdt, pCtx->idtr.cbIdt, uEFlags,
+ pCtx->ldtr.Sel, pCtx->ldtr.u64Base, pCtx->ldtr.u32Limit, pCtx->ldtr.Attr.u,
+ pCtx->tr.Sel, pCtx->tr.u64Base, pCtx->tr.u32Limit, pCtx->tr.Attr.u,
+ pCtx->SysEnter.cs, pCtx->SysEnter.eip, pCtx->SysEnter.esp));
+ }
+ else
+ Log(("eax=%08x ebx=%08x ecx=%08x edx=%08x esi=%08x edi=%08x\n"
+ "eip=%08x esp=%08x ebp=%08x iopl=%d %*s\n"
+ "cs={%04x base=%016RX64 limit=%08x flags=%08x} dr0=%08RX64 dr1=%08RX64\n"
+ "ds={%04x base=%016RX64 limit=%08x flags=%08x} dr2=%08RX64 dr3=%08RX64\n"
+ "es={%04x base=%016RX64 limit=%08x flags=%08x} dr4=%08RX64 dr5=%08RX64\n"
+ "fs={%04x base=%016RX64 limit=%08x flags=%08x} dr6=%08RX64 dr7=%08RX64\n"
+ "gs={%04x base=%016RX64 limit=%08x flags=%08x} cr0=%08RX64 cr2=%08RX64\n"
+ "ss={%04x base=%016RX64 limit=%08x flags=%08x} cr3=%08RX64 cr4=%08RX64\n"
+ "gdtr=%016RX64:%04x idtr=%016RX64:%04x eflags=%08x\n"
+ "ldtr={%04x base=%08RX64 limit=%08x flags=%08x}\n"
+ "tr ={%04x base=%08RX64 limit=%08x flags=%08x}\n"
+ "SysEnter={cs=%04llx eip=%08llx esp=%08llx}\n"
+ ,
+ pCtx->eax, pCtx->ebx, pCtx->ecx, pCtx->edx, pCtx->esi, pCtx->edi,
+ pCtx->eip, pCtx->esp, pCtx->ebp, X86_EFL_GET_IOPL(uEFlags), 31, szEFlags,
+ pCtx->cs.Sel, pCtx->cs.u64Base, pCtx->cs.u32Limit, pCtx->cs.Attr.u, pCtx->dr[0], pCtx->dr[1],
+ pCtx->ds.Sel, pCtx->ds.u64Base, pCtx->ds.u32Limit, pCtx->ds.Attr.u, pCtx->dr[2], pCtx->dr[3],
+ pCtx->es.Sel, pCtx->es.u64Base, pCtx->es.u32Limit, pCtx->es.Attr.u, pCtx->dr[4], pCtx->dr[5],
+ pCtx->fs.Sel, pCtx->fs.u64Base, pCtx->fs.u32Limit, pCtx->fs.Attr.u, pCtx->dr[6], pCtx->dr[7],
+ pCtx->gs.Sel, pCtx->gs.u64Base, pCtx->gs.u32Limit, pCtx->gs.Attr.u, pCtx->cr0, pCtx->cr2,
+ pCtx->ss.Sel, pCtx->ss.u64Base, pCtx->ss.u32Limit, pCtx->ss.Attr.u, pCtx->cr3, pCtx->cr4,
+ pCtx->gdtr.pGdt, pCtx->gdtr.cbGdt, pCtx->idtr.pIdt, pCtx->idtr.cbIdt, uEFlags,
+ pCtx->ldtr.Sel, pCtx->ldtr.u64Base, pCtx->ldtr.u32Limit, pCtx->ldtr.Attr.u,
+ pCtx->tr.Sel, pCtx->tr.u64Base, pCtx->tr.u32Limit, pCtx->tr.Attr.u,
+ pCtx->SysEnter.cs, pCtx->SysEnter.eip, pCtx->SysEnter.esp));
+
+ PX86FXSTATE pFpuCtx = &pCtx->CTX_SUFF(pXState)->x87;
+ Log(("FPU:\n"
+ "FCW=%04x FSW=%04x FTW=%02x\n"
+ "FOP=%04x FPUIP=%08x CS=%04x Rsrvd1=%04x\n"
+ "FPUDP=%04x DS=%04x Rsvrd2=%04x MXCSR=%08x MXCSR_MASK=%08x\n"
+ ,
+ pFpuCtx->FCW, pFpuCtx->FSW, pFpuCtx->FTW,
+ pFpuCtx->FOP, pFpuCtx->FPUIP, pFpuCtx->CS, pFpuCtx->Rsrvd1,
+ pFpuCtx->FPUDP, pFpuCtx->DS, pFpuCtx->Rsrvd2,
+ pFpuCtx->MXCSR, pFpuCtx->MXCSR_MASK));
+
+ Log(("MSR:\n"
+ "EFER =%016RX64\n"
+ "PAT =%016RX64\n"
+ "STAR =%016RX64\n"
+ "CSTAR =%016RX64\n"
+ "LSTAR =%016RX64\n"
+ "SFMASK =%016RX64\n"
+ "KERNELGSBASE =%016RX64\n",
+ pCtx->msrEFER,
+ pCtx->msrPAT,
+ pCtx->msrSTAR,
+ pCtx->msrCSTAR,
+ pCtx->msrLSTAR,
+ pCtx->msrSFMASK,
+ pCtx->msrKERNELGSBASE));
+
+ NOREF(pFpuCtx);
+}
+#endif /* VBOX_STRICT */
+
diff --git a/src/VBox/VMM/VMMR0/HMR0A.asm b/src/VBox/VMM/VMMR0/HMR0A.asm
new file mode 100644
index 00000000..3db49a1e
--- /dev/null
+++ b/src/VBox/VMM/VMMR0/HMR0A.asm
@@ -0,0 +1,2184 @@
+; $Id: HMR0A.asm $
+;; @file
+; HM - Ring-0 VMX, SVM world-switch and helper routines
+;
+
+;
+; Copyright (C) 2006-2019 Oracle Corporation
+;
+; This file is part of VirtualBox Open Source Edition (OSE), as
+; available from http://www.virtualbox.org. This file is free software;
+; you can redistribute it and/or modify it under the terms of the GNU
+; General Public License (GPL) as published by the Free Software
+; Foundation, in version 2 as it comes in the "COPYING" file of the
+; VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+;
+
+;*********************************************************************************************************************************
+;* Header Files *
+;*********************************************************************************************************************************
+%include "VBox/asmdefs.mac"
+%include "VBox/err.mac"
+%include "VBox/vmm/hm_vmx.mac"
+%include "VBox/vmm/cpum.mac"
+%include "VBox/vmm/vm.mac"
+%include "iprt/x86.mac"
+%include "HMInternal.mac"
+
+%ifdef RT_OS_OS2 ;; @todo fix OMF support in yasm and kick nasm out completely.
+ %macro vmwrite 2,
+ int3
+ %endmacro
+ %define vmlaunch int3
+ %define vmresume int3
+ %define vmsave int3
+ %define vmload int3
+ %define vmrun int3
+ %define clgi int3
+ %define stgi int3
+ %macro invlpga 2,
+ int3
+ %endmacro
+%endif
+
+;*********************************************************************************************************************************
+;* Defined Constants And Macros *
+;*********************************************************************************************************************************
+;; The offset of the XMM registers in X86FXSTATE.
+; Use define because I'm too lazy to convert the struct.
+%define XMM_OFF_IN_X86FXSTATE 160
+
+;; Spectre filler for 32-bit mode.
+; Some user space address that points to a 4MB page boundrary in hope that it
+; will somehow make it less useful.
+%define SPECTRE_FILLER32 0x227fffff
+;; Spectre filler for 64-bit mode.
+; Choosen to be an invalid address (also with 5 level paging).
+%define SPECTRE_FILLER64 0x02204204207fffff
+;; Spectre filler for the current CPU mode.
+%ifdef RT_ARCH_AMD64
+ %define SPECTRE_FILLER SPECTRE_FILLER64
+%else
+ %define SPECTRE_FILLER SPECTRE_FILLER32
+%endif
+
+;;
+; Determine skipping restoring of GDTR, IDTR, TR across VMX non-root operation
+;
+%ifdef RT_ARCH_AMD64
+ %define VMX_SKIP_GDTR
+ %define VMX_SKIP_TR
+ %define VBOX_SKIP_RESTORE_SEG
+ %ifdef RT_OS_DARWIN
+ ; Load the NULL selector into DS, ES, FS and GS on 64-bit darwin so we don't
+ ; risk loading a stale LDT value or something invalid.
+ %define HM_64_BIT_USE_NULL_SEL
+ ; Darwin (Mavericks) uses IDTR limit to store the CPU Id so we need to restore it always.
+ ; See @bugref{6875}.
+ %else
+ %define VMX_SKIP_IDTR
+ %endif
+%endif
+
+;; @def MYPUSHAD
+; Macro generating an equivalent to pushad
+
+;; @def MYPOPAD
+; Macro generating an equivalent to popad
+
+;; @def MYPUSHSEGS
+; Macro saving all segment registers on the stack.
+; @param 1 full width register name
+; @param 2 16-bit register name for \a 1.
+
+;; @def MYPOPSEGS
+; Macro restoring all segment registers on the stack
+; @param 1 full width register name
+; @param 2 16-bit register name for \a 1.
+
+%ifdef ASM_CALL64_GCC
+ %macro MYPUSHAD64 0
+ push r15
+ push r14
+ push r13
+ push r12
+ push rbx
+ %endmacro
+ %macro MYPOPAD64 0
+ pop rbx
+ pop r12
+ pop r13
+ pop r14
+ pop r15
+ %endmacro
+
+%else ; ASM_CALL64_MSC
+ %macro MYPUSHAD64 0
+ push r15
+ push r14
+ push r13
+ push r12
+ push rbx
+ push rsi
+ push rdi
+ %endmacro
+ %macro MYPOPAD64 0
+ pop rdi
+ pop rsi
+ pop rbx
+ pop r12
+ pop r13
+ pop r14
+ pop r15
+ %endmacro
+%endif
+
+%ifdef VBOX_SKIP_RESTORE_SEG
+ %macro MYPUSHSEGS64 2
+ %endmacro
+
+ %macro MYPOPSEGS64 2
+ %endmacro
+%else ; !VBOX_SKIP_RESTORE_SEG
+ ; trashes, rax, rdx & rcx
+ %macro MYPUSHSEGS64 2
+ %ifndef HM_64_BIT_USE_NULL_SEL
+ mov %2, es
+ push %1
+ mov %2, ds
+ push %1
+ %endif
+
+ ; Special case for FS; Windows and Linux either don't use it or restore it when leaving kernel mode, Solaris OTOH doesn't and we must save it.
+ mov ecx, MSR_K8_FS_BASE
+ rdmsr
+ push rdx
+ push rax
+ %ifndef HM_64_BIT_USE_NULL_SEL
+ push fs
+ %endif
+
+ ; Special case for GS; OSes typically use swapgs to reset the hidden base register for GS on entry into the kernel. The same happens on exit
+ mov ecx, MSR_K8_GS_BASE
+ rdmsr
+ push rdx
+ push rax
+ %ifndef HM_64_BIT_USE_NULL_SEL
+ push gs
+ %endif
+ %endmacro
+
+ ; trashes, rax, rdx & rcx
+ %macro MYPOPSEGS64 2
+ ; Note: do not step through this code with a debugger!
+ %ifndef HM_64_BIT_USE_NULL_SEL
+ xor eax, eax
+ mov ds, ax
+ mov es, ax
+ mov fs, ax
+ mov gs, ax
+ %endif
+
+ %ifndef HM_64_BIT_USE_NULL_SEL
+ pop gs
+ %endif
+ pop rax
+ pop rdx
+ mov ecx, MSR_K8_GS_BASE
+ wrmsr
+
+ %ifndef HM_64_BIT_USE_NULL_SEL
+ pop fs
+ %endif
+ pop rax
+ pop rdx
+ mov ecx, MSR_K8_FS_BASE
+ wrmsr
+ ; Now it's safe to step again
+
+ %ifndef HM_64_BIT_USE_NULL_SEL
+ pop %1
+ mov ds, %2
+ pop %1
+ mov es, %2
+ %endif
+ %endmacro
+%endif ; VBOX_SKIP_RESTORE_SEG
+
+%macro MYPUSHAD32 0
+ pushad
+%endmacro
+%macro MYPOPAD32 0
+ popad
+%endmacro
+
+%macro MYPUSHSEGS32 2
+ push ds
+ push es
+ push fs
+ push gs
+%endmacro
+%macro MYPOPSEGS32 2
+ pop gs
+ pop fs
+ pop es
+ pop ds
+%endmacro
+
+%ifdef RT_ARCH_AMD64
+ %define MYPUSHAD MYPUSHAD64
+ %define MYPOPAD MYPOPAD64
+ %define MYPUSHSEGS MYPUSHSEGS64
+ %define MYPOPSEGS MYPOPSEGS64
+%else
+ %define MYPUSHAD MYPUSHAD32
+ %define MYPOPAD MYPOPAD32
+ %define MYPUSHSEGS MYPUSHSEGS32
+ %define MYPOPSEGS MYPOPSEGS32
+%endif
+
+;;
+; Creates an indirect branch prediction barrier on CPUs that need and supports that.
+; @clobbers eax, edx, ecx
+; @param 1 How to address CPUMCTX.
+; @param 2 Which flag to test for (CPUMCTX_WSF_IBPB_ENTRY or CPUMCTX_WSF_IBPB_EXIT)
+%macro INDIRECT_BRANCH_PREDICTION_BARRIER 2
+ test byte [%1 + CPUMCTX.fWorldSwitcher], %2
+ jz %%no_indirect_branch_barrier
+ mov ecx, MSR_IA32_PRED_CMD
+ mov eax, MSR_IA32_PRED_CMD_F_IBPB
+ xor edx, edx
+ wrmsr
+%%no_indirect_branch_barrier:
+%endmacro
+
+;;
+; Creates an indirect branch prediction and L1D barrier on CPUs that need and supports that.
+; @clobbers eax, edx, ecx
+; @param 1 How to address CPUMCTX.
+; @param 2 Which IBPB flag to test for (CPUMCTX_WSF_IBPB_ENTRY or CPUMCTX_WSF_IBPB_EXIT)
+; @param 3 Which FLUSH flag to test for (CPUMCTX_WSF_L1D_ENTRY)
+%macro INDIRECT_BRANCH_PREDICTION_AND_L1_CACHE_BARRIER 3
+ ; Only one test+jmp when disabled CPUs.
+ test byte [%1 + CPUMCTX.fWorldSwitcher], (%2 | %3)
+ jz %%no_barrier_needed
+
+ ; The eax:edx value is the same for both.
+ AssertCompile(MSR_IA32_PRED_CMD_F_IBPB == MSR_IA32_FLUSH_CMD_F_L1D)
+ mov eax, MSR_IA32_PRED_CMD_F_IBPB
+ xor edx, edx
+
+ ; Indirect branch barrier.
+ test byte [%1 + CPUMCTX.fWorldSwitcher], %2
+ jz %%no_indirect_branch_barrier
+ mov ecx, MSR_IA32_PRED_CMD
+ wrmsr
+%%no_indirect_branch_barrier:
+
+ ; Level 1 data cache flush.
+ test byte [%1 + CPUMCTX.fWorldSwitcher], %3
+ jz %%no_cache_flush_barrier
+ mov ecx, MSR_IA32_FLUSH_CMD
+ wrmsr
+%%no_cache_flush_barrier:
+
+%%no_barrier_needed:
+%endmacro
+
+
+;*********************************************************************************************************************************
+;* External Symbols *
+;*********************************************************************************************************************************
+%ifdef VBOX_WITH_KERNEL_USING_XMM
+extern NAME(CPUMIsGuestFPUStateActive)
+%endif
+
+
+BEGINCODE
+
+
+;/**
+; * Restores host-state fields.
+; *
+; * @returns VBox status code
+; * @param f32RestoreHost x86: [ebp + 08h] msc: ecx gcc: edi RestoreHost flags.
+; * @param pRestoreHost x86: [ebp + 0ch] msc: rdx gcc: rsi Pointer to the RestoreHost struct.
+; */
+ALIGNCODE(16)
+BEGINPROC VMXRestoreHostState
+%ifdef RT_ARCH_AMD64
+ %ifndef ASM_CALL64_GCC
+ ; Use GCC's input registers since we'll be needing both rcx and rdx further
+ ; down with the wrmsr instruction. Use the R10 and R11 register for saving
+ ; RDI and RSI since MSC preserve the two latter registers.
+ mov r10, rdi
+ mov r11, rsi
+ mov rdi, rcx
+ mov rsi, rdx
+ %endif
+
+ test edi, VMX_RESTORE_HOST_GDTR
+ jz .test_idtr
+ lgdt [rsi + VMXRESTOREHOST.HostGdtr]
+
+.test_idtr:
+ test edi, VMX_RESTORE_HOST_IDTR
+ jz .test_ds
+ lidt [rsi + VMXRESTOREHOST.HostIdtr]
+
+.test_ds:
+ test edi, VMX_RESTORE_HOST_SEL_DS
+ jz .test_es
+ mov ax, [rsi + VMXRESTOREHOST.uHostSelDS]
+ mov ds, eax
+
+.test_es:
+ test edi, VMX_RESTORE_HOST_SEL_ES
+ jz .test_tr
+ mov ax, [rsi + VMXRESTOREHOST.uHostSelES]
+ mov es, eax
+
+.test_tr:
+ test edi, VMX_RESTORE_HOST_SEL_TR
+ jz .test_fs
+ ; When restoring the TR, we must first clear the busy flag or we'll end up faulting.
+ mov dx, [rsi + VMXRESTOREHOST.uHostSelTR]
+ mov ax, dx
+ and eax, X86_SEL_MASK_OFF_RPL ; Mask away TI and RPL bits leaving only the descriptor offset.
+ test edi, VMX_RESTORE_HOST_GDT_READ_ONLY | VMX_RESTORE_HOST_GDT_NEED_WRITABLE
+ jnz .gdt_readonly
+ add rax, qword [rsi + VMXRESTOREHOST.HostGdtr + 2] ; xAX <- descriptor offset + GDTR.pGdt.
+ and dword [rax + 4], ~RT_BIT(9) ; Clear the busy flag in TSS desc (bits 0-7=base, bit 9=busy bit).
+ ltr dx
+ jmp short .test_fs
+.gdt_readonly:
+ test edi, VMX_RESTORE_HOST_GDT_NEED_WRITABLE
+ jnz .gdt_readonly_need_writable
+ mov rcx, cr0
+ mov r9, rcx
+ add rax, qword [rsi + VMXRESTOREHOST.HostGdtr + 2] ; xAX <- descriptor offset + GDTR.pGdt.
+ and rcx, ~X86_CR0_WP
+ mov cr0, rcx
+ and dword [rax + 4], ~RT_BIT(9) ; Clear the busy flag in TSS desc (bits 0-7=base, bit 9=busy bit).
+ ltr dx
+ mov cr0, r9
+ jmp short .test_fs
+.gdt_readonly_need_writable:
+ add rax, qword [rsi + VMXRESTOREHOST.HostGdtrRw + 2] ; xAX <- descriptor offset + GDTR.pGdtRw.
+ and dword [rax + 4], ~RT_BIT(9) ; Clear the busy flag in TSS desc (bits 0-7=base, bit 9=busy bit).
+ lgdt [rsi + VMXRESTOREHOST.HostGdtrRw]
+ ltr dx
+ lgdt [rsi + VMXRESTOREHOST.HostGdtr] ; Load the original GDT
+
+.test_fs:
+ ;
+ ; When restoring the selector values for FS and GS, we'll temporarily trash
+ ; the base address (at least the high 32-bit bits, but quite possibly the
+ ; whole base address), the wrmsr will restore it correctly. (VT-x actually
+ ; restores the base correctly when leaving guest mode, but not the selector
+ ; value, so there is little problem with interrupts being enabled prior to
+ ; this restore job.)
+ ; We'll disable ints once for both FS and GS as that's probably faster.
+ ;
+ test edi, VMX_RESTORE_HOST_SEL_FS | VMX_RESTORE_HOST_SEL_GS
+ jz .restore_success
+ pushfq
+ cli ; (see above)
+
+ test edi, VMX_RESTORE_HOST_SEL_FS
+ jz .test_gs
+ mov ax, word [rsi + VMXRESTOREHOST.uHostSelFS]
+ mov fs, eax
+ mov eax, dword [rsi + VMXRESTOREHOST.uHostFSBase] ; uHostFSBase - Lo
+ mov edx, dword [rsi + VMXRESTOREHOST.uHostFSBase + 4h] ; uHostFSBase - Hi
+ mov ecx, MSR_K8_FS_BASE
+ wrmsr
+
+.test_gs:
+ test edi, VMX_RESTORE_HOST_SEL_GS
+ jz .restore_flags
+ mov ax, word [rsi + VMXRESTOREHOST.uHostSelGS]
+ mov gs, eax
+ mov eax, dword [rsi + VMXRESTOREHOST.uHostGSBase] ; uHostGSBase - Lo
+ mov edx, dword [rsi + VMXRESTOREHOST.uHostGSBase + 4h] ; uHostGSBase - Hi
+ mov ecx, MSR_K8_GS_BASE
+ wrmsr
+
+.restore_flags:
+ popfq
+
+.restore_success:
+ mov eax, VINF_SUCCESS
+ %ifndef ASM_CALL64_GCC
+ ; Restore RDI and RSI on MSC.
+ mov rdi, r10
+ mov rsi, r11
+ %endif
+%else ; RT_ARCH_X86
+ mov eax, VERR_NOT_IMPLEMENTED
+%endif
+ ret
+ENDPROC VMXRestoreHostState
+
+
+;/**
+; * Dispatches an NMI to the host.
+; */
+ALIGNCODE(16)
+BEGINPROC VMXDispatchHostNmi
+ int 2 ; NMI is always vector 2. The IDT[2] IRQ handler cannot be anything else. See Intel spec. 6.3.1 "External Interrupts".
+ ret
+ENDPROC VMXDispatchHostNmi
+
+
+;/**
+; * Executes VMWRITE, 64-bit value.
+; *
+; * @returns VBox status code.
+; * @param idxField x86: [ebp + 08h] msc: rcx gcc: rdi VMCS index.
+; * @param u64Data x86: [ebp + 0ch] msc: rdx gcc: rsi VM field value.
+; */
+ALIGNCODE(16)
+BEGINPROC VMXWriteVmcs64
+%ifdef RT_ARCH_AMD64
+ %ifdef ASM_CALL64_GCC
+ and edi, 0ffffffffh
+ xor rax, rax
+ vmwrite rdi, rsi
+ %else
+ and ecx, 0ffffffffh
+ xor rax, rax
+ vmwrite rcx, rdx
+ %endif
+%else ; RT_ARCH_X86
+ mov ecx, [esp + 4] ; idxField
+ lea edx, [esp + 8] ; &u64Data
+ vmwrite ecx, [edx] ; low dword
+ jz .done
+ jc .done
+ inc ecx
+ xor eax, eax
+ vmwrite ecx, [edx + 4] ; high dword
+.done:
+%endif ; RT_ARCH_X86
+ jnc .valid_vmcs
+ mov eax, VERR_VMX_INVALID_VMCS_PTR
+ ret
+.valid_vmcs:
+ jnz .the_end
+ mov eax, VERR_VMX_INVALID_VMCS_FIELD
+.the_end:
+ ret
+ENDPROC VMXWriteVmcs64
+
+
+;/**
+; * Executes VMREAD, 64-bit value.
+; *
+; * @returns VBox status code.
+; * @param idxField VMCS index.
+; * @param pData Where to store VM field value.
+; */
+;DECLASM(int) VMXReadVmcs64(uint32_t idxField, uint64_t *pData);
+ALIGNCODE(16)
+BEGINPROC VMXReadVmcs64
+%ifdef RT_ARCH_AMD64
+ %ifdef ASM_CALL64_GCC
+ and edi, 0ffffffffh
+ xor rax, rax
+ vmread [rsi], rdi
+ %else
+ and ecx, 0ffffffffh
+ xor rax, rax
+ vmread [rdx], rcx
+ %endif
+%else ; RT_ARCH_X86
+ mov ecx, [esp + 4] ; idxField
+ mov edx, [esp + 8] ; pData
+ vmread [edx], ecx ; low dword
+ jz .done
+ jc .done
+ inc ecx
+ xor eax, eax
+ vmread [edx + 4], ecx ; high dword
+.done:
+%endif ; RT_ARCH_X86
+ jnc .valid_vmcs
+ mov eax, VERR_VMX_INVALID_VMCS_PTR
+ ret
+.valid_vmcs:
+ jnz .the_end
+ mov eax, VERR_VMX_INVALID_VMCS_FIELD
+.the_end:
+ ret
+ENDPROC VMXReadVmcs64
+
+
+;/**
+; * Executes VMREAD, 32-bit value.
+; *
+; * @returns VBox status code.
+; * @param idxField VMCS index.
+; * @param pu32Data Where to store VM field value.
+; */
+;DECLASM(int) VMXReadVmcs32(uint32_t idxField, uint32_t *pu32Data);
+ALIGNCODE(16)
+BEGINPROC VMXReadVmcs32
+%ifdef RT_ARCH_AMD64
+ %ifdef ASM_CALL64_GCC
+ and edi, 0ffffffffh
+ xor rax, rax
+ vmread r10, rdi
+ mov [rsi], r10d
+ %else
+ and ecx, 0ffffffffh
+ xor rax, rax
+ vmread r10, rcx
+ mov [rdx], r10d
+ %endif
+%else ; RT_ARCH_X86
+ mov ecx, [esp + 4] ; idxField
+ mov edx, [esp + 8] ; pu32Data
+ xor eax, eax
+ vmread [edx], ecx
+%endif ; RT_ARCH_X86
+ jnc .valid_vmcs
+ mov eax, VERR_VMX_INVALID_VMCS_PTR
+ ret
+.valid_vmcs:
+ jnz .the_end
+ mov eax, VERR_VMX_INVALID_VMCS_FIELD
+.the_end:
+ ret
+ENDPROC VMXReadVmcs32
+
+
+;/**
+; * Executes VMWRITE, 32-bit value.
+; *
+; * @returns VBox status code.
+; * @param idxField VMCS index.
+; * @param u32Data Where to store VM field value.
+; */
+;DECLASM(int) VMXWriteVmcs32(uint32_t idxField, uint32_t u32Data);
+ALIGNCODE(16)
+BEGINPROC VMXWriteVmcs32
+%ifdef RT_ARCH_AMD64
+ %ifdef ASM_CALL64_GCC
+ and edi, 0ffffffffh
+ and esi, 0ffffffffh
+ xor rax, rax
+ vmwrite rdi, rsi
+ %else
+ and ecx, 0ffffffffh
+ and edx, 0ffffffffh
+ xor rax, rax
+ vmwrite rcx, rdx
+ %endif
+%else ; RT_ARCH_X86
+ mov ecx, [esp + 4] ; idxField
+ mov edx, [esp + 8] ; u32Data
+ xor eax, eax
+ vmwrite ecx, edx
+%endif ; RT_ARCH_X86
+ jnc .valid_vmcs
+ mov eax, VERR_VMX_INVALID_VMCS_PTR
+ ret
+.valid_vmcs:
+ jnz .the_end
+ mov eax, VERR_VMX_INVALID_VMCS_FIELD
+.the_end:
+ ret
+ENDPROC VMXWriteVmcs32
+
+
+;/**
+; * Executes VMXON.
+; *
+; * @returns VBox status code.
+; * @param HCPhysVMXOn Physical address of VMXON structure.
+; */
+;DECLASM(int) VMXEnable(RTHCPHYS HCPhysVMXOn);
+BEGINPROC VMXEnable
+%ifdef RT_ARCH_AMD64
+ xor rax, rax
+ %ifdef ASM_CALL64_GCC
+ push rdi
+ %else
+ push rcx
+ %endif
+ vmxon [rsp]
+%else ; RT_ARCH_X86
+ xor eax, eax
+ vmxon [esp + 4]
+%endif ; RT_ARCH_X86
+ jnc .good
+ mov eax, VERR_VMX_INVALID_VMXON_PTR
+ jmp .the_end
+
+.good:
+ jnz .the_end
+ mov eax, VERR_VMX_VMXON_FAILED
+
+.the_end:
+%ifdef RT_ARCH_AMD64
+ add rsp, 8
+%endif
+ ret
+ENDPROC VMXEnable
+
+
+;/**
+; * Executes VMXOFF.
+; */
+;DECLASM(void) VMXDisable(void);
+BEGINPROC VMXDisable
+ vmxoff
+.the_end:
+ ret
+ENDPROC VMXDisable
+
+
+;/**
+; * Executes VMCLEAR.
+; *
+; * @returns VBox status code.
+; * @param HCPhysVmcs Physical address of VM control structure.
+; */
+;DECLASM(int) VMXClearVmcs(RTHCPHYS HCPhysVmcs);
+ALIGNCODE(16)
+BEGINPROC VMXClearVmcs
+%ifdef RT_ARCH_AMD64
+ xor rax, rax
+ %ifdef ASM_CALL64_GCC
+ push rdi
+ %else
+ push rcx
+ %endif
+ vmclear [rsp]
+%else ; RT_ARCH_X86
+ xor eax, eax
+ vmclear [esp + 4]
+%endif ; RT_ARCH_X86
+ jnc .the_end
+ mov eax, VERR_VMX_INVALID_VMCS_PTR
+.the_end:
+%ifdef RT_ARCH_AMD64
+ add rsp, 8
+%endif
+ ret
+ENDPROC VMXClearVmcs
+
+
+;/**
+; * Executes VMPTRLD.
+; *
+; * @returns VBox status code.
+; * @param HCPhysVmcs Physical address of VMCS structure.
+; */
+;DECLASM(int) VMXActivateVmcs(RTHCPHYS HCPhysVmcs);
+ALIGNCODE(16)
+BEGINPROC VMXActivateVmcs
+%ifdef RT_ARCH_AMD64
+ xor rax, rax
+ %ifdef ASM_CALL64_GCC
+ push rdi
+ %else
+ push rcx
+ %endif
+ vmptrld [rsp]
+%else
+ xor eax, eax
+ vmptrld [esp + 4]
+%endif
+ jnc .the_end
+ mov eax, VERR_VMX_INVALID_VMCS_PTR
+.the_end:
+%ifdef RT_ARCH_AMD64
+ add rsp, 8
+%endif
+ ret
+ENDPROC VMXActivateVmcs
+
+
+;/**
+; * Executes VMPTRST.
+; *
+; * @returns VBox status code.
+; * @param [esp + 04h] gcc:rdi msc:rcx Param 1 - First parameter - Address that will receive the current pointer.
+; */
+;DECLASM(int) VMXGetActivatedVmcs(RTHCPHYS *pVMCS);
+BEGINPROC VMXGetActivatedVmcs
+%ifdef RT_OS_OS2
+ mov eax, VERR_NOT_SUPPORTED
+ ret
+%else
+ %ifdef RT_ARCH_AMD64
+ %ifdef ASM_CALL64_GCC
+ vmptrst qword [rdi]
+ %else
+ vmptrst qword [rcx]
+ %endif
+ %else
+ vmptrst qword [esp+04h]
+ %endif
+ xor eax, eax
+.the_end:
+ ret
+%endif
+ENDPROC VMXGetActivatedVmcs
+
+;/**
+; * Invalidate a page using INVEPT.
+; @param enmTlbFlush msc:ecx gcc:edi x86:[esp+04] Type of flush.
+; @param pDescriptor msc:edx gcc:esi x86:[esp+08] Descriptor pointer.
+; */
+;DECLASM(int) VMXR0InvEPT(VMXTLBFLUSHEPT enmTlbFlush, uint64_t *pDescriptor);
+BEGINPROC VMXR0InvEPT
+%ifdef RT_ARCH_AMD64
+ %ifdef ASM_CALL64_GCC
+ and edi, 0ffffffffh
+ xor rax, rax
+; invept rdi, qword [rsi]
+ DB 0x66, 0x0F, 0x38, 0x80, 0x3E
+ %else
+ and ecx, 0ffffffffh
+ xor rax, rax
+; invept rcx, qword [rdx]
+ DB 0x66, 0x0F, 0x38, 0x80, 0xA
+ %endif
+%else
+ mov ecx, [esp + 4]
+ mov edx, [esp + 8]
+ xor eax, eax
+; invept ecx, qword [edx]
+ DB 0x66, 0x0F, 0x38, 0x80, 0xA
+%endif
+ jnc .valid_vmcs
+ mov eax, VERR_VMX_INVALID_VMCS_PTR
+ ret
+.valid_vmcs:
+ jnz .the_end
+ mov eax, VERR_INVALID_PARAMETER
+.the_end:
+ ret
+ENDPROC VMXR0InvEPT
+
+
+;/**
+; * Invalidate a page using invvpid
+; @param enmTlbFlush msc:ecx gcc:edi x86:[esp+04] Type of flush
+; @param pDescriptor msc:edx gcc:esi x86:[esp+08] Descriptor pointer
+; */
+;DECLASM(int) VMXR0InvVPID(VMXTLBFLUSHVPID enmTlbFlush, uint64_t *pDescriptor);
+BEGINPROC VMXR0InvVPID
+%ifdef RT_ARCH_AMD64
+ %ifdef ASM_CALL64_GCC
+ and edi, 0ffffffffh
+ xor rax, rax
+; invvpid rdi, qword [rsi]
+ DB 0x66, 0x0F, 0x38, 0x81, 0x3E
+ %else
+ and ecx, 0ffffffffh
+ xor rax, rax
+; invvpid rcx, qword [rdx]
+ DB 0x66, 0x0F, 0x38, 0x81, 0xA
+ %endif
+%else
+ mov ecx, [esp + 4]
+ mov edx, [esp + 8]
+ xor eax, eax
+; invvpid ecx, qword [edx]
+ DB 0x66, 0x0F, 0x38, 0x81, 0xA
+%endif
+ jnc .valid_vmcs
+ mov eax, VERR_VMX_INVALID_VMCS_PTR
+ ret
+.valid_vmcs:
+ jnz .the_end
+ mov eax, VERR_INVALID_PARAMETER
+.the_end:
+ ret
+ENDPROC VMXR0InvVPID
+
+
+%if GC_ARCH_BITS == 64
+;;
+; Executes INVLPGA
+;
+; @param pPageGC msc:rcx gcc:rdi x86:[esp+04] Virtual page to invalidate
+; @param uASID msc:rdx gcc:rsi x86:[esp+0C] Tagged TLB id
+;
+;DECLASM(void) SVMR0InvlpgA(RTGCPTR pPageGC, uint32_t uASID);
+BEGINPROC SVMR0InvlpgA
+%ifdef RT_ARCH_AMD64
+ %ifdef ASM_CALL64_GCC
+ mov rax, rdi
+ mov rcx, rsi
+ %else
+ mov rax, rcx
+ mov rcx, rdx
+ %endif
+%else
+ mov eax, [esp + 4]
+ mov ecx, [esp + 0Ch]
+%endif
+ invlpga [xAX], ecx
+ ret
+ENDPROC SVMR0InvlpgA
+
+%else ; GC_ARCH_BITS != 64
+;;
+; Executes INVLPGA
+;
+; @param pPageGC msc:ecx gcc:edi x86:[esp+04] Virtual page to invalidate
+; @param uASID msc:edx gcc:esi x86:[esp+08] Tagged TLB id
+;
+;DECLASM(void) SVMR0InvlpgA(RTGCPTR pPageGC, uint32_t uASID);
+BEGINPROC SVMR0InvlpgA
+%ifdef RT_ARCH_AMD64
+ %ifdef ASM_CALL64_GCC
+ movzx rax, edi
+ mov ecx, esi
+ %else
+ ; from http://www.cs.cmu.edu/~fp/courses/15213-s06/misc/asm64-handout.pdf:
+ ; ``Perhaps unexpectedly, instructions that move or generate 32-bit register
+ ; values also set the upper 32 bits of the register to zero. Consequently
+ ; there is no need for an instruction movzlq.''
+ mov eax, ecx
+ mov ecx, edx
+ %endif
+%else
+ mov eax, [esp + 4]
+ mov ecx, [esp + 8]
+%endif
+ invlpga [xAX], ecx
+ ret
+ENDPROC SVMR0InvlpgA
+
+%endif ; GC_ARCH_BITS != 64
+
+
+%ifdef VBOX_WITH_KERNEL_USING_XMM
+
+;;
+; Wrapper around vmx.pfnStartVM that preserves host XMM registers and
+; load the guest ones when necessary.
+;
+; @cproto DECLASM(int) HMR0VMXStartVMhmR0DumpDescriptorM(RTHCUINT fResume, PCPUMCTX pCtx, PVMCSCACHE pCache, PVM pVM,
+; PVMCPU pVCpu, PFNHMVMXSTARTVM pfnStartVM);
+;
+; @returns eax
+;
+; @param fResumeVM msc:rcx
+; @param pCtx msc:rdx
+; @param pVMCSCache msc:r8
+; @param pVM msc:r9
+; @param pVCpu msc:[rbp+30h] The cross context virtual CPU structure of the calling EMT.
+; @param pfnStartVM msc:[rbp+38h]
+;
+; @remarks This is essentially the same code as hmR0SVMRunWrapXMM, only the parameters differ a little bit.
+;
+; @remarks Drivers shouldn't use AVX registers without saving+loading:
+; https://msdn.microsoft.com/en-us/library/windows/hardware/ff545910%28v=vs.85%29.aspx?f=255&MSPPError=-2147217396
+; However the compiler docs have different idea:
+; https://msdn.microsoft.com/en-us/library/9z1stfyw.aspx
+; We'll go with the former for now.
+;
+; ASSUMING 64-bit and windows for now.
+;
+ALIGNCODE(16)
+BEGINPROC hmR0VMXStartVMWrapXMM
+ push xBP
+ mov xBP, xSP
+ sub xSP, 0b0h + 040h ; Don't bother optimizing the frame size.
+
+ ; spill input parameters.
+ mov [xBP + 010h], rcx ; fResumeVM
+ mov [xBP + 018h], rdx ; pCtx
+ mov [xBP + 020h], r8 ; pVMCSCache
+ mov [xBP + 028h], r9 ; pVM
+
+ ; Ask CPUM whether we've started using the FPU yet.
+ mov rcx, [xBP + 30h] ; pVCpu
+ call NAME(CPUMIsGuestFPUStateActive)
+ test al, al
+ jnz .guest_fpu_state_active
+
+ ; No need to mess with XMM registers just call the start routine and return.
+ mov r11, [xBP + 38h] ; pfnStartVM
+ mov r10, [xBP + 30h] ; pVCpu
+ mov [xSP + 020h], r10
+ mov rcx, [xBP + 010h] ; fResumeVM
+ mov rdx, [xBP + 018h] ; pCtx
+ mov r8, [xBP + 020h] ; pVMCSCache
+ mov r9, [xBP + 028h] ; pVM
+ call r11
+
+ leave
+ ret
+
+ALIGNCODE(8)
+.guest_fpu_state_active:
+ ; Save the non-volatile host XMM registers.
+ movdqa [rsp + 040h + 000h], xmm6
+ movdqa [rsp + 040h + 010h], xmm7
+ movdqa [rsp + 040h + 020h], xmm8
+ movdqa [rsp + 040h + 030h], xmm9
+ movdqa [rsp + 040h + 040h], xmm10
+ movdqa [rsp + 040h + 050h], xmm11
+ movdqa [rsp + 040h + 060h], xmm12
+ movdqa [rsp + 040h + 070h], xmm13
+ movdqa [rsp + 040h + 080h], xmm14
+ movdqa [rsp + 040h + 090h], xmm15
+ stmxcsr [rsp + 040h + 0a0h]
+
+ mov r10, [xBP + 018h] ; pCtx
+ mov eax, [r10 + CPUMCTX.fXStateMask]
+ test eax, eax
+ jz .guest_fpu_state_manually
+
+ ;
+ ; Using XSAVE to load the guest XMM, YMM and ZMM registers.
+ ;
+ and eax, CPUM_VOLATILE_XSAVE_GUEST_COMPONENTS
+ xor edx, edx
+ mov r10, [r10 + CPUMCTX.pXStateR0]
+ xrstor [r10]
+
+ ; Make the call (same as in the other case ).
+ mov r11, [xBP + 38h] ; pfnStartVM
+ mov r10, [xBP + 30h] ; pVCpu
+ mov [xSP + 020h], r10
+ mov rcx, [xBP + 010h] ; fResumeVM
+ mov rdx, [xBP + 018h] ; pCtx
+ mov r8, [xBP + 020h] ; pVMCSCache
+ mov r9, [xBP + 028h] ; pVM
+ call r11
+
+ mov r11d, eax ; save return value (xsave below uses eax)
+
+ ; Save the guest XMM registers.
+ mov r10, [xBP + 018h] ; pCtx
+ mov eax, [r10 + CPUMCTX.fXStateMask]
+ and eax, CPUM_VOLATILE_XSAVE_GUEST_COMPONENTS
+ xor edx, edx
+ mov r10, [r10 + CPUMCTX.pXStateR0]
+ xsave [r10]
+
+ mov eax, r11d ; restore return value.
+
+.restore_non_volatile_host_xmm_regs:
+ ; Load the non-volatile host XMM registers.
+ movdqa xmm6, [rsp + 040h + 000h]
+ movdqa xmm7, [rsp + 040h + 010h]
+ movdqa xmm8, [rsp + 040h + 020h]
+ movdqa xmm9, [rsp + 040h + 030h]
+ movdqa xmm10, [rsp + 040h + 040h]
+ movdqa xmm11, [rsp + 040h + 050h]
+ movdqa xmm12, [rsp + 040h + 060h]
+ movdqa xmm13, [rsp + 040h + 070h]
+ movdqa xmm14, [rsp + 040h + 080h]
+ movdqa xmm15, [rsp + 040h + 090h]
+ ldmxcsr [rsp + 040h + 0a0h]
+ leave
+ ret
+
+ ;
+ ; No XSAVE, load and save the guest XMM registers manually.
+ ;
+.guest_fpu_state_manually:
+ ; Load the full guest XMM register state.
+ mov r10, [r10 + CPUMCTX.pXStateR0]
+ movdqa xmm0, [r10 + XMM_OFF_IN_X86FXSTATE + 000h]
+ movdqa xmm1, [r10 + XMM_OFF_IN_X86FXSTATE + 010h]
+ movdqa xmm2, [r10 + XMM_OFF_IN_X86FXSTATE + 020h]
+ movdqa xmm3, [r10 + XMM_OFF_IN_X86FXSTATE + 030h]
+ movdqa xmm4, [r10 + XMM_OFF_IN_X86FXSTATE + 040h]
+ movdqa xmm5, [r10 + XMM_OFF_IN_X86FXSTATE + 050h]
+ movdqa xmm6, [r10 + XMM_OFF_IN_X86FXSTATE + 060h]
+ movdqa xmm7, [r10 + XMM_OFF_IN_X86FXSTATE + 070h]
+ movdqa xmm8, [r10 + XMM_OFF_IN_X86FXSTATE + 080h]
+ movdqa xmm9, [r10 + XMM_OFF_IN_X86FXSTATE + 090h]
+ movdqa xmm10, [r10 + XMM_OFF_IN_X86FXSTATE + 0a0h]
+ movdqa xmm11, [r10 + XMM_OFF_IN_X86FXSTATE + 0b0h]
+ movdqa xmm12, [r10 + XMM_OFF_IN_X86FXSTATE + 0c0h]
+ movdqa xmm13, [r10 + XMM_OFF_IN_X86FXSTATE + 0d0h]
+ movdqa xmm14, [r10 + XMM_OFF_IN_X86FXSTATE + 0e0h]
+ movdqa xmm15, [r10 + XMM_OFF_IN_X86FXSTATE + 0f0h]
+ ldmxcsr [r10 + X86FXSTATE.MXCSR]
+
+ ; Make the call (same as in the other case ).
+ mov r11, [xBP + 38h] ; pfnStartVM
+ mov r10, [xBP + 30h] ; pVCpu
+ mov [xSP + 020h], r10
+ mov rcx, [xBP + 010h] ; fResumeVM
+ mov rdx, [xBP + 018h] ; pCtx
+ mov r8, [xBP + 020h] ; pVMCSCache
+ mov r9, [xBP + 028h] ; pVM
+ call r11
+
+ ; Save the guest XMM registers.
+ mov r10, [xBP + 018h] ; pCtx
+ mov r10, [r10 + CPUMCTX.pXStateR0]
+ stmxcsr [r10 + X86FXSTATE.MXCSR]
+ movdqa [r10 + XMM_OFF_IN_X86FXSTATE + 000h], xmm0
+ movdqa [r10 + XMM_OFF_IN_X86FXSTATE + 010h], xmm1
+ movdqa [r10 + XMM_OFF_IN_X86FXSTATE + 020h], xmm2
+ movdqa [r10 + XMM_OFF_IN_X86FXSTATE + 030h], xmm3
+ movdqa [r10 + XMM_OFF_IN_X86FXSTATE + 040h], xmm4
+ movdqa [r10 + XMM_OFF_IN_X86FXSTATE + 050h], xmm5
+ movdqa [r10 + XMM_OFF_IN_X86FXSTATE + 060h], xmm6
+ movdqa [r10 + XMM_OFF_IN_X86FXSTATE + 070h], xmm7
+ movdqa [r10 + XMM_OFF_IN_X86FXSTATE + 080h], xmm8
+ movdqa [r10 + XMM_OFF_IN_X86FXSTATE + 090h], xmm9
+ movdqa [r10 + XMM_OFF_IN_X86FXSTATE + 0a0h], xmm10
+ movdqa [r10 + XMM_OFF_IN_X86FXSTATE + 0b0h], xmm11
+ movdqa [r10 + XMM_OFF_IN_X86FXSTATE + 0c0h], xmm12
+ movdqa [r10 + XMM_OFF_IN_X86FXSTATE + 0d0h], xmm13
+ movdqa [r10 + XMM_OFF_IN_X86FXSTATE + 0e0h], xmm14
+ movdqa [r10 + XMM_OFF_IN_X86FXSTATE + 0f0h], xmm15
+ jmp .restore_non_volatile_host_xmm_regs
+ENDPROC hmR0VMXStartVMWrapXMM
+
+;;
+; Wrapper around svm.pfnVMRun that preserves host XMM registers and
+; load the guest ones when necessary.
+;
+; @cproto DECLASM(int) hmR0SVMRunWrapXMM(RTHCPHYS HCPhysVmcbHost, RTHCPHYS HCPhysVmcb, PCPUMCTX pCtx, PVM pVM, PVMCPU pVCpu,
+; PFNHMSVMVMRUN pfnVMRun);
+;
+; @returns eax
+;
+; @param HCPhysVmcbHost msc:rcx
+; @param HCPhysVmcb msc:rdx
+; @param pCtx msc:r8
+; @param pVM msc:r9
+; @param pVCpu msc:[rbp+30h] The cross context virtual CPU structure of the calling EMT.
+; @param pfnVMRun msc:[rbp+38h]
+;
+; @remarks This is essentially the same code as hmR0VMXStartVMWrapXMM, only the parameters differ a little bit.
+;
+; @remarks Drivers shouldn't use AVX registers without saving+loading:
+; https://msdn.microsoft.com/en-us/library/windows/hardware/ff545910%28v=vs.85%29.aspx?f=255&MSPPError=-2147217396
+; However the compiler docs have different idea:
+; https://msdn.microsoft.com/en-us/library/9z1stfyw.aspx
+; We'll go with the former for now.
+;
+; ASSUMING 64-bit and windows for now.
+ALIGNCODE(16)
+BEGINPROC hmR0SVMRunWrapXMM
+ push xBP
+ mov xBP, xSP
+ sub xSP, 0b0h + 040h ; Don't bother optimizing the frame size.
+
+ ; spill input parameters.
+ mov [xBP + 010h], rcx ; HCPhysVmcbHost
+ mov [xBP + 018h], rdx ; HCPhysVmcb
+ mov [xBP + 020h], r8 ; pCtx
+ mov [xBP + 028h], r9 ; pVM
+
+ ; Ask CPUM whether we've started using the FPU yet.
+ mov rcx, [xBP + 30h] ; pVCpu
+ call NAME(CPUMIsGuestFPUStateActive)
+ test al, al
+ jnz .guest_fpu_state_active
+
+ ; No need to mess with XMM registers just call the start routine and return.
+ mov r11, [xBP + 38h] ; pfnVMRun
+ mov r10, [xBP + 30h] ; pVCpu
+ mov [xSP + 020h], r10
+ mov rcx, [xBP + 010h] ; HCPhysVmcbHost
+ mov rdx, [xBP + 018h] ; HCPhysVmcb
+ mov r8, [xBP + 020h] ; pCtx
+ mov r9, [xBP + 028h] ; pVM
+ call r11
+
+ leave
+ ret
+
+ALIGNCODE(8)
+.guest_fpu_state_active:
+ ; Save the non-volatile host XMM registers.
+ movdqa [rsp + 040h + 000h], xmm6
+ movdqa [rsp + 040h + 010h], xmm7
+ movdqa [rsp + 040h + 020h], xmm8
+ movdqa [rsp + 040h + 030h], xmm9
+ movdqa [rsp + 040h + 040h], xmm10
+ movdqa [rsp + 040h + 050h], xmm11
+ movdqa [rsp + 040h + 060h], xmm12
+ movdqa [rsp + 040h + 070h], xmm13
+ movdqa [rsp + 040h + 080h], xmm14
+ movdqa [rsp + 040h + 090h], xmm15
+ stmxcsr [rsp + 040h + 0a0h]
+
+ mov r10, [xBP + 020h] ; pCtx
+ mov eax, [r10 + CPUMCTX.fXStateMask]
+ test eax, eax
+ jz .guest_fpu_state_manually
+
+ ;
+ ; Using XSAVE.
+ ;
+ and eax, CPUM_VOLATILE_XSAVE_GUEST_COMPONENTS
+ xor edx, edx
+ mov r10, [r10 + CPUMCTX.pXStateR0]
+ xrstor [r10]
+
+ ; Make the call (same as in the other case ).
+ mov r11, [xBP + 38h] ; pfnVMRun
+ mov r10, [xBP + 30h] ; pVCpu
+ mov [xSP + 020h], r10
+ mov rcx, [xBP + 010h] ; HCPhysVmcbHost
+ mov rdx, [xBP + 018h] ; HCPhysVmcb
+ mov r8, [xBP + 020h] ; pCtx
+ mov r9, [xBP + 028h] ; pVM
+ call r11
+
+ mov r11d, eax ; save return value (xsave below uses eax)
+
+ ; Save the guest XMM registers.
+ mov r10, [xBP + 020h] ; pCtx
+ mov eax, [r10 + CPUMCTX.fXStateMask]
+ and eax, CPUM_VOLATILE_XSAVE_GUEST_COMPONENTS
+ xor edx, edx
+ mov r10, [r10 + CPUMCTX.pXStateR0]
+ xsave [r10]
+
+ mov eax, r11d ; restore return value.
+
+.restore_non_volatile_host_xmm_regs:
+ ; Load the non-volatile host XMM registers.
+ movdqa xmm6, [rsp + 040h + 000h]
+ movdqa xmm7, [rsp + 040h + 010h]
+ movdqa xmm8, [rsp + 040h + 020h]
+ movdqa xmm9, [rsp + 040h + 030h]
+ movdqa xmm10, [rsp + 040h + 040h]
+ movdqa xmm11, [rsp + 040h + 050h]
+ movdqa xmm12, [rsp + 040h + 060h]
+ movdqa xmm13, [rsp + 040h + 070h]
+ movdqa xmm14, [rsp + 040h + 080h]
+ movdqa xmm15, [rsp + 040h + 090h]
+ ldmxcsr [rsp + 040h + 0a0h]
+ leave
+ ret
+
+ ;
+ ; No XSAVE, load and save the guest XMM registers manually.
+ ;
+.guest_fpu_state_manually:
+ ; Load the full guest XMM register state.
+ mov r10, [r10 + CPUMCTX.pXStateR0]
+ movdqa xmm0, [r10 + XMM_OFF_IN_X86FXSTATE + 000h]
+ movdqa xmm1, [r10 + XMM_OFF_IN_X86FXSTATE + 010h]
+ movdqa xmm2, [r10 + XMM_OFF_IN_X86FXSTATE + 020h]
+ movdqa xmm3, [r10 + XMM_OFF_IN_X86FXSTATE + 030h]
+ movdqa xmm4, [r10 + XMM_OFF_IN_X86FXSTATE + 040h]
+ movdqa xmm5, [r10 + XMM_OFF_IN_X86FXSTATE + 050h]
+ movdqa xmm6, [r10 + XMM_OFF_IN_X86FXSTATE + 060h]
+ movdqa xmm7, [r10 + XMM_OFF_IN_X86FXSTATE + 070h]
+ movdqa xmm8, [r10 + XMM_OFF_IN_X86FXSTATE + 080h]
+ movdqa xmm9, [r10 + XMM_OFF_IN_X86FXSTATE + 090h]
+ movdqa xmm10, [r10 + XMM_OFF_IN_X86FXSTATE + 0a0h]
+ movdqa xmm11, [r10 + XMM_OFF_IN_X86FXSTATE + 0b0h]
+ movdqa xmm12, [r10 + XMM_OFF_IN_X86FXSTATE + 0c0h]
+ movdqa xmm13, [r10 + XMM_OFF_IN_X86FXSTATE + 0d0h]
+ movdqa xmm14, [r10 + XMM_OFF_IN_X86FXSTATE + 0e0h]
+ movdqa xmm15, [r10 + XMM_OFF_IN_X86FXSTATE + 0f0h]
+ ldmxcsr [r10 + X86FXSTATE.MXCSR]
+
+ ; Make the call (same as in the other case ).
+ mov r11, [xBP + 38h] ; pfnVMRun
+ mov r10, [xBP + 30h] ; pVCpu
+ mov [xSP + 020h], r10
+ mov rcx, [xBP + 010h] ; HCPhysVmcbHost
+ mov rdx, [xBP + 018h] ; HCPhysVmcb
+ mov r8, [xBP + 020h] ; pCtx
+ mov r9, [xBP + 028h] ; pVM
+ call r11
+
+ ; Save the guest XMM registers.
+ mov r10, [xBP + 020h] ; pCtx
+ mov r10, [r10 + CPUMCTX.pXStateR0]
+ stmxcsr [r10 + X86FXSTATE.MXCSR]
+ movdqa [r10 + XMM_OFF_IN_X86FXSTATE + 000h], xmm0
+ movdqa [r10 + XMM_OFF_IN_X86FXSTATE + 010h], xmm1
+ movdqa [r10 + XMM_OFF_IN_X86FXSTATE + 020h], xmm2
+ movdqa [r10 + XMM_OFF_IN_X86FXSTATE + 030h], xmm3
+ movdqa [r10 + XMM_OFF_IN_X86FXSTATE + 040h], xmm4
+ movdqa [r10 + XMM_OFF_IN_X86FXSTATE + 050h], xmm5
+ movdqa [r10 + XMM_OFF_IN_X86FXSTATE + 060h], xmm6
+ movdqa [r10 + XMM_OFF_IN_X86FXSTATE + 070h], xmm7
+ movdqa [r10 + XMM_OFF_IN_X86FXSTATE + 080h], xmm8
+ movdqa [r10 + XMM_OFF_IN_X86FXSTATE + 090h], xmm9
+ movdqa [r10 + XMM_OFF_IN_X86FXSTATE + 0a0h], xmm10
+ movdqa [r10 + XMM_OFF_IN_X86FXSTATE + 0b0h], xmm11
+ movdqa [r10 + XMM_OFF_IN_X86FXSTATE + 0c0h], xmm12
+ movdqa [r10 + XMM_OFF_IN_X86FXSTATE + 0d0h], xmm13
+ movdqa [r10 + XMM_OFF_IN_X86FXSTATE + 0e0h], xmm14
+ movdqa [r10 + XMM_OFF_IN_X86FXSTATE + 0f0h], xmm15
+ jmp .restore_non_volatile_host_xmm_regs
+ENDPROC hmR0SVMRunWrapXMM
+
+%endif ; VBOX_WITH_KERNEL_USING_XMM
+
+
+;; @def RESTORE_STATE_VM32
+; Macro restoring essential host state and updating guest state
+; for common host, 32-bit guest for VT-x.
+%macro RESTORE_STATE_VM32 0
+ ; Restore base and limit of the IDTR & GDTR.
+ %ifndef VMX_SKIP_IDTR
+ lidt [xSP]
+ add xSP, xCB * 2
+ %endif
+ %ifndef VMX_SKIP_GDTR
+ lgdt [xSP]
+ add xSP, xCB * 2
+ %endif
+
+ push xDI
+ %ifndef VMX_SKIP_TR
+ mov xDI, [xSP + xCB * 3] ; pCtx (*3 to skip the saved xDI, TR, LDTR).
+ %else
+ mov xDI, [xSP + xCB * 2] ; pCtx (*2 to skip the saved xDI, LDTR).
+ %endif
+
+ mov [ss:xDI + CPUMCTX.eax], eax
+ mov xAX, SPECTRE_FILLER
+ mov [ss:xDI + CPUMCTX.ebx], ebx
+ mov xBX, xAX
+ mov [ss:xDI + CPUMCTX.ecx], ecx
+ mov xCX, xAX
+ mov [ss:xDI + CPUMCTX.edx], edx
+ mov xDX, xAX
+ mov [ss:xDI + CPUMCTX.esi], esi
+ mov xSI, xAX
+ mov [ss:xDI + CPUMCTX.ebp], ebp
+ mov xBP, xAX
+ mov xAX, cr2
+ mov [ss:xDI + CPUMCTX.cr2], xAX
+
+ %ifdef RT_ARCH_AMD64
+ pop xAX ; The guest edi we pushed above.
+ mov dword [ss:xDI + CPUMCTX.edi], eax
+ %else
+ pop dword [ss:xDI + CPUMCTX.edi] ; The guest edi we pushed above.
+ %endif
+
+ ; Fight spectre.
+ INDIRECT_BRANCH_PREDICTION_BARRIER ss:xDI, CPUMCTX_WSF_IBPB_EXIT
+
+ %ifndef VMX_SKIP_TR
+ ; Restore TSS selector; must mark it as not busy before using ltr (!)
+ ; ASSUME that this is supposed to be 'BUSY'. (saves 20-30 ticks on the T42p)
+ ; @todo get rid of sgdt
+ pop xBX ; Saved TR
+ sub xSP, xCB * 2
+ sgdt [xSP]
+ mov xAX, xBX
+ and eax, X86_SEL_MASK_OFF_RPL ; Mask away TI and RPL bits leaving only the descriptor offset.
+ add xAX, [xSP + 2] ; eax <- GDTR.address + descriptor offset.
+ and dword [ss:xAX + 4], ~RT_BIT(9) ; Clear the busy flag in TSS desc (bits 0-7=base, bit 9=busy bit).
+ ltr bx
+ add xSP, xCB * 2
+ %endif
+
+ pop xAX ; Saved LDTR
+ %ifdef RT_ARCH_AMD64
+ cmp eax, 0
+ je %%skip_ldt_write32
+ %endif
+ lldt ax
+
+%%skip_ldt_write32:
+ add xSP, xCB ; pCtx
+
+ %ifdef VMX_USE_CACHED_VMCS_ACCESSES
+ pop xDX ; Saved pCache
+
+ ; Note! If we get here as a result of invalid VMCS pointer, all the following
+ ; vmread's will fail (only eflags.cf=1 will be set) but that shouldn't cause any
+ ; trouble only just less efficient.
+ mov ecx, [ss:xDX + VMCSCACHE.Read.cValidEntries]
+ cmp ecx, 0 ; Can't happen
+ je %%no_cached_read32
+ jmp %%cached_read32
+
+ALIGN(16)
+%%cached_read32:
+ dec xCX
+ mov eax, [ss:xDX + VMCSCACHE.Read.aField + xCX * 4]
+ ; Note! This leaves the high 32 bits of the cache entry unmodified!!
+ vmread [ss:xDX + VMCSCACHE.Read.aFieldVal + xCX * 8], xAX
+ cmp xCX, 0
+ jnz %%cached_read32
+%%no_cached_read32:
+ %endif
+
+ ; Restore segment registers.
+ MYPOPSEGS xAX, ax
+
+ ; Restore the host XCR0 if necessary.
+ pop xCX
+ test ecx, ecx
+ jnz %%xcr0_after_skip
+ pop xAX
+ pop xDX
+ xsetbv ; ecx is already zero.
+%%xcr0_after_skip:
+
+ ; Restore general purpose registers.
+ MYPOPAD
+%endmacro
+
+
+;;
+; Prepares for and executes VMLAUNCH/VMRESUME (32 bits guest mode)
+;
+; @returns VBox status code
+; @param fResume x86:[ebp+8], msc:rcx,gcc:rdi Whether to use vmlauch/vmresume.
+; @param pCtx x86:[ebp+c], msc:rdx,gcc:rsi Pointer to the guest-CPU context.
+; @param pCache x86:[ebp+10],msc:r8, gcc:rdx Pointer to the VMCS cache.
+; @param pVM x86:[ebp+14],msc:r9, gcc:rcx The cross context VM structure.
+; @param pVCpu x86:[ebp+18],msc:[ebp+30],gcc:r8 The cross context virtual CPU structure of the calling EMT.
+;
+ALIGNCODE(16)
+BEGINPROC VMXR0StartVM32
+ push xBP
+ mov xBP, xSP
+
+ pushf
+ cli
+
+ ;
+ ; Save all general purpose host registers.
+ ;
+ MYPUSHAD
+
+ ;
+ ; First we have to write some final guest CPU context registers.
+ ;
+ mov eax, VMX_VMCS_HOST_RIP
+%ifdef RT_ARCH_AMD64
+ lea r10, [.vmlaunch_done wrt rip]
+ vmwrite rax, r10
+%else
+ mov ecx, .vmlaunch_done
+ vmwrite eax, ecx
+%endif
+ ; Note: assumes success!
+
+ ;
+ ; Unify input parameter registers.
+ ;
+%ifdef RT_ARCH_AMD64
+ %ifdef ASM_CALL64_GCC
+ ; fResume already in rdi
+ ; pCtx already in rsi
+ mov rbx, rdx ; pCache
+ %else
+ mov rdi, rcx ; fResume
+ mov rsi, rdx ; pCtx
+ mov rbx, r8 ; pCache
+ %endif
+%else
+ mov edi, [ebp + 8] ; fResume
+ mov esi, [ebp + 12] ; pCtx
+ mov ebx, [ebp + 16] ; pCache
+%endif
+
+ ;
+ ; Save the host XCR0 and load the guest one if necessary.
+ ; Note! Trashes rdx and rcx.
+ ;
+%ifdef ASM_CALL64_MSC
+ mov rax, [xBP + 30h] ; pVCpu
+%elifdef ASM_CALL64_GCC
+ mov rax, r8 ; pVCpu
+%else
+ mov eax, [xBP + 18h] ; pVCpu
+%endif
+ test byte [xAX + VMCPU.hm + HMCPU.fLoadSaveGuestXcr0], 1
+ jz .xcr0_before_skip
+
+ xor ecx, ecx
+ xgetbv ; Save the host one on the stack.
+ push xDX
+ push xAX
+
+ mov eax, [xSI + CPUMCTX.aXcr] ; Load the guest one.
+ mov edx, [xSI + CPUMCTX.aXcr + 4]
+ xor ecx, ecx ; paranoia
+ xsetbv
+
+ push 0 ; Indicate that we must restore XCR0 (popped into ecx, thus 0).
+ jmp .xcr0_before_done
+
+.xcr0_before_skip:
+ push 3fh ; indicate that we need not.
+.xcr0_before_done:
+
+ ;
+ ; Save segment registers.
+ ; Note! Trashes rdx & rcx, so we moved it here (amd64 case).
+ ;
+ MYPUSHSEGS xAX, ax
+
+%ifdef VMX_USE_CACHED_VMCS_ACCESSES
+ mov ecx, [xBX + VMCSCACHE.Write.cValidEntries]
+ cmp ecx, 0
+ je .no_cached_writes
+ mov edx, ecx
+ mov ecx, 0
+ jmp .cached_write
+
+ALIGN(16)
+.cached_write:
+ mov eax, [xBX + VMCSCACHE.Write.aField + xCX * 4]
+ vmwrite xAX, [xBX + VMCSCACHE.Write.aFieldVal + xCX * 8]
+ inc xCX
+ cmp xCX, xDX
+ jl .cached_write
+
+ mov dword [xBX + VMCSCACHE.Write.cValidEntries], 0
+.no_cached_writes:
+
+ ; Save the pCache pointer.
+ push xBX
+%endif
+
+ ; Save the pCtx pointer.
+ push xSI
+
+ ; Save host LDTR.
+ xor eax, eax
+ sldt ax
+ push xAX
+
+%ifndef VMX_SKIP_TR
+ ; The host TR limit is reset to 0x67; save & restore it manually.
+ str eax
+ push xAX
+%endif
+
+%ifndef VMX_SKIP_GDTR
+ ; VT-x only saves the base of the GDTR & IDTR and resets the limit to 0xffff; we must restore the limit correctly!
+ sub xSP, xCB * 2
+ sgdt [xSP]
+%endif
+%ifndef VMX_SKIP_IDTR
+ sub xSP, xCB * 2
+ sidt [xSP]
+%endif
+
+ ; Load CR2 if necessary (may be expensive as writing CR2 is a synchronizing instruction).
+ mov xBX, [xSI + CPUMCTX.cr2]
+ mov xDX, cr2
+ cmp xBX, xDX
+ je .skip_cr2_write32
+ mov cr2, xBX
+
+.skip_cr2_write32:
+ mov eax, VMX_VMCS_HOST_RSP
+ vmwrite xAX, xSP
+ ; Note: assumes success!
+ ; Don't mess with ESP anymore!!!
+
+ ; Fight spectre and similar.
+ INDIRECT_BRANCH_PREDICTION_AND_L1_CACHE_BARRIER xSI, CPUMCTX_WSF_IBPB_ENTRY, CPUMCTX_WSF_L1D_ENTRY
+
+ ; Load guest general purpose registers.
+ mov eax, [xSI + CPUMCTX.eax]
+ mov ebx, [xSI + CPUMCTX.ebx]
+ mov ecx, [xSI + CPUMCTX.ecx]
+ mov edx, [xSI + CPUMCTX.edx]
+ mov ebp, [xSI + CPUMCTX.ebp]
+
+ ; Resume or start VM?
+ cmp xDI, 0 ; fResume
+
+ ; Load guest edi & esi.
+ mov edi, [xSI + CPUMCTX.edi]
+ mov esi, [xSI + CPUMCTX.esi]
+
+ je .vmlaunch_launch
+
+ vmresume
+ jc near .vmxstart_invalid_vmcs_ptr
+ jz near .vmxstart_start_failed
+ jmp .vmlaunch_done; ; Here if vmresume detected a failure.
+
+.vmlaunch_launch:
+ vmlaunch
+ jc near .vmxstart_invalid_vmcs_ptr
+ jz near .vmxstart_start_failed
+ jmp .vmlaunch_done; ; Here if vmlaunch detected a failure.
+
+ALIGNCODE(16) ;; @todo YASM BUG - this alignment is wrong on darwin, it's 1 byte off.
+.vmlaunch_done:
+ RESTORE_STATE_VM32
+ mov eax, VINF_SUCCESS
+
+.vmstart_end:
+ popf
+ pop xBP
+ ret
+
+.vmxstart_invalid_vmcs_ptr:
+ RESTORE_STATE_VM32
+ mov eax, VERR_VMX_INVALID_VMCS_PTR_TO_START_VM
+ jmp .vmstart_end
+
+.vmxstart_start_failed:
+ RESTORE_STATE_VM32
+ mov eax, VERR_VMX_UNABLE_TO_START_VM
+ jmp .vmstart_end
+
+ENDPROC VMXR0StartVM32
+
+
+%ifdef RT_ARCH_AMD64
+;; @def RESTORE_STATE_VM64
+; Macro restoring essential host state and updating guest state
+; for 64-bit host, 64-bit guest for VT-x.
+;
+%macro RESTORE_STATE_VM64 0
+ ; Restore base and limit of the IDTR & GDTR
+ %ifndef VMX_SKIP_IDTR
+ lidt [xSP]
+ add xSP, xCB * 2
+ %endif
+ %ifndef VMX_SKIP_GDTR
+ lgdt [xSP]
+ add xSP, xCB * 2
+ %endif
+
+ push xDI
+ %ifndef VMX_SKIP_TR
+ mov xDI, [xSP + xCB * 3] ; pCtx (*3 to skip the saved xDI, TR, LDTR)
+ %else
+ mov xDI, [xSP + xCB * 2] ; pCtx (*2 to skip the saved xDI, LDTR)
+ %endif
+
+ mov qword [xDI + CPUMCTX.eax], rax
+ mov rax, SPECTRE_FILLER64
+ mov qword [xDI + CPUMCTX.ebx], rbx
+ mov rbx, rax
+ mov qword [xDI + CPUMCTX.ecx], rcx
+ mov rcx, rax
+ mov qword [xDI + CPUMCTX.edx], rdx
+ mov rdx, rax
+ mov qword [xDI + CPUMCTX.esi], rsi
+ mov rsi, rax
+ mov qword [xDI + CPUMCTX.ebp], rbp
+ mov rbp, rax
+ mov qword [xDI + CPUMCTX.r8], r8
+ mov r8, rax
+ mov qword [xDI + CPUMCTX.r9], r9
+ mov r9, rax
+ mov qword [xDI + CPUMCTX.r10], r10
+ mov r10, rax
+ mov qword [xDI + CPUMCTX.r11], r11
+ mov r11, rax
+ mov qword [xDI + CPUMCTX.r12], r12
+ mov r12, rax
+ mov qword [xDI + CPUMCTX.r13], r13
+ mov r13, rax
+ mov qword [xDI + CPUMCTX.r14], r14
+ mov r14, rax
+ mov qword [xDI + CPUMCTX.r15], r15
+ mov r15, rax
+ mov rax, cr2
+ mov qword [xDI + CPUMCTX.cr2], rax
+
+ pop xAX ; The guest rdi we pushed above
+ mov qword [xDI + CPUMCTX.edi], rax
+
+ ; Fight spectre.
+ INDIRECT_BRANCH_PREDICTION_BARRIER xDI, CPUMCTX_WSF_IBPB_EXIT
+
+ %ifndef VMX_SKIP_TR
+ ; Restore TSS selector; must mark it as not busy before using ltr (!)
+ ; ASSUME that this is supposed to be 'BUSY'. (saves 20-30 ticks on the T42p).
+ ; @todo get rid of sgdt
+ pop xBX ; Saved TR
+ sub xSP, xCB * 2
+ sgdt [xSP]
+ mov xAX, xBX
+ and eax, X86_SEL_MASK_OFF_RPL ; Mask away TI and RPL bits leaving only the descriptor offset.
+ add xAX, [xSP + 2] ; eax <- GDTR.address + descriptor offset.
+ and dword [xAX + 4], ~RT_BIT(9) ; Clear the busy flag in TSS desc (bits 0-7=base, bit 9=busy bit).
+ ltr bx
+ add xSP, xCB * 2
+ %endif
+
+ pop xAX ; Saved LDTR
+ cmp eax, 0
+ je %%skip_ldt_write64
+ lldt ax
+
+%%skip_ldt_write64:
+ pop xSI ; pCtx (needed in rsi by the macros below)
+
+ %ifdef VMX_USE_CACHED_VMCS_ACCESSES
+ pop xDX ; Saved pCache
+
+ ; Note! If we get here as a result of invalid VMCS pointer, all the following
+ ; vmread's will fail (only eflags.cf=1 will be set) but that shouldn't cause any
+ ; trouble only just less efficient.
+ mov ecx, [xDX + VMCSCACHE.Read.cValidEntries]
+ cmp ecx, 0 ; Can't happen
+ je %%no_cached_read64
+ jmp %%cached_read64
+
+ALIGN(16)
+%%cached_read64:
+ dec xCX
+ mov eax, [xDX + VMCSCACHE.Read.aField + xCX * 4]
+ vmread [xDX + VMCSCACHE.Read.aFieldVal + xCX * 8], xAX
+ cmp xCX, 0
+ jnz %%cached_read64
+%%no_cached_read64:
+ %endif
+
+ ; Restore segment registers.
+ MYPOPSEGS xAX, ax
+
+ ; Restore the host XCR0 if necessary.
+ pop xCX
+ test ecx, ecx
+ jnz %%xcr0_after_skip
+ pop xAX
+ pop xDX
+ xsetbv ; ecx is already zero.
+%%xcr0_after_skip:
+
+ ; Restore general purpose registers.
+ MYPOPAD
+%endmacro
+
+
+;;
+; Prepares for and executes VMLAUNCH/VMRESUME (64 bits guest mode)
+;
+; @returns VBox status code
+; @param fResume msc:rcx, gcc:rdi Whether to use vmlauch/vmresume.
+; @param pCtx msc:rdx, gcc:rsi Pointer to the guest-CPU context.
+; @param pCache msc:r8, gcc:rdx Pointer to the VMCS cache.
+; @param pVM msc:r9, gcc:rcx The cross context VM structure.
+; @param pVCpu msc:[ebp+30], gcc:r8 The cross context virtual CPU structure of the calling EMT.
+;
+ALIGNCODE(16)
+BEGINPROC VMXR0StartVM64
+ push xBP
+ mov xBP, xSP
+
+ pushf
+ cli
+
+ ; Save all general purpose host registers.
+ MYPUSHAD
+
+ ; First we have to save some final CPU context registers.
+ lea r10, [.vmlaunch64_done wrt rip]
+ mov rax, VMX_VMCS_HOST_RIP ; Return address (too difficult to continue after VMLAUNCH?).
+ vmwrite rax, r10
+ ; Note: assumes success!
+
+ ;
+ ; Unify the input parameter registers.
+ ;
+%ifdef ASM_CALL64_GCC
+ ; fResume already in rdi
+ ; pCtx already in rsi
+ mov rbx, rdx ; pCache
+%else
+ mov rdi, rcx ; fResume
+ mov rsi, rdx ; pCtx
+ mov rbx, r8 ; pCache
+%endif
+
+ ;
+ ; Save the host XCR0 and load the guest one if necessary.
+ ; Note! Trashes rdx and rcx.
+ ;
+%ifdef ASM_CALL64_MSC
+ mov rax, [xBP + 30h] ; pVCpu
+%else
+ mov rax, r8 ; pVCpu
+%endif
+ test byte [xAX + VMCPU.hm + HMCPU.fLoadSaveGuestXcr0], 1
+ jz .xcr0_before_skip
+
+ xor ecx, ecx
+ xgetbv ; Save the host one on the stack.
+ push xDX
+ push xAX
+
+ mov eax, [xSI + CPUMCTX.aXcr] ; Load the guest one.
+ mov edx, [xSI + CPUMCTX.aXcr + 4]
+ xor ecx, ecx ; paranoia
+ xsetbv
+
+ push 0 ; Indicate that we must restore XCR0 (popped into ecx, thus 0).
+ jmp .xcr0_before_done
+
+.xcr0_before_skip:
+ push 3fh ; indicate that we need not.
+.xcr0_before_done:
+
+ ;
+ ; Save segment registers.
+ ; Note! Trashes rdx & rcx, so we moved it here (amd64 case).
+ ;
+ MYPUSHSEGS xAX, ax
+
+%ifdef VMX_USE_CACHED_VMCS_ACCESSES
+ mov ecx, [xBX + VMCSCACHE.Write.cValidEntries]
+ cmp ecx, 0
+ je .no_cached_writes
+ mov edx, ecx
+ mov ecx, 0
+ jmp .cached_write
+
+ALIGN(16)
+.cached_write:
+ mov eax, [xBX + VMCSCACHE.Write.aField + xCX * 4]
+ vmwrite xAX, [xBX + VMCSCACHE.Write.aFieldVal + xCX * 8]
+ inc xCX
+ cmp xCX, xDX
+ jl .cached_write
+
+ mov dword [xBX + VMCSCACHE.Write.cValidEntries], 0
+.no_cached_writes:
+
+ ; Save the pCache pointer.
+ push xBX
+%endif
+
+ ; Save the pCtx pointer.
+ push xSI
+
+ ; Save host LDTR.
+ xor eax, eax
+ sldt ax
+ push xAX
+
+%ifndef VMX_SKIP_TR
+ ; The host TR limit is reset to 0x67; save & restore it manually.
+ str eax
+ push xAX
+%endif
+
+%ifndef VMX_SKIP_GDTR
+ ; VT-x only saves the base of the GDTR & IDTR and resets the limit to 0xffff; we must restore the limit correctly!
+ sub xSP, xCB * 2
+ sgdt [xSP]
+%endif
+%ifndef VMX_SKIP_IDTR
+ sub xSP, xCB * 2
+ sidt [xSP]
+%endif
+
+ ; Load CR2 if necessary (may be expensive as writing CR2 is a synchronizing instruction).
+ mov rbx, qword [xSI + CPUMCTX.cr2]
+ mov rdx, cr2
+ cmp rbx, rdx
+ je .skip_cr2_write
+ mov cr2, rbx
+
+.skip_cr2_write:
+ mov eax, VMX_VMCS_HOST_RSP
+ vmwrite xAX, xSP
+ ; Note: assumes success!
+ ; Don't mess with ESP anymore!!!
+
+ ; Fight spectre and similar.
+ INDIRECT_BRANCH_PREDICTION_AND_L1_CACHE_BARRIER xSI, CPUMCTX_WSF_IBPB_ENTRY, CPUMCTX_WSF_L1D_ENTRY
+
+ ; Load guest general purpose registers.
+ mov rax, qword [xSI + CPUMCTX.eax]
+ mov rbx, qword [xSI + CPUMCTX.ebx]
+ mov rcx, qword [xSI + CPUMCTX.ecx]
+ mov rdx, qword [xSI + CPUMCTX.edx]
+ mov rbp, qword [xSI + CPUMCTX.ebp]
+ mov r8, qword [xSI + CPUMCTX.r8]
+ mov r9, qword [xSI + CPUMCTX.r9]
+ mov r10, qword [xSI + CPUMCTX.r10]
+ mov r11, qword [xSI + CPUMCTX.r11]
+ mov r12, qword [xSI + CPUMCTX.r12]
+ mov r13, qword [xSI + CPUMCTX.r13]
+ mov r14, qword [xSI + CPUMCTX.r14]
+ mov r15, qword [xSI + CPUMCTX.r15]
+
+ ; Resume or start VM?
+ cmp xDI, 0 ; fResume
+
+ ; Load guest rdi & rsi.
+ mov rdi, qword [xSI + CPUMCTX.edi]
+ mov rsi, qword [xSI + CPUMCTX.esi]
+
+ je .vmlaunch64_launch
+
+ vmresume
+ jc near .vmxstart64_invalid_vmcs_ptr
+ jz near .vmxstart64_start_failed
+ jmp .vmlaunch64_done; ; Here if vmresume detected a failure.
+
+.vmlaunch64_launch:
+ vmlaunch
+ jc near .vmxstart64_invalid_vmcs_ptr
+ jz near .vmxstart64_start_failed
+ jmp .vmlaunch64_done; ; Here if vmlaunch detected a failure.
+
+ALIGNCODE(16)
+.vmlaunch64_done:
+ RESTORE_STATE_VM64
+ mov eax, VINF_SUCCESS
+
+.vmstart64_end:
+ popf
+ pop xBP
+ ret
+
+.vmxstart64_invalid_vmcs_ptr:
+ RESTORE_STATE_VM64
+ mov eax, VERR_VMX_INVALID_VMCS_PTR_TO_START_VM
+ jmp .vmstart64_end
+
+.vmxstart64_start_failed:
+ RESTORE_STATE_VM64
+ mov eax, VERR_VMX_UNABLE_TO_START_VM
+ jmp .vmstart64_end
+ENDPROC VMXR0StartVM64
+%endif ; RT_ARCH_AMD64
+
+
+;;
+; Prepares for and executes VMRUN (32 bits guests)
+;
+; @returns VBox status code
+; @param HCPhysVmcbHost msc:rcx,gcc:rdi Physical address of host VMCB.
+; @param HCPhysVmcb msc:rdx,gcc:rsi Physical address of guest VMCB.
+; @param pCtx msc:r8,gcc:rdx Pointer to the guest CPU-context.
+; @param pVM msc:r9,gcc:rcx The cross context VM structure.
+; @param pVCpu msc:[rsp+28],gcc:r8 The cross context virtual CPU structure of the calling EMT.
+;
+ALIGNCODE(16)
+BEGINPROC SVMR0VMRun
+%ifdef RT_ARCH_AMD64 ; fake a cdecl stack frame
+ %ifdef ASM_CALL64_GCC
+ push r8 ; pVCpu
+ push rcx ; pVM
+ push rdx ; pCtx
+ push rsi ; HCPhysVmcb
+ push rdi ; HCPhysVmcbHost
+ %else
+ mov rax, [rsp + 28h]
+ push rax ; pVCpu
+ push r9 ; pVM
+ push r8 ; pCtx
+ push rdx ; HCPhysVmcb
+ push rcx ; HCPhysVmcbHost
+ %endif
+ push 0
+%endif
+ push xBP
+ mov xBP, xSP
+ pushf
+
+ ; Save all general purpose host registers.
+ MYPUSHAD
+
+ ; Load pCtx into xSI.
+ mov xSI, [xBP + xCB * 2 + RTHCPHYS_CB * 2] ; pCtx
+
+ ; Save the host XCR0 and load the guest one if necessary.
+ mov xAX, [xBP + xCB * 2 + RTHCPHYS_CB * 2 + xCB * 2] ; pVCpu
+ test byte [xAX + VMCPU.hm + HMCPU.fLoadSaveGuestXcr0], 1
+ jz .xcr0_before_skip
+
+ xor ecx, ecx
+ xgetbv ; Save the host XCR0 on the stack
+ push xDX
+ push xAX
+
+ mov xSI, [xBP + xCB * 2 + RTHCPHYS_CB * 2] ; pCtx
+ mov eax, [xSI + CPUMCTX.aXcr] ; load the guest XCR0
+ mov edx, [xSI + CPUMCTX.aXcr + 4]
+ xor ecx, ecx ; paranoia
+ xsetbv
+
+ push 0 ; indicate that we must restore XCR0 (popped into ecx, thus 0)
+ jmp .xcr0_before_done
+
+.xcr0_before_skip:
+ push 3fh ; indicate that we need not restore XCR0
+.xcr0_before_done:
+
+ ; Save guest CPU-context pointer for simplifying saving of the GPRs afterwards.
+ push xSI
+
+ ; Save host fs, gs, sysenter msr etc.
+ mov xAX, [xBP + xCB * 2] ; HCPhysVmcbHost (64 bits physical address; x86: take low dword only)
+ push xAX ; save for the vmload after vmrun
+ vmsave
+
+ ; Fight spectre.
+ INDIRECT_BRANCH_PREDICTION_BARRIER xSI, CPUMCTX_WSF_IBPB_ENTRY
+
+ ; Setup xAX for VMLOAD.
+ mov xAX, [xBP + xCB * 2 + RTHCPHYS_CB] ; HCPhysVmcb (64 bits physical address; x86: take low dword only)
+
+ ; Load guest general purpose registers.
+ ; eax is loaded from the VMCB by VMRUN.
+ mov ebx, [xSI + CPUMCTX.ebx]
+ mov ecx, [xSI + CPUMCTX.ecx]
+ mov edx, [xSI + CPUMCTX.edx]
+ mov edi, [xSI + CPUMCTX.edi]
+ mov ebp, [xSI + CPUMCTX.ebp]
+ mov esi, [xSI + CPUMCTX.esi]
+
+ ; Clear the global interrupt flag & execute sti to make sure external interrupts cause a world switch.
+ clgi
+ sti
+
+ ; Load guest fs, gs, sysenter msr etc.
+ vmload
+
+ ; Run the VM.
+ vmrun
+
+ ; Save guest fs, gs, sysenter msr etc.
+ vmsave
+
+ ; Load host fs, gs, sysenter msr etc.
+ pop xAX ; load HCPhysVmcbHost (pushed above)
+ vmload
+
+ ; Set the global interrupt flag again, but execute cli to make sure IF=0.
+ cli
+ stgi
+
+ ; Pop the context pointer (pushed above) and save the guest GPRs (sans RSP and RAX).
+ pop xAX
+
+ mov [ss:xAX + CPUMCTX.ebx], ebx
+ mov xBX, SPECTRE_FILLER
+ mov [ss:xAX + CPUMCTX.ecx], ecx
+ mov xCX, xBX
+ mov [ss:xAX + CPUMCTX.edx], edx
+ mov xDX, xBX
+ mov [ss:xAX + CPUMCTX.esi], esi
+ mov xSI, xBX
+ mov [ss:xAX + CPUMCTX.edi], edi
+ mov xDI, xBX
+ mov [ss:xAX + CPUMCTX.ebp], ebp
+ mov xBP, xBX
+
+ ; Fight spectre. Note! Trashes xAX!
+ INDIRECT_BRANCH_PREDICTION_BARRIER ss:xAX, CPUMCTX_WSF_IBPB_EXIT
+
+ ; Restore the host xcr0 if necessary.
+ pop xCX
+ test ecx, ecx
+ jnz .xcr0_after_skip
+ pop xAX
+ pop xDX
+ xsetbv ; ecx is already zero
+.xcr0_after_skip:
+
+ ; Restore host general purpose registers.
+ MYPOPAD
+
+ mov eax, VINF_SUCCESS
+
+ popf
+ pop xBP
+%ifdef RT_ARCH_AMD64
+ add xSP, 6*xCB
+%endif
+ ret
+ENDPROC SVMR0VMRun
+
+
+%ifdef RT_ARCH_AMD64
+;;
+; Prepares for and executes VMRUN (64 bits guests)
+;
+; @returns VBox status code
+; @param HCPhysVmcbHost msc:rcx,gcc:rdi Physical address of host VMCB.
+; @param HCPhysVmcb msc:rdx,gcc:rsi Physical address of guest VMCB.
+; @param pCtx msc:r8,gcc:rdx Pointer to the guest-CPU context.
+; @param pVM msc:r9,gcc:rcx The cross context VM structure.
+; @param pVCpu msc:[rsp+28],gcc:r8 The cross context virtual CPU structure of the calling EMT.
+;
+ALIGNCODE(16)
+BEGINPROC SVMR0VMRun64
+ ; Fake a cdecl stack frame
+ %ifdef ASM_CALL64_GCC
+ push r8 ;pVCpu
+ push rcx ;pVM
+ push rdx ;pCtx
+ push rsi ;HCPhysVmcb
+ push rdi ;HCPhysVmcbHost
+ %else
+ mov rax, [rsp + 28h]
+ push rax ; rbp + 30h pVCpu
+ push r9 ; rbp + 28h pVM
+ push r8 ; rbp + 20h pCtx
+ push rdx ; rbp + 18h HCPhysVmcb
+ push rcx ; rbp + 10h HCPhysVmcbHost
+ %endif
+ push 0 ; rbp + 08h "fake ret addr"
+ push rbp ; rbp + 00h
+ mov rbp, rsp
+ pushf
+
+ ; Manual save and restore:
+ ; - General purpose registers except RIP, RSP, RAX
+ ;
+ ; Trashed:
+ ; - CR2 (we don't care)
+ ; - LDTR (reset to 0)
+ ; - DRx (presumably not changed at all)
+ ; - DR7 (reset to 0x400)
+
+ ; Save all general purpose host registers.
+ MYPUSHAD
+
+ ; Load pCtx into xSI.
+ mov xSI, [rbp + xCB * 2 + RTHCPHYS_CB * 2]
+
+ ; Save the host XCR0 and load the guest one if necessary.
+ mov rax, [xBP + 30h] ; pVCpu
+ test byte [xAX + VMCPU.hm + HMCPU.fLoadSaveGuestXcr0], 1
+ jz .xcr0_before_skip
+
+ xor ecx, ecx
+ xgetbv ; save the host XCR0 on the stack.
+ push xDX
+ push xAX
+
+ mov xSI, [xBP + xCB * 2 + RTHCPHYS_CB * 2] ; pCtx
+ mov eax, [xSI + CPUMCTX.aXcr] ; load the guest XCR0
+ mov edx, [xSI + CPUMCTX.aXcr + 4]
+ xor ecx, ecx ; paranoia
+ xsetbv
+
+ push 0 ; indicate that we must restore XCR0 (popped into ecx, thus 0)
+ jmp .xcr0_before_done
+
+.xcr0_before_skip:
+ push 3fh ; indicate that we need not restore XCR0
+.xcr0_before_done:
+
+ ; Save guest CPU-context pointer for simplifying saving of the GPRs afterwards.
+ push rsi
+
+ ; Save host fs, gs, sysenter msr etc.
+ mov rax, [rbp + xCB * 2] ; HCPhysVmcbHost (64 bits physical address; x86: take low dword only)
+ push rax ; save for the vmload after vmrun
+ vmsave
+
+ ; Fight spectre.
+ INDIRECT_BRANCH_PREDICTION_BARRIER xSI, CPUMCTX_WSF_IBPB_ENTRY
+
+ ; Setup rax for VMLOAD.
+ mov rax, [rbp + xCB * 2 + RTHCPHYS_CB] ; HCPhysVmcb (64 bits physical address; take low dword only)
+
+ ; Load guest general purpose registers (rax is loaded from the VMCB by VMRUN).
+ mov rbx, qword [xSI + CPUMCTX.ebx]
+ mov rcx, qword [xSI + CPUMCTX.ecx]
+ mov rdx, qword [xSI + CPUMCTX.edx]
+ mov rdi, qword [xSI + CPUMCTX.edi]
+ mov rbp, qword [xSI + CPUMCTX.ebp]
+ mov r8, qword [xSI + CPUMCTX.r8]
+ mov r9, qword [xSI + CPUMCTX.r9]
+ mov r10, qword [xSI + CPUMCTX.r10]
+ mov r11, qword [xSI + CPUMCTX.r11]
+ mov r12, qword [xSI + CPUMCTX.r12]
+ mov r13, qword [xSI + CPUMCTX.r13]
+ mov r14, qword [xSI + CPUMCTX.r14]
+ mov r15, qword [xSI + CPUMCTX.r15]
+ mov rsi, qword [xSI + CPUMCTX.esi]
+
+ ; Clear the global interrupt flag & execute sti to make sure external interrupts cause a world switch.
+ clgi
+ sti
+
+ ; Load guest FS, GS, Sysenter MSRs etc.
+ vmload
+
+ ; Run the VM.
+ vmrun
+
+ ; Save guest fs, gs, sysenter msr etc.
+ vmsave
+
+ ; Load host fs, gs, sysenter msr etc.
+ pop rax ; load HCPhysVmcbHost (pushed above)
+ vmload
+
+ ; Set the global interrupt flag again, but execute cli to make sure IF=0.
+ cli
+ stgi
+
+ ; Pop the context pointer (pushed above) and save the guest GPRs (sans RSP and RAX).
+ pop rax
+
+ mov qword [rax + CPUMCTX.ebx], rbx
+ mov rbx, SPECTRE_FILLER64
+ mov qword [rax + CPUMCTX.ecx], rcx
+ mov rcx, rbx
+ mov qword [rax + CPUMCTX.edx], rdx
+ mov rdx, rbx
+ mov qword [rax + CPUMCTX.esi], rsi
+ mov rsi, rbx
+ mov qword [rax + CPUMCTX.edi], rdi
+ mov rdi, rbx
+ mov qword [rax + CPUMCTX.ebp], rbp
+ mov rbp, rbx
+ mov qword [rax + CPUMCTX.r8], r8
+ mov r8, rbx
+ mov qword [rax + CPUMCTX.r9], r9
+ mov r9, rbx
+ mov qword [rax + CPUMCTX.r10], r10
+ mov r10, rbx
+ mov qword [rax + CPUMCTX.r11], r11
+ mov r11, rbx
+ mov qword [rax + CPUMCTX.r12], r12
+ mov r12, rbx
+ mov qword [rax + CPUMCTX.r13], r13
+ mov r13, rbx
+ mov qword [rax + CPUMCTX.r14], r14
+ mov r14, rbx
+ mov qword [rax + CPUMCTX.r15], r15
+ mov r15, rbx
+
+ ; Fight spectre. Note! Trashes rax!
+ INDIRECT_BRANCH_PREDICTION_BARRIER rax, CPUMCTX_WSF_IBPB_EXIT
+
+ ; Restore the host xcr0 if necessary.
+ pop xCX
+ test ecx, ecx
+ jnz .xcr0_after_skip
+ pop xAX
+ pop xDX
+ xsetbv ; ecx is already zero
+.xcr0_after_skip:
+
+ ; Restore host general purpose registers.
+ MYPOPAD
+
+ mov eax, VINF_SUCCESS
+
+ popf
+ pop rbp
+ add rsp, 6 * xCB
+ ret
+ENDPROC SVMR0VMRun64
+%endif ; RT_ARCH_AMD64
+
diff --git a/src/VBox/VMM/VMMR0/HMSVMR0.cpp b/src/VBox/VMM/VMMR0/HMSVMR0.cpp
new file mode 100644
index 00000000..50338e0e
--- /dev/null
+++ b/src/VBox/VMM/VMMR0/HMSVMR0.cpp
@@ -0,0 +1,8232 @@
+/* $Id: HMSVMR0.cpp $ */
+/** @file
+ * HM SVM (AMD-V) - Host Context Ring-0.
+ */
+
+/*
+ * Copyright (C) 2013-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#define LOG_GROUP LOG_GROUP_HM
+#define VMCPU_INCL_CPUM_GST_CTX
+#include <iprt/asm-amd64-x86.h>
+#include <iprt/thread.h>
+
+#include <VBox/vmm/pdmapi.h>
+#include <VBox/vmm/dbgf.h>
+#include <VBox/vmm/iem.h>
+#include <VBox/vmm/iom.h>
+#include <VBox/vmm/tm.h>
+#include <VBox/vmm/em.h>
+#include <VBox/vmm/gim.h>
+#include <VBox/vmm/apic.h>
+#include "HMInternal.h"
+#include <VBox/vmm/vm.h>
+#include <VBox/err.h>
+#include "HMSVMR0.h"
+#include "dtrace/VBoxVMM.h"
+
+#ifdef DEBUG_ramshankar
+# define HMSVM_SYNC_FULL_GUEST_STATE
+# define HMSVM_ALWAYS_TRAP_ALL_XCPTS
+# define HMSVM_ALWAYS_TRAP_PF
+# define HMSVM_ALWAYS_TRAP_TASK_SWITCH
+#endif
+
+
+/*********************************************************************************************************************************
+* Defined Constants And Macros *
+*********************************************************************************************************************************/
+#ifdef VBOX_WITH_STATISTICS
+# define HMSVM_EXITCODE_STAM_COUNTER_INC(u64ExitCode) do { \
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatExitAll); \
+ if ((u64ExitCode) == SVM_EXIT_NPF) \
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatExitReasonNpf); \
+ else \
+ STAM_COUNTER_INC(&pVCpu->hm.s.paStatExitReasonR0[(u64ExitCode) & MASK_EXITREASON_STAT]); \
+ } while (0)
+
+# ifdef VBOX_WITH_NESTED_HWVIRT_SVM
+# define HMSVM_NESTED_EXITCODE_STAM_COUNTER_INC(u64ExitCode) do { \
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatExitAll); \
+ if ((u64ExitCode) == SVM_EXIT_NPF) \
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatNestedExitReasonNpf); \
+ else \
+ STAM_COUNTER_INC(&pVCpu->hm.s.paStatNestedExitReasonR0[(u64ExitCode) & MASK_EXITREASON_STAT]); \
+ } while (0)
+# endif
+#else
+# define HMSVM_EXITCODE_STAM_COUNTER_INC(u64ExitCode) do { } while (0)
+# ifdef VBOX_WITH_NESTED_HWVIRT_SVM
+# define HMSVM_NESTED_EXITCODE_STAM_COUNTER_INC(u64ExitCode) do { } while (0)
+# endif
+#endif /* !VBOX_WITH_STATISTICS */
+
+/** If we decide to use a function table approach this can be useful to
+ * switch to a "static DECLCALLBACK(int)". */
+#define HMSVM_EXIT_DECL static int
+
+/**
+ * Subset of the guest-CPU state that is kept by SVM R0 code while executing the
+ * guest using hardware-assisted SVM.
+ *
+ * This excludes state like TSC AUX, GPRs (other than RSP, RAX) which are always
+ * are swapped and restored across the world-switch and also registers like
+ * EFER, PAT MSR etc. which cannot be modified by the guest without causing a
+ * \#VMEXIT.
+ */
+#define HMSVM_CPUMCTX_EXTRN_ALL ( CPUMCTX_EXTRN_RIP \
+ | CPUMCTX_EXTRN_RFLAGS \
+ | CPUMCTX_EXTRN_RAX \
+ | CPUMCTX_EXTRN_RSP \
+ | CPUMCTX_EXTRN_SREG_MASK \
+ | CPUMCTX_EXTRN_CR0 \
+ | CPUMCTX_EXTRN_CR2 \
+ | CPUMCTX_EXTRN_CR3 \
+ | CPUMCTX_EXTRN_TABLE_MASK \
+ | CPUMCTX_EXTRN_DR6 \
+ | CPUMCTX_EXTRN_DR7 \
+ | CPUMCTX_EXTRN_KERNEL_GS_BASE \
+ | CPUMCTX_EXTRN_SYSCALL_MSRS \
+ | CPUMCTX_EXTRN_SYSENTER_MSRS \
+ | CPUMCTX_EXTRN_HWVIRT \
+ | CPUMCTX_EXTRN_HM_SVM_MASK)
+
+/**
+ * Subset of the guest-CPU state that is shared between the guest and host.
+ */
+#define HMSVM_CPUMCTX_SHARED_STATE CPUMCTX_EXTRN_DR_MASK
+
+/** Macro for importing guest state from the VMCB back into CPUMCTX. */
+#define HMSVM_CPUMCTX_IMPORT_STATE(a_pVCpu, a_fWhat) \
+ do { \
+ if ((a_pVCpu)->cpum.GstCtx.fExtrn & (a_fWhat)) \
+ hmR0SvmImportGuestState((a_pVCpu), (a_fWhat)); \
+ } while (0)
+
+/** Assert that the required state bits are fetched. */
+#define HMSVM_CPUMCTX_ASSERT(a_pVCpu, a_fExtrnMbz) AssertMsg(!((a_pVCpu)->cpum.GstCtx.fExtrn & (a_fExtrnMbz)), \
+ ("fExtrn=%#RX64 fExtrnMbz=%#RX64\n", \
+ (a_pVCpu)->cpum.GstCtx.fExtrn, (a_fExtrnMbz)))
+
+/** Assert that preemption is disabled or covered by thread-context hooks. */
+#define HMSVM_ASSERT_PREEMPT_SAFE(a_pVCpu) Assert( VMMR0ThreadCtxHookIsEnabled((a_pVCpu)) \
+ || !RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+
+/** Assert that we haven't migrated CPUs when thread-context hooks are not
+ * used. */
+#define HMSVM_ASSERT_CPU_SAFE(a_pVCpu) AssertMsg( VMMR0ThreadCtxHookIsEnabled((a_pVCpu)) \
+ || (a_pVCpu)->hm.s.idEnteredCpu == RTMpCpuId(), \
+ ("Illegal migration! Entered on CPU %u Current %u\n", \
+ (a_pVCpu)->hm.s.idEnteredCpu, RTMpCpuId()));
+
+/** Assert that we're not executing a nested-guest. */
+#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
+# define HMSVM_ASSERT_NOT_IN_NESTED_GUEST(a_pCtx) Assert(!CPUMIsGuestInSvmNestedHwVirtMode((a_pCtx)))
+#else
+# define HMSVM_ASSERT_NOT_IN_NESTED_GUEST(a_pCtx) do { NOREF((a_pCtx)); } while (0)
+#endif
+
+/** Assert that we're executing a nested-guest. */
+#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
+# define HMSVM_ASSERT_IN_NESTED_GUEST(a_pCtx) Assert(CPUMIsGuestInSvmNestedHwVirtMode((a_pCtx)))
+#else
+# define HMSVM_ASSERT_IN_NESTED_GUEST(a_pCtx) do { NOREF((a_pCtx)); } while (0)
+#endif
+
+/** Macro for checking and returning from the using function for
+ * \#VMEXIT intercepts that maybe caused during delivering of another
+ * event in the guest. */
+#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
+# define HMSVM_CHECK_EXIT_DUE_TO_EVENT_DELIVERY(a_pVCpu, a_pSvmTransient) \
+ do \
+ { \
+ int rc = hmR0SvmCheckExitDueToEventDelivery((a_pVCpu), (a_pSvmTransient)); \
+ if (RT_LIKELY(rc == VINF_SUCCESS)) { /* continue #VMEXIT handling */ } \
+ else if ( rc == VINF_HM_DOUBLE_FAULT) { return VINF_SUCCESS; } \
+ else if ( rc == VINF_EM_RESET \
+ && CPUMIsGuestSvmCtrlInterceptSet((a_pVCpu), &(a_pVCpu)->cpum.GstCtx, SVM_CTRL_INTERCEPT_SHUTDOWN)) \
+ { \
+ HMSVM_CPUMCTX_IMPORT_STATE((a_pVCpu), HMSVM_CPUMCTX_EXTRN_ALL); \
+ return VBOXSTRICTRC_TODO(IEMExecSvmVmexit((a_pVCpu), SVM_EXIT_SHUTDOWN, 0, 0)); \
+ } \
+ else \
+ return rc; \
+ } while (0)
+#else
+# define HMSVM_CHECK_EXIT_DUE_TO_EVENT_DELIVERY(a_pVCpu, a_pSvmTransient) \
+ do \
+ { \
+ int rc = hmR0SvmCheckExitDueToEventDelivery((a_pVCpu), (a_pSvmTransient)); \
+ if (RT_LIKELY(rc == VINF_SUCCESS)) { /* continue #VMEXIT handling */ } \
+ else if ( rc == VINF_HM_DOUBLE_FAULT) { return VINF_SUCCESS; } \
+ else \
+ return rc; \
+ } while (0)
+#endif
+
+/** Macro for upgrading a @a a_rc to VINF_EM_DBG_STEPPED after emulating an
+ * instruction that exited. */
+#define HMSVM_CHECK_SINGLE_STEP(a_pVCpu, a_rc) \
+ do { \
+ if ((a_pVCpu)->hm.s.fSingleInstruction && (a_rc) == VINF_SUCCESS) \
+ (a_rc) = VINF_EM_DBG_STEPPED; \
+ } while (0)
+
+/** Validate segment descriptor granularity bit. */
+#ifdef VBOX_STRICT
+# define HMSVM_ASSERT_SEG_GRANULARITY(a_pCtx, reg) \
+ AssertMsg( !(a_pCtx)->reg.Attr.n.u1Present \
+ || ( (a_pCtx)->reg.Attr.n.u1Granularity \
+ ? ((a_pCtx)->reg.u32Limit & 0xfff) == 0xfff \
+ : (a_pCtx)->reg.u32Limit <= UINT32_C(0xfffff)), \
+ ("Invalid Segment Attributes Limit=%#RX32 Attr=%#RX32 Base=%#RX64\n", (a_pCtx)->reg.u32Limit, \
+ (a_pCtx)->reg.Attr.u, (a_pCtx)->reg.u64Base))
+#else
+# define HMSVM_ASSERT_SEG_GRANULARITY(a_pCtx, reg) do { } while (0)
+#endif
+
+/**
+ * Exception bitmap mask for all contributory exceptions.
+ *
+ * Page fault is deliberately excluded here as it's conditional as to whether
+ * it's contributory or benign. Page faults are handled separately.
+ */
+#define HMSVM_CONTRIBUTORY_XCPT_MASK ( RT_BIT(X86_XCPT_GP) | RT_BIT(X86_XCPT_NP) | RT_BIT(X86_XCPT_SS) | RT_BIT(X86_XCPT_TS) \
+ | RT_BIT(X86_XCPT_DE))
+
+/**
+ * Mandatory/unconditional guest control intercepts.
+ *
+ * SMIs can and do happen in normal operation. We need not intercept them
+ * while executing the guest (or nested-guest).
+ */
+#define HMSVM_MANDATORY_GUEST_CTRL_INTERCEPTS ( SVM_CTRL_INTERCEPT_INTR \
+ | SVM_CTRL_INTERCEPT_NMI \
+ | SVM_CTRL_INTERCEPT_INIT \
+ | SVM_CTRL_INTERCEPT_RDPMC \
+ | SVM_CTRL_INTERCEPT_CPUID \
+ | SVM_CTRL_INTERCEPT_RSM \
+ | SVM_CTRL_INTERCEPT_HLT \
+ | SVM_CTRL_INTERCEPT_IOIO_PROT \
+ | SVM_CTRL_INTERCEPT_MSR_PROT \
+ | SVM_CTRL_INTERCEPT_INVLPGA \
+ | SVM_CTRL_INTERCEPT_SHUTDOWN \
+ | SVM_CTRL_INTERCEPT_FERR_FREEZE \
+ | SVM_CTRL_INTERCEPT_VMRUN \
+ | SVM_CTRL_INTERCEPT_SKINIT \
+ | SVM_CTRL_INTERCEPT_WBINVD \
+ | SVM_CTRL_INTERCEPT_MONITOR \
+ | SVM_CTRL_INTERCEPT_MWAIT \
+ | SVM_CTRL_INTERCEPT_CR0_SEL_WRITE \
+ | SVM_CTRL_INTERCEPT_XSETBV)
+
+/** @name VMCB Clean Bits.
+ *
+ * These flags are used for VMCB-state caching. A set VMCB Clean bit indicates
+ * AMD-V doesn't need to reload the corresponding value(s) from the VMCB in
+ * memory.
+ *
+ * @{ */
+/** All intercepts vectors, TSC offset, PAUSE filter counter. */
+#define HMSVM_VMCB_CLEAN_INTERCEPTS RT_BIT(0)
+/** I/O permission bitmap, MSR permission bitmap. */
+#define HMSVM_VMCB_CLEAN_IOPM_MSRPM RT_BIT(1)
+/** ASID. */
+#define HMSVM_VMCB_CLEAN_ASID RT_BIT(2)
+/** TRP: V_TPR, V_IRQ, V_INTR_PRIO, V_IGN_TPR, V_INTR_MASKING,
+V_INTR_VECTOR. */
+#define HMSVM_VMCB_CLEAN_INT_CTRL RT_BIT(3)
+/** Nested Paging: Nested CR3 (nCR3), PAT. */
+#define HMSVM_VMCB_CLEAN_NP RT_BIT(4)
+/** Control registers (CR0, CR3, CR4, EFER). */
+#define HMSVM_VMCB_CLEAN_CRX_EFER RT_BIT(5)
+/** Debug registers (DR6, DR7). */
+#define HMSVM_VMCB_CLEAN_DRX RT_BIT(6)
+/** GDT, IDT limit and base. */
+#define HMSVM_VMCB_CLEAN_DT RT_BIT(7)
+/** Segment register: CS, SS, DS, ES limit and base. */
+#define HMSVM_VMCB_CLEAN_SEG RT_BIT(8)
+/** CR2.*/
+#define HMSVM_VMCB_CLEAN_CR2 RT_BIT(9)
+/** Last-branch record (DbgCtlMsr, br_from, br_to, lastint_from, lastint_to) */
+#define HMSVM_VMCB_CLEAN_LBR RT_BIT(10)
+/** AVIC (AVIC APIC_BAR; AVIC APIC_BACKING_PAGE, AVIC
+PHYSICAL_TABLE and AVIC LOGICAL_TABLE Pointers). */
+#define HMSVM_VMCB_CLEAN_AVIC RT_BIT(11)
+/** Mask of all valid VMCB Clean bits. */
+#define HMSVM_VMCB_CLEAN_ALL ( HMSVM_VMCB_CLEAN_INTERCEPTS \
+ | HMSVM_VMCB_CLEAN_IOPM_MSRPM \
+ | HMSVM_VMCB_CLEAN_ASID \
+ | HMSVM_VMCB_CLEAN_INT_CTRL \
+ | HMSVM_VMCB_CLEAN_NP \
+ | HMSVM_VMCB_CLEAN_CRX_EFER \
+ | HMSVM_VMCB_CLEAN_DRX \
+ | HMSVM_VMCB_CLEAN_DT \
+ | HMSVM_VMCB_CLEAN_SEG \
+ | HMSVM_VMCB_CLEAN_CR2 \
+ | HMSVM_VMCB_CLEAN_LBR \
+ | HMSVM_VMCB_CLEAN_AVIC)
+/** @} */
+
+/** @name SVM transient.
+ *
+ * A state structure for holding miscellaneous information across AMD-V
+ * VMRUN/\#VMEXIT operation, restored after the transition.
+ *
+ * @{ */
+typedef struct SVMTRANSIENT
+{
+ /** The host's rflags/eflags. */
+ RTCCUINTREG fEFlags;
+#if HC_ARCH_BITS == 32
+ uint32_t u32Alignment0;
+#endif
+
+ /** The \#VMEXIT exit code (the EXITCODE field in the VMCB). */
+ uint64_t u64ExitCode;
+ /** The guest's TPR value used for TPR shadowing. */
+ uint8_t u8GuestTpr;
+ /** Alignment. */
+ uint8_t abAlignment0[7];
+
+ /** Pointer to the currently executing VMCB. */
+ PSVMVMCB pVmcb;
+ /** Whether we are currently executing a nested-guest. */
+ bool fIsNestedGuest;
+
+ /** Whether the guest debug state was active at the time of \#VMEXIT. */
+ bool fWasGuestDebugStateActive;
+ /** Whether the hyper debug state was active at the time of \#VMEXIT. */
+ bool fWasHyperDebugStateActive;
+ /** Whether the TSC offset mode needs to be updated. */
+ bool fUpdateTscOffsetting;
+ /** Whether the TSC_AUX MSR needs restoring on \#VMEXIT. */
+ bool fRestoreTscAuxMsr;
+ /** Whether the \#VMEXIT was caused by a page-fault during delivery of a
+ * contributary exception or a page-fault. */
+ bool fVectoringDoublePF;
+ /** Whether the \#VMEXIT was caused by a page-fault during delivery of an
+ * external interrupt or NMI. */
+ bool fVectoringPF;
+} SVMTRANSIENT, *PSVMTRANSIENT;
+AssertCompileMemberAlignment(SVMTRANSIENT, u64ExitCode, sizeof(uint64_t));
+AssertCompileMemberAlignment(SVMTRANSIENT, pVmcb, sizeof(uint64_t));
+/** @} */
+
+/**
+ * MSRPM (MSR permission bitmap) read permissions (for guest RDMSR).
+ */
+typedef enum SVMMSREXITREAD
+{
+ /** Reading this MSR causes a \#VMEXIT. */
+ SVMMSREXIT_INTERCEPT_READ = 0xb,
+ /** Reading this MSR does not cause a \#VMEXIT. */
+ SVMMSREXIT_PASSTHRU_READ
+} SVMMSREXITREAD;
+
+/**
+ * MSRPM (MSR permission bitmap) write permissions (for guest WRMSR).
+ */
+typedef enum SVMMSREXITWRITE
+{
+ /** Writing to this MSR causes a \#VMEXIT. */
+ SVMMSREXIT_INTERCEPT_WRITE = 0xd,
+ /** Writing to this MSR does not cause a \#VMEXIT. */
+ SVMMSREXIT_PASSTHRU_WRITE
+} SVMMSREXITWRITE;
+
+/**
+ * SVM \#VMEXIT handler.
+ *
+ * @returns VBox status code.
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param pSvmTransient Pointer to the SVM-transient structure.
+ */
+typedef int FNSVMEXITHANDLER(PVMCPU pVCpu, PSVMTRANSIENT pSvmTransient);
+
+
+/*********************************************************************************************************************************
+* Internal Functions *
+*********************************************************************************************************************************/
+static void hmR0SvmPendingEventToTrpmTrap(PVMCPU pVCpu);
+static void hmR0SvmLeave(PVMCPU pVCpu, bool fImportState);
+
+
+/** @name \#VMEXIT handlers.
+ * @{
+ */
+static FNSVMEXITHANDLER hmR0SvmExitIntr;
+static FNSVMEXITHANDLER hmR0SvmExitWbinvd;
+static FNSVMEXITHANDLER hmR0SvmExitInvd;
+static FNSVMEXITHANDLER hmR0SvmExitCpuid;
+static FNSVMEXITHANDLER hmR0SvmExitRdtsc;
+static FNSVMEXITHANDLER hmR0SvmExitRdtscp;
+static FNSVMEXITHANDLER hmR0SvmExitRdpmc;
+static FNSVMEXITHANDLER hmR0SvmExitInvlpg;
+static FNSVMEXITHANDLER hmR0SvmExitHlt;
+static FNSVMEXITHANDLER hmR0SvmExitMonitor;
+static FNSVMEXITHANDLER hmR0SvmExitMwait;
+static FNSVMEXITHANDLER hmR0SvmExitShutdown;
+static FNSVMEXITHANDLER hmR0SvmExitUnexpected;
+static FNSVMEXITHANDLER hmR0SvmExitReadCRx;
+static FNSVMEXITHANDLER hmR0SvmExitWriteCRx;
+static FNSVMEXITHANDLER hmR0SvmExitMsr;
+static FNSVMEXITHANDLER hmR0SvmExitReadDRx;
+static FNSVMEXITHANDLER hmR0SvmExitWriteDRx;
+static FNSVMEXITHANDLER hmR0SvmExitXsetbv;
+static FNSVMEXITHANDLER hmR0SvmExitIOInstr;
+static FNSVMEXITHANDLER hmR0SvmExitNestedPF;
+static FNSVMEXITHANDLER hmR0SvmExitVIntr;
+static FNSVMEXITHANDLER hmR0SvmExitTaskSwitch;
+static FNSVMEXITHANDLER hmR0SvmExitVmmCall;
+static FNSVMEXITHANDLER hmR0SvmExitPause;
+static FNSVMEXITHANDLER hmR0SvmExitFerrFreeze;
+static FNSVMEXITHANDLER hmR0SvmExitIret;
+static FNSVMEXITHANDLER hmR0SvmExitXcptPF;
+static FNSVMEXITHANDLER hmR0SvmExitXcptUD;
+static FNSVMEXITHANDLER hmR0SvmExitXcptMF;
+static FNSVMEXITHANDLER hmR0SvmExitXcptDB;
+static FNSVMEXITHANDLER hmR0SvmExitXcptAC;
+static FNSVMEXITHANDLER hmR0SvmExitXcptBP;
+static FNSVMEXITHANDLER hmR0SvmExitXcptGP;
+#if defined(HMSVM_ALWAYS_TRAP_ALL_XCPTS) || defined(VBOX_WITH_NESTED_HWVIRT_SVM)
+static FNSVMEXITHANDLER hmR0SvmExitXcptGeneric;
+#endif
+#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
+static FNSVMEXITHANDLER hmR0SvmExitClgi;
+static FNSVMEXITHANDLER hmR0SvmExitStgi;
+static FNSVMEXITHANDLER hmR0SvmExitVmload;
+static FNSVMEXITHANDLER hmR0SvmExitVmsave;
+static FNSVMEXITHANDLER hmR0SvmExitInvlpga;
+static FNSVMEXITHANDLER hmR0SvmExitVmrun;
+static FNSVMEXITHANDLER hmR0SvmNestedExitXcptDB;
+static FNSVMEXITHANDLER hmR0SvmNestedExitXcptBP;
+#endif
+/** @} */
+
+static int hmR0SvmHandleExit(PVMCPU pVCpu, PSVMTRANSIENT pSvmTransient);
+#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
+static int hmR0SvmHandleExitNested(PVMCPU pVCpu, PSVMTRANSIENT pSvmTransient);
+#endif
+
+
+/*********************************************************************************************************************************
+* Global Variables *
+*********************************************************************************************************************************/
+/** Ring-0 memory object for the IO bitmap. */
+static RTR0MEMOBJ g_hMemObjIOBitmap = NIL_RTR0MEMOBJ;
+/** Physical address of the IO bitmap. */
+static RTHCPHYS g_HCPhysIOBitmap;
+/** Pointer to the IO bitmap. */
+static R0PTRTYPE(void *) g_pvIOBitmap;
+
+#ifdef VBOX_STRICT
+# define HMSVM_LOG_RBP_RSP RT_BIT_32(0)
+# define HMSVM_LOG_CR_REGS RT_BIT_32(1)
+# define HMSVM_LOG_CS RT_BIT_32(2)
+# define HMSVM_LOG_SS RT_BIT_32(3)
+# define HMSVM_LOG_FS RT_BIT_32(4)
+# define HMSVM_LOG_GS RT_BIT_32(5)
+# define HMSVM_LOG_LBR RT_BIT_32(6)
+# define HMSVM_LOG_ALL ( HMSVM_LOG_RBP_RSP \
+ | HMSVM_LOG_CR_REGS \
+ | HMSVM_LOG_CS \
+ | HMSVM_LOG_SS \
+ | HMSVM_LOG_FS \
+ | HMSVM_LOG_GS \
+ | HMSVM_LOG_LBR)
+
+/**
+ * Dumps virtual CPU state and additional info. to the logger for diagnostics.
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param pVmcb Pointer to the VM control block.
+ * @param pszPrefix Log prefix.
+ * @param fFlags Log flags, see HMSVM_LOG_XXX.
+ * @param uVerbose The verbosity level, currently unused.
+ */
+static void hmR0SvmLogState(PVMCPU pVCpu, PCSVMVMCB pVmcb, const char *pszPrefix, uint32_t fFlags, uint8_t uVerbose)
+{
+ RT_NOREF2(pVCpu, uVerbose);
+ PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+
+ HMSVM_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_CS | CPUMCTX_EXTRN_RIP | CPUMCTX_EXTRN_RFLAGS);
+ Log4(("%s: cs:rip=%04x:%RX64 efl=%#RX64\n", pszPrefix, pCtx->cs.Sel, pCtx->rip, pCtx->rflags.u));
+
+ if (fFlags & HMSVM_LOG_RBP_RSP)
+ {
+ HMSVM_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_RSP | CPUMCTX_EXTRN_RBP);
+ Log4(("%s: rsp=%#RX64 rbp=%#RX64\n", pszPrefix, pCtx->rsp, pCtx->rbp));
+ }
+
+ if (fFlags & HMSVM_LOG_CR_REGS)
+ {
+ HMSVM_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_CR0 | CPUMCTX_EXTRN_CR3 | CPUMCTX_EXTRN_CR4);
+ Log4(("%s: cr0=%#RX64 cr3=%#RX64 cr4=%#RX64\n", pszPrefix, pCtx->cr0, pCtx->cr3, pCtx->cr4));
+ }
+
+ if (fFlags & HMSVM_LOG_CS)
+ {
+ HMSVM_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_CS);
+ Log4(("%s: cs={%04x base=%016RX64 limit=%08x flags=%08x}\n", pszPrefix, pCtx->cs.Sel, pCtx->cs.u64Base,
+ pCtx->cs.u32Limit, pCtx->cs.Attr.u));
+ }
+ if (fFlags & HMSVM_LOG_SS)
+ {
+ HMSVM_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_SS);
+ Log4(("%s: ss={%04x base=%016RX64 limit=%08x flags=%08x}\n", pszPrefix, pCtx->ss.Sel, pCtx->ss.u64Base,
+ pCtx->ss.u32Limit, pCtx->ss.Attr.u));
+ }
+ if (fFlags & HMSVM_LOG_FS)
+ {
+ HMSVM_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_FS);
+ Log4(("%s: fs={%04x base=%016RX64 limit=%08x flags=%08x}\n", pszPrefix, pCtx->fs.Sel, pCtx->fs.u64Base,
+ pCtx->fs.u32Limit, pCtx->fs.Attr.u));
+ }
+ if (fFlags & HMSVM_LOG_GS)
+ {
+ HMSVM_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_GS);
+ Log4(("%s: gs={%04x base=%016RX64 limit=%08x flags=%08x}\n", pszPrefix, pCtx->gs.Sel, pCtx->gs.u64Base,
+ pCtx->gs.u32Limit, pCtx->gs.Attr.u));
+ }
+
+ PCSVMVMCBSTATESAVE pVmcbGuest = &pVmcb->guest;
+ if (fFlags & HMSVM_LOG_LBR)
+ {
+ Log4(("%s: br_from=%#RX64 br_to=%#RX64 lastxcpt_from=%#RX64 lastxcpt_to=%#RX64\n", pszPrefix, pVmcbGuest->u64BR_FROM,
+ pVmcbGuest->u64BR_TO, pVmcbGuest->u64LASTEXCPFROM, pVmcbGuest->u64LASTEXCPTO));
+ }
+ NOREF(pszPrefix); NOREF(pVmcbGuest); NOREF(pCtx);
+}
+#endif /* VBOX_STRICT */
+
+
+/**
+ * Sets up and activates AMD-V on the current CPU.
+ *
+ * @returns VBox status code.
+ * @param pHostCpu The HM physical-CPU structure.
+ * @param pVM The cross context VM structure. Can be
+ * NULL after a resume!
+ * @param pvCpuPage Pointer to the global CPU page.
+ * @param HCPhysCpuPage Physical address of the global CPU page.
+ * @param fEnabledByHost Whether the host OS has already initialized AMD-V.
+ * @param pHwvirtMsrs Pointer to the hardware-virtualization MSRs (currently
+ * unused).
+ */
+VMMR0DECL(int) SVMR0EnableCpu(PHMPHYSCPU pHostCpu, PVM pVM, void *pvCpuPage, RTHCPHYS HCPhysCpuPage, bool fEnabledByHost,
+ PCSUPHWVIRTMSRS pHwvirtMsrs)
+{
+ Assert(!fEnabledByHost);
+ Assert(HCPhysCpuPage && HCPhysCpuPage != NIL_RTHCPHYS);
+ Assert(RT_ALIGN_T(HCPhysCpuPage, _4K, RTHCPHYS) == HCPhysCpuPage);
+ Assert(pvCpuPage); NOREF(pvCpuPage);
+ Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+
+ RT_NOREF2(fEnabledByHost, pHwvirtMsrs);
+
+ /* Paranoid: Disable interrupt as, in theory, interrupt handlers might mess with EFER. */
+ RTCCUINTREG const fEFlags = ASMIntDisableFlags();
+
+ /*
+ * We must turn on AMD-V and setup the host state physical address, as those MSRs are per CPU.
+ */
+ uint64_t u64HostEfer = ASMRdMsr(MSR_K6_EFER);
+ if (u64HostEfer & MSR_K6_EFER_SVME)
+ {
+ /* If the VBOX_HWVIRTEX_IGNORE_SVM_IN_USE is active, then we blindly use AMD-V. */
+ if ( pVM
+ && pVM->hm.s.svm.fIgnoreInUseError)
+ pHostCpu->fIgnoreAMDVInUseError = true;
+
+ if (!pHostCpu->fIgnoreAMDVInUseError)
+ {
+ ASMSetFlags(fEFlags);
+ return VERR_SVM_IN_USE;
+ }
+ }
+
+ /* Turn on AMD-V in the EFER MSR. */
+ ASMWrMsr(MSR_K6_EFER, u64HostEfer | MSR_K6_EFER_SVME);
+
+ /* Write the physical page address where the CPU will store the host state while executing the VM. */
+ ASMWrMsr(MSR_K8_VM_HSAVE_PA, HCPhysCpuPage);
+
+ /* Restore interrupts. */
+ ASMSetFlags(fEFlags);
+
+ /*
+ * Theoretically, other hypervisors may have used ASIDs, ideally we should flush all
+ * non-zero ASIDs when enabling SVM. AMD doesn't have an SVM instruction to flush all
+ * ASIDs (flushing is done upon VMRUN). Therefore, flag that we need to flush the TLB
+ * entirely with before executing any guest code.
+ */
+ pHostCpu->fFlushAsidBeforeUse = true;
+
+ /*
+ * Ensure each VCPU scheduled on this CPU gets a new ASID on resume. See @bugref{6255}.
+ */
+ ++pHostCpu->cTlbFlushes;
+
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Deactivates AMD-V on the current CPU.
+ *
+ * @returns VBox status code.
+ * @param pvCpuPage Pointer to the global CPU page.
+ * @param HCPhysCpuPage Physical address of the global CPU page.
+ */
+VMMR0DECL(int) SVMR0DisableCpu(void *pvCpuPage, RTHCPHYS HCPhysCpuPage)
+{
+ Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+ AssertReturn( HCPhysCpuPage
+ && HCPhysCpuPage != NIL_RTHCPHYS, VERR_INVALID_PARAMETER);
+ AssertReturn(pvCpuPage, VERR_INVALID_PARAMETER);
+
+ /* Paranoid: Disable interrupts as, in theory, interrupt handlers might mess with EFER. */
+ RTCCUINTREG const fEFlags = ASMIntDisableFlags();
+
+ /* Turn off AMD-V in the EFER MSR. */
+ uint64_t u64HostEfer = ASMRdMsr(MSR_K6_EFER);
+ ASMWrMsr(MSR_K6_EFER, u64HostEfer & ~MSR_K6_EFER_SVME);
+
+ /* Invalidate host state physical address. */
+ ASMWrMsr(MSR_K8_VM_HSAVE_PA, 0);
+
+ /* Restore interrupts. */
+ ASMSetFlags(fEFlags);
+
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Does global AMD-V initialization (called during module initialization).
+ *
+ * @returns VBox status code.
+ */
+VMMR0DECL(int) SVMR0GlobalInit(void)
+{
+ /*
+ * Allocate 12 KB (3 pages) for the IO bitmap. Since this is non-optional and we always
+ * intercept all IO accesses, it's done once globally here instead of per-VM.
+ */
+ Assert(g_hMemObjIOBitmap == NIL_RTR0MEMOBJ);
+ int rc = RTR0MemObjAllocCont(&g_hMemObjIOBitmap, SVM_IOPM_PAGES << X86_PAGE_4K_SHIFT, false /* fExecutable */);
+ if (RT_FAILURE(rc))
+ return rc;
+
+ g_pvIOBitmap = RTR0MemObjAddress(g_hMemObjIOBitmap);
+ g_HCPhysIOBitmap = RTR0MemObjGetPagePhysAddr(g_hMemObjIOBitmap, 0 /* iPage */);
+
+ /* Set all bits to intercept all IO accesses. */
+ ASMMemFill32(g_pvIOBitmap, SVM_IOPM_PAGES << X86_PAGE_4K_SHIFT, UINT32_C(0xffffffff));
+
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Does global AMD-V termination (called during module termination).
+ */
+VMMR0DECL(void) SVMR0GlobalTerm(void)
+{
+ if (g_hMemObjIOBitmap != NIL_RTR0MEMOBJ)
+ {
+ RTR0MemObjFree(g_hMemObjIOBitmap, true /* fFreeMappings */);
+ g_pvIOBitmap = NULL;
+ g_HCPhysIOBitmap = 0;
+ g_hMemObjIOBitmap = NIL_RTR0MEMOBJ;
+ }
+}
+
+
+/**
+ * Frees any allocated per-VCPU structures for a VM.
+ *
+ * @param pVM The cross context VM structure.
+ */
+DECLINLINE(void) hmR0SvmFreeStructs(PVM pVM)
+{
+ for (uint32_t i = 0; i < pVM->cCpus; i++)
+ {
+ PVMCPU pVCpu = &pVM->aCpus[i];
+ AssertPtr(pVCpu);
+
+ if (pVCpu->hm.s.svm.hMemObjVmcbHost != NIL_RTR0MEMOBJ)
+ {
+ RTR0MemObjFree(pVCpu->hm.s.svm.hMemObjVmcbHost, false);
+ pVCpu->hm.s.svm.HCPhysVmcbHost = 0;
+ pVCpu->hm.s.svm.hMemObjVmcbHost = NIL_RTR0MEMOBJ;
+ }
+
+ if (pVCpu->hm.s.svm.hMemObjVmcb != NIL_RTR0MEMOBJ)
+ {
+ RTR0MemObjFree(pVCpu->hm.s.svm.hMemObjVmcb, false);
+ pVCpu->hm.s.svm.pVmcb = NULL;
+ pVCpu->hm.s.svm.HCPhysVmcb = 0;
+ pVCpu->hm.s.svm.hMemObjVmcb = NIL_RTR0MEMOBJ;
+ }
+
+ if (pVCpu->hm.s.svm.hMemObjMsrBitmap != NIL_RTR0MEMOBJ)
+ {
+ RTR0MemObjFree(pVCpu->hm.s.svm.hMemObjMsrBitmap, false);
+ pVCpu->hm.s.svm.pvMsrBitmap = NULL;
+ pVCpu->hm.s.svm.HCPhysMsrBitmap = 0;
+ pVCpu->hm.s.svm.hMemObjMsrBitmap = NIL_RTR0MEMOBJ;
+ }
+ }
+}
+
+
+/**
+ * Does per-VM AMD-V initialization.
+ *
+ * @returns VBox status code.
+ * @param pVM The cross context VM structure.
+ */
+VMMR0DECL(int) SVMR0InitVM(PVM pVM)
+{
+ int rc = VERR_INTERNAL_ERROR_5;
+
+ /*
+ * Check for an AMD CPU erratum which requires us to flush the TLB before every world-switch.
+ */
+ uint32_t u32Family;
+ uint32_t u32Model;
+ uint32_t u32Stepping;
+ if (HMIsSubjectToSvmErratum170(&u32Family, &u32Model, &u32Stepping))
+ {
+ Log4Func(("AMD cpu with erratum 170 family %#x model %#x stepping %#x\n", u32Family, u32Model, u32Stepping));
+ pVM->hm.s.svm.fAlwaysFlushTLB = true;
+ }
+
+ /*
+ * Initialize the R0 memory objects up-front so we can properly cleanup on allocation failures.
+ */
+ for (VMCPUID i = 0; i < pVM->cCpus; i++)
+ {
+ PVMCPU pVCpu = &pVM->aCpus[i];
+ pVCpu->hm.s.svm.hMemObjVmcbHost = NIL_RTR0MEMOBJ;
+ pVCpu->hm.s.svm.hMemObjVmcb = NIL_RTR0MEMOBJ;
+ pVCpu->hm.s.svm.hMemObjMsrBitmap = NIL_RTR0MEMOBJ;
+ }
+
+ for (VMCPUID i = 0; i < pVM->cCpus; i++)
+ {
+ PVMCPU pVCpu = &pVM->aCpus[i];
+
+ /*
+ * Allocate one page for the host-context VM control block (VMCB). This is used for additional host-state (such as
+ * FS, GS, Kernel GS Base, etc.) apart from the host-state save area specified in MSR_K8_VM_HSAVE_PA.
+ */
+ rc = RTR0MemObjAllocCont(&pVCpu->hm.s.svm.hMemObjVmcbHost, SVM_VMCB_PAGES << PAGE_SHIFT, false /* fExecutable */);
+ if (RT_FAILURE(rc))
+ goto failure_cleanup;
+
+ void *pvVmcbHost = RTR0MemObjAddress(pVCpu->hm.s.svm.hMemObjVmcbHost);
+ pVCpu->hm.s.svm.HCPhysVmcbHost = RTR0MemObjGetPagePhysAddr(pVCpu->hm.s.svm.hMemObjVmcbHost, 0 /* iPage */);
+ Assert(pVCpu->hm.s.svm.HCPhysVmcbHost < _4G);
+ ASMMemZeroPage(pvVmcbHost);
+
+ /*
+ * Allocate one page for the guest-state VMCB.
+ */
+ rc = RTR0MemObjAllocCont(&pVCpu->hm.s.svm.hMemObjVmcb, SVM_VMCB_PAGES << PAGE_SHIFT, false /* fExecutable */);
+ if (RT_FAILURE(rc))
+ goto failure_cleanup;
+
+ pVCpu->hm.s.svm.pVmcb = (PSVMVMCB)RTR0MemObjAddress(pVCpu->hm.s.svm.hMemObjVmcb);
+ pVCpu->hm.s.svm.HCPhysVmcb = RTR0MemObjGetPagePhysAddr(pVCpu->hm.s.svm.hMemObjVmcb, 0 /* iPage */);
+ Assert(pVCpu->hm.s.svm.HCPhysVmcb < _4G);
+ ASMMemZeroPage(pVCpu->hm.s.svm.pVmcb);
+
+ /*
+ * Allocate two pages (8 KB) for the MSR permission bitmap. There doesn't seem to be a way to convince
+ * SVM to not require one.
+ */
+ rc = RTR0MemObjAllocCont(&pVCpu->hm.s.svm.hMemObjMsrBitmap, SVM_MSRPM_PAGES << X86_PAGE_4K_SHIFT,
+ false /* fExecutable */);
+ if (RT_FAILURE(rc))
+ goto failure_cleanup;
+
+ pVCpu->hm.s.svm.pvMsrBitmap = RTR0MemObjAddress(pVCpu->hm.s.svm.hMemObjMsrBitmap);
+ pVCpu->hm.s.svm.HCPhysMsrBitmap = RTR0MemObjGetPagePhysAddr(pVCpu->hm.s.svm.hMemObjMsrBitmap, 0 /* iPage */);
+ /* Set all bits to intercept all MSR accesses (changed later on). */
+ ASMMemFill32(pVCpu->hm.s.svm.pvMsrBitmap, SVM_MSRPM_PAGES << X86_PAGE_4K_SHIFT, UINT32_C(0xffffffff));
+ }
+
+ return VINF_SUCCESS;
+
+failure_cleanup:
+ hmR0SvmFreeStructs(pVM);
+ return rc;
+}
+
+
+/**
+ * Does per-VM AMD-V termination.
+ *
+ * @returns VBox status code.
+ * @param pVM The cross context VM structure.
+ */
+VMMR0DECL(int) SVMR0TermVM(PVM pVM)
+{
+ hmR0SvmFreeStructs(pVM);
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Returns whether the VMCB Clean Bits feature is supported.
+ *
+ * @return @c true if supported, @c false otherwise.
+ * @param pVCpu The cross context virtual CPU structure.
+ */
+DECLINLINE(bool) hmR0SvmSupportsVmcbCleanBits(PVMCPU pVCpu)
+{
+ PVM pVM = pVCpu->CTX_SUFF(pVM);
+#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
+ if (CPUMIsGuestInSvmNestedHwVirtMode(&pVCpu->cpum.GstCtx))
+ {
+ return (pVM->hm.s.svm.u32Features & X86_CPUID_SVM_FEATURE_EDX_VMCB_CLEAN)
+ && pVM->cpum.ro.GuestFeatures.fSvmVmcbClean;
+ }
+#endif
+ return RT_BOOL(pVM->hm.s.svm.u32Features & X86_CPUID_SVM_FEATURE_EDX_VMCB_CLEAN);
+}
+
+
+/**
+ * Returns whether the decode assists feature is supported.
+ *
+ * @return @c true if supported, @c false otherwise.
+ * @param pVCpu The cross context virtual CPU structure.
+ */
+DECLINLINE(bool) hmR0SvmSupportsDecodeAssists(PVMCPU pVCpu)
+{
+ PVM pVM = pVCpu->CTX_SUFF(pVM);
+#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
+ if (CPUMIsGuestInSvmNestedHwVirtMode(&pVCpu->cpum.GstCtx))
+ {
+ return (pVM->hm.s.svm.u32Features & X86_CPUID_SVM_FEATURE_EDX_DECODE_ASSISTS)
+ && pVM->cpum.ro.GuestFeatures.fSvmDecodeAssists;
+ }
+#endif
+ return RT_BOOL(pVM->hm.s.svm.u32Features & X86_CPUID_SVM_FEATURE_EDX_DECODE_ASSISTS);
+}
+
+
+/**
+ * Returns whether the NRIP_SAVE feature is supported.
+ *
+ * @return @c true if supported, @c false otherwise.
+ * @param pVCpu The cross context virtual CPU structure.
+ */
+DECLINLINE(bool) hmR0SvmSupportsNextRipSave(PVMCPU pVCpu)
+{
+ PVM pVM = pVCpu->CTX_SUFF(pVM);
+#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
+ if (CPUMIsGuestInSvmNestedHwVirtMode(&pVCpu->cpum.GstCtx))
+ {
+ return (pVM->hm.s.svm.u32Features & X86_CPUID_SVM_FEATURE_EDX_NRIP_SAVE)
+ && pVM->cpum.ro.GuestFeatures.fSvmNextRipSave;
+ }
+#endif
+ return RT_BOOL(pVM->hm.s.svm.u32Features & X86_CPUID_SVM_FEATURE_EDX_NRIP_SAVE);
+}
+
+
+/**
+ * Sets the permission bits for the specified MSR in the MSRPM bitmap.
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param pbMsrBitmap Pointer to the MSR bitmap.
+ * @param idMsr The MSR for which the permissions are being set.
+ * @param enmRead MSR read permissions.
+ * @param enmWrite MSR write permissions.
+ *
+ * @remarks This function does -not- clear the VMCB clean bits for MSRPM. The
+ * caller needs to take care of this.
+ */
+static void hmR0SvmSetMsrPermission(PVMCPU pVCpu, uint8_t *pbMsrBitmap, uint32_t idMsr, SVMMSREXITREAD enmRead,
+ SVMMSREXITWRITE enmWrite)
+{
+ bool const fInNestedGuestMode = CPUMIsGuestInSvmNestedHwVirtMode(&pVCpu->cpum.GstCtx);
+ uint16_t offMsrpm;
+ uint8_t uMsrpmBit;
+ int rc = HMGetSvmMsrpmOffsetAndBit(idMsr, &offMsrpm, &uMsrpmBit);
+ AssertRC(rc);
+
+ Assert(uMsrpmBit == 0 || uMsrpmBit == 2 || uMsrpmBit == 4 || uMsrpmBit == 6);
+ Assert(offMsrpm < SVM_MSRPM_PAGES << X86_PAGE_4K_SHIFT);
+
+ pbMsrBitmap += offMsrpm;
+ if (enmRead == SVMMSREXIT_INTERCEPT_READ)
+ *pbMsrBitmap |= RT_BIT(uMsrpmBit);
+ else
+ {
+ if (!fInNestedGuestMode)
+ *pbMsrBitmap &= ~RT_BIT(uMsrpmBit);
+#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
+ else
+ {
+ /* Only clear the bit if the nested-guest is also not intercepting the MSR read.*/
+ uint8_t const *pbNstGstMsrBitmap = (uint8_t *)pVCpu->cpum.GstCtx.hwvirt.svm.CTX_SUFF(pvMsrBitmap);
+ pbNstGstMsrBitmap += offMsrpm;
+ if (!(*pbNstGstMsrBitmap & RT_BIT(uMsrpmBit)))
+ *pbMsrBitmap &= ~RT_BIT(uMsrpmBit);
+ else
+ Assert(*pbMsrBitmap & RT_BIT(uMsrpmBit));
+ }
+#endif
+ }
+
+ if (enmWrite == SVMMSREXIT_INTERCEPT_WRITE)
+ *pbMsrBitmap |= RT_BIT(uMsrpmBit + 1);
+ else
+ {
+ if (!fInNestedGuestMode)
+ *pbMsrBitmap &= ~RT_BIT(uMsrpmBit + 1);
+#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
+ else
+ {
+ /* Only clear the bit if the nested-guest is also not intercepting the MSR write.*/
+ uint8_t const *pbNstGstMsrBitmap = (uint8_t *)pVCpu->cpum.GstCtx.hwvirt.svm.CTX_SUFF(pvMsrBitmap);
+ pbNstGstMsrBitmap += offMsrpm;
+ if (!(*pbNstGstMsrBitmap & RT_BIT(uMsrpmBit + 1)))
+ *pbMsrBitmap &= ~RT_BIT(uMsrpmBit + 1);
+ else
+ Assert(*pbMsrBitmap & RT_BIT(uMsrpmBit + 1));
+ }
+#endif
+ }
+}
+
+
+/**
+ * Sets up AMD-V for the specified VM.
+ * This function is only called once per-VM during initalization.
+ *
+ * @returns VBox status code.
+ * @param pVM The cross context VM structure.
+ */
+VMMR0DECL(int) SVMR0SetupVM(PVM pVM)
+{
+ Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+ AssertReturn(pVM, VERR_INVALID_PARAMETER);
+ Assert(pVM->hm.s.svm.fSupported);
+
+ bool const fPauseFilter = RT_BOOL(pVM->hm.s.svm.u32Features & X86_CPUID_SVM_FEATURE_EDX_PAUSE_FILTER);
+ bool const fPauseFilterThreshold = RT_BOOL(pVM->hm.s.svm.u32Features & X86_CPUID_SVM_FEATURE_EDX_PAUSE_FILTER_THRESHOLD);
+ bool const fUsePauseFilter = fPauseFilter && pVM->hm.s.svm.cPauseFilter;
+
+ bool const fLbrVirt = RT_BOOL(pVM->hm.s.svm.u32Features & X86_CPUID_SVM_FEATURE_EDX_LBR_VIRT);
+ bool const fUseLbrVirt = fLbrVirt; /** @todo CFGM, IEM implementation etc. */
+
+#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
+ bool const fVirtVmsaveVmload = RT_BOOL(pVM->hm.s.svm.u32Features & X86_CPUID_SVM_FEATURE_EDX_VIRT_VMSAVE_VMLOAD);
+ bool const fUseVirtVmsaveVmload = fVirtVmsaveVmload && pVM->hm.s.svm.fVirtVmsaveVmload && pVM->hm.s.fNestedPaging;
+
+ bool const fVGif = RT_BOOL(pVM->hm.s.svm.u32Features & X86_CPUID_SVM_FEATURE_EDX_VGIF);
+ bool const fUseVGif = fVGif && pVM->hm.s.svm.fVGif;
+#endif
+
+ PVMCPU pVCpu = &pVM->aCpus[0];
+ PSVMVMCB pVmcb = pVCpu->hm.s.svm.pVmcb;
+ AssertMsgReturn(pVmcb, ("Invalid pVmcb for vcpu[0]\n"), VERR_SVM_INVALID_PVMCB);
+ PSVMVMCBCTRL pVmcbCtrl = &pVmcb->ctrl;
+
+ /* Always trap #AC for reasons of security. */
+ pVmcbCtrl->u32InterceptXcpt |= RT_BIT_32(X86_XCPT_AC);
+
+ /* Always trap #DB for reasons of security. */
+ pVmcbCtrl->u32InterceptXcpt |= RT_BIT_32(X86_XCPT_DB);
+
+ /* Trap exceptions unconditionally (debug purposes). */
+#ifdef HMSVM_ALWAYS_TRAP_PF
+ pVmcbCtrl->u32InterceptXcpt |= RT_BIT(X86_XCPT_PF);
+#endif
+#ifdef HMSVM_ALWAYS_TRAP_ALL_XCPTS
+ /* If you add any exceptions here, make sure to update hmR0SvmHandleExit(). */
+ pVmcbCtrl->u32InterceptXcpt |= 0
+ | RT_BIT(X86_XCPT_BP)
+ | RT_BIT(X86_XCPT_DE)
+ | RT_BIT(X86_XCPT_NM)
+ | RT_BIT(X86_XCPT_UD)
+ | RT_BIT(X86_XCPT_NP)
+ | RT_BIT(X86_XCPT_SS)
+ | RT_BIT(X86_XCPT_GP)
+ | RT_BIT(X86_XCPT_PF)
+ | RT_BIT(X86_XCPT_MF)
+ ;
+#endif
+
+ /* Apply the exceptions intercepts needed by the GIM provider. */
+ if (pVCpu->hm.s.fGIMTrapXcptUD)
+ pVmcbCtrl->u32InterceptXcpt |= RT_BIT(X86_XCPT_UD);
+
+ /* The mesa 3d driver hack needs #GP. */
+ if (pVCpu->hm.s.fTrapXcptGpForLovelyMesaDrv)
+ pVmcbCtrl->u32InterceptXcpt |= RT_BIT(X86_XCPT_GP);
+
+ /* Set up unconditional intercepts and conditions. */
+ pVmcbCtrl->u64InterceptCtrl = HMSVM_MANDATORY_GUEST_CTRL_INTERCEPTS
+ | SVM_CTRL_INTERCEPT_VMMCALL;
+
+#ifdef HMSVM_ALWAYS_TRAP_TASK_SWITCH
+ pVmcbCtrl->u64InterceptCtrl |= SVM_CTRL_INTERCEPT_TASK_SWITCH;
+#endif
+
+#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
+ /* Virtualized VMSAVE/VMLOAD. */
+ pVmcbCtrl->LbrVirt.n.u1VirtVmsaveVmload = fUseVirtVmsaveVmload;
+ if (!fUseVirtVmsaveVmload)
+ {
+ pVmcbCtrl->u64InterceptCtrl |= SVM_CTRL_INTERCEPT_VMSAVE
+ | SVM_CTRL_INTERCEPT_VMLOAD;
+ }
+
+ /* Virtual GIF. */
+ pVmcbCtrl->IntCtrl.n.u1VGifEnable = fUseVGif;
+ if (!fUseVGif)
+ {
+ pVmcbCtrl->u64InterceptCtrl |= SVM_CTRL_INTERCEPT_CLGI
+ | SVM_CTRL_INTERCEPT_STGI;
+ }
+#endif
+
+ /* CR4 writes must always be intercepted for tracking PGM mode changes. */
+ pVmcbCtrl->u16InterceptWrCRx = RT_BIT(4);
+
+ /* Intercept all DRx reads and writes by default. Changed later on. */
+ pVmcbCtrl->u16InterceptRdDRx = 0xffff;
+ pVmcbCtrl->u16InterceptWrDRx = 0xffff;
+
+ /* Virtualize masking of INTR interrupts. (reads/writes from/to CR8 go to the V_TPR register) */
+ pVmcbCtrl->IntCtrl.n.u1VIntrMasking = 1;
+
+ /* Ignore the priority in the virtual TPR. This is necessary for delivering PIC style (ExtInt) interrupts
+ and we currently deliver both PIC and APIC interrupts alike, see hmR0SvmEvaluatePendingEvent() */
+ pVmcbCtrl->IntCtrl.n.u1IgnoreTPR = 1;
+
+ /* Set the IO permission bitmap physical addresses. */
+ pVmcbCtrl->u64IOPMPhysAddr = g_HCPhysIOBitmap;
+
+ /* LBR virtualization. */
+ pVmcbCtrl->LbrVirt.n.u1LbrVirt = fUseLbrVirt;
+
+ /* The host ASID MBZ, for the guest start with 1. */
+ pVmcbCtrl->TLBCtrl.n.u32ASID = 1;
+
+ /* Setup Nested Paging. This doesn't change throughout the execution time of the VM. */
+ pVmcbCtrl->NestedPagingCtrl.n.u1NestedPaging = pVM->hm.s.fNestedPaging;
+
+ /* Without Nested Paging, we need additionally intercepts. */
+ if (!pVM->hm.s.fNestedPaging)
+ {
+ /* CR3 reads/writes must be intercepted; our shadow values differ from the guest values. */
+ pVmcbCtrl->u16InterceptRdCRx |= RT_BIT(3);
+ pVmcbCtrl->u16InterceptWrCRx |= RT_BIT(3);
+
+ /* Intercept INVLPG and task switches (may change CR3, EFLAGS, LDT). */
+ pVmcbCtrl->u64InterceptCtrl |= SVM_CTRL_INTERCEPT_INVLPG
+ | SVM_CTRL_INTERCEPT_TASK_SWITCH;
+
+ /* Page faults must be intercepted to implement shadow paging. */
+ pVmcbCtrl->u32InterceptXcpt |= RT_BIT(X86_XCPT_PF);
+ }
+
+ /* Setup Pause Filter for guest pause-loop (spinlock) exiting. */
+ if (fUsePauseFilter)
+ {
+ Assert(pVM->hm.s.svm.cPauseFilter > 0);
+ pVmcbCtrl->u16PauseFilterCount = pVM->hm.s.svm.cPauseFilter;
+ if (fPauseFilterThreshold)
+ pVmcbCtrl->u16PauseFilterThreshold = pVM->hm.s.svm.cPauseFilterThresholdTicks;
+ pVmcbCtrl->u64InterceptCtrl |= SVM_CTRL_INTERCEPT_PAUSE;
+ }
+
+ /*
+ * Setup the MSR permission bitmap.
+ * The following MSRs are saved/restored automatically during the world-switch.
+ * Don't intercept guest read/write accesses to these MSRs.
+ */
+ uint8_t *pbMsrBitmap = (uint8_t *)pVCpu->hm.s.svm.pvMsrBitmap;
+ hmR0SvmSetMsrPermission(pVCpu, pbMsrBitmap, MSR_K8_LSTAR, SVMMSREXIT_PASSTHRU_READ, SVMMSREXIT_PASSTHRU_WRITE);
+ hmR0SvmSetMsrPermission(pVCpu, pbMsrBitmap, MSR_K8_CSTAR, SVMMSREXIT_PASSTHRU_READ, SVMMSREXIT_PASSTHRU_WRITE);
+ hmR0SvmSetMsrPermission(pVCpu, pbMsrBitmap, MSR_K6_STAR, SVMMSREXIT_PASSTHRU_READ, SVMMSREXIT_PASSTHRU_WRITE);
+ hmR0SvmSetMsrPermission(pVCpu, pbMsrBitmap, MSR_K8_SF_MASK, SVMMSREXIT_PASSTHRU_READ, SVMMSREXIT_PASSTHRU_WRITE);
+ hmR0SvmSetMsrPermission(pVCpu, pbMsrBitmap, MSR_K8_FS_BASE, SVMMSREXIT_PASSTHRU_READ, SVMMSREXIT_PASSTHRU_WRITE);
+ hmR0SvmSetMsrPermission(pVCpu, pbMsrBitmap, MSR_K8_GS_BASE, SVMMSREXIT_PASSTHRU_READ, SVMMSREXIT_PASSTHRU_WRITE);
+ hmR0SvmSetMsrPermission(pVCpu, pbMsrBitmap, MSR_K8_KERNEL_GS_BASE, SVMMSREXIT_PASSTHRU_READ, SVMMSREXIT_PASSTHRU_WRITE);
+ hmR0SvmSetMsrPermission(pVCpu, pbMsrBitmap, MSR_IA32_SYSENTER_CS, SVMMSREXIT_PASSTHRU_READ, SVMMSREXIT_PASSTHRU_WRITE);
+ hmR0SvmSetMsrPermission(pVCpu, pbMsrBitmap, MSR_IA32_SYSENTER_ESP, SVMMSREXIT_PASSTHRU_READ, SVMMSREXIT_PASSTHRU_WRITE);
+ hmR0SvmSetMsrPermission(pVCpu, pbMsrBitmap, MSR_IA32_SYSENTER_EIP, SVMMSREXIT_PASSTHRU_READ, SVMMSREXIT_PASSTHRU_WRITE);
+ pVmcbCtrl->u64MSRPMPhysAddr = pVCpu->hm.s.svm.HCPhysMsrBitmap;
+
+ /* Initially all VMCB clean bits MBZ indicating that everything should be loaded from the VMCB in memory. */
+ Assert(pVmcbCtrl->u32VmcbCleanBits == 0);
+
+ for (VMCPUID i = 1; i < pVM->cCpus; i++)
+ {
+ PVMCPU pVCpuCur = &pVM->aCpus[i];
+ PSVMVMCB pVmcbCur = pVM->aCpus[i].hm.s.svm.pVmcb;
+ AssertMsgReturn(pVmcbCur, ("Invalid pVmcb for vcpu[%u]\n", i), VERR_SVM_INVALID_PVMCB);
+ PSVMVMCBCTRL pVmcbCtrlCur = &pVmcbCur->ctrl;
+
+ /* Copy the VMCB control area. */
+ memcpy(pVmcbCtrlCur, pVmcbCtrl, sizeof(*pVmcbCtrlCur));
+
+ /* Copy the MSR bitmap and setup the VCPU-specific host physical address. */
+ uint8_t *pbMsrBitmapCur = (uint8_t *)pVCpuCur->hm.s.svm.pvMsrBitmap;
+ memcpy(pbMsrBitmapCur, pbMsrBitmap, SVM_MSRPM_PAGES << X86_PAGE_4K_SHIFT);
+ pVmcbCtrlCur->u64MSRPMPhysAddr = pVCpuCur->hm.s.svm.HCPhysMsrBitmap;
+
+ /* Initially all VMCB clean bits MBZ indicating that everything should be loaded from the VMCB in memory. */
+ Assert(pVmcbCtrlCur->u32VmcbCleanBits == 0);
+
+ /* Verify our assumption that GIM providers trap #UD uniformly across VCPUs initially. */
+ Assert(pVCpuCur->hm.s.fGIMTrapXcptUD == pVCpu->hm.s.fGIMTrapXcptUD);
+ }
+
+#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
+ LogRel(("HM: fUsePauseFilter=%RTbool fUseLbrVirt=%RTbool fUseVGif=%RTbool fUseVirtVmsaveVmload=%RTbool\n", fUsePauseFilter,
+ fUseLbrVirt, fUseVGif, fUseVirtVmsaveVmload));
+#else
+ LogRel(("HM: fUsePauseFilter=%RTbool fUseLbrVirt=%RTbool\n", fUsePauseFilter, fUseLbrVirt));
+#endif
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Gets a pointer to the currently active guest (or nested-guest) VMCB.
+ *
+ * @returns Pointer to the current context VMCB.
+ * @param pVCpu The cross context virtual CPU structure.
+ */
+DECLINLINE(PSVMVMCB) hmR0SvmGetCurrentVmcb(PVMCPU pVCpu)
+{
+#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
+ if (CPUMIsGuestInSvmNestedHwVirtMode(&pVCpu->cpum.GstCtx))
+ return pVCpu->cpum.GstCtx.hwvirt.svm.CTX_SUFF(pVmcb);
+#endif
+ return pVCpu->hm.s.svm.pVmcb;
+}
+
+
+/**
+ * Gets a pointer to the nested-guest VMCB cache.
+ *
+ * @returns Pointer to the nested-guest VMCB cache.
+ * @param pVCpu The cross context virtual CPU structure.
+ */
+DECLINLINE(PSVMNESTEDVMCBCACHE) hmR0SvmGetNestedVmcbCache(PVMCPU pVCpu)
+{
+#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
+ Assert(pVCpu->hm.s.svm.NstGstVmcbCache.fCacheValid);
+ return &pVCpu->hm.s.svm.NstGstVmcbCache;
+#else
+ RT_NOREF(pVCpu);
+ return NULL;
+#endif
+}
+
+
+/**
+ * Invalidates a guest page by guest virtual address.
+ *
+ * @returns VBox status code.
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param GCVirt Guest virtual address of the page to invalidate.
+ */
+VMMR0DECL(int) SVMR0InvalidatePage(PVMCPU pVCpu, RTGCPTR GCVirt)
+{
+ Assert(pVCpu->CTX_SUFF(pVM)->hm.s.svm.fSupported);
+
+ bool const fFlushPending = pVCpu->CTX_SUFF(pVM)->hm.s.svm.fAlwaysFlushTLB || VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
+
+ /* Skip it if a TLB flush is already pending. */
+ if (!fFlushPending)
+ {
+ Log4Func(("%#RGv\n", GCVirt));
+
+ PSVMVMCB pVmcb = hmR0SvmGetCurrentVmcb(pVCpu);
+ AssertMsgReturn(pVmcb, ("Invalid pVmcb!\n"), VERR_SVM_INVALID_PVMCB);
+
+#if HC_ARCH_BITS == 32
+ /* If we get a flush in 64-bit guest mode, then force a full TLB flush. INVLPGA takes only 32-bit addresses. */
+ if (CPUMIsGuestInLongMode(pVCpu))
+ VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
+ else
+#endif
+ {
+ SVMR0InvlpgA(GCVirt, pVmcb->ctrl.TLBCtrl.n.u32ASID);
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbInvlpgVirt);
+ }
+ }
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Flushes the appropriate tagged-TLB entries.
+ *
+ * @param pHostCpu The HM physical-CPU structure.
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param pVmcb Pointer to the VM control block.
+ */
+static void hmR0SvmFlushTaggedTlb(PHMPHYSCPU pHostCpu, PVMCPU pVCpu, PSVMVMCB pVmcb)
+{
+ /*
+ * Force a TLB flush for the first world switch if the current CPU differs from the one
+ * we ran on last. This can happen both for start & resume due to long jumps back to
+ * ring-3.
+ *
+ * We also force a TLB flush every time when executing a nested-guest VCPU as there is no
+ * correlation between it and the physical CPU.
+ *
+ * If the TLB flush count changed, another VM (VCPU rather) has hit the ASID limit while
+ * flushing the TLB, so we cannot reuse the ASIDs without flushing.
+ */
+ bool fNewAsid = false;
+ Assert(pHostCpu->idCpu != NIL_RTCPUID);
+ if ( pVCpu->hm.s.idLastCpu != pHostCpu->idCpu
+ || pVCpu->hm.s.cTlbFlushes != pHostCpu->cTlbFlushes
+#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
+ || CPUMIsGuestInSvmNestedHwVirtMode(&pVCpu->cpum.GstCtx)
+#endif
+ )
+ {
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbWorldSwitch);
+ pVCpu->hm.s.fForceTLBFlush = true;
+ fNewAsid = true;
+ }
+
+ /* Set TLB flush state as checked until we return from the world switch. */
+ ASMAtomicWriteBool(&pVCpu->hm.s.fCheckedTLBFlush, true);
+
+ /* Check for explicit TLB flushes. */
+ if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH))
+ {
+ pVCpu->hm.s.fForceTLBFlush = true;
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlb);
+ }
+
+ /*
+ * If the AMD CPU erratum 170, We need to flush the entire TLB for each world switch. Sad.
+ * This Host CPU requirement takes precedence.
+ */
+ PVM pVM = pVCpu->CTX_SUFF(pVM);
+ if (pVM->hm.s.svm.fAlwaysFlushTLB)
+ {
+ pHostCpu->uCurrentAsid = 1;
+ pVCpu->hm.s.uCurrentAsid = 1;
+ pVCpu->hm.s.cTlbFlushes = pHostCpu->cTlbFlushes;
+ pVCpu->hm.s.idLastCpu = pHostCpu->idCpu;
+ pVmcb->ctrl.TLBCtrl.n.u8TLBFlush = SVM_TLB_FLUSH_ENTIRE;
+
+ /* Clear the VMCB Clean Bit for NP while flushing the TLB. See @bugref{7152}. */
+ pVmcb->ctrl.u32VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_NP;
+ }
+ else
+ {
+ pVmcb->ctrl.TLBCtrl.n.u8TLBFlush = SVM_TLB_FLUSH_NOTHING;
+ if (pVCpu->hm.s.fForceTLBFlush)
+ {
+ /* Clear the VMCB Clean Bit for NP while flushing the TLB. See @bugref{7152}. */
+ pVmcb->ctrl.u32VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_NP;
+
+ if (fNewAsid)
+ {
+ ++pHostCpu->uCurrentAsid;
+
+ bool fHitASIDLimit = false;
+ if (pHostCpu->uCurrentAsid >= pVM->hm.s.uMaxAsid)
+ {
+ pHostCpu->uCurrentAsid = 1; /* Wraparound at 1; host uses 0 */
+ pHostCpu->cTlbFlushes++; /* All VCPUs that run on this host CPU must use a new ASID. */
+ fHitASIDLimit = true;
+ }
+
+ if ( fHitASIDLimit
+ || pHostCpu->fFlushAsidBeforeUse)
+ {
+ pVmcb->ctrl.TLBCtrl.n.u8TLBFlush = SVM_TLB_FLUSH_ENTIRE;
+ pHostCpu->fFlushAsidBeforeUse = false;
+ }
+
+ pVCpu->hm.s.uCurrentAsid = pHostCpu->uCurrentAsid;
+ pVCpu->hm.s.idLastCpu = pHostCpu->idCpu;
+ pVCpu->hm.s.cTlbFlushes = pHostCpu->cTlbFlushes;
+ }
+ else
+ {
+ if (pVM->hm.s.svm.u32Features & X86_CPUID_SVM_FEATURE_EDX_FLUSH_BY_ASID)
+ pVmcb->ctrl.TLBCtrl.n.u8TLBFlush = SVM_TLB_FLUSH_SINGLE_CONTEXT;
+ else
+ pVmcb->ctrl.TLBCtrl.n.u8TLBFlush = SVM_TLB_FLUSH_ENTIRE;
+ }
+
+ pVCpu->hm.s.fForceTLBFlush = false;
+ }
+ }
+
+ /* Update VMCB with the ASID. */
+ if (pVmcb->ctrl.TLBCtrl.n.u32ASID != pVCpu->hm.s.uCurrentAsid)
+ {
+ pVmcb->ctrl.TLBCtrl.n.u32ASID = pVCpu->hm.s.uCurrentAsid;
+ pVmcb->ctrl.u32VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_ASID;
+ }
+
+ AssertMsg(pVCpu->hm.s.idLastCpu == pHostCpu->idCpu,
+ ("vcpu idLastCpu=%u hostcpu idCpu=%u\n", pVCpu->hm.s.idLastCpu, pHostCpu->idCpu));
+ AssertMsg(pVCpu->hm.s.cTlbFlushes == pHostCpu->cTlbFlushes,
+ ("Flush count mismatch for cpu %u (%u vs %u)\n", pHostCpu->idCpu, pVCpu->hm.s.cTlbFlushes, pHostCpu->cTlbFlushes));
+ AssertMsg(pHostCpu->uCurrentAsid >= 1 && pHostCpu->uCurrentAsid < pVM->hm.s.uMaxAsid,
+ ("cpu%d uCurrentAsid = %x\n", pHostCpu->idCpu, pHostCpu->uCurrentAsid));
+ AssertMsg(pVCpu->hm.s.uCurrentAsid >= 1 && pVCpu->hm.s.uCurrentAsid < pVM->hm.s.uMaxAsid,
+ ("cpu%d VM uCurrentAsid = %x\n", pHostCpu->idCpu, pVCpu->hm.s.uCurrentAsid));
+
+#ifdef VBOX_WITH_STATISTICS
+ if (pVmcb->ctrl.TLBCtrl.n.u8TLBFlush == SVM_TLB_FLUSH_NOTHING)
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatNoFlushTlbWorldSwitch);
+ else if ( pVmcb->ctrl.TLBCtrl.n.u8TLBFlush == SVM_TLB_FLUSH_SINGLE_CONTEXT
+ || pVmcb->ctrl.TLBCtrl.n.u8TLBFlush == SVM_TLB_FLUSH_SINGLE_CONTEXT_RETAIN_GLOBALS)
+ {
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushAsid);
+ }
+ else
+ {
+ Assert(pVmcb->ctrl.TLBCtrl.n.u8TLBFlush == SVM_TLB_FLUSH_ENTIRE);
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushEntire);
+ }
+#endif
+}
+
+
+/** @name 64-bit guest on 32-bit host OS helper functions.
+ *
+ * The host CPU is still 64-bit capable but the host OS is running in 32-bit
+ * mode (code segment, paging). These wrappers/helpers perform the necessary
+ * bits for the 32->64 switcher.
+ *
+ * @{ */
+#if HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS)
+/**
+ * Prepares for and executes VMRUN (64-bit guests on a 32-bit host).
+ *
+ * @returns VBox status code.
+ * @param HCPhysVmcbHost Physical address of host VMCB.
+ * @param HCPhysVmcb Physical address of the VMCB.
+ * @param pCtx Pointer to the guest-CPU context.
+ * @param pVM The cross context VM structure.
+ * @param pVCpu The cross context virtual CPU structure.
+ */
+DECLASM(int) SVMR0VMSwitcherRun64(RTHCPHYS HCPhysVmcbHost, RTHCPHYS HCPhysVmcb, PCPUMCTX pCtx, PVM pVM, PVMCPU pVCpu)
+{
+ RT_NOREF2(pVM, pCtx);
+ uint32_t aParam[8];
+ aParam[0] = RT_LO_U32(HCPhysVmcbHost); /* Param 1: HCPhysVmcbHost - Lo. */
+ aParam[1] = RT_HI_U32(HCPhysVmcbHost); /* Param 1: HCPhysVmcbHost - Hi. */
+ aParam[2] = RT_LO_U32(HCPhysVmcb); /* Param 2: HCPhysVmcb - Lo. */
+ aParam[3] = RT_HI_U32(HCPhysVmcb); /* Param 2: HCPhysVmcb - Hi. */
+ aParam[4] = VM_RC_ADDR(pVM, pVM);
+ aParam[5] = 0;
+ aParam[6] = VM_RC_ADDR(pVM, pVCpu);
+ aParam[7] = 0;
+
+ return SVMR0Execute64BitsHandler(pVCpu, HM64ON32OP_SVMRCVMRun64, RT_ELEMENTS(aParam), &aParam[0]);
+}
+
+
+/**
+ * Executes the specified VMRUN handler in 64-bit mode.
+ *
+ * @returns VBox status code.
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param enmOp The operation to perform.
+ * @param cParams Number of parameters.
+ * @param paParam Array of 32-bit parameters.
+ */
+VMMR0DECL(int) SVMR0Execute64BitsHandler(PVMCPU pVCpu, HM64ON32OP enmOp, uint32_t cParams, uint32_t *paParam)
+{
+ PVM pVM = pVCpu->CTX_SUFF(pVM);
+ AssertReturn(pVM->hm.s.pfnHost32ToGuest64R0, VERR_HM_NO_32_TO_64_SWITCHER);
+ Assert(enmOp > HM64ON32OP_INVALID && enmOp < HM64ON32OP_END);
+
+ /* Disable interrupts. */
+ RTHCUINTREG const fEFlags = ASMIntDisableFlags();
+
+#ifdef VBOX_WITH_VMMR0_DISABLE_LAPIC_NMI
+ RTCPUID idHostCpu = RTMpCpuId();
+ CPUMR0SetLApic(pVCpu, idHostCpu);
+#endif
+
+ CPUMSetHyperESP(pVCpu, VMMGetStackRC(pVCpu));
+ CPUMSetHyperEIP(pVCpu, enmOp);
+ for (int i = (int)cParams - 1; i >= 0; i--)
+ CPUMPushHyper(pVCpu, paParam[i]);
+
+ /* Call the switcher. */
+ STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatWorldSwitch3264, z);
+ int rc = pVM->hm.s.pfnHost32ToGuest64R0(pVM, RT_UOFFSETOF_DYN(VM, aCpus[pVCpu->idCpu].cpum) - RT_UOFFSETOF(VM, cpum));
+ STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatWorldSwitch3264, z);
+
+ /* Restore interrupts. */
+ ASMSetFlags(fEFlags);
+ return rc;
+}
+
+#endif /* HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS) */
+/** @} */
+
+
+/**
+ * Sets an exception intercept in the specified VMCB.
+ *
+ * @param pVmcb Pointer to the VM control block.
+ * @param uXcpt The exception (X86_XCPT_*).
+ */
+DECLINLINE(void) hmR0SvmSetXcptIntercept(PSVMVMCB pVmcb, uint8_t uXcpt)
+{
+ if (!(pVmcb->ctrl.u32InterceptXcpt & RT_BIT(uXcpt)))
+ {
+ pVmcb->ctrl.u32InterceptXcpt |= RT_BIT(uXcpt);
+ pVmcb->ctrl.u32VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_INTERCEPTS;
+ }
+}
+
+
+/**
+ * Clears an exception intercept in the specified VMCB.
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param pVmcb Pointer to the VM control block.
+ * @param uXcpt The exception (X86_XCPT_*).
+ *
+ * @remarks This takes into account if we're executing a nested-guest and only
+ * removes the exception intercept if both the guest -and- nested-guest
+ * are not intercepting it.
+ */
+DECLINLINE(void) hmR0SvmClearXcptIntercept(PVMCPU pVCpu, PSVMVMCB pVmcb, uint8_t uXcpt)
+{
+ Assert(uXcpt != X86_XCPT_DB);
+ Assert(uXcpt != X86_XCPT_AC);
+ Assert(uXcpt != X86_XCPT_GP);
+#ifndef HMSVM_ALWAYS_TRAP_ALL_XCPTS
+ if (pVmcb->ctrl.u32InterceptXcpt & RT_BIT(uXcpt))
+ {
+ bool fRemove = true;
+# ifdef VBOX_WITH_NESTED_HWVIRT_SVM
+ /* Only remove the intercept if the nested-guest is also not intercepting it! */
+ PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+ if (CPUMIsGuestInSvmNestedHwVirtMode(pCtx))
+ {
+ PCSVMNESTEDVMCBCACHE pVmcbNstGstCache = hmR0SvmGetNestedVmcbCache(pVCpu);
+ fRemove = !(pVmcbNstGstCache->u32InterceptXcpt & RT_BIT(uXcpt));
+ }
+# else
+ RT_NOREF(pVCpu);
+# endif
+ if (fRemove)
+ {
+ pVmcb->ctrl.u32InterceptXcpt &= ~RT_BIT(uXcpt);
+ pVmcb->ctrl.u32VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_INTERCEPTS;
+ }
+ }
+#else
+ RT_NOREF3(pVCpu, pVmcb, uXcpt);
+#endif
+}
+
+
+/**
+ * Sets a control intercept in the specified VMCB.
+ *
+ * @param pVmcb Pointer to the VM control block.
+ * @param fCtrlIntercept The control intercept (SVM_CTRL_INTERCEPT_*).
+ */
+DECLINLINE(void) hmR0SvmSetCtrlIntercept(PSVMVMCB pVmcb, uint64_t fCtrlIntercept)
+{
+ if (!(pVmcb->ctrl.u64InterceptCtrl & fCtrlIntercept))
+ {
+ pVmcb->ctrl.u64InterceptCtrl |= fCtrlIntercept;
+ pVmcb->ctrl.u32VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_INTERCEPTS;
+ }
+}
+
+
+/**
+ * Clears a control intercept in the specified VMCB.
+ *
+ * @returns @c true if the intercept is still set, @c false otherwise.
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param pVmcb Pointer to the VM control block.
+ * @param fCtrlIntercept The control intercept (SVM_CTRL_INTERCEPT_*).
+ *
+ * @remarks This takes into account if we're executing a nested-guest and only
+ * removes the control intercept if both the guest -and- nested-guest
+ * are not intercepting it.
+ */
+static bool hmR0SvmClearCtrlIntercept(PVMCPU pVCpu, PSVMVMCB pVmcb, uint64_t fCtrlIntercept)
+{
+ if (pVmcb->ctrl.u64InterceptCtrl & fCtrlIntercept)
+ {
+ bool fRemove = true;
+#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
+ /* Only remove the control intercept if the nested-guest is also not intercepting it! */
+ if (CPUMIsGuestInSvmNestedHwVirtMode(&pVCpu->cpum.GstCtx))
+ {
+ PCSVMNESTEDVMCBCACHE pVmcbNstGstCache = hmR0SvmGetNestedVmcbCache(pVCpu);
+ fRemove = !(pVmcbNstGstCache->u64InterceptCtrl & fCtrlIntercept);
+ }
+#else
+ RT_NOREF(pVCpu);
+#endif
+ if (fRemove)
+ {
+ pVmcb->ctrl.u64InterceptCtrl &= ~fCtrlIntercept;
+ pVmcb->ctrl.u32VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_INTERCEPTS;
+ }
+ }
+
+ return RT_BOOL(pVmcb->ctrl.u64InterceptCtrl & fCtrlIntercept);
+}
+
+
+/**
+ * Exports the guest (or nested-guest) CR0 into the VMCB.
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param pVmcb Pointer to the VM control block.
+ *
+ * @remarks This assumes we always pre-load the guest FPU.
+ * @remarks No-long-jump zone!!!
+ */
+static void hmR0SvmExportGuestCR0(PVMCPU pVCpu, PSVMVMCB pVmcb)
+{
+ Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+
+ PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+ uint64_t const uGuestCr0 = pCtx->cr0;
+ uint64_t uShadowCr0 = uGuestCr0;
+
+ /* Always enable caching. */
+ uShadowCr0 &= ~(X86_CR0_CD | X86_CR0_NW);
+
+ /* When Nested Paging is not available use shadow page tables and intercept #PFs (latter done in SVMR0SetupVM()). */
+ if (!pVCpu->CTX_SUFF(pVM)->hm.s.fNestedPaging)
+ {
+ uShadowCr0 |= X86_CR0_PG /* Use shadow page tables. */
+ | X86_CR0_WP; /* Guest CPL 0 writes to its read-only pages should cause a #PF #VMEXIT. */
+ }
+
+ /*
+ * Use the #MF style of legacy-FPU error reporting for now. Although AMD-V has MSRs that
+ * lets us isolate the host from it, IEM/REM still needs work to emulate it properly,
+ * see @bugref{7243#c103}.
+ */
+ if (!(uGuestCr0 & X86_CR0_NE))
+ {
+ uShadowCr0 |= X86_CR0_NE;
+ hmR0SvmSetXcptIntercept(pVmcb, X86_XCPT_MF);
+ }
+ else
+ hmR0SvmClearXcptIntercept(pVCpu, pVmcb, X86_XCPT_MF);
+
+ /*
+ * If the shadow and guest CR0 are identical we can avoid intercepting CR0 reads.
+ *
+ * CR0 writes still needs interception as PGM requires tracking paging mode changes,
+ * see @bugref{6944}.
+ *
+ * We also don't ever want to honor weird things like cache disable from the guest.
+ * However, we can avoid intercepting changes to the TS & MP bits by clearing the CR0
+ * write intercept below and keeping SVM_CTRL_INTERCEPT_CR0_SEL_WRITE instead.
+ */
+ if (uShadowCr0 == uGuestCr0)
+ {
+ if (!CPUMIsGuestInSvmNestedHwVirtMode(pCtx))
+ {
+ pVmcb->ctrl.u16InterceptRdCRx &= ~RT_BIT(0);
+ pVmcb->ctrl.u16InterceptWrCRx &= ~RT_BIT(0);
+ Assert(pVmcb->ctrl.u64InterceptCtrl & SVM_CTRL_INTERCEPT_CR0_SEL_WRITE);
+ }
+ else
+ {
+ /* If the nested-hypervisor intercepts CR0 reads/writes, we need to continue intercepting them. */
+ PCSVMNESTEDVMCBCACHE pVmcbNstGstCache = hmR0SvmGetNestedVmcbCache(pVCpu);
+ pVmcb->ctrl.u16InterceptRdCRx = (pVmcb->ctrl.u16InterceptRdCRx & ~RT_BIT(0))
+ | (pVmcbNstGstCache->u16InterceptRdCRx & RT_BIT(0));
+ pVmcb->ctrl.u16InterceptWrCRx = (pVmcb->ctrl.u16InterceptWrCRx & ~RT_BIT(0))
+ | (pVmcbNstGstCache->u16InterceptWrCRx & RT_BIT(0));
+ }
+ }
+ else
+ {
+ pVmcb->ctrl.u16InterceptRdCRx |= RT_BIT(0);
+ pVmcb->ctrl.u16InterceptWrCRx |= RT_BIT(0);
+ }
+ pVmcb->ctrl.u32VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_INTERCEPTS;
+
+ Assert(!RT_HI_U32(uShadowCr0));
+ if (pVmcb->guest.u64CR0 != uShadowCr0)
+ {
+ pVmcb->guest.u64CR0 = uShadowCr0;
+ pVmcb->ctrl.u32VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_CRX_EFER;
+ }
+}
+
+
+/**
+ * Exports the guest (or nested-guest) CR3 into the VMCB.
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param pVmcb Pointer to the VM control block.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+static void hmR0SvmExportGuestCR3(PVMCPU pVCpu, PSVMVMCB pVmcb)
+{
+ Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+
+ PVM pVM = pVCpu->CTX_SUFF(pVM);
+ PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+ if (pVM->hm.s.fNestedPaging)
+ {
+ PGMMODE enmShwPagingMode;
+#if HC_ARCH_BITS == 32
+ if (CPUMIsGuestInLongModeEx(pCtx))
+ enmShwPagingMode = PGMMODE_AMD64_NX;
+ else
+#endif
+ enmShwPagingMode = PGMGetHostMode(pVM);
+
+ pVmcb->ctrl.u64NestedPagingCR3 = PGMGetNestedCR3(pVCpu, enmShwPagingMode);
+ pVmcb->ctrl.u32VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_NP;
+ pVmcb->guest.u64CR3 = pCtx->cr3;
+ Assert(pVmcb->ctrl.u64NestedPagingCR3);
+ }
+ else
+ pVmcb->guest.u64CR3 = PGMGetHyperCR3(pVCpu);
+
+ pVmcb->ctrl.u32VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_CRX_EFER;
+}
+
+
+/**
+ * Exports the guest (or nested-guest) CR4 into the VMCB.
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param pVmcb Pointer to the VM control block.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+static int hmR0SvmExportGuestCR4(PVMCPU pVCpu, PSVMVMCB pVmcb)
+{
+ Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+
+ PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+ uint64_t uShadowCr4 = pCtx->cr4;
+ if (!pVCpu->CTX_SUFF(pVM)->hm.s.fNestedPaging)
+ {
+ switch (pVCpu->hm.s.enmShadowMode)
+ {
+ case PGMMODE_REAL:
+ case PGMMODE_PROTECTED: /* Protected mode, no paging. */
+ return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
+
+ case PGMMODE_32_BIT: /* 32-bit paging. */
+ uShadowCr4 &= ~X86_CR4_PAE;
+ break;
+
+ case PGMMODE_PAE: /* PAE paging. */
+ case PGMMODE_PAE_NX: /* PAE paging with NX enabled. */
+ /** Must use PAE paging as we could use physical memory > 4 GB */
+ uShadowCr4 |= X86_CR4_PAE;
+ break;
+
+ case PGMMODE_AMD64: /* 64-bit AMD paging (long mode). */
+ case PGMMODE_AMD64_NX: /* 64-bit AMD paging (long mode) with NX enabled. */
+#ifdef VBOX_ENABLE_64_BITS_GUESTS
+ break;
+#else
+ return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
+#endif
+
+ default: /* shut up gcc */
+ return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
+ }
+ }
+
+ /* Whether to save/load/restore XCR0 during world switch depends on CR4.OSXSAVE and host+guest XCR0. */
+ pVCpu->hm.s.fLoadSaveGuestXcr0 = (pCtx->cr4 & X86_CR4_OSXSAVE) && pCtx->aXcr[0] != ASMGetXcr0();
+
+ /* Avoid intercepting CR4 reads if the guest and shadow CR4 values are identical. */
+ if (uShadowCr4 == pCtx->cr4)
+ {
+ if (!CPUMIsGuestInSvmNestedHwVirtMode(pCtx))
+ pVmcb->ctrl.u16InterceptRdCRx &= ~RT_BIT(4);
+ else
+ {
+ /* If the nested-hypervisor intercepts CR4 reads, we need to continue intercepting them. */
+ PCSVMNESTEDVMCBCACHE pVmcbNstGstCache = hmR0SvmGetNestedVmcbCache(pVCpu);
+ pVmcb->ctrl.u16InterceptRdCRx = (pVmcb->ctrl.u16InterceptRdCRx & ~RT_BIT(4))
+ | (pVmcbNstGstCache->u16InterceptRdCRx & RT_BIT(4));
+ }
+ }
+ else
+ pVmcb->ctrl.u16InterceptRdCRx |= RT_BIT(4);
+
+ /* CR4 writes are always intercepted (both guest, nested-guest) for tracking PGM mode changes. */
+ Assert(pVmcb->ctrl.u16InterceptWrCRx & RT_BIT(4));
+
+ /* Update VMCB with the shadow CR4 the appropriate VMCB clean bits. */
+ Assert(!RT_HI_U32(uShadowCr4));
+ pVmcb->guest.u64CR4 = uShadowCr4;
+ pVmcb->ctrl.u32VmcbCleanBits &= ~(HMSVM_VMCB_CLEAN_CRX_EFER | HMSVM_VMCB_CLEAN_INTERCEPTS);
+
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Exports the guest (or nested-guest) control registers into the VMCB.
+ *
+ * @returns VBox status code.
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param pVmcb Pointer to the VM control block.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+static int hmR0SvmExportGuestControlRegs(PVMCPU pVCpu, PSVMVMCB pVmcb)
+{
+ Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+
+ if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_GUEST_CR_MASK)
+ {
+ if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_GUEST_CR0)
+ hmR0SvmExportGuestCR0(pVCpu, pVmcb);
+
+ if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_GUEST_CR2)
+ {
+ pVmcb->guest.u64CR2 = pVCpu->cpum.GstCtx.cr2;
+ pVmcb->ctrl.u32VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_CR2;
+ }
+
+ if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_GUEST_CR3)
+ hmR0SvmExportGuestCR3(pVCpu, pVmcb);
+
+ /* CR4 re-loading is ASSUMED to be done everytime we get in from ring-3! (XCR0) */
+ if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_GUEST_CR4)
+ {
+ int rc = hmR0SvmExportGuestCR4(pVCpu, pVmcb);
+ if (RT_FAILURE(rc))
+ return rc;
+ }
+
+ pVCpu->hm.s.fCtxChanged &= ~HM_CHANGED_GUEST_CR_MASK;
+ }
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Exports the guest (or nested-guest) segment registers into the VMCB.
+ *
+ * @returns VBox status code.
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param pVmcb Pointer to the VM control block.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+static void hmR0SvmExportGuestSegmentRegs(PVMCPU pVCpu, PSVMVMCB pVmcb)
+{
+ Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+ PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+
+ /* Guest segment registers. */
+ if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_GUEST_SREG_MASK)
+ {
+ if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_GUEST_CS)
+ HMSVM_SEG_REG_COPY_TO_VMCB(pCtx, &pVmcb->guest, CS, cs);
+
+ if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_GUEST_SS)
+ {
+ HMSVM_SEG_REG_COPY_TO_VMCB(pCtx, &pVmcb->guest, SS, ss);
+ pVmcb->guest.u8CPL = pCtx->ss.Attr.n.u2Dpl;
+ }
+
+ if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_GUEST_DS)
+ HMSVM_SEG_REG_COPY_TO_VMCB(pCtx, &pVmcb->guest, DS, ds);
+
+ if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_GUEST_ES)
+ HMSVM_SEG_REG_COPY_TO_VMCB(pCtx, &pVmcb->guest, ES, es);
+
+ if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_GUEST_FS)
+ HMSVM_SEG_REG_COPY_TO_VMCB(pCtx, &pVmcb->guest, FS, fs);
+
+ if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_GUEST_GS)
+ HMSVM_SEG_REG_COPY_TO_VMCB(pCtx, &pVmcb->guest, GS, gs);
+
+ pVmcb->ctrl.u32VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_SEG;
+ }
+
+ /* Guest TR. */
+ if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_GUEST_TR)
+ HMSVM_SEG_REG_COPY_TO_VMCB(pCtx, &pVmcb->guest, TR, tr);
+
+ /* Guest LDTR. */
+ if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_GUEST_LDTR)
+ HMSVM_SEG_REG_COPY_TO_VMCB(pCtx, &pVmcb->guest, LDTR, ldtr);
+
+ /* Guest GDTR. */
+ if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_GUEST_GDTR)
+ {
+ pVmcb->guest.GDTR.u32Limit = pCtx->gdtr.cbGdt;
+ pVmcb->guest.GDTR.u64Base = pCtx->gdtr.pGdt;
+ pVmcb->ctrl.u32VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_DT;
+ }
+
+ /* Guest IDTR. */
+ if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_GUEST_IDTR)
+ {
+ pVmcb->guest.IDTR.u32Limit = pCtx->idtr.cbIdt;
+ pVmcb->guest.IDTR.u64Base = pCtx->idtr.pIdt;
+ pVmcb->ctrl.u32VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_DT;
+ }
+
+ pVCpu->hm.s.fCtxChanged &= ~( HM_CHANGED_GUEST_SREG_MASK
+ | HM_CHANGED_GUEST_TABLE_MASK);
+}
+
+
+/**
+ * Exports the guest (or nested-guest) MSRs into the VMCB.
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param pVmcb Pointer to the VM control block.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+static void hmR0SvmExportGuestMsrs(PVMCPU pVCpu, PSVMVMCB pVmcb)
+{
+ Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+ PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+
+ /* Guest Sysenter MSRs. */
+ if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_GUEST_SYSENTER_MSR_MASK)
+ {
+ if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_GUEST_SYSENTER_CS_MSR)
+ pVmcb->guest.u64SysEnterCS = pCtx->SysEnter.cs;
+
+ if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_GUEST_SYSENTER_EIP_MSR)
+ pVmcb->guest.u64SysEnterEIP = pCtx->SysEnter.eip;
+
+ if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_GUEST_SYSENTER_ESP_MSR)
+ pVmcb->guest.u64SysEnterESP = pCtx->SysEnter.esp;
+ }
+
+ /*
+ * Guest EFER MSR.
+ * AMD-V requires guest EFER.SVME to be set. Weird.
+ * See AMD spec. 15.5.1 "Basic Operation" | "Canonicalization and Consistency Checks".
+ */
+ if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_GUEST_EFER_MSR)
+ {
+ pVmcb->guest.u64EFER = pCtx->msrEFER | MSR_K6_EFER_SVME;
+ pVmcb->ctrl.u32VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_CRX_EFER;
+ }
+
+ /* If the guest isn't in 64-bit mode, clear MSR_K6_LME bit, otherwise SVM expects amd64 shadow paging. */
+ if ( !CPUMIsGuestInLongModeEx(pCtx)
+ && (pCtx->msrEFER & MSR_K6_EFER_LME))
+ {
+ pVmcb->guest.u64EFER &= ~MSR_K6_EFER_LME;
+ pVmcb->ctrl.u32VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_CRX_EFER;
+ }
+
+ if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_GUEST_SYSCALL_MSRS)
+ {
+ pVmcb->guest.u64STAR = pCtx->msrSTAR;
+ pVmcb->guest.u64LSTAR = pCtx->msrLSTAR;
+ pVmcb->guest.u64CSTAR = pCtx->msrCSTAR;
+ pVmcb->guest.u64SFMASK = pCtx->msrSFMASK;
+ }
+
+ if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_GUEST_KERNEL_GS_BASE)
+ pVmcb->guest.u64KernelGSBase = pCtx->msrKERNELGSBASE;
+
+ pVCpu->hm.s.fCtxChanged &= ~( HM_CHANGED_GUEST_SYSENTER_MSR_MASK
+ | HM_CHANGED_GUEST_EFER_MSR
+ | HM_CHANGED_GUEST_SYSCALL_MSRS
+ | HM_CHANGED_GUEST_KERNEL_GS_BASE);
+
+ /*
+ * Setup the PAT MSR (applicable for Nested Paging only).
+ *
+ * While guests can modify and see the modified values through the shadow values,
+ * we shall not honor any guest modifications of this MSR to ensure caching is always
+ * enabled similar to how we clear CR0.CD and NW bits.
+ *
+ * For nested-guests this needs to always be set as well, see @bugref{7243#c109}.
+ */
+ pVmcb->guest.u64PAT = MSR_IA32_CR_PAT_INIT_VAL;
+
+ /* Enable the last branch record bit if LBR virtualization is enabled. */
+ if (pVmcb->ctrl.LbrVirt.n.u1LbrVirt)
+ pVmcb->guest.u64DBGCTL = MSR_IA32_DEBUGCTL_LBR;
+}
+
+
+/**
+ * Exports the guest (or nested-guest) debug state into the VMCB and programs
+ * the necessary intercepts accordingly.
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param pVmcb Pointer to the VM control block.
+ *
+ * @remarks No-long-jump zone!!!
+ * @remarks Requires EFLAGS to be up-to-date in the VMCB!
+ */
+static void hmR0SvmExportSharedDebugState(PVMCPU pVCpu, PSVMVMCB pVmcb)
+{
+ PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+
+ /*
+ * Anyone single stepping on the host side? If so, we'll have to use the
+ * trap flag in the guest EFLAGS since AMD-V doesn't have a trap flag on
+ * the VMM level like the VT-x implementations does.
+ */
+ bool fInterceptMovDRx = false;
+ bool const fStepping = pVCpu->hm.s.fSingleInstruction || DBGFIsStepping(pVCpu);
+ if (fStepping)
+ {
+ pVCpu->hm.s.fClearTrapFlag = true;
+ pVmcb->guest.u64RFlags |= X86_EFL_TF;
+ fInterceptMovDRx = true; /* Need clean DR6, no guest mess. */
+ }
+
+ if ( fStepping
+ || (CPUMGetHyperDR7(pVCpu) & X86_DR7_ENABLED_MASK))
+ {
+ /*
+ * Use the combined guest and host DRx values found in the hypervisor
+ * register set because the debugger has breakpoints active or someone
+ * is single stepping on the host side.
+ *
+ * Note! DBGF expects a clean DR6 state before executing guest code.
+ */
+#if HC_ARCH_BITS == 32 && defined(VBOX_WITH_64_BITS_GUESTS)
+ if ( CPUMIsGuestInLongModeEx(pCtx)
+ && !CPUMIsHyperDebugStateActivePending(pVCpu))
+ {
+ CPUMR0LoadHyperDebugState(pVCpu, false /* include DR6 */);
+ Assert(!CPUMIsGuestDebugStateActivePending(pVCpu));
+ Assert(CPUMIsHyperDebugStateActivePending(pVCpu));
+ }
+ else
+#endif
+ if (!CPUMIsHyperDebugStateActive(pVCpu))
+ {
+ CPUMR0LoadHyperDebugState(pVCpu, false /* include DR6 */);
+ Assert(!CPUMIsGuestDebugStateActive(pVCpu));
+ Assert(CPUMIsHyperDebugStateActive(pVCpu));
+ }
+
+ /* Update DR6 & DR7. (The other DRx values are handled by CPUM one way or the other.) */
+ if ( pVmcb->guest.u64DR6 != X86_DR6_INIT_VAL
+ || pVmcb->guest.u64DR7 != CPUMGetHyperDR7(pVCpu))
+ {
+ pVmcb->guest.u64DR7 = CPUMGetHyperDR7(pVCpu);
+ pVmcb->guest.u64DR6 = X86_DR6_INIT_VAL;
+ pVmcb->ctrl.u32VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_DRX;
+ }
+
+ /** @todo If we cared, we could optimize to allow the guest to read registers
+ * with the same values. */
+ fInterceptMovDRx = true;
+ pVCpu->hm.s.fUsingHyperDR7 = true;
+ Log5(("hmR0SvmExportSharedDebugState: Loaded hyper DRx\n"));
+ }
+ else
+ {
+ /*
+ * Update DR6, DR7 with the guest values if necessary.
+ */
+ if ( pVmcb->guest.u64DR7 != pCtx->dr[7]
+ || pVmcb->guest.u64DR6 != pCtx->dr[6])
+ {
+ pVmcb->guest.u64DR7 = pCtx->dr[7];
+ pVmcb->guest.u64DR6 = pCtx->dr[6];
+ pVmcb->ctrl.u32VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_DRX;
+ }
+ pVCpu->hm.s.fUsingHyperDR7 = false;
+
+ /*
+ * If the guest has enabled debug registers, we need to load them prior to
+ * executing guest code so they'll trigger at the right time.
+ */
+ if (pCtx->dr[7] & (X86_DR7_ENABLED_MASK | X86_DR7_GD)) /** @todo Why GD? */
+ {
+#if HC_ARCH_BITS == 32 && defined(VBOX_WITH_64_BITS_GUESTS)
+ if ( CPUMIsGuestInLongModeEx(pCtx)
+ && !CPUMIsGuestDebugStateActivePending(pVCpu))
+ {
+ CPUMR0LoadGuestDebugState(pVCpu, false /* include DR6 */);
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatDRxArmed);
+ Assert(!CPUMIsHyperDebugStateActivePending(pVCpu));
+ Assert(CPUMIsGuestDebugStateActivePending(pVCpu));
+ }
+ else
+#endif
+ if (!CPUMIsGuestDebugStateActive(pVCpu))
+ {
+ CPUMR0LoadGuestDebugState(pVCpu, false /* include DR6 */);
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatDRxArmed);
+ Assert(!CPUMIsHyperDebugStateActive(pVCpu));
+ Assert(CPUMIsGuestDebugStateActive(pVCpu));
+ }
+ Log5(("hmR0SvmExportSharedDebugState: Loaded guest DRx\n"));
+ }
+ /*
+ * If no debugging enabled, we'll lazy load DR0-3. We don't need to
+ * intercept #DB as DR6 is updated in the VMCB.
+ *
+ * Note! If we cared and dared, we could skip intercepting \#DB here.
+ * However, \#DB shouldn't be performance critical, so we'll play safe
+ * and keep the code similar to the VT-x code and always intercept it.
+ */
+#if HC_ARCH_BITS == 32 && defined(VBOX_WITH_64_BITS_GUESTS)
+ else if ( !CPUMIsGuestDebugStateActivePending(pVCpu)
+ && !CPUMIsGuestDebugStateActive(pVCpu))
+#else
+ else if (!CPUMIsGuestDebugStateActive(pVCpu))
+#endif
+ {
+ fInterceptMovDRx = true;
+ }
+ }
+
+ Assert(pVmcb->ctrl.u32InterceptXcpt & RT_BIT_32(X86_XCPT_DB));
+ if (fInterceptMovDRx)
+ {
+ if ( pVmcb->ctrl.u16InterceptRdDRx != 0xffff
+ || pVmcb->ctrl.u16InterceptWrDRx != 0xffff)
+ {
+ pVmcb->ctrl.u16InterceptRdDRx = 0xffff;
+ pVmcb->ctrl.u16InterceptWrDRx = 0xffff;
+ pVmcb->ctrl.u32VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_INTERCEPTS;
+ }
+ }
+ else
+ {
+ if ( pVmcb->ctrl.u16InterceptRdDRx
+ || pVmcb->ctrl.u16InterceptWrDRx)
+ {
+ pVmcb->ctrl.u16InterceptRdDRx = 0;
+ pVmcb->ctrl.u16InterceptWrDRx = 0;
+ pVmcb->ctrl.u32VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_INTERCEPTS;
+ }
+ }
+ Log4Func(("DR6=%#RX64 DR7=%#RX64\n", pCtx->dr[6], pCtx->dr[7]));
+}
+
+#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
+/**
+ * Exports the nested-guest hardware virtualization state into the nested-guest
+ * VMCB.
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param pVmcbNstGst Pointer to the nested-guest VM control block.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+static void hmR0SvmExportGuestHwvirtStateNested(PVMCPU pVCpu, PSVMVMCB pVmcbNstGst)
+{
+ Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+
+ if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_GUEST_HWVIRT)
+ {
+ /*
+ * Ensure the nested-guest pause-filter counters don't exceed the outer guest values esp.
+ * since SVM doesn't have a preemption timer.
+ *
+ * We do this here rather than in hmR0SvmSetupVmcbNested() as we may have been executing the
+ * nested-guest in IEM incl. PAUSE instructions which would update the pause-filter counters
+ * and may continue execution in SVM R0 without a nested-guest #VMEXIT in between.
+ */
+ PVM pVM = pVCpu->CTX_SUFF(pVM);
+ PSVMVMCBCTRL pVmcbNstGstCtrl = &pVmcbNstGst->ctrl;
+ uint16_t const uGuestPauseFilterCount = pVM->hm.s.svm.cPauseFilter;
+ uint16_t const uGuestPauseFilterThreshold = pVM->hm.s.svm.cPauseFilterThresholdTicks;
+ if (HMIsGuestSvmCtrlInterceptSet(pVCpu, SVM_CTRL_INTERCEPT_PAUSE))
+ {
+ PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+ pVmcbNstGstCtrl->u16PauseFilterCount = RT_MIN(pCtx->hwvirt.svm.cPauseFilter, uGuestPauseFilterCount);
+ pVmcbNstGstCtrl->u16PauseFilterThreshold = RT_MIN(pCtx->hwvirt.svm.cPauseFilterThreshold, uGuestPauseFilterThreshold);
+ pVmcbNstGstCtrl->u32VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_INTERCEPTS;
+ }
+ else
+ {
+ pVmcbNstGstCtrl->u16PauseFilterCount = uGuestPauseFilterCount;
+ pVmcbNstGstCtrl->u16PauseFilterThreshold = uGuestPauseFilterThreshold;
+ }
+
+ pVCpu->hm.s.fCtxChanged &= ~HM_CHANGED_GUEST_HWVIRT;
+ }
+}
+#endif
+
+/**
+ * Exports the guest APIC TPR state into the VMCB.
+ *
+ * @returns VBox status code.
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param pVmcb Pointer to the VM control block.
+ */
+static int hmR0SvmExportGuestApicTpr(PVMCPU pVCpu, PSVMVMCB pVmcb)
+{
+ if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_APIC_TPR)
+ {
+ PVM pVM = pVCpu->CTX_SUFF(pVM);
+ if ( PDMHasApic(pVM)
+ && APICIsEnabled(pVCpu))
+ {
+ bool fPendingIntr;
+ uint8_t u8Tpr;
+ int rc = APICGetTpr(pVCpu, &u8Tpr, &fPendingIntr, NULL /* pu8PendingIrq */);
+ AssertRCReturn(rc, rc);
+
+ /* Assume that we need to trap all TPR accesses and thus need not check on
+ every #VMEXIT if we should update the TPR. */
+ Assert(pVmcb->ctrl.IntCtrl.n.u1VIntrMasking);
+ pVCpu->hm.s.svm.fSyncVTpr = false;
+
+ if (!pVM->hm.s.fTPRPatchingActive)
+ {
+ /* Bits 3-0 of the VTPR field correspond to bits 7-4 of the TPR (which is the Task-Priority Class). */
+ pVmcb->ctrl.IntCtrl.n.u8VTPR = (u8Tpr >> 4);
+
+ /* If there are interrupts pending, intercept CR8 writes to evaluate ASAP if we
+ can deliver the interrupt to the guest. */
+ if (fPendingIntr)
+ pVmcb->ctrl.u16InterceptWrCRx |= RT_BIT(8);
+ else
+ {
+ pVmcb->ctrl.u16InterceptWrCRx &= ~RT_BIT(8);
+ pVCpu->hm.s.svm.fSyncVTpr = true;
+ }
+
+ pVmcb->ctrl.u32VmcbCleanBits &= ~(HMSVM_VMCB_CLEAN_INTERCEPTS | HMSVM_VMCB_CLEAN_INT_CTRL);
+ }
+ else
+ {
+ /* 32-bit guests uses LSTAR MSR for patching guest code which touches the TPR. */
+ pVmcb->guest.u64LSTAR = u8Tpr;
+ uint8_t *pbMsrBitmap = (uint8_t *)pVCpu->hm.s.svm.pvMsrBitmap;
+
+ /* If there are interrupts pending, intercept LSTAR writes, otherwise don't intercept reads or writes. */
+ if (fPendingIntr)
+ hmR0SvmSetMsrPermission(pVCpu, pbMsrBitmap, MSR_K8_LSTAR, SVMMSREXIT_PASSTHRU_READ, SVMMSREXIT_INTERCEPT_WRITE);
+ else
+ {
+ hmR0SvmSetMsrPermission(pVCpu, pbMsrBitmap, MSR_K8_LSTAR, SVMMSREXIT_PASSTHRU_READ, SVMMSREXIT_PASSTHRU_WRITE);
+ pVCpu->hm.s.svm.fSyncVTpr = true;
+ }
+ pVmcb->ctrl.u32VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_IOPM_MSRPM;
+ }
+ }
+ ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_APIC_TPR);
+ }
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Sets up the exception interrupts required for guest (or nested-guest)
+ * execution in the VMCB.
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param pVmcb Pointer to the VM control block.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+static void hmR0SvmExportGuestXcptIntercepts(PVMCPU pVCpu, PSVMVMCB pVmcb)
+{
+ Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+
+ /* If we modify intercepts from here, please check & adjust hmR0SvmMergeVmcbCtrlsNested() if required. */
+ if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_SVM_GUEST_XCPT_INTERCEPTS)
+ {
+ /* Trap #UD for GIM provider (e.g. for hypercalls). */
+ if (pVCpu->hm.s.fGIMTrapXcptUD)
+ hmR0SvmSetXcptIntercept(pVmcb, X86_XCPT_UD);
+ else
+ hmR0SvmClearXcptIntercept(pVCpu, pVmcb, X86_XCPT_UD);
+
+ /* Trap #BP for INT3 debug breakpoints set by the VM debugger. */
+ if (pVCpu->CTX_SUFF(pVM)->dbgf.ro.cEnabledInt3Breakpoints)
+ hmR0SvmSetXcptIntercept(pVmcb, X86_XCPT_BP);
+ else
+ hmR0SvmClearXcptIntercept(pVCpu, pVmcb, X86_XCPT_BP);
+
+ /* The remaining intercepts are handled elsewhere, e.g. in hmR0SvmExportGuestCR0(). */
+ pVCpu->hm.s.fCtxChanged &= ~HM_CHANGED_SVM_GUEST_XCPT_INTERCEPTS;
+ }
+}
+
+
+#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
+/**
+ * Merges guest and nested-guest intercepts for executing the nested-guest using
+ * hardware-assisted SVM.
+ *
+ * This merges the guest and nested-guest intercepts in a way that if the outer
+ * guest intercept is set we need to intercept it in the nested-guest as
+ * well.
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param pVmcbNstGst Pointer to the nested-guest VM control block.
+ */
+static void hmR0SvmMergeVmcbCtrlsNested(PVMCPU pVCpu)
+{
+ PVM pVM = pVCpu->CTX_SUFF(pVM);
+ PCSVMVMCB pVmcb = pVCpu->hm.s.svm.pVmcb;
+ PSVMVMCB pVmcbNstGst = pVCpu->cpum.GstCtx.hwvirt.svm.CTX_SUFF(pVmcb);
+ PSVMVMCBCTRL pVmcbNstGstCtrl = &pVmcbNstGst->ctrl;
+
+ /* Merge the guest's CR intercepts into the nested-guest VMCB. */
+ pVmcbNstGstCtrl->u16InterceptRdCRx |= pVmcb->ctrl.u16InterceptRdCRx;
+ pVmcbNstGstCtrl->u16InterceptWrCRx |= pVmcb->ctrl.u16InterceptWrCRx;
+
+ /* Always intercept CR4 writes for tracking PGM mode changes. */
+ pVmcbNstGstCtrl->u16InterceptWrCRx |= RT_BIT(4);
+
+ /* Without nested paging, intercept CR3 reads and writes as we load shadow page tables. */
+ if (!pVM->hm.s.fNestedPaging)
+ {
+ pVmcbNstGstCtrl->u16InterceptRdCRx |= RT_BIT(3);
+ pVmcbNstGstCtrl->u16InterceptWrCRx |= RT_BIT(3);
+ }
+
+ /** @todo Figure out debugging with nested-guests, till then just intercept
+ * all DR[0-15] accesses. */
+ pVmcbNstGstCtrl->u16InterceptRdDRx |= 0xffff;
+ pVmcbNstGstCtrl->u16InterceptWrDRx |= 0xffff;
+
+ /*
+ * Merge the guest's exception intercepts into the nested-guest VMCB.
+ *
+ * - #UD: Exclude these as the outer guest's GIM hypercalls are not applicable
+ * while executing the nested-guest.
+ *
+ * - #BP: Exclude breakpoints set by the VM debugger for the outer guest. This can
+ * be tweaked later depending on how we wish to implement breakpoints.
+ *
+ * - #GP: Exclude these as it's the inner VMMs problem to get vmsvga 3d drivers
+ * loaded into their guests, not ours.
+ *
+ * Warning!! This ASSUMES we only intercept \#UD for hypercall purposes and \#BP
+ * for VM debugger breakpoints, see hmR0SvmExportGuestXcptIntercepts().
+ */
+#ifndef HMSVM_ALWAYS_TRAP_ALL_XCPTS
+ pVmcbNstGstCtrl->u32InterceptXcpt |= pVmcb->ctrl.u32InterceptXcpt
+ & ~( RT_BIT(X86_XCPT_UD)
+ | RT_BIT(X86_XCPT_BP)
+ | (pVCpu->hm.s.fTrapXcptGpForLovelyMesaDrv ? RT_BIT(X86_XCPT_GP) : 0));
+#else
+ pVmcbNstGstCtrl->u32InterceptXcpt |= pVmcb->ctrl.u32InterceptXcpt;
+#endif
+
+ /*
+ * Adjust intercepts while executing the nested-guest that differ from the
+ * outer guest intercepts.
+ *
+ * - VINTR: Exclude the outer guest intercept as we don't need to cause VINTR #VMEXITs
+ * that belong to the nested-guest to the outer guest.
+ *
+ * - VMMCALL: Exclude the outer guest intercept as when it's also not intercepted by
+ * the nested-guest, the physical CPU raises a \#UD exception as expected.
+ */
+ pVmcbNstGstCtrl->u64InterceptCtrl |= (pVmcb->ctrl.u64InterceptCtrl & ~( SVM_CTRL_INTERCEPT_VINTR
+ | SVM_CTRL_INTERCEPT_VMMCALL))
+ | HMSVM_MANDATORY_GUEST_CTRL_INTERCEPTS;
+
+ Assert( (pVmcbNstGstCtrl->u64InterceptCtrl & HMSVM_MANDATORY_GUEST_CTRL_INTERCEPTS)
+ == HMSVM_MANDATORY_GUEST_CTRL_INTERCEPTS);
+
+ /* Finally, update the VMCB clean bits. */
+ pVmcbNstGstCtrl->u32VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_INTERCEPTS;
+}
+#endif
+
+
+/**
+ * Selects the appropriate function to run guest code.
+ *
+ * @returns VBox status code.
+ * @param pVCpu The cross context virtual CPU structure.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+static int hmR0SvmSelectVMRunHandler(PVMCPU pVCpu)
+{
+ if (CPUMIsGuestInLongMode(pVCpu))
+ {
+#ifndef VBOX_ENABLE_64_BITS_GUESTS
+ return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
+#endif
+ Assert(pVCpu->CTX_SUFF(pVM)->hm.s.fAllow64BitGuests); /* Guaranteed by hmR3InitFinalizeR0(). */
+#if HC_ARCH_BITS == 32
+ /* 32-bit host. We need to switch to 64-bit before running the 64-bit guest. */
+ pVCpu->hm.s.svm.pfnVMRun = SVMR0VMSwitcherRun64;
+#else
+ /* 64-bit host or hybrid host. */
+ pVCpu->hm.s.svm.pfnVMRun = SVMR0VMRun64;
+#endif
+ }
+ else
+ {
+ /* Guest is not in long mode, use the 32-bit handler. */
+ pVCpu->hm.s.svm.pfnVMRun = SVMR0VMRun;
+ }
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Enters the AMD-V session.
+ *
+ * @returns VBox status code.
+ * @param pVCpu The cross context virtual CPU structure.
+ */
+VMMR0DECL(int) SVMR0Enter(PVMCPU pVCpu)
+{
+ AssertPtr(pVCpu);
+ Assert(pVCpu->CTX_SUFF(pVM)->hm.s.svm.fSupported);
+ Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+
+ LogFlowFunc(("pVCpu=%p\n", pVCpu));
+ Assert((pVCpu->hm.s.fCtxChanged & (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_SVM_HOST_GUEST_SHARED_STATE))
+ == (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_SVM_HOST_GUEST_SHARED_STATE));
+
+ pVCpu->hm.s.fLeaveDone = false;
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Thread-context callback for AMD-V.
+ *
+ * @param enmEvent The thread-context event.
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param fGlobalInit Whether global VT-x/AMD-V init. is used.
+ * @thread EMT(pVCpu)
+ */
+VMMR0DECL(void) SVMR0ThreadCtxCallback(RTTHREADCTXEVENT enmEvent, PVMCPU pVCpu, bool fGlobalInit)
+{
+ NOREF(fGlobalInit);
+
+ switch (enmEvent)
+ {
+ case RTTHREADCTXEVENT_OUT:
+ {
+ Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+ Assert(VMMR0ThreadCtxHookIsEnabled(pVCpu));
+ VMCPU_ASSERT_EMT(pVCpu);
+
+ /* No longjmps (log-flush, locks) in this fragile context. */
+ VMMRZCallRing3Disable(pVCpu);
+
+ if (!pVCpu->hm.s.fLeaveDone)
+ {
+ hmR0SvmLeave(pVCpu, false /* fImportState */);
+ pVCpu->hm.s.fLeaveDone = true;
+ }
+
+ /* Leave HM context, takes care of local init (term). */
+ int rc = HMR0LeaveCpu(pVCpu);
+ AssertRC(rc); NOREF(rc);
+
+ /* Restore longjmp state. */
+ VMMRZCallRing3Enable(pVCpu);
+ STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatSwitchPreempt);
+ break;
+ }
+
+ case RTTHREADCTXEVENT_IN:
+ {
+ Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+ Assert(VMMR0ThreadCtxHookIsEnabled(pVCpu));
+ VMCPU_ASSERT_EMT(pVCpu);
+
+ /* No longjmps (log-flush, locks) in this fragile context. */
+ VMMRZCallRing3Disable(pVCpu);
+
+ /*
+ * Initialize the bare minimum state required for HM. This takes care of
+ * initializing AMD-V if necessary (onlined CPUs, local init etc.)
+ */
+ int rc = hmR0EnterCpu(pVCpu);
+ AssertRC(rc); NOREF(rc);
+ Assert((pVCpu->hm.s.fCtxChanged & (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_SVM_HOST_GUEST_SHARED_STATE))
+ == (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_SVM_HOST_GUEST_SHARED_STATE));
+
+ pVCpu->hm.s.fLeaveDone = false;
+
+ /* Restore longjmp state. */
+ VMMRZCallRing3Enable(pVCpu);
+ break;
+ }
+
+ default:
+ break;
+ }
+}
+
+
+/**
+ * Saves the host state.
+ *
+ * @returns VBox status code.
+ * @param pVCpu The cross context virtual CPU structure.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+VMMR0DECL(int) SVMR0ExportHostState(PVMCPU pVCpu)
+{
+ NOREF(pVCpu);
+
+ /* Nothing to do here. AMD-V does this for us automatically during the world-switch. */
+ ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_HOST_CONTEXT);
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Exports the guest state from the guest-CPU context into the VMCB.
+ *
+ * The CPU state will be loaded from these fields on every successful VM-entry.
+ * Also sets up the appropriate VMRUN function to execute guest code based on
+ * the guest CPU mode.
+ *
+ * @returns VBox status code.
+ * @param pVCpu The cross context virtual CPU structure.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+static int hmR0SvmExportGuestState(PVMCPU pVCpu)
+{
+ STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatExportGuestState, x);
+
+ PSVMVMCB pVmcb = pVCpu->hm.s.svm.pVmcb;
+ PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+
+ Assert(pVmcb);
+ HMSVM_ASSERT_NOT_IN_NESTED_GUEST(pCtx);
+
+ pVmcb->guest.u64RIP = pCtx->rip;
+ pVmcb->guest.u64RSP = pCtx->rsp;
+ pVmcb->guest.u64RFlags = pCtx->eflags.u32;
+ pVmcb->guest.u64RAX = pCtx->rax;
+#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
+ if (pVmcb->ctrl.IntCtrl.n.u1VGifEnable)
+ {
+ Assert(pVCpu->CTX_SUFF(pVM)->hm.s.svm.u32Features & X86_CPUID_SVM_FEATURE_EDX_VGIF); /* Hardware supports it. */
+ Assert(HMIsSvmVGifActive(pVCpu->CTX_SUFF(pVM))); /* VM has configured it. */
+ pVmcb->ctrl.IntCtrl.n.u1VGif = CPUMGetGuestGif(pCtx);
+ }
+#endif
+
+ RTCCUINTREG const fEFlags = ASMIntDisableFlags();
+
+ int rc = hmR0SvmExportGuestControlRegs(pVCpu, pVmcb);
+ AssertRCReturnStmt(rc, ASMSetFlags(fEFlags), rc);
+
+ hmR0SvmExportGuestSegmentRegs(pVCpu, pVmcb);
+ hmR0SvmExportGuestMsrs(pVCpu, pVmcb);
+ hmR0SvmExportGuestXcptIntercepts(pVCpu, pVmcb);
+
+ ASMSetFlags(fEFlags);
+
+ /* hmR0SvmExportGuestApicTpr() must be called -after- hmR0SvmExportGuestMsrs() as we
+ otherwise we would overwrite the LSTAR MSR that we use for TPR patching. */
+ hmR0SvmExportGuestApicTpr(pVCpu, pVmcb);
+
+ rc = hmR0SvmSelectVMRunHandler(pVCpu);
+ AssertRCReturn(rc, rc);
+
+ /* Clear any bits that may be set but exported unconditionally or unused/reserved bits. */
+ ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~( HM_CHANGED_GUEST_RIP
+ | HM_CHANGED_GUEST_RFLAGS
+ | HM_CHANGED_GUEST_GPRS_MASK
+ | HM_CHANGED_GUEST_X87
+ | HM_CHANGED_GUEST_SSE_AVX
+ | HM_CHANGED_GUEST_OTHER_XSAVE
+ | HM_CHANGED_GUEST_XCRx
+ | HM_CHANGED_GUEST_TSC_AUX
+ | HM_CHANGED_GUEST_OTHER_MSRS
+ | HM_CHANGED_GUEST_HWVIRT
+ | (HM_CHANGED_KEEPER_STATE_MASK & ~HM_CHANGED_SVM_GUEST_XCPT_INTERCEPTS)));
+
+#ifdef VBOX_STRICT
+ /*
+ * All of the guest-CPU state and SVM keeper bits should be exported here by now,
+ * except for the host-context and/or shared host-guest context bits.
+ */
+ uint64_t const fCtxChanged = ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged);
+ RT_UNTRUSTED_NONVOLATILE_COPY_FENCE();
+ AssertMsg(!(fCtxChanged & (HM_CHANGED_ALL_GUEST & ~HM_CHANGED_SVM_HOST_GUEST_SHARED_STATE)),
+ ("fCtxChanged=%#RX64\n", fCtxChanged));
+
+ /*
+ * If we need to log state that isn't always imported, we'll need to import them here.
+ * See hmR0SvmPostRunGuest() for which part of the state is imported uncondtionally.
+ */
+ hmR0SvmLogState(pVCpu, pVmcb, "hmR0SvmExportGuestState", 0 /* fFlags */, 0 /* uVerbose */);
+#endif
+
+ STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExportGuestState, x);
+ return VINF_SUCCESS;
+}
+
+
+#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
+/**
+ * Merges the guest and nested-guest MSR permission bitmap.
+ *
+ * If the guest is intercepting an MSR we need to intercept it regardless of
+ * whether the nested-guest is intercepting it or not.
+ *
+ * @param pHostCpu The HM physical-CPU structure.
+ * @param pVCpu The cross context virtual CPU structure.
+ *
+ * @remarks No-long-jmp zone!!!
+ */
+DECLINLINE(void) hmR0SvmMergeMsrpmNested(PHMPHYSCPU pHostCpu, PVMCPU pVCpu)
+{
+ uint64_t const *pu64GstMsrpm = (uint64_t const *)pVCpu->hm.s.svm.pvMsrBitmap;
+ uint64_t const *pu64NstGstMsrpm = (uint64_t const *)pVCpu->cpum.GstCtx.hwvirt.svm.CTX_SUFF(pvMsrBitmap);
+ uint64_t *pu64DstMsrpm = (uint64_t *)pHostCpu->n.svm.pvNstGstMsrpm;
+
+ /* MSRPM bytes from offset 0x1800 are reserved, so we stop merging there. */
+ uint32_t const offRsvdQwords = 0x1800 >> 3;
+ for (uint32_t i = 0; i < offRsvdQwords; i++)
+ pu64DstMsrpm[i] = pu64NstGstMsrpm[i] | pu64GstMsrpm[i];
+}
+
+
+/**
+ * Caches the nested-guest VMCB fields before we modify them for execution using
+ * hardware-assisted SVM.
+ *
+ * @returns true if the VMCB was previously already cached, false otherwise.
+ * @param pVCpu The cross context virtual CPU structure.
+ *
+ * @sa HMNotifySvmNstGstVmexit.
+ */
+static bool hmR0SvmCacheVmcbNested(PVMCPU pVCpu)
+{
+ /*
+ * Cache the nested-guest programmed VMCB fields if we have not cached it yet.
+ * Otherwise we risk re-caching the values we may have modified, see @bugref{7243#c44}.
+ *
+ * Nested-paging CR3 is not saved back into the VMCB on #VMEXIT, hence no need to
+ * cache and restore it, see AMD spec. 15.25.4 "Nested Paging and VMRUN/#VMEXIT".
+ */
+ PSVMNESTEDVMCBCACHE pVmcbNstGstCache = &pVCpu->hm.s.svm.NstGstVmcbCache;
+ bool const fWasCached = pVmcbNstGstCache->fCacheValid;
+ if (!fWasCached)
+ {
+ PCSVMVMCB pVmcbNstGst = pVCpu->cpum.GstCtx.hwvirt.svm.CTX_SUFF(pVmcb);
+ PCSVMVMCBCTRL pVmcbNstGstCtrl = &pVmcbNstGst->ctrl;
+ pVmcbNstGstCache->u16InterceptRdCRx = pVmcbNstGstCtrl->u16InterceptRdCRx;
+ pVmcbNstGstCache->u16InterceptWrCRx = pVmcbNstGstCtrl->u16InterceptWrCRx;
+ pVmcbNstGstCache->u16InterceptRdDRx = pVmcbNstGstCtrl->u16InterceptRdDRx;
+ pVmcbNstGstCache->u16InterceptWrDRx = pVmcbNstGstCtrl->u16InterceptWrDRx;
+ pVmcbNstGstCache->u16PauseFilterThreshold = pVmcbNstGstCtrl->u16PauseFilterThreshold;
+ pVmcbNstGstCache->u16PauseFilterCount = pVmcbNstGstCtrl->u16PauseFilterCount;
+ pVmcbNstGstCache->u32InterceptXcpt = pVmcbNstGstCtrl->u32InterceptXcpt;
+ pVmcbNstGstCache->u64InterceptCtrl = pVmcbNstGstCtrl->u64InterceptCtrl;
+ pVmcbNstGstCache->u64TSCOffset = pVmcbNstGstCtrl->u64TSCOffset;
+ pVmcbNstGstCache->fVIntrMasking = pVmcbNstGstCtrl->IntCtrl.n.u1VIntrMasking;
+ pVmcbNstGstCache->fNestedPaging = pVmcbNstGstCtrl->NestedPagingCtrl.n.u1NestedPaging;
+ pVmcbNstGstCache->fLbrVirt = pVmcbNstGstCtrl->LbrVirt.n.u1LbrVirt;
+ pVmcbNstGstCache->fCacheValid = true;
+ Log4Func(("Cached VMCB fields\n"));
+ }
+
+ return fWasCached;
+}
+
+
+/**
+ * Sets up the nested-guest VMCB for execution using hardware-assisted SVM.
+ *
+ * This is done the first time we enter nested-guest execution using SVM R0
+ * until the nested-guest \#VMEXIT (not to be confused with physical CPU
+ * \#VMEXITs which may or may not cause a corresponding nested-guest \#VMEXIT).
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ */
+static void hmR0SvmSetupVmcbNested(PVMCPU pVCpu)
+{
+ PSVMVMCB pVmcbNstGst = pVCpu->cpum.GstCtx.hwvirt.svm.CTX_SUFF(pVmcb);
+ PSVMVMCBCTRL pVmcbNstGstCtrl = &pVmcbNstGst->ctrl;
+
+ /*
+ * First cache the nested-guest VMCB fields we may potentially modify.
+ */
+ bool const fVmcbCached = hmR0SvmCacheVmcbNested(pVCpu);
+ if (!fVmcbCached)
+ {
+ /*
+ * The IOPM of the nested-guest can be ignored because the the guest always
+ * intercepts all IO port accesses. Thus, we'll swap to the guest IOPM rather
+ * than the nested-guest IOPM and swap the field back on the #VMEXIT.
+ */
+ pVmcbNstGstCtrl->u64IOPMPhysAddr = g_HCPhysIOBitmap;
+
+ /*
+ * Use the same nested-paging as the outer guest. We can't dynamically switch off
+ * nested-paging suddenly while executing a VM (see assertion at the end of
+ * Trap0eHandler() in PGMAllBth.h).
+ */
+ pVmcbNstGstCtrl->NestedPagingCtrl.n.u1NestedPaging = pVCpu->CTX_SUFF(pVM)->hm.s.fNestedPaging;
+
+ /* Always enable V_INTR_MASKING as we do not want to allow access to the physical APIC TPR. */
+ pVmcbNstGstCtrl->IntCtrl.n.u1VIntrMasking = 1;
+
+ /*
+ * Turn off TPR syncing on #VMEXIT for nested-guests as CR8 intercepts are subject
+ * to the nested-guest intercepts and we always run with V_INTR_MASKING.
+ */
+ pVCpu->hm.s.svm.fSyncVTpr = false;
+
+#ifdef DEBUG_ramshankar
+ /* For debugging purposes - copy the LBR info. from outer guest VMCB. */
+ pVmcbNstGstCtrl->LbrVirt.n.u1LbrVirt = pVmcb->ctrl.LbrVirt.n.u1LbrVirt;
+#endif
+
+ /*
+ * If we don't expose Virtualized-VMSAVE/VMLOAD feature to the outer guest, we
+ * need to intercept VMSAVE/VMLOAD instructions executed by the nested-guest.
+ */
+ if (!pVCpu->CTX_SUFF(pVM)->cpum.ro.GuestFeatures.fSvmVirtVmsaveVmload)
+ pVmcbNstGstCtrl->u64InterceptCtrl |= SVM_CTRL_INTERCEPT_VMSAVE
+ | SVM_CTRL_INTERCEPT_VMLOAD;
+
+ /*
+ * If we don't expose Virtual GIF feature to the outer guest, we need to intercept
+ * CLGI/STGI instructions executed by the nested-guest.
+ */
+ if (!pVCpu->CTX_SUFF(pVM)->cpum.ro.GuestFeatures.fSvmVGif)
+ pVmcbNstGstCtrl->u64InterceptCtrl |= SVM_CTRL_INTERCEPT_CLGI
+ | SVM_CTRL_INTERCEPT_STGI;
+
+ /* Merge the guest and nested-guest intercepts. */
+ hmR0SvmMergeVmcbCtrlsNested(pVCpu);
+
+ /* Update the VMCB clean bits. */
+ pVmcbNstGstCtrl->u32VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_INTERCEPTS;
+ }
+ else
+ {
+ Assert(!pVCpu->hm.s.svm.fSyncVTpr);
+ Assert(pVmcbNstGstCtrl->u64IOPMPhysAddr == g_HCPhysIOBitmap);
+ Assert(RT_BOOL(pVmcbNstGstCtrl->NestedPagingCtrl.n.u1NestedPaging) == pVCpu->CTX_SUFF(pVM)->hm.s.fNestedPaging);
+ }
+}
+
+
+/**
+ * Exports the nested-guest state into the VMCB.
+ *
+ * We need to export the entire state as we could be continuing nested-guest
+ * execution at any point (not just immediately after VMRUN) and thus the VMCB
+ * can be out-of-sync with the nested-guest state if it was executed in IEM.
+ *
+ * @returns VBox status code.
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param pCtx Pointer to the guest-CPU context.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+static int hmR0SvmExportGuestStateNested(PVMCPU pVCpu)
+{
+ STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatExportGuestState, x);
+
+ PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+ PSVMVMCB pVmcbNstGst = pCtx->hwvirt.svm.CTX_SUFF(pVmcb);
+ Assert(pVmcbNstGst);
+
+ hmR0SvmSetupVmcbNested(pVCpu);
+
+ pVmcbNstGst->guest.u64RIP = pCtx->rip;
+ pVmcbNstGst->guest.u64RSP = pCtx->rsp;
+ pVmcbNstGst->guest.u64RFlags = pCtx->eflags.u32;
+ pVmcbNstGst->guest.u64RAX = pCtx->rax;
+
+ RTCCUINTREG const fEFlags = ASMIntDisableFlags();
+
+ int rc = hmR0SvmExportGuestControlRegs(pVCpu, pVmcbNstGst);
+ AssertRCReturnStmt(rc, ASMSetFlags(fEFlags), rc);
+
+ hmR0SvmExportGuestSegmentRegs(pVCpu, pVmcbNstGst);
+ hmR0SvmExportGuestMsrs(pVCpu, pVmcbNstGst);
+ hmR0SvmExportGuestHwvirtStateNested(pVCpu, pVmcbNstGst);
+
+ ASMSetFlags(fEFlags);
+
+ /* Nested VGIF not supported yet. */
+ Assert(!pVmcbNstGst->ctrl.IntCtrl.n.u1VGifEnable);
+
+ rc = hmR0SvmSelectVMRunHandler(pVCpu);
+ AssertRCReturn(rc, rc);
+
+ /* Clear any bits that may be set but exported unconditionally or unused/reserved bits. */
+ ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~( HM_CHANGED_GUEST_RIP
+ | HM_CHANGED_GUEST_RFLAGS
+ | HM_CHANGED_GUEST_GPRS_MASK
+ | HM_CHANGED_GUEST_APIC_TPR
+ | HM_CHANGED_GUEST_X87
+ | HM_CHANGED_GUEST_SSE_AVX
+ | HM_CHANGED_GUEST_OTHER_XSAVE
+ | HM_CHANGED_GUEST_XCRx
+ | HM_CHANGED_GUEST_TSC_AUX
+ | HM_CHANGED_GUEST_OTHER_MSRS
+ | HM_CHANGED_SVM_GUEST_XCPT_INTERCEPTS
+ | (HM_CHANGED_KEEPER_STATE_MASK & ~HM_CHANGED_SVM_MASK)));
+
+#ifdef VBOX_STRICT
+ /*
+ * All of the guest-CPU state and SVM keeper bits should be exported here by now, except
+ * for the host-context and/or shared host-guest context bits.
+ */
+ uint64_t const fCtxChanged = ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged);
+ RT_UNTRUSTED_NONVOLATILE_COPY_FENCE();
+ AssertMsg(!(fCtxChanged & (HM_CHANGED_ALL_GUEST & ~HM_CHANGED_SVM_HOST_GUEST_SHARED_STATE)),
+ ("fCtxChanged=%#RX64\n", fCtxChanged));
+
+ /*
+ * If we need to log state that isn't always imported, we'll need to import them here.
+ * See hmR0SvmPostRunGuest() for which part of the state is imported uncondtionally.
+ */
+ hmR0SvmLogState(pVCpu, pVmcbNstGst, "hmR0SvmExportGuestStateNested", 0 /* fFlags */, 0 /* uVerbose */);
+#endif
+
+ STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExportGuestState, x);
+ return rc;
+}
+#endif /* VBOX_WITH_NESTED_HWVIRT_SVM */
+
+
+/**
+ * Exports the state shared between the host and guest (or nested-guest) into
+ * the VMCB.
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param pVmcb Pointer to the VM control block.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+static void hmR0SvmExportSharedState(PVMCPU pVCpu, PSVMVMCB pVmcb)
+{
+ Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+ Assert(!VMMRZCallRing3IsEnabled(pVCpu));
+
+ if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_GUEST_DR_MASK)
+ {
+ /** @todo Figure out stepping with nested-guest. */
+ PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+ if (!CPUMIsGuestInSvmNestedHwVirtMode(pCtx))
+ hmR0SvmExportSharedDebugState(pVCpu, pVmcb);
+ else
+ {
+ pVmcb->guest.u64DR6 = pCtx->dr[6];
+ pVmcb->guest.u64DR7 = pCtx->dr[7];
+ }
+ }
+
+ pVCpu->hm.s.fCtxChanged &= ~HM_CHANGED_GUEST_DR_MASK;
+ AssertMsg(!(pVCpu->hm.s.fCtxChanged & HM_CHANGED_SVM_HOST_GUEST_SHARED_STATE),
+ ("fCtxChanged=%#RX64\n", pVCpu->hm.s.fCtxChanged));
+}
+
+
+/**
+ * Worker for SVMR0ImportStateOnDemand.
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param fWhat What to import, CPUMCTX_EXTRN_XXX.
+ */
+static void hmR0SvmImportGuestState(PVMCPU pVCpu, uint64_t fWhat)
+{
+ STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatImportGuestState, x);
+
+ PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+ PCSVMVMCB pVmcb = hmR0SvmGetCurrentVmcb(pVCpu);
+ PCSVMVMCBSTATESAVE pVmcbGuest = &pVmcb->guest;
+ PCSVMVMCBCTRL pVmcbCtrl = &pVmcb->ctrl;
+
+ Log4Func(("fExtrn=%#RX64 fWhat=%#RX64\n", pCtx->fExtrn, fWhat));
+
+ /*
+ * We disable interrupts to make the updating of the state and in particular
+ * the fExtrn modification atomic wrt to preemption hooks.
+ */
+ RTCCUINTREG const fEFlags = ASMIntDisableFlags();
+
+ fWhat &= pCtx->fExtrn;
+ if (fWhat)
+ {
+#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
+ if (fWhat & CPUMCTX_EXTRN_HWVIRT)
+ {
+ if (pVmcbCtrl->IntCtrl.n.u1VGifEnable)
+ {
+ Assert(!CPUMIsGuestInSvmNestedHwVirtMode(pCtx)); /* We don't yet support passing VGIF feature to the guest. */
+ Assert(HMIsSvmVGifActive(pVCpu->CTX_SUFF(pVM))); /* VM has configured it. */
+ CPUMSetGuestGif(pCtx, pVmcbCtrl->IntCtrl.n.u1VGif);
+ }
+ }
+
+ if (fWhat & CPUMCTX_EXTRN_HM_SVM_HWVIRT_VIRQ)
+ {
+ if ( !pVmcbCtrl->IntCtrl.n.u1VIrqPending
+ && VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INTERRUPT_NESTED_GUEST))
+ VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INTERRUPT_NESTED_GUEST);
+ }
+#endif
+
+ if (fWhat & CPUMCTX_EXTRN_HM_SVM_INT_SHADOW)
+ {
+ if (pVmcbCtrl->IntShadow.n.u1IntShadow)
+ EMSetInhibitInterruptsPC(pVCpu, pVmcbGuest->u64RIP);
+ else if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS))
+ VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS);
+ }
+
+ if (fWhat & CPUMCTX_EXTRN_RIP)
+ pCtx->rip = pVmcbGuest->u64RIP;
+
+ if (fWhat & CPUMCTX_EXTRN_RFLAGS)
+ pCtx->eflags.u32 = pVmcbGuest->u64RFlags;
+
+ if (fWhat & CPUMCTX_EXTRN_RSP)
+ pCtx->rsp = pVmcbGuest->u64RSP;
+
+ if (fWhat & CPUMCTX_EXTRN_RAX)
+ pCtx->rax = pVmcbGuest->u64RAX;
+
+ if (fWhat & CPUMCTX_EXTRN_SREG_MASK)
+ {
+ if (fWhat & CPUMCTX_EXTRN_CS)
+ {
+ HMSVM_SEG_REG_COPY_FROM_VMCB(pCtx, pVmcbGuest, CS, cs);
+ /* Correct the CS granularity bit. Haven't seen it being wrong in any other register (yet). */
+ /** @todo SELM might need to be fixed as it too should not care about the
+ * granularity bit. See @bugref{6785}. */
+ if ( !pCtx->cs.Attr.n.u1Granularity
+ && pCtx->cs.Attr.n.u1Present
+ && pCtx->cs.u32Limit > UINT32_C(0xfffff))
+ {
+ Assert((pCtx->cs.u32Limit & 0xfff) == 0xfff);
+ pCtx->cs.Attr.n.u1Granularity = 1;
+ }
+ HMSVM_ASSERT_SEG_GRANULARITY(pCtx, cs);
+ }
+ if (fWhat & CPUMCTX_EXTRN_SS)
+ {
+ HMSVM_SEG_REG_COPY_FROM_VMCB(pCtx, pVmcbGuest, SS, ss);
+ HMSVM_ASSERT_SEG_GRANULARITY(pCtx, ss);
+ /*
+ * Sync the hidden SS DPL field. AMD CPUs have a separate CPL field in the
+ * VMCB and uses that and thus it's possible that when the CPL changes during
+ * guest execution that the SS DPL isn't updated by AMD-V. Observed on some
+ * AMD Fusion CPUs with 64-bit guests.
+ *
+ * See AMD spec. 15.5.1 "Basic operation".
+ */
+ Assert(!(pVmcbGuest->u8CPL & ~0x3));
+ uint8_t const uCpl = pVmcbGuest->u8CPL;
+ if (pCtx->ss.Attr.n.u2Dpl != uCpl)
+ pCtx->ss.Attr.n.u2Dpl = uCpl & 0x3;
+ }
+ if (fWhat & CPUMCTX_EXTRN_DS)
+ {
+ HMSVM_SEG_REG_COPY_FROM_VMCB(pCtx, pVmcbGuest, DS, ds);
+ HMSVM_ASSERT_SEG_GRANULARITY(pCtx, ds);
+ }
+ if (fWhat & CPUMCTX_EXTRN_ES)
+ {
+ HMSVM_SEG_REG_COPY_FROM_VMCB(pCtx, pVmcbGuest, ES, es);
+ HMSVM_ASSERT_SEG_GRANULARITY(pCtx, es);
+ }
+ if (fWhat & CPUMCTX_EXTRN_FS)
+ {
+ HMSVM_SEG_REG_COPY_FROM_VMCB(pCtx, pVmcbGuest, FS, fs);
+ HMSVM_ASSERT_SEG_GRANULARITY(pCtx, fs);
+ }
+ if (fWhat & CPUMCTX_EXTRN_GS)
+ {
+ HMSVM_SEG_REG_COPY_FROM_VMCB(pCtx, pVmcbGuest, GS, gs);
+ HMSVM_ASSERT_SEG_GRANULARITY(pCtx, gs);
+ }
+ }
+
+ if (fWhat & CPUMCTX_EXTRN_TABLE_MASK)
+ {
+ if (fWhat & CPUMCTX_EXTRN_TR)
+ {
+ /*
+ * Fixup TR attributes so it's compatible with Intel. Important when saved-states
+ * are used between Intel and AMD, see @bugref{6208#c39}.
+ * ASSUME that it's normally correct and that we're in 32-bit or 64-bit mode.
+ */
+ HMSVM_SEG_REG_COPY_FROM_VMCB(pCtx, pVmcbGuest, TR, tr);
+ if (pCtx->tr.Attr.n.u4Type != X86_SEL_TYPE_SYS_386_TSS_BUSY)
+ {
+ if ( pCtx->tr.Attr.n.u4Type == X86_SEL_TYPE_SYS_386_TSS_AVAIL
+ || CPUMIsGuestInLongModeEx(pCtx))
+ pCtx->tr.Attr.n.u4Type = X86_SEL_TYPE_SYS_386_TSS_BUSY;
+ else if (pCtx->tr.Attr.n.u4Type == X86_SEL_TYPE_SYS_286_TSS_AVAIL)
+ pCtx->tr.Attr.n.u4Type = X86_SEL_TYPE_SYS_286_TSS_BUSY;
+ }
+ }
+
+ if (fWhat & CPUMCTX_EXTRN_LDTR)
+ HMSVM_SEG_REG_COPY_FROM_VMCB(pCtx, pVmcbGuest, LDTR, ldtr);
+
+ if (fWhat & CPUMCTX_EXTRN_GDTR)
+ {
+ pCtx->gdtr.cbGdt = pVmcbGuest->GDTR.u32Limit;
+ pCtx->gdtr.pGdt = pVmcbGuest->GDTR.u64Base;
+ }
+
+ if (fWhat & CPUMCTX_EXTRN_IDTR)
+ {
+ pCtx->idtr.cbIdt = pVmcbGuest->IDTR.u32Limit;
+ pCtx->idtr.pIdt = pVmcbGuest->IDTR.u64Base;
+ }
+ }
+
+ if (fWhat & CPUMCTX_EXTRN_SYSCALL_MSRS)
+ {
+ pCtx->msrSTAR = pVmcbGuest->u64STAR;
+ pCtx->msrLSTAR = pVmcbGuest->u64LSTAR;
+ pCtx->msrCSTAR = pVmcbGuest->u64CSTAR;
+ pCtx->msrSFMASK = pVmcbGuest->u64SFMASK;
+ }
+
+ if (fWhat & CPUMCTX_EXTRN_SYSENTER_MSRS)
+ {
+ pCtx->SysEnter.cs = pVmcbGuest->u64SysEnterCS;
+ pCtx->SysEnter.eip = pVmcbGuest->u64SysEnterEIP;
+ pCtx->SysEnter.esp = pVmcbGuest->u64SysEnterESP;
+ }
+
+ if (fWhat & CPUMCTX_EXTRN_KERNEL_GS_BASE)
+ pCtx->msrKERNELGSBASE = pVmcbGuest->u64KernelGSBase;
+
+ if (fWhat & CPUMCTX_EXTRN_DR_MASK)
+ {
+ if (fWhat & CPUMCTX_EXTRN_DR6)
+ {
+ if (!pVCpu->hm.s.fUsingHyperDR7)
+ pCtx->dr[6] = pVmcbGuest->u64DR6;
+ else
+ CPUMSetHyperDR6(pVCpu, pVmcbGuest->u64DR6);
+ }
+
+ if (fWhat & CPUMCTX_EXTRN_DR7)
+ {
+ if (!pVCpu->hm.s.fUsingHyperDR7)
+ pCtx->dr[7] = pVmcbGuest->u64DR7;
+ else
+ Assert(pVmcbGuest->u64DR7 == CPUMGetHyperDR7(pVCpu));
+ }
+ }
+
+ if (fWhat & CPUMCTX_EXTRN_CR_MASK)
+ {
+ if (fWhat & CPUMCTX_EXTRN_CR0)
+ {
+ /* We intercept changes to all CR0 bits except maybe TS & MP bits. */
+ uint64_t const uCr0 = (pCtx->cr0 & ~(X86_CR0_TS | X86_CR0_MP))
+ | (pVmcbGuest->u64CR0 & (X86_CR0_TS | X86_CR0_MP));
+ VMMRZCallRing3Disable(pVCpu); /* Calls into PGM which has Log statements. */
+ CPUMSetGuestCR0(pVCpu, uCr0);
+ VMMRZCallRing3Enable(pVCpu);
+ }
+
+ if (fWhat & CPUMCTX_EXTRN_CR2)
+ pCtx->cr2 = pVmcbGuest->u64CR2;
+
+ if (fWhat & CPUMCTX_EXTRN_CR3)
+ {
+ if ( pVmcbCtrl->NestedPagingCtrl.n.u1NestedPaging
+ && pCtx->cr3 != pVmcbGuest->u64CR3)
+ {
+ CPUMSetGuestCR3(pVCpu, pVmcbGuest->u64CR3);
+ VMCPU_FF_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3);
+ }
+ }
+
+ /* Changes to CR4 are always intercepted. */
+ }
+
+ /* Update fExtrn. */
+ pCtx->fExtrn &= ~fWhat;
+
+ /* If everything has been imported, clear the HM keeper bit. */
+ if (!(pCtx->fExtrn & HMSVM_CPUMCTX_EXTRN_ALL))
+ {
+ pCtx->fExtrn &= ~CPUMCTX_EXTRN_KEEPER_HM;
+ Assert(!pCtx->fExtrn);
+ }
+ }
+ else
+ Assert(!pCtx->fExtrn || (pCtx->fExtrn & HMSVM_CPUMCTX_EXTRN_ALL));
+
+ ASMSetFlags(fEFlags);
+
+ STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatImportGuestState, x);
+
+ /*
+ * Honor any pending CR3 updates.
+ *
+ * Consider this scenario: #VMEXIT -> VMMRZCallRing3Enable() -> do stuff that causes a longjmp
+ * -> hmR0SvmCallRing3Callback() -> VMMRZCallRing3Disable() -> hmR0SvmImportGuestState()
+ * -> Sets VMCPU_FF_HM_UPDATE_CR3 pending -> return from the longjmp -> continue with #VMEXIT
+ * handling -> hmR0SvmImportGuestState() and here we are.
+ *
+ * The reason for such complicated handling is because VM-exits that call into PGM expect
+ * CR3 to be up-to-date and thus any CR3-saves -before- the VM-exit (longjmp) would've
+ * postponed the CR3 update via the force-flag and cleared CR3 from fExtrn. Any SVM R0
+ * VM-exit handler that requests CR3 to be saved will end up here and we call PGMUpdateCR3().
+ *
+ * The longjmp exit path can't check these CR3 force-flags and call code that takes a lock again,
+ * and does not process force-flag like regular exits to ring-3 either, we cover for it here.
+ */
+ if ( VMMRZCallRing3IsEnabled(pVCpu)
+ && VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3))
+ {
+ Assert(pCtx->cr3 == pVmcbGuest->u64CR3);
+ PGMUpdateCR3(pVCpu, pCtx->cr3);
+ }
+}
+
+
+/**
+ * Saves the guest (or nested-guest) state from the VMCB into the guest-CPU
+ * context.
+ *
+ * Currently there is no residual state left in the CPU that is not updated in the
+ * VMCB.
+ *
+ * @returns VBox status code.
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param fWhat What to import, CPUMCTX_EXTRN_XXX.
+ */
+VMMR0DECL(int) SVMR0ImportStateOnDemand(PVMCPU pVCpu, uint64_t fWhat)
+{
+ hmR0SvmImportGuestState(pVCpu, fWhat);
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Does the necessary state syncing before returning to ring-3 for any reason
+ * (longjmp, preemption, voluntary exits to ring-3) from AMD-V.
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param fImportState Whether to import the guest state from the VMCB back
+ * to the guest-CPU context.
+ *
+ * @remarks No-long-jmp zone!!!
+ */
+static void hmR0SvmLeave(PVMCPU pVCpu, bool fImportState)
+{
+ Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+ Assert(!VMMRZCallRing3IsEnabled(pVCpu));
+ Assert(VMMR0IsLogFlushDisabled(pVCpu));
+
+ /*
+ * !!! IMPORTANT !!!
+ * If you modify code here, make sure to check whether hmR0SvmCallRing3Callback() needs to be updated too.
+ */
+
+ /* Save the guest state if necessary. */
+ if (fImportState)
+ hmR0SvmImportGuestState(pVCpu, HMSVM_CPUMCTX_EXTRN_ALL);
+
+ /* Restore host FPU state if necessary and resync on next R0 reentry. */
+ CPUMR0FpuStateMaybeSaveGuestAndRestoreHost(pVCpu);
+ Assert(!CPUMIsGuestFPUStateActive(pVCpu));
+
+ /*
+ * Restore host debug registers if necessary and resync on next R0 reentry.
+ */
+#ifdef VBOX_STRICT
+ if (CPUMIsHyperDebugStateActive(pVCpu))
+ {
+ PSVMVMCB pVmcb = pVCpu->hm.s.svm.pVmcb; /** @todo nested-guest. */
+ Assert(pVmcb->ctrl.u16InterceptRdDRx == 0xffff);
+ Assert(pVmcb->ctrl.u16InterceptWrDRx == 0xffff);
+ }
+#endif
+ CPUMR0DebugStateMaybeSaveGuestAndRestoreHost(pVCpu, false /* save DR6 */);
+ Assert(!CPUMIsHyperDebugStateActive(pVCpu));
+ Assert(!CPUMIsGuestDebugStateActive(pVCpu));
+
+ STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatEntry);
+ STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatImportGuestState);
+ STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExportGuestState);
+ STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatPreExit);
+ STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitHandling);
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchLongJmpToR3);
+
+ VMCPU_CMPXCHG_STATE(pVCpu, VMCPUSTATE_STARTED_HM, VMCPUSTATE_STARTED_EXEC);
+}
+
+
+/**
+ * Leaves the AMD-V session.
+ *
+ * Only used while returning to ring-3 either due to longjump or exits to
+ * ring-3.
+ *
+ * @returns VBox status code.
+ * @param pVCpu The cross context virtual CPU structure.
+ */
+static int hmR0SvmLeaveSession(PVMCPU pVCpu)
+{
+ HM_DISABLE_PREEMPT(pVCpu);
+ Assert(!VMMRZCallRing3IsEnabled(pVCpu));
+ Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+
+ /* When thread-context hooks are used, we can avoid doing the leave again if we had been preempted before
+ and done this from the SVMR0ThreadCtxCallback(). */
+ if (!pVCpu->hm.s.fLeaveDone)
+ {
+ hmR0SvmLeave(pVCpu, true /* fImportState */);
+ pVCpu->hm.s.fLeaveDone = true;
+ }
+
+ /*
+ * !!! IMPORTANT !!!
+ * If you modify code here, make sure to check whether hmR0SvmCallRing3Callback() needs to be updated too.
+ */
+
+ /** @todo eliminate the need for calling VMMR0ThreadCtxHookDisable here! */
+ /* Deregister hook now that we've left HM context before re-enabling preemption. */
+ VMMR0ThreadCtxHookDisable(pVCpu);
+
+ /* Leave HM context. This takes care of local init (term). */
+ int rc = HMR0LeaveCpu(pVCpu);
+
+ HM_RESTORE_PREEMPT();
+ return rc;
+}
+
+
+/**
+ * Does the necessary state syncing before doing a longjmp to ring-3.
+ *
+ * @returns VBox status code.
+ * @param pVCpu The cross context virtual CPU structure.
+ *
+ * @remarks No-long-jmp zone!!!
+ */
+static int hmR0SvmLongJmpToRing3(PVMCPU pVCpu)
+{
+ return hmR0SvmLeaveSession(pVCpu);
+}
+
+
+/**
+ * VMMRZCallRing3() callback wrapper which saves the guest state (or restores
+ * any remaining host state) before we longjump to ring-3 and possibly get
+ * preempted.
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param enmOperation The operation causing the ring-3 longjump.
+ * @param pvUser The user argument, NULL (currently unused).
+ */
+static DECLCALLBACK(int) hmR0SvmCallRing3Callback(PVMCPU pVCpu, VMMCALLRING3 enmOperation, void *pvUser)
+{
+ RT_NOREF_PV(pvUser);
+
+ if (enmOperation == VMMCALLRING3_VM_R0_ASSERTION)
+ {
+ /*
+ * !!! IMPORTANT !!!
+ * If you modify code here, make sure to check whether hmR0SvmLeave() and hmR0SvmLeaveSession() needs
+ * to be updated too. This is a stripped down version which gets out ASAP trying to not trigger any assertion.
+ */
+ VMMRZCallRing3RemoveNotification(pVCpu);
+ VMMRZCallRing3Disable(pVCpu);
+ HM_DISABLE_PREEMPT(pVCpu);
+
+ /* Import the entire guest state. */
+ hmR0SvmImportGuestState(pVCpu, HMSVM_CPUMCTX_EXTRN_ALL);
+
+ /* Restore host FPU state if necessary and resync on next R0 reentry. */
+ CPUMR0FpuStateMaybeSaveGuestAndRestoreHost(pVCpu);
+
+ /* Restore host debug registers if necessary and resync on next R0 reentry. */
+ CPUMR0DebugStateMaybeSaveGuestAndRestoreHost(pVCpu, false /* save DR6 */);
+
+ /* Deregister the hook now that we've left HM context before re-enabling preemption. */
+ /** @todo eliminate the need for calling VMMR0ThreadCtxHookDisable here! */
+ VMMR0ThreadCtxHookDisable(pVCpu);
+
+ /* Leave HM context. This takes care of local init (term). */
+ HMR0LeaveCpu(pVCpu);
+
+ HM_RESTORE_PREEMPT();
+ return VINF_SUCCESS;
+ }
+
+ Assert(pVCpu);
+ Assert(VMMRZCallRing3IsEnabled(pVCpu));
+ HMSVM_ASSERT_PREEMPT_SAFE(pVCpu);
+
+ VMMRZCallRing3Disable(pVCpu);
+ Assert(VMMR0IsLogFlushDisabled(pVCpu));
+
+ Log4Func(("Calling hmR0SvmLongJmpToRing3\n"));
+ int rc = hmR0SvmLongJmpToRing3(pVCpu);
+ AssertRCReturn(rc, rc);
+
+ VMMRZCallRing3Enable(pVCpu);
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Take necessary actions before going back to ring-3.
+ *
+ * An action requires us to go back to ring-3. This function does the necessary
+ * steps before we can safely return to ring-3. This is not the same as longjmps
+ * to ring-3, this is voluntary.
+ *
+ * @returns VBox status code.
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param rcExit The reason for exiting to ring-3. Can be
+ * VINF_VMM_UNKNOWN_RING3_CALL.
+ */
+static int hmR0SvmExitToRing3(PVMCPU pVCpu, int rcExit)
+{
+ Assert(pVCpu);
+ HMSVM_ASSERT_PREEMPT_SAFE(pVCpu);
+
+ /* Please, no longjumps here (any logging shouldn't flush jump back to ring-3). NO LOGGING BEFORE THIS POINT! */
+ VMMRZCallRing3Disable(pVCpu);
+ Log4Func(("rcExit=%d LocalFF=%#RX64 GlobalFF=%#RX32\n", rcExit, (uint64_t)pVCpu->fLocalForcedActions,
+ pVCpu->CTX_SUFF(pVM)->fGlobalForcedActions));
+
+ /* We need to do this only while truly exiting the "inner loop" back to ring-3 and -not- for any longjmp to ring3. */
+ if (pVCpu->hm.s.Event.fPending)
+ {
+ hmR0SvmPendingEventToTrpmTrap(pVCpu);
+ Assert(!pVCpu->hm.s.Event.fPending);
+ }
+
+ /* Sync. the necessary state for going back to ring-3. */
+ hmR0SvmLeaveSession(pVCpu);
+ STAM_COUNTER_DEC(&pVCpu->hm.s.StatSwitchLongJmpToR3);
+
+ VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TO_R3);
+ CPUMSetChangedFlags(pVCpu, CPUM_CHANGED_SYSENTER_MSR
+ | CPUM_CHANGED_LDTR
+ | CPUM_CHANGED_GDTR
+ | CPUM_CHANGED_IDTR
+ | CPUM_CHANGED_TR
+ | CPUM_CHANGED_HIDDEN_SEL_REGS);
+ if ( pVCpu->CTX_SUFF(pVM)->hm.s.fNestedPaging
+ && CPUMIsGuestPagingEnabledEx(&pVCpu->cpum.GstCtx))
+ {
+ CPUMSetChangedFlags(pVCpu, CPUM_CHANGED_GLOBAL_TLB_FLUSH);
+ }
+
+ /* Update the exit-to-ring 3 reason. */
+ pVCpu->hm.s.rcLastExitToR3 = rcExit;
+
+ /* On our way back from ring-3, reload the guest-CPU state if it may change while in ring-3. */
+ if ( rcExit != VINF_EM_RAW_INTERRUPT
+ || CPUMIsGuestInSvmNestedHwVirtMode(&pVCpu->cpum.GstCtx))
+ {
+ Assert(!(pVCpu->cpum.GstCtx.fExtrn & HMSVM_CPUMCTX_EXTRN_ALL));
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_ALL_GUEST);
+ }
+
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchExitToR3);
+
+ /* We do -not- want any longjmp notifications after this! We must return to ring-3 ASAP. */
+ VMMRZCallRing3RemoveNotification(pVCpu);
+ VMMRZCallRing3Enable(pVCpu);
+
+ /*
+ * If we're emulating an instruction, we shouldn't have any TRPM traps pending
+ * and if we're injecting an event we should have a TRPM trap pending.
+ */
+ AssertReturnStmt(rcExit != VINF_EM_RAW_INJECT_TRPM_EVENT || TRPMHasTrap(pVCpu),
+ pVCpu->hm.s.u32HMError = rcExit,
+ VERR_SVM_IPE_5);
+ AssertReturnStmt(rcExit != VINF_EM_RAW_EMULATE_INSTR || !TRPMHasTrap(pVCpu),
+ pVCpu->hm.s.u32HMError = rcExit,
+ VERR_SVM_IPE_4);
+
+ return rcExit;
+}
+
+
+/**
+ * Updates the use of TSC offsetting mode for the CPU and adjusts the necessary
+ * intercepts.
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param pVmcb Pointer to the VM control block.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+static void hmR0SvmUpdateTscOffsetting(PVMCPU pVCpu, PSVMVMCB pVmcb)
+{
+ /*
+ * Avoid intercepting RDTSC/RDTSCP if we determined the host TSC (++) is stable
+ * and in case of a nested-guest, if the nested-VMCB specifies it is not intercepting
+ * RDTSC/RDTSCP as well.
+ */
+ bool fParavirtTsc;
+ uint64_t uTscOffset;
+ bool const fCanUseRealTsc = TMCpuTickCanUseRealTSC(pVCpu->CTX_SUFF(pVM), pVCpu, &uTscOffset, &fParavirtTsc);
+
+ bool fIntercept;
+ if (fCanUseRealTsc)
+ fIntercept = hmR0SvmClearCtrlIntercept(pVCpu, pVmcb, SVM_CTRL_INTERCEPT_RDTSC | SVM_CTRL_INTERCEPT_RDTSCP);
+ else
+ {
+ hmR0SvmSetCtrlIntercept(pVmcb, SVM_CTRL_INTERCEPT_RDTSC | SVM_CTRL_INTERCEPT_RDTSCP);
+ fIntercept = true;
+ }
+
+ if (!fIntercept)
+ {
+#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
+ /* Apply the nested-guest VMCB's TSC offset over the guest TSC offset. */
+ if (CPUMIsGuestInSvmNestedHwVirtMode(&pVCpu->cpum.GstCtx))
+ uTscOffset = HMApplySvmNstGstTscOffset(pVCpu, uTscOffset);
+#endif
+
+ /* Update the TSC offset in the VMCB and the relevant clean bits. */
+ pVmcb->ctrl.u64TSCOffset = uTscOffset;
+ pVmcb->ctrl.u32VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_INTERCEPTS;
+
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatTscOffset);
+ }
+ else
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatTscIntercept);
+
+ /* Currently neither Hyper-V nor KVM need to update their paravirt. TSC
+ information before every VM-entry, hence we have nothing to do here at the moment. */
+ if (fParavirtTsc)
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatTscParavirt);
+}
+
+
+/**
+ * Sets an event as a pending event to be injected into the guest.
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param pEvent Pointer to the SVM event.
+ * @param GCPtrFaultAddress The fault-address (CR2) in case it's a
+ * page-fault.
+ *
+ * @remarks Statistics counter assumes this is a guest event being reflected to
+ * the guest i.e. 'StatInjectPendingReflect' is incremented always.
+ */
+DECLINLINE(void) hmR0SvmSetPendingEvent(PVMCPU pVCpu, PSVMEVENT pEvent, RTGCUINTPTR GCPtrFaultAddress)
+{
+ Assert(!pVCpu->hm.s.Event.fPending);
+ Assert(pEvent->n.u1Valid);
+
+ pVCpu->hm.s.Event.u64IntInfo = pEvent->u;
+ pVCpu->hm.s.Event.fPending = true;
+ pVCpu->hm.s.Event.GCPtrFaultAddress = GCPtrFaultAddress;
+
+ Log4Func(("u=%#RX64 u8Vector=%#x Type=%#x ErrorCodeValid=%RTbool ErrorCode=%#RX32\n", pEvent->u, pEvent->n.u8Vector,
+ (uint8_t)pEvent->n.u3Type, !!pEvent->n.u1ErrorCodeValid, pEvent->n.u32ErrorCode));
+}
+
+
+/**
+ * Sets an invalid-opcode (\#UD) exception as pending-for-injection into the VM.
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ */
+DECLINLINE(void) hmR0SvmSetPendingXcptUD(PVMCPU pVCpu)
+{
+ SVMEVENT Event;
+ Event.u = 0;
+ Event.n.u1Valid = 1;
+ Event.n.u3Type = SVM_EVENT_EXCEPTION;
+ Event.n.u8Vector = X86_XCPT_UD;
+ hmR0SvmSetPendingEvent(pVCpu, &Event, 0 /* GCPtrFaultAddress */);
+}
+
+
+/**
+ * Sets a debug (\#DB) exception as pending-for-injection into the VM.
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ */
+DECLINLINE(void) hmR0SvmSetPendingXcptDB(PVMCPU pVCpu)
+{
+ SVMEVENT Event;
+ Event.u = 0;
+ Event.n.u1Valid = 1;
+ Event.n.u3Type = SVM_EVENT_EXCEPTION;
+ Event.n.u8Vector = X86_XCPT_DB;
+ hmR0SvmSetPendingEvent(pVCpu, &Event, 0 /* GCPtrFaultAddress */);
+}
+
+
+/**
+ * Sets a page fault (\#PF) exception as pending-for-injection into the VM.
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param u32ErrCode The error-code for the page-fault.
+ * @param uFaultAddress The page fault address (CR2).
+ *
+ * @remarks This updates the guest CR2 with @a uFaultAddress!
+ */
+DECLINLINE(void) hmR0SvmSetPendingXcptPF(PVMCPU pVCpu, uint32_t u32ErrCode, RTGCUINTPTR uFaultAddress)
+{
+ SVMEVENT Event;
+ Event.u = 0;
+ Event.n.u1Valid = 1;
+ Event.n.u3Type = SVM_EVENT_EXCEPTION;
+ Event.n.u8Vector = X86_XCPT_PF;
+ Event.n.u1ErrorCodeValid = 1;
+ Event.n.u32ErrorCode = u32ErrCode;
+
+ /* Update CR2 of the guest. */
+ HMSVM_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_CR2);
+ if (pVCpu->cpum.GstCtx.cr2 != uFaultAddress)
+ {
+ pVCpu->cpum.GstCtx.cr2 = uFaultAddress;
+ /* The VMCB clean bit for CR2 will be updated while re-loading the guest state. */
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_CR2);
+ }
+
+ hmR0SvmSetPendingEvent(pVCpu, &Event, uFaultAddress);
+}
+
+
+/**
+ * Sets a math-fault (\#MF) exception as pending-for-injection into the VM.
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ */
+DECLINLINE(void) hmR0SvmSetPendingXcptMF(PVMCPU pVCpu)
+{
+ SVMEVENT Event;
+ Event.u = 0;
+ Event.n.u1Valid = 1;
+ Event.n.u3Type = SVM_EVENT_EXCEPTION;
+ Event.n.u8Vector = X86_XCPT_MF;
+ hmR0SvmSetPendingEvent(pVCpu, &Event, 0 /* GCPtrFaultAddress */);
+}
+
+
+/**
+ * Sets a double fault (\#DF) exception as pending-for-injection into the VM.
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ */
+DECLINLINE(void) hmR0SvmSetPendingXcptDF(PVMCPU pVCpu)
+{
+ SVMEVENT Event;
+ Event.u = 0;
+ Event.n.u1Valid = 1;
+ Event.n.u3Type = SVM_EVENT_EXCEPTION;
+ Event.n.u8Vector = X86_XCPT_DF;
+ Event.n.u1ErrorCodeValid = 1;
+ Event.n.u32ErrorCode = 0;
+ hmR0SvmSetPendingEvent(pVCpu, &Event, 0 /* GCPtrFaultAddress */);
+}
+
+
+/**
+ * Injects an event into the guest upon VMRUN by updating the relevant field
+ * in the VMCB.
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param pVmcb Pointer to the guest VM control block.
+ * @param pEvent Pointer to the event.
+ *
+ * @remarks No-long-jump zone!!!
+ * @remarks Requires CR0!
+ */
+DECLINLINE(void) hmR0SvmInjectEventVmcb(PVMCPU pVCpu, PSVMVMCB pVmcb, PSVMEVENT pEvent)
+{
+ Assert(!pVmcb->ctrl.EventInject.n.u1Valid);
+ pVmcb->ctrl.EventInject.u = pEvent->u;
+ STAM_COUNTER_INC(&pVCpu->hm.s.paStatInjectedIrqsR0[pEvent->n.u8Vector & MASK_INJECT_IRQ_STAT]);
+ RT_NOREF(pVCpu);
+
+ Log4Func(("u=%#RX64 u8Vector=%#x Type=%#x ErrorCodeValid=%RTbool ErrorCode=%#RX32\n", pEvent->u, pEvent->n.u8Vector,
+ (uint8_t)pEvent->n.u3Type, !!pEvent->n.u1ErrorCodeValid, pEvent->n.u32ErrorCode));
+}
+
+
+
+/**
+ * Converts any TRPM trap into a pending HM event. This is typically used when
+ * entering from ring-3 (not longjmp returns).
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ */
+static void hmR0SvmTrpmTrapToPendingEvent(PVMCPU pVCpu)
+{
+ Assert(TRPMHasTrap(pVCpu));
+ Assert(!pVCpu->hm.s.Event.fPending);
+
+ uint8_t uVector;
+ TRPMEVENT enmTrpmEvent;
+ RTGCUINT uErrCode;
+ RTGCUINTPTR GCPtrFaultAddress;
+ uint8_t cbInstr;
+
+ int rc = TRPMQueryTrapAll(pVCpu, &uVector, &enmTrpmEvent, &uErrCode, &GCPtrFaultAddress, &cbInstr);
+ AssertRC(rc);
+
+ SVMEVENT Event;
+ Event.u = 0;
+ Event.n.u1Valid = 1;
+ Event.n.u8Vector = uVector;
+
+ /* Refer AMD spec. 15.20 "Event Injection" for the format. */
+ if (enmTrpmEvent == TRPM_TRAP)
+ {
+ Event.n.u3Type = SVM_EVENT_EXCEPTION;
+ switch (uVector)
+ {
+ case X86_XCPT_NMI:
+ {
+ Event.n.u3Type = SVM_EVENT_NMI;
+ break;
+ }
+
+ case X86_XCPT_PF:
+ case X86_XCPT_DF:
+ case X86_XCPT_TS:
+ case X86_XCPT_NP:
+ case X86_XCPT_SS:
+ case X86_XCPT_GP:
+ case X86_XCPT_AC:
+ {
+ Event.n.u1ErrorCodeValid = 1;
+ Event.n.u32ErrorCode = uErrCode;
+ break;
+ }
+ }
+ }
+ else if (enmTrpmEvent == TRPM_HARDWARE_INT)
+ Event.n.u3Type = SVM_EVENT_EXTERNAL_IRQ;
+ else if (enmTrpmEvent == TRPM_SOFTWARE_INT)
+ Event.n.u3Type = SVM_EVENT_SOFTWARE_INT;
+ else
+ AssertMsgFailed(("Invalid TRPM event type %d\n", enmTrpmEvent));
+
+ rc = TRPMResetTrap(pVCpu);
+ AssertRC(rc);
+
+ Log4(("TRPM->HM event: u=%#RX64 u8Vector=%#x uErrorCodeValid=%RTbool uErrorCode=%#RX32\n", Event.u, Event.n.u8Vector,
+ !!Event.n.u1ErrorCodeValid, Event.n.u32ErrorCode));
+
+ hmR0SvmSetPendingEvent(pVCpu, &Event, GCPtrFaultAddress);
+}
+
+
+/**
+ * Converts any pending SVM event into a TRPM trap. Typically used when leaving
+ * AMD-V to execute any instruction.
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ */
+static void hmR0SvmPendingEventToTrpmTrap(PVMCPU pVCpu)
+{
+ Assert(pVCpu->hm.s.Event.fPending);
+ Assert(TRPMQueryTrap(pVCpu, NULL /* pu8TrapNo */, NULL /* pEnmType */) == VERR_TRPM_NO_ACTIVE_TRAP);
+
+ SVMEVENT Event;
+ Event.u = pVCpu->hm.s.Event.u64IntInfo;
+
+ uint8_t uVector = Event.n.u8Vector;
+ uint8_t uVectorType = Event.n.u3Type;
+ TRPMEVENT enmTrapType = HMSvmEventToTrpmEventType(&Event);
+
+ Log4(("HM event->TRPM: uVector=%#x enmTrapType=%d\n", uVector, uVectorType));
+
+ int rc = TRPMAssertTrap(pVCpu, uVector, enmTrapType);
+ AssertRC(rc);
+
+ if (Event.n.u1ErrorCodeValid)
+ TRPMSetErrorCode(pVCpu, Event.n.u32ErrorCode);
+
+ if ( uVectorType == SVM_EVENT_EXCEPTION
+ && uVector == X86_XCPT_PF)
+ {
+ TRPMSetFaultAddress(pVCpu, pVCpu->hm.s.Event.GCPtrFaultAddress);
+ Assert(pVCpu->hm.s.Event.GCPtrFaultAddress == CPUMGetGuestCR2(pVCpu));
+ }
+ else if (uVectorType == SVM_EVENT_SOFTWARE_INT)
+ {
+ AssertMsg( uVectorType == SVM_EVENT_SOFTWARE_INT
+ || (uVector == X86_XCPT_BP || uVector == X86_XCPT_OF),
+ ("Invalid vector: uVector=%#x uVectorType=%#x\n", uVector, uVectorType));
+ TRPMSetInstrLength(pVCpu, pVCpu->hm.s.Event.cbInstr);
+ }
+ pVCpu->hm.s.Event.fPending = false;
+}
+
+
+/**
+ * Checks if the guest (or nested-guest) has an interrupt shadow active right
+ * now.
+ *
+ * @returns @c true if the interrupt shadow is active, @c false otherwise.
+ * @param pVCpu The cross context virtual CPU structure.
+ *
+ * @remarks No-long-jump zone!!!
+ * @remarks Has side-effects with VMCPU_FF_INHIBIT_INTERRUPTS force-flag.
+ */
+static bool hmR0SvmIsIntrShadowActive(PVMCPU pVCpu)
+{
+ /*
+ * Instructions like STI and MOV SS inhibit interrupts till the next instruction
+ * completes. Check if we should inhibit interrupts or clear any existing
+ * interrupt inhibition.
+ */
+ if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS))
+ {
+ if (pVCpu->cpum.GstCtx.rip != EMGetInhibitInterruptsPC(pVCpu))
+ {
+ /*
+ * We can clear the inhibit force flag as even if we go back to the recompiler
+ * without executing guest code in AMD-V, the flag's condition to be cleared is
+ * met and thus the cleared state is correct.
+ */
+ VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS);
+ return false;
+ }
+ return true;
+ }
+ return false;
+}
+
+
+/**
+ * Sets the virtual interrupt intercept control in the VMCB.
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param pVmcb Pointer to the VM control block.
+ */
+static void hmR0SvmSetIntWindowExiting(PVMCPU pVCpu, PSVMVMCB pVmcb)
+{
+ /*
+ * When AVIC isn't supported, set up an interrupt window to cause a #VMEXIT when the guest
+ * is ready to accept interrupts. At #VMEXIT, we then get the interrupt from the APIC
+ * (updating ISR at the right time) and inject the interrupt.
+ *
+ * With AVIC is supported, we could make use of the asynchronously delivery without
+ * #VMEXIT and we would be passing the AVIC page to SVM.
+ *
+ * In AMD-V, an interrupt window is achieved using a combination of V_IRQ (an interrupt
+ * is pending), V_IGN_TPR (ignore TPR priorities) and the VINTR intercept all being set.
+ */
+#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
+ /*
+ * Currently we don't overlay interupt windows and if there's any V_IRQ pending in the
+ * nested-guest VMCB, we avoid setting up any interrupt window on behalf of the outer
+ * guest.
+ */
+ /** @todo Does this mean we end up prioritizing virtual interrupt
+ * delivery/window over a physical interrupt (from the outer guest)
+ * might be pending? */
+ bool const fEnableIntWindow = !VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INTERRUPT_NESTED_GUEST);
+ if (!fEnableIntWindow)
+ {
+ Assert(CPUMIsGuestInSvmNestedHwVirtMode(&pVCpu->cpum.GstCtx));
+ Log4(("Nested-guest V_IRQ already pending\n"));
+ }
+#else
+ bool const fEnableIntWindow = true;
+ RT_NOREF(pVCpu);
+#endif
+ if (fEnableIntWindow)
+ {
+ Assert(pVmcb->ctrl.IntCtrl.n.u1IgnoreTPR);
+ pVmcb->ctrl.IntCtrl.n.u1VIrqPending = 1;
+ pVmcb->ctrl.u32VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_INT_CTRL;
+ hmR0SvmSetCtrlIntercept(pVmcb, SVM_CTRL_INTERCEPT_VINTR);
+ Log4(("Set VINTR intercept\n"));
+ }
+}
+
+
+/**
+ * Clears the virtual interrupt intercept control in the VMCB as
+ * we are figured the guest is unable process any interrupts
+ * at this point of time.
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param pVmcb Pointer to the VM control block.
+ */
+static void hmR0SvmClearIntWindowExiting(PVMCPU pVCpu, PSVMVMCB pVmcb)
+{
+ PSVMVMCBCTRL pVmcbCtrl = &pVmcb->ctrl;
+ if ( pVmcbCtrl->IntCtrl.n.u1VIrqPending
+ || (pVmcbCtrl->u64InterceptCtrl & SVM_CTRL_INTERCEPT_VINTR))
+ {
+ pVmcbCtrl->IntCtrl.n.u1VIrqPending = 0;
+ pVmcbCtrl->u32VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_INT_CTRL;
+ hmR0SvmClearCtrlIntercept(pVCpu, pVmcb, SVM_CTRL_INTERCEPT_VINTR);
+ Log4(("Cleared VINTR intercept\n"));
+ }
+}
+
+#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
+/**
+ * Evaluates the event to be delivered to the nested-guest and sets it as the
+ * pending event.
+ *
+ * @returns VBox strict status code.
+ * @param pVCpu The cross context virtual CPU structure.
+ */
+static VBOXSTRICTRC hmR0SvmEvaluatePendingEventNested(PVMCPU pVCpu)
+{
+ PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+ HMSVM_ASSERT_IN_NESTED_GUEST(pCtx);
+ HMSVM_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_HWVIRT
+ | CPUMCTX_EXTRN_RFLAGS
+ | CPUMCTX_EXTRN_HM_SVM_INT_SHADOW
+ | CPUMCTX_EXTRN_HM_SVM_HWVIRT_VIRQ);
+
+ Assert(!pVCpu->hm.s.Event.fPending);
+ PSVMVMCB pVmcb = hmR0SvmGetCurrentVmcb(pVCpu);
+ Assert(pVmcb);
+
+ bool const fGif = CPUMGetGuestGif(pCtx);
+ bool const fIntShadow = hmR0SvmIsIntrShadowActive(pVCpu);
+ bool const fBlockNmi = VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_BLOCK_NMIS);
+
+ Log4Func(("fGif=%RTbool fBlockNmi=%RTbool fIntShadow=%RTbool fIntPending=%RTbool fNmiPending=%RTbool\n",
+ fGif, fBlockNmi, fIntShadow, VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC),
+ VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INTERRUPT_NMI)));
+
+ /** @todo SMI. SMIs take priority over NMIs. */
+
+ /*
+ * Check if the guest can receive NMIs.
+ * Nested NMIs are not allowed, see AMD spec. 8.1.4 "Masking External Interrupts".
+ * NMIs take priority over maskable interrupts, see AMD spec. 8.5 "Priorities".
+ */
+ if ( VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INTERRUPT_NMI)
+ && !fBlockNmi)
+ {
+ if ( fGif
+ && !fIntShadow)
+ {
+ if (CPUMIsGuestSvmCtrlInterceptSet(pVCpu, pCtx, SVM_CTRL_INTERCEPT_NMI))
+ {
+ Log4(("Intercepting NMI -> #VMEXIT\n"));
+ HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, HMSVM_CPUMCTX_EXTRN_ALL);
+ return IEMExecSvmVmexit(pVCpu, SVM_EXIT_NMI, 0, 0);
+ }
+
+ Log4(("Setting NMI pending for injection\n"));
+ SVMEVENT Event;
+ Event.u = 0;
+ Event.n.u1Valid = 1;
+ Event.n.u8Vector = X86_XCPT_NMI;
+ Event.n.u3Type = SVM_EVENT_NMI;
+ hmR0SvmSetPendingEvent(pVCpu, &Event, 0 /* GCPtrFaultAddress */);
+ VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INTERRUPT_NMI);
+ }
+ else if (!fGif)
+ hmR0SvmSetCtrlIntercept(pVmcb, SVM_CTRL_INTERCEPT_STGI);
+ else
+ hmR0SvmSetIntWindowExiting(pVCpu, pVmcb);
+ }
+ /*
+ * Check if the nested-guest can receive external interrupts (generated by the guest's
+ * PIC/APIC).
+ *
+ * External intercepts, NMI, SMI etc. from the physical CPU are -always- intercepted
+ * when executing using hardware-assisted SVM, see HMSVM_MANDATORY_GUEST_CTRL_INTERCEPTS.
+ *
+ * External interrupts that are generated for the outer guest may be intercepted
+ * depending on how the nested-guest VMCB was programmed by guest software.
+ *
+ * Physical interrupts always take priority over virtual interrupts,
+ * see AMD spec. 15.21.4 "Injecting Virtual (INTR) Interrupts".
+ *
+ * We don't need to inject nested-guest virtual interrupts here, we can let the hardware
+ * do that work when we execute nested guest code esp. since all the required information
+ * is in the VMCB, unlike physical interrupts where we need to fetch the interrupt from
+ * the virtual interrupt controller.
+ */
+ else if ( VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC)
+ && !pVCpu->hm.s.fSingleInstruction)
+ {
+ if ( fGif
+ && !fIntShadow
+ && CPUMIsGuestSvmPhysIntrEnabled(pVCpu, pCtx))
+ {
+ if (CPUMIsGuestSvmCtrlInterceptSet(pVCpu, pCtx, SVM_CTRL_INTERCEPT_INTR))
+ {
+ Log4(("Intercepting INTR -> #VMEXIT\n"));
+ HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, HMSVM_CPUMCTX_EXTRN_ALL);
+ return IEMExecSvmVmexit(pVCpu, SVM_EXIT_INTR, 0, 0);
+ }
+
+ uint8_t u8Interrupt;
+ int rc = PDMGetInterrupt(pVCpu, &u8Interrupt);
+ if (RT_SUCCESS(rc))
+ {
+ Log4(("Setting external interrupt %#x pending for injection\n", u8Interrupt));
+ SVMEVENT Event;
+ Event.u = 0;
+ Event.n.u1Valid = 1;
+ Event.n.u8Vector = u8Interrupt;
+ Event.n.u3Type = SVM_EVENT_EXTERNAL_IRQ;
+ hmR0SvmSetPendingEvent(pVCpu, &Event, 0 /* GCPtrFaultAddress */);
+ }
+ else if (rc == VERR_APIC_INTR_MASKED_BY_TPR)
+ {
+ /*
+ * AMD-V has no TPR thresholding feature. TPR and the force-flag will be
+ * updated eventually when the TPR is written by the guest.
+ */
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchTprMaskedIrq);
+ }
+ else
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchGuestIrq);
+ }
+ else if (!fGif)
+ hmR0SvmSetCtrlIntercept(pVmcb, SVM_CTRL_INTERCEPT_STGI);
+ else
+ hmR0SvmSetIntWindowExiting(pVCpu, pVmcb);
+ }
+
+ return VINF_SUCCESS;
+}
+#endif
+
+/**
+ * Evaluates the event to be delivered to the guest and sets it as the pending
+ * event.
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ */
+static void hmR0SvmEvaluatePendingEvent(PVMCPU pVCpu)
+{
+ PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+ HMSVM_ASSERT_NOT_IN_NESTED_GUEST(pCtx);
+ HMSVM_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_HWVIRT
+ | CPUMCTX_EXTRN_RFLAGS
+ | CPUMCTX_EXTRN_HM_SVM_INT_SHADOW);
+
+ Assert(!pVCpu->hm.s.Event.fPending);
+ PSVMVMCB pVmcb = hmR0SvmGetCurrentVmcb(pVCpu);
+ Assert(pVmcb);
+
+ bool const fGif = CPUMGetGuestGif(pCtx);
+ bool const fIntShadow = hmR0SvmIsIntrShadowActive(pVCpu);
+ bool const fBlockInt = !(pCtx->eflags.u32 & X86_EFL_IF);
+ bool const fBlockNmi = VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_BLOCK_NMIS);
+
+ Log4Func(("fGif=%RTbool fBlockNmi=%RTbool fBlockInt=%RTbool fIntShadow=%RTbool fIntPending=%RTbool NMI pending=%RTbool\n",
+ fGif, fBlockNmi, fBlockInt, fIntShadow,
+ VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC),
+ VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INTERRUPT_NMI)));
+
+ /** @todo SMI. SMIs take priority over NMIs. */
+
+ /*
+ * Check if the guest can receive NMIs.
+ * Nested NMIs are not allowed, see AMD spec. 8.1.4 "Masking External Interrupts".
+ * NMIs take priority over maskable interrupts, see AMD spec. 8.5 "Priorities".
+ */
+ if ( VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INTERRUPT_NMI)
+ && !fBlockNmi)
+ {
+ if ( fGif
+ && !fIntShadow)
+ {
+ Log4(("Setting NMI pending for injection\n"));
+ SVMEVENT Event;
+ Event.u = 0;
+ Event.n.u1Valid = 1;
+ Event.n.u8Vector = X86_XCPT_NMI;
+ Event.n.u3Type = SVM_EVENT_NMI;
+ hmR0SvmSetPendingEvent(pVCpu, &Event, 0 /* GCPtrFaultAddress */);
+ VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INTERRUPT_NMI);
+ }
+ else if (!fGif)
+ hmR0SvmSetCtrlIntercept(pVmcb, SVM_CTRL_INTERCEPT_STGI);
+ else
+ hmR0SvmSetIntWindowExiting(pVCpu, pVmcb);
+ }
+ /*
+ * Check if the guest can receive external interrupts (PIC/APIC). Once PDMGetInterrupt()
+ * returns a valid interrupt we -must- deliver the interrupt. We can no longer re-request
+ * it from the APIC device.
+ */
+ else if ( VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC)
+ && !pVCpu->hm.s.fSingleInstruction)
+ {
+ if ( fGif
+ && !fBlockInt
+ && !fIntShadow)
+ {
+ uint8_t u8Interrupt;
+ int rc = PDMGetInterrupt(pVCpu, &u8Interrupt);
+ if (RT_SUCCESS(rc))
+ {
+ Log4(("Setting external interrupt %#x pending for injection\n", u8Interrupt));
+ SVMEVENT Event;
+ Event.u = 0;
+ Event.n.u1Valid = 1;
+ Event.n.u8Vector = u8Interrupt;
+ Event.n.u3Type = SVM_EVENT_EXTERNAL_IRQ;
+ hmR0SvmSetPendingEvent(pVCpu, &Event, 0 /* GCPtrFaultAddress */);
+ }
+ else if (rc == VERR_APIC_INTR_MASKED_BY_TPR)
+ {
+ /*
+ * AMD-V has no TPR thresholding feature. TPR and the force-flag will be
+ * updated eventually when the TPR is written by the guest.
+ */
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchTprMaskedIrq);
+ }
+ else
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchGuestIrq);
+ }
+ else if (!fGif)
+ hmR0SvmSetCtrlIntercept(pVmcb, SVM_CTRL_INTERCEPT_STGI);
+ else
+ hmR0SvmSetIntWindowExiting(pVCpu, pVmcb);
+ }
+}
+
+
+/**
+ * Injects any pending events into the guest (or nested-guest).
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param pVmcb Pointer to the VM control block.
+ *
+ * @remarks Must only be called when we are guaranteed to enter
+ * hardware-assisted SVM execution and not return to ring-3
+ * prematurely.
+ */
+static void hmR0SvmInjectPendingEvent(PVMCPU pVCpu, PSVMVMCB pVmcb)
+{
+ Assert(!TRPMHasTrap(pVCpu));
+ Assert(!VMMRZCallRing3IsEnabled(pVCpu));
+
+ bool const fIntShadow = hmR0SvmIsIntrShadowActive(pVCpu);
+#ifdef VBOX_STRICT
+ PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+ bool const fGif = pCtx->hwvirt.fGif;
+ bool fAllowInt = fGif;
+ if (fGif)
+ {
+ /*
+ * For nested-guests we have no way to determine if we're injecting a physical or
+ * virtual interrupt at this point. Hence the partial verification below.
+ */
+ if (CPUMIsGuestInSvmNestedHwVirtMode(pCtx))
+ fAllowInt = CPUMIsGuestSvmPhysIntrEnabled(pVCpu, pCtx) || CPUMIsGuestSvmVirtIntrEnabled(pVCpu, pCtx);
+ else
+ fAllowInt = RT_BOOL(pCtx->eflags.u32 & X86_EFL_IF);
+ }
+#endif
+
+ if (pVCpu->hm.s.Event.fPending)
+ {
+ SVMEVENT Event;
+ Event.u = pVCpu->hm.s.Event.u64IntInfo;
+ Assert(Event.n.u1Valid);
+
+ /*
+ * Validate event injection pre-conditions.
+ */
+ if (Event.n.u3Type == SVM_EVENT_EXTERNAL_IRQ)
+ {
+ Assert(fAllowInt);
+ Assert(!fIntShadow);
+ }
+ else if (Event.n.u3Type == SVM_EVENT_NMI)
+ {
+ Assert(fGif);
+ Assert(!fIntShadow);
+ }
+
+ /*
+ * Before injecting an NMI we must set VMCPU_FF_BLOCK_NMIS to prevent nested NMIs. We
+ * do this only when we are surely going to inject the NMI as otherwise if we return
+ * to ring-3 prematurely we could leave NMIs blocked indefinitely upon re-entry into
+ * SVM R0.
+ *
+ * With VT-x, this is handled by the Guest interruptibility information VMCS field
+ * which will set the VMCS field after actually delivering the NMI which we read on
+ * VM-exit to determine the state.
+ */
+ if ( Event.n.u3Type == SVM_EVENT_NMI
+ && Event.n.u8Vector == X86_XCPT_NMI
+ && !VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_BLOCK_NMIS))
+ {
+ VMCPU_FF_SET(pVCpu, VMCPU_FF_BLOCK_NMIS);
+ }
+
+ /*
+ * Inject it (update VMCB for injection by the hardware).
+ */
+ Log4(("Injecting pending HM event\n"));
+ hmR0SvmInjectEventVmcb(pVCpu, pVmcb, &Event);
+ pVCpu->hm.s.Event.fPending = false;
+
+ if (Event.n.u3Type == SVM_EVENT_EXTERNAL_IRQ)
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatInjectInterrupt);
+ else
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatInjectXcpt);
+ }
+ else
+ Assert(pVmcb->ctrl.EventInject.n.u1Valid == 0);
+
+ /*
+ * We could have injected an NMI through IEM and continue guest execution using
+ * hardware-assisted SVM. In which case, we would not have any events pending (above)
+ * but we still need to intercept IRET in order to eventually clear NMI inhibition.
+ */
+ if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_BLOCK_NMIS))
+ hmR0SvmSetCtrlIntercept(pVmcb, SVM_CTRL_INTERCEPT_IRET);
+
+ /*
+ * Update the guest interrupt shadow in the guest (or nested-guest) VMCB.
+ *
+ * For nested-guests: We need to update it too for the scenario where IEM executes
+ * the nested-guest but execution later continues here with an interrupt shadow active.
+ */
+ pVmcb->ctrl.IntShadow.n.u1IntShadow = fIntShadow;
+}
+
+
+/**
+ * Reports world-switch error and dumps some useful debug info.
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param rcVMRun The return code from VMRUN (or
+ * VERR_SVM_INVALID_GUEST_STATE for invalid
+ * guest-state).
+ */
+static void hmR0SvmReportWorldSwitchError(PVMCPU pVCpu, int rcVMRun)
+{
+ HMSVM_ASSERT_PREEMPT_SAFE(pVCpu);
+ HMSVM_ASSERT_NOT_IN_NESTED_GUEST(&pVCpu->cpum.GstCtx);
+ HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, HMSVM_CPUMCTX_EXTRN_ALL);
+
+ if (rcVMRun == VERR_SVM_INVALID_GUEST_STATE)
+ {
+#ifdef VBOX_STRICT
+ hmR0DumpRegs(pVCpu);
+ PCSVMVMCB pVmcb = hmR0SvmGetCurrentVmcb(pVCpu);
+ Log4(("ctrl.u32VmcbCleanBits %#RX32\n", pVmcb->ctrl.u32VmcbCleanBits));
+ Log4(("ctrl.u16InterceptRdCRx %#x\n", pVmcb->ctrl.u16InterceptRdCRx));
+ Log4(("ctrl.u16InterceptWrCRx %#x\n", pVmcb->ctrl.u16InterceptWrCRx));
+ Log4(("ctrl.u16InterceptRdDRx %#x\n", pVmcb->ctrl.u16InterceptRdDRx));
+ Log4(("ctrl.u16InterceptWrDRx %#x\n", pVmcb->ctrl.u16InterceptWrDRx));
+ Log4(("ctrl.u32InterceptXcpt %#x\n", pVmcb->ctrl.u32InterceptXcpt));
+ Log4(("ctrl.u64InterceptCtrl %#RX64\n", pVmcb->ctrl.u64InterceptCtrl));
+ Log4(("ctrl.u64IOPMPhysAddr %#RX64\n", pVmcb->ctrl.u64IOPMPhysAddr));
+ Log4(("ctrl.u64MSRPMPhysAddr %#RX64\n", pVmcb->ctrl.u64MSRPMPhysAddr));
+ Log4(("ctrl.u64TSCOffset %#RX64\n", pVmcb->ctrl.u64TSCOffset));
+
+ Log4(("ctrl.TLBCtrl.u32ASID %#x\n", pVmcb->ctrl.TLBCtrl.n.u32ASID));
+ Log4(("ctrl.TLBCtrl.u8TLBFlush %#x\n", pVmcb->ctrl.TLBCtrl.n.u8TLBFlush));
+ Log4(("ctrl.TLBCtrl.u24Reserved %#x\n", pVmcb->ctrl.TLBCtrl.n.u24Reserved));
+
+ Log4(("ctrl.IntCtrl.u8VTPR %#x\n", pVmcb->ctrl.IntCtrl.n.u8VTPR));
+ Log4(("ctrl.IntCtrl.u1VIrqPending %#x\n", pVmcb->ctrl.IntCtrl.n.u1VIrqPending));
+ Log4(("ctrl.IntCtrl.u1VGif %#x\n", pVmcb->ctrl.IntCtrl.n.u1VGif));
+ Log4(("ctrl.IntCtrl.u6Reserved0 %#x\n", pVmcb->ctrl.IntCtrl.n.u6Reserved));
+ Log4(("ctrl.IntCtrl.u4VIntrPrio %#x\n", pVmcb->ctrl.IntCtrl.n.u4VIntrPrio));
+ Log4(("ctrl.IntCtrl.u1IgnoreTPR %#x\n", pVmcb->ctrl.IntCtrl.n.u1IgnoreTPR));
+ Log4(("ctrl.IntCtrl.u3Reserved %#x\n", pVmcb->ctrl.IntCtrl.n.u3Reserved));
+ Log4(("ctrl.IntCtrl.u1VIntrMasking %#x\n", pVmcb->ctrl.IntCtrl.n.u1VIntrMasking));
+ Log4(("ctrl.IntCtrl.u1VGifEnable %#x\n", pVmcb->ctrl.IntCtrl.n.u1VGifEnable));
+ Log4(("ctrl.IntCtrl.u5Reserved1 %#x\n", pVmcb->ctrl.IntCtrl.n.u5Reserved));
+ Log4(("ctrl.IntCtrl.u8VIntrVector %#x\n", pVmcb->ctrl.IntCtrl.n.u8VIntrVector));
+ Log4(("ctrl.IntCtrl.u24Reserved %#x\n", pVmcb->ctrl.IntCtrl.n.u24Reserved));
+
+ Log4(("ctrl.IntShadow.u1IntShadow %#x\n", pVmcb->ctrl.IntShadow.n.u1IntShadow));
+ Log4(("ctrl.IntShadow.u1GuestIntMask %#x\n", pVmcb->ctrl.IntShadow.n.u1GuestIntMask));
+ Log4(("ctrl.u64ExitCode %#RX64\n", pVmcb->ctrl.u64ExitCode));
+ Log4(("ctrl.u64ExitInfo1 %#RX64\n", pVmcb->ctrl.u64ExitInfo1));
+ Log4(("ctrl.u64ExitInfo2 %#RX64\n", pVmcb->ctrl.u64ExitInfo2));
+ Log4(("ctrl.ExitIntInfo.u8Vector %#x\n", pVmcb->ctrl.ExitIntInfo.n.u8Vector));
+ Log4(("ctrl.ExitIntInfo.u3Type %#x\n", pVmcb->ctrl.ExitIntInfo.n.u3Type));
+ Log4(("ctrl.ExitIntInfo.u1ErrorCodeValid %#x\n", pVmcb->ctrl.ExitIntInfo.n.u1ErrorCodeValid));
+ Log4(("ctrl.ExitIntInfo.u19Reserved %#x\n", pVmcb->ctrl.ExitIntInfo.n.u19Reserved));
+ Log4(("ctrl.ExitIntInfo.u1Valid %#x\n", pVmcb->ctrl.ExitIntInfo.n.u1Valid));
+ Log4(("ctrl.ExitIntInfo.u32ErrorCode %#x\n", pVmcb->ctrl.ExitIntInfo.n.u32ErrorCode));
+ Log4(("ctrl.NestedPagingCtrl.u1NestedPaging %#x\n", pVmcb->ctrl.NestedPagingCtrl.n.u1NestedPaging));
+ Log4(("ctrl.NestedPagingCtrl.u1Sev %#x\n", pVmcb->ctrl.NestedPagingCtrl.n.u1Sev));
+ Log4(("ctrl.NestedPagingCtrl.u1SevEs %#x\n", pVmcb->ctrl.NestedPagingCtrl.n.u1SevEs));
+ Log4(("ctrl.EventInject.u8Vector %#x\n", pVmcb->ctrl.EventInject.n.u8Vector));
+ Log4(("ctrl.EventInject.u3Type %#x\n", pVmcb->ctrl.EventInject.n.u3Type));
+ Log4(("ctrl.EventInject.u1ErrorCodeValid %#x\n", pVmcb->ctrl.EventInject.n.u1ErrorCodeValid));
+ Log4(("ctrl.EventInject.u19Reserved %#x\n", pVmcb->ctrl.EventInject.n.u19Reserved));
+ Log4(("ctrl.EventInject.u1Valid %#x\n", pVmcb->ctrl.EventInject.n.u1Valid));
+ Log4(("ctrl.EventInject.u32ErrorCode %#x\n", pVmcb->ctrl.EventInject.n.u32ErrorCode));
+
+ Log4(("ctrl.u64NestedPagingCR3 %#RX64\n", pVmcb->ctrl.u64NestedPagingCR3));
+
+ Log4(("ctrl.LbrVirt.u1LbrVirt %#x\n", pVmcb->ctrl.LbrVirt.n.u1LbrVirt));
+ Log4(("ctrl.LbrVirt.u1VirtVmsaveVmload %#x\n", pVmcb->ctrl.LbrVirt.n.u1VirtVmsaveVmload));
+
+ Log4(("guest.CS.u16Sel %RTsel\n", pVmcb->guest.CS.u16Sel));
+ Log4(("guest.CS.u16Attr %#x\n", pVmcb->guest.CS.u16Attr));
+ Log4(("guest.CS.u32Limit %#RX32\n", pVmcb->guest.CS.u32Limit));
+ Log4(("guest.CS.u64Base %#RX64\n", pVmcb->guest.CS.u64Base));
+ Log4(("guest.DS.u16Sel %#RTsel\n", pVmcb->guest.DS.u16Sel));
+ Log4(("guest.DS.u16Attr %#x\n", pVmcb->guest.DS.u16Attr));
+ Log4(("guest.DS.u32Limit %#RX32\n", pVmcb->guest.DS.u32Limit));
+ Log4(("guest.DS.u64Base %#RX64\n", pVmcb->guest.DS.u64Base));
+ Log4(("guest.ES.u16Sel %RTsel\n", pVmcb->guest.ES.u16Sel));
+ Log4(("guest.ES.u16Attr %#x\n", pVmcb->guest.ES.u16Attr));
+ Log4(("guest.ES.u32Limit %#RX32\n", pVmcb->guest.ES.u32Limit));
+ Log4(("guest.ES.u64Base %#RX64\n", pVmcb->guest.ES.u64Base));
+ Log4(("guest.FS.u16Sel %RTsel\n", pVmcb->guest.FS.u16Sel));
+ Log4(("guest.FS.u16Attr %#x\n", pVmcb->guest.FS.u16Attr));
+ Log4(("guest.FS.u32Limit %#RX32\n", pVmcb->guest.FS.u32Limit));
+ Log4(("guest.FS.u64Base %#RX64\n", pVmcb->guest.FS.u64Base));
+ Log4(("guest.GS.u16Sel %RTsel\n", pVmcb->guest.GS.u16Sel));
+ Log4(("guest.GS.u16Attr %#x\n", pVmcb->guest.GS.u16Attr));
+ Log4(("guest.GS.u32Limit %#RX32\n", pVmcb->guest.GS.u32Limit));
+ Log4(("guest.GS.u64Base %#RX64\n", pVmcb->guest.GS.u64Base));
+
+ Log4(("guest.GDTR.u32Limit %#RX32\n", pVmcb->guest.GDTR.u32Limit));
+ Log4(("guest.GDTR.u64Base %#RX64\n", pVmcb->guest.GDTR.u64Base));
+
+ Log4(("guest.LDTR.u16Sel %RTsel\n", pVmcb->guest.LDTR.u16Sel));
+ Log4(("guest.LDTR.u16Attr %#x\n", pVmcb->guest.LDTR.u16Attr));
+ Log4(("guest.LDTR.u32Limit %#RX32\n", pVmcb->guest.LDTR.u32Limit));
+ Log4(("guest.LDTR.u64Base %#RX64\n", pVmcb->guest.LDTR.u64Base));
+
+ Log4(("guest.IDTR.u32Limit %#RX32\n", pVmcb->guest.IDTR.u32Limit));
+ Log4(("guest.IDTR.u64Base %#RX64\n", pVmcb->guest.IDTR.u64Base));
+
+ Log4(("guest.TR.u16Sel %RTsel\n", pVmcb->guest.TR.u16Sel));
+ Log4(("guest.TR.u16Attr %#x\n", pVmcb->guest.TR.u16Attr));
+ Log4(("guest.TR.u32Limit %#RX32\n", pVmcb->guest.TR.u32Limit));
+ Log4(("guest.TR.u64Base %#RX64\n", pVmcb->guest.TR.u64Base));
+
+ Log4(("guest.u8CPL %#x\n", pVmcb->guest.u8CPL));
+ Log4(("guest.u64CR0 %#RX64\n", pVmcb->guest.u64CR0));
+ Log4(("guest.u64CR2 %#RX64\n", pVmcb->guest.u64CR2));
+ Log4(("guest.u64CR3 %#RX64\n", pVmcb->guest.u64CR3));
+ Log4(("guest.u64CR4 %#RX64\n", pVmcb->guest.u64CR4));
+ Log4(("guest.u64DR6 %#RX64\n", pVmcb->guest.u64DR6));
+ Log4(("guest.u64DR7 %#RX64\n", pVmcb->guest.u64DR7));
+
+ Log4(("guest.u64RIP %#RX64\n", pVmcb->guest.u64RIP));
+ Log4(("guest.u64RSP %#RX64\n", pVmcb->guest.u64RSP));
+ Log4(("guest.u64RAX %#RX64\n", pVmcb->guest.u64RAX));
+ Log4(("guest.u64RFlags %#RX64\n", pVmcb->guest.u64RFlags));
+
+ Log4(("guest.u64SysEnterCS %#RX64\n", pVmcb->guest.u64SysEnterCS));
+ Log4(("guest.u64SysEnterEIP %#RX64\n", pVmcb->guest.u64SysEnterEIP));
+ Log4(("guest.u64SysEnterESP %#RX64\n", pVmcb->guest.u64SysEnterESP));
+
+ Log4(("guest.u64EFER %#RX64\n", pVmcb->guest.u64EFER));
+ Log4(("guest.u64STAR %#RX64\n", pVmcb->guest.u64STAR));
+ Log4(("guest.u64LSTAR %#RX64\n", pVmcb->guest.u64LSTAR));
+ Log4(("guest.u64CSTAR %#RX64\n", pVmcb->guest.u64CSTAR));
+ Log4(("guest.u64SFMASK %#RX64\n", pVmcb->guest.u64SFMASK));
+ Log4(("guest.u64KernelGSBase %#RX64\n", pVmcb->guest.u64KernelGSBase));
+ Log4(("guest.u64PAT %#RX64\n", pVmcb->guest.u64PAT));
+ Log4(("guest.u64DBGCTL %#RX64\n", pVmcb->guest.u64DBGCTL));
+ Log4(("guest.u64BR_FROM %#RX64\n", pVmcb->guest.u64BR_FROM));
+ Log4(("guest.u64BR_TO %#RX64\n", pVmcb->guest.u64BR_TO));
+ Log4(("guest.u64LASTEXCPFROM %#RX64\n", pVmcb->guest.u64LASTEXCPFROM));
+ Log4(("guest.u64LASTEXCPTO %#RX64\n", pVmcb->guest.u64LASTEXCPTO));
+
+ NOREF(pVmcb);
+#endif /* VBOX_STRICT */
+ }
+ else
+ Log4Func(("rcVMRun=%d\n", rcVMRun));
+}
+
+
+/**
+ * Check per-VM and per-VCPU force flag actions that require us to go back to
+ * ring-3 for one reason or another.
+ *
+ * @returns VBox status code (information status code included).
+ * @retval VINF_SUCCESS if we don't have any actions that require going back to
+ * ring-3.
+ * @retval VINF_PGM_SYNC_CR3 if we have pending PGM CR3 sync.
+ * @retval VINF_EM_PENDING_REQUEST if we have pending requests (like hardware
+ * interrupts)
+ * @retval VINF_PGM_POOL_FLUSH_PENDING if PGM is doing a pool flush and requires
+ * all EMTs to be in ring-3.
+ * @retval VINF_EM_RAW_TO_R3 if there is pending DMA requests.
+ * @retval VINF_EM_NO_MEMORY PGM is out of memory, we need to return
+ * to the EM loop.
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ */
+static int hmR0SvmCheckForceFlags(PVMCPU pVCpu)
+{
+ Assert(VMMRZCallRing3IsEnabled(pVCpu));
+ Assert(!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_PAE_PDPES));
+
+ /* Could happen as a result of longjump. */
+ if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3))
+ PGMUpdateCR3(pVCpu, CPUMGetGuestCR3(pVCpu));
+
+ /* Update pending interrupts into the APIC's IRR. */
+ if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_UPDATE_APIC))
+ APICUpdatePendingInterrupts(pVCpu);
+
+ PVM pVM = pVCpu->CTX_SUFF(pVM);
+ if ( VM_FF_IS_ANY_SET(pVM, !pVCpu->hm.s.fSingleInstruction
+ ? VM_FF_HP_R0_PRE_HM_MASK : VM_FF_HP_R0_PRE_HM_STEP_MASK)
+ || VMCPU_FF_IS_ANY_SET(pVCpu, !pVCpu->hm.s.fSingleInstruction
+ ? VMCPU_FF_HP_R0_PRE_HM_MASK : VMCPU_FF_HP_R0_PRE_HM_STEP_MASK) )
+ {
+ /* Pending PGM C3 sync. */
+ if (VMCPU_FF_IS_ANY_SET(pVCpu,VMCPU_FF_PGM_SYNC_CR3 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL))
+ {
+ int rc = PGMSyncCR3(pVCpu, pVCpu->cpum.GstCtx.cr0, pVCpu->cpum.GstCtx.cr3, pVCpu->cpum.GstCtx.cr4,
+ VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3));
+ if (rc != VINF_SUCCESS)
+ {
+ Log4Func(("PGMSyncCR3 forcing us back to ring-3. rc=%d\n", rc));
+ return rc;
+ }
+ }
+
+ /* Pending HM-to-R3 operations (critsects, timers, EMT rendezvous etc.) */
+ /* -XXX- what was that about single stepping? */
+ if ( VM_FF_IS_ANY_SET(pVM, VM_FF_HM_TO_R3_MASK)
+ || VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_HM_TO_R3_MASK))
+ {
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchHmToR3FF);
+ int rc = RT_LIKELY(!VM_FF_IS_SET(pVM, VM_FF_PGM_NO_MEMORY)) ? VINF_EM_RAW_TO_R3 : VINF_EM_NO_MEMORY;
+ Log4Func(("HM_TO_R3 forcing us back to ring-3. rc=%d\n", rc));
+ return rc;
+ }
+
+ /* Pending VM request packets, such as hardware interrupts. */
+ if ( VM_FF_IS_SET(pVM, VM_FF_REQUEST)
+ || VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_REQUEST))
+ {
+ Log4Func(("Pending VM request forcing us back to ring-3\n"));
+ return VINF_EM_PENDING_REQUEST;
+ }
+
+ /* Pending PGM pool flushes. */
+ if (VM_FF_IS_SET(pVM, VM_FF_PGM_POOL_FLUSH_PENDING))
+ {
+ Log4Func(("PGM pool flush pending forcing us back to ring-3\n"));
+ return VINF_PGM_POOL_FLUSH_PENDING;
+ }
+
+ /* Pending DMA requests. */
+ if (VM_FF_IS_SET(pVM, VM_FF_PDM_DMA))
+ {
+ Log4Func(("Pending DMA request forcing us back to ring-3\n"));
+ return VINF_EM_RAW_TO_R3;
+ }
+ }
+
+ return VINF_SUCCESS;
+}
+
+
+#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
+/**
+ * Does the preparations before executing nested-guest code in AMD-V.
+ *
+ * @returns VBox status code (informational status codes included).
+ * @retval VINF_SUCCESS if we can proceed with running the guest.
+ * @retval VINF_* scheduling changes, we have to go back to ring-3.
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param pSvmTransient Pointer to the SVM transient structure.
+ *
+ * @remarks Same caveats regarding longjumps as hmR0SvmPreRunGuest applies.
+ * @sa hmR0SvmPreRunGuest.
+ */
+static int hmR0SvmPreRunGuestNested(PVMCPU pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+ PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+ HMSVM_ASSERT_PREEMPT_SAFE(pVCpu);
+ HMSVM_ASSERT_IN_NESTED_GUEST(pCtx);
+
+#ifdef VBOX_WITH_NESTED_HWVIRT_ONLY_IN_IEM
+ if (CPUMIsGuestInSvmNestedHwVirtMode(pCtx)) /* Redundant check to avoid unreachable code warning. */
+ {
+ Log2(("hmR0SvmPreRunGuest: Rescheduling to IEM due to nested-hwvirt or forced IEM exec -> VINF_EM_RESCHEDULE_REM\n"));
+ return VINF_EM_RESCHEDULE_REM;
+ }
+#endif
+
+ /* Check force flag actions that might require us to go back to ring-3. */
+ int rc = hmR0SvmCheckForceFlags(pVCpu);
+ if (rc != VINF_SUCCESS)
+ return rc;
+
+ if (TRPMHasTrap(pVCpu))
+ hmR0SvmTrpmTrapToPendingEvent(pVCpu);
+ else if (!pVCpu->hm.s.Event.fPending)
+ {
+ VBOXSTRICTRC rcStrict = hmR0SvmEvaluatePendingEventNested(pVCpu);
+ if ( rcStrict != VINF_SUCCESS
+ || !CPUMIsGuestInSvmNestedHwVirtMode(pCtx))
+ return VBOXSTRICTRC_VAL(rcStrict);
+ }
+
+ HMSVM_ASSERT_IN_NESTED_GUEST(pCtx);
+
+ /*
+ * On the oldest AMD-V systems, we may not get enough information to reinject an NMI.
+ * Just do it in software, see @bugref{8411}.
+ * NB: If we could continue a task switch exit we wouldn't need to do this.
+ */
+ PVM pVM = pVCpu->CTX_SUFF(pVM);
+ if (RT_UNLIKELY( !pVM->hm.s.svm.u32Features
+ && pVCpu->hm.s.Event.fPending
+ && SVM_EVENT_GET_TYPE(pVCpu->hm.s.Event.u64IntInfo) == SVM_EVENT_NMI))
+ {
+ return VINF_EM_RAW_INJECT_TRPM_EVENT;
+ }
+
+#ifdef HMSVM_SYNC_FULL_GUEST_STATE
+ Assert(!(pCtx->fExtrn & HMSVM_CPUMCTX_EXTRN_ALL));
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_ALL_GUEST);
+#endif
+
+ /*
+ * Export the nested-guest state bits that are not shared with the host in any way as we
+ * can longjmp or get preempted in the midst of exporting some of the state.
+ */
+ rc = hmR0SvmExportGuestStateNested(pVCpu);
+ AssertRCReturn(rc, rc);
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatExportFull);
+
+ /* Ensure we've cached (and hopefully modified) the VMCB for execution using hardware-assisted SVM. */
+ Assert(pVCpu->hm.s.svm.NstGstVmcbCache.fCacheValid);
+
+ /*
+ * No longjmps to ring-3 from this point on!!!
+ *
+ * Asserts() will still longjmp to ring-3 (but won't return), which is intentional,
+ * better than a kernel panic. This also disables flushing of the R0-logger instance.
+ */
+ VMMRZCallRing3Disable(pVCpu);
+
+ /*
+ * We disable interrupts so that we don't miss any interrupts that would flag preemption
+ * (IPI/timers etc.) when thread-context hooks aren't used and we've been running with
+ * preemption disabled for a while. Since this is purly to aid the
+ * RTThreadPreemptIsPending() code, it doesn't matter that it may temporarily reenable and
+ * disable interrupt on NT.
+ *
+ * We need to check for force-flags that could've possible been altered since we last
+ * checked them (e.g. by PDMGetInterrupt() leaving the PDM critical section,
+ * see @bugref{6398}).
+ *
+ * We also check a couple of other force-flags as a last opportunity to get the EMT back
+ * to ring-3 before executing guest code.
+ */
+ pSvmTransient->fEFlags = ASMIntDisableFlags();
+ if ( VM_FF_IS_ANY_SET(pVM, VM_FF_EMT_RENDEZVOUS | VM_FF_TM_VIRTUAL_SYNC)
+ || VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_HM_TO_R3_MASK))
+ {
+ ASMSetFlags(pSvmTransient->fEFlags);
+ VMMRZCallRing3Enable(pVCpu);
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchHmToR3FF);
+ return VINF_EM_RAW_TO_R3;
+ }
+ if (RTThreadPreemptIsPending(NIL_RTTHREAD))
+ {
+ ASMSetFlags(pSvmTransient->fEFlags);
+ VMMRZCallRing3Enable(pVCpu);
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchPendingHostIrq);
+ return VINF_EM_RAW_INTERRUPT;
+ }
+ return VINF_SUCCESS;
+}
+#endif
+
+
+/**
+ * Does the preparations before executing guest code in AMD-V.
+ *
+ * This may cause longjmps to ring-3 and may even result in rescheduling to the
+ * recompiler. We must be cautious what we do here regarding committing
+ * guest-state information into the VMCB assuming we assuredly execute the guest
+ * in AMD-V. If we fall back to the recompiler after updating the VMCB and
+ * clearing the common-state (TRPM/forceflags), we must undo those changes so
+ * that the recompiler can (and should) use them when it resumes guest
+ * execution. Otherwise such operations must be done when we can no longer
+ * exit to ring-3.
+ *
+ * @returns VBox status code (informational status codes included).
+ * @retval VINF_SUCCESS if we can proceed with running the guest.
+ * @retval VINF_* scheduling changes, we have to go back to ring-3.
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param pSvmTransient Pointer to the SVM transient structure.
+ */
+static int hmR0SvmPreRunGuest(PVMCPU pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+ HMSVM_ASSERT_PREEMPT_SAFE(pVCpu);
+ HMSVM_ASSERT_NOT_IN_NESTED_GUEST(&pVCpu->cpum.GstCtx);
+
+ /* Check force flag actions that might require us to go back to ring-3. */
+ int rc = hmR0SvmCheckForceFlags(pVCpu);
+ if (rc != VINF_SUCCESS)
+ return rc;
+
+ if (TRPMHasTrap(pVCpu))
+ hmR0SvmTrpmTrapToPendingEvent(pVCpu);
+ else if (!pVCpu->hm.s.Event.fPending)
+ hmR0SvmEvaluatePendingEvent(pVCpu);
+
+ /*
+ * On the oldest AMD-V systems, we may not get enough information to reinject an NMI.
+ * Just do it in software, see @bugref{8411}.
+ * NB: If we could continue a task switch exit we wouldn't need to do this.
+ */
+ PVM pVM = pVCpu->CTX_SUFF(pVM);
+ if (RT_UNLIKELY(pVCpu->hm.s.Event.fPending && (((pVCpu->hm.s.Event.u64IntInfo >> 8) & 7) == SVM_EVENT_NMI)))
+ if (RT_UNLIKELY(!pVM->hm.s.svm.u32Features))
+ return VINF_EM_RAW_INJECT_TRPM_EVENT;
+
+#ifdef HMSVM_SYNC_FULL_GUEST_STATE
+ Assert(!(pVCpu->cpum.GstCtx->fExtrn & HMSVM_CPUMCTX_EXTRN_ALL));
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_ALL_GUEST);
+#endif
+
+ /*
+ * Export the guest state bits that are not shared with the host in any way as we can
+ * longjmp or get preempted in the midst of exporting some of the state.
+ */
+ rc = hmR0SvmExportGuestState(pVCpu);
+ AssertRCReturn(rc, rc);
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatExportFull);
+
+ /*
+ * If we're not intercepting TPR changes in the guest, save the guest TPR before the
+ * world-switch so we can update it on the way back if the guest changed the TPR.
+ */
+ if (pVCpu->hm.s.svm.fSyncVTpr)
+ {
+ PCSVMVMCB pVmcb = pVCpu->hm.s.svm.pVmcb;
+ if (pVM->hm.s.fTPRPatchingActive)
+ pSvmTransient->u8GuestTpr = pVmcb->guest.u64LSTAR;
+ else
+ pSvmTransient->u8GuestTpr = pVmcb->ctrl.IntCtrl.n.u8VTPR;
+ }
+
+ /*
+ * No longjmps to ring-3 from this point on!!!
+ *
+ * Asserts() will still longjmp to ring-3 (but won't return), which is intentional,
+ * better than a kernel panic. This also disables flushing of the R0-logger instance.
+ */
+ VMMRZCallRing3Disable(pVCpu);
+
+ /*
+ * We disable interrupts so that we don't miss any interrupts that would flag preemption
+ * (IPI/timers etc.) when thread-context hooks aren't used and we've been running with
+ * preemption disabled for a while. Since this is purly to aid the
+ * RTThreadPreemptIsPending() code, it doesn't matter that it may temporarily reenable and
+ * disable interrupt on NT.
+ *
+ * We need to check for force-flags that could've possible been altered since we last
+ * checked them (e.g. by PDMGetInterrupt() leaving the PDM critical section,
+ * see @bugref{6398}).
+ *
+ * We also check a couple of other force-flags as a last opportunity to get the EMT back
+ * to ring-3 before executing guest code.
+ */
+ pSvmTransient->fEFlags = ASMIntDisableFlags();
+ if ( VM_FF_IS_ANY_SET(pVM, VM_FF_EMT_RENDEZVOUS | VM_FF_TM_VIRTUAL_SYNC)
+ || VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_HM_TO_R3_MASK))
+ {
+ ASMSetFlags(pSvmTransient->fEFlags);
+ VMMRZCallRing3Enable(pVCpu);
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchHmToR3FF);
+ return VINF_EM_RAW_TO_R3;
+ }
+ if (RTThreadPreemptIsPending(NIL_RTTHREAD))
+ {
+ ASMSetFlags(pSvmTransient->fEFlags);
+ VMMRZCallRing3Enable(pVCpu);
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchPendingHostIrq);
+ return VINF_EM_RAW_INTERRUPT;
+ }
+
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Prepares to run guest (or nested-guest) code in AMD-V and we've committed to
+ * doing so.
+ *
+ * This means there is no backing out to ring-3 or anywhere else at this point.
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param pSvmTransient Pointer to the SVM transient structure.
+ *
+ * @remarks Called with preemption disabled.
+ * @remarks No-long-jump zone!!!
+ */
+static void hmR0SvmPreRunGuestCommitted(PVMCPU pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+ Assert(!VMMRZCallRing3IsEnabled(pVCpu));
+ Assert(VMMR0IsLogFlushDisabled(pVCpu));
+ Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+
+ VMCPU_ASSERT_STATE(pVCpu, VMCPUSTATE_STARTED_HM);
+ VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_EXEC); /* Indicate the start of guest execution. */
+
+ PVM pVM = pVCpu->CTX_SUFF(pVM);
+ PSVMVMCB pVmcb = pSvmTransient->pVmcb;
+
+ hmR0SvmInjectPendingEvent(pVCpu, pVmcb);
+
+ if (!CPUMIsGuestFPUStateActive(pVCpu))
+ {
+ STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatLoadGuestFpuState, x);
+ CPUMR0LoadGuestFPU(pVM, pVCpu); /* (Ignore rc, no need to set HM_CHANGED_HOST_CONTEXT for SVM.) */
+ STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatLoadGuestFpuState, x);
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatLoadGuestFpu);
+ }
+
+ /* Load the state shared between host and guest (FPU, debug). */
+ if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_SVM_HOST_GUEST_SHARED_STATE)
+ hmR0SvmExportSharedState(pVCpu, pVmcb);
+
+ pVCpu->hm.s.fCtxChanged &= ~HM_CHANGED_HOST_CONTEXT; /* Preemption might set this, nothing to do on AMD-V. */
+ AssertMsg(!pVCpu->hm.s.fCtxChanged, ("fCtxChanged=%#RX64\n", pVCpu->hm.s.fCtxChanged));
+
+ PHMPHYSCPU pHostCpu = hmR0GetCurrentCpu();
+ RTCPUID const idHostCpu = pHostCpu->idCpu;
+ bool const fMigratedHostCpu = idHostCpu != pVCpu->hm.s.idLastCpu;
+
+ /* Setup TSC offsetting. */
+ if ( pSvmTransient->fUpdateTscOffsetting
+ || fMigratedHostCpu)
+ {
+ hmR0SvmUpdateTscOffsetting(pVCpu, pVmcb);
+ pSvmTransient->fUpdateTscOffsetting = false;
+ }
+
+ /* If we've migrating CPUs, mark the VMCB Clean bits as dirty. */
+ if (fMigratedHostCpu)
+ pVmcb->ctrl.u32VmcbCleanBits = 0;
+
+ /* Store status of the shared guest-host state at the time of VMRUN. */
+#if HC_ARCH_BITS == 32 && defined(VBOX_WITH_64_BITS_GUESTS)
+ if (CPUMIsGuestInLongModeEx(&pVCpu->cpum.GstCtx))
+ {
+ pSvmTransient->fWasGuestDebugStateActive = CPUMIsGuestDebugStateActivePending(pVCpu);
+ pSvmTransient->fWasHyperDebugStateActive = CPUMIsHyperDebugStateActivePending(pVCpu);
+ }
+ else
+#endif
+ {
+ pSvmTransient->fWasGuestDebugStateActive = CPUMIsGuestDebugStateActive(pVCpu);
+ pSvmTransient->fWasHyperDebugStateActive = CPUMIsHyperDebugStateActive(pVCpu);
+ }
+
+#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
+ uint8_t *pbMsrBitmap;
+ if (!pSvmTransient->fIsNestedGuest)
+ pbMsrBitmap = (uint8_t *)pVCpu->hm.s.svm.pvMsrBitmap;
+ else
+ {
+ hmR0SvmMergeMsrpmNested(pHostCpu, pVCpu);
+
+ /* Update the nested-guest VMCB with the newly merged MSRPM (clean bits updated below). */
+ pVmcb->ctrl.u64MSRPMPhysAddr = pHostCpu->n.svm.HCPhysNstGstMsrpm;
+ pbMsrBitmap = (uint8_t *)pHostCpu->n.svm.pvNstGstMsrpm;
+ }
+#else
+ uint8_t *pbMsrBitmap = (uint8_t *)pVCpu->hm.s.svm.pvMsrBitmap;
+#endif
+
+ ASMAtomicWriteBool(&pVCpu->hm.s.fCheckedTLBFlush, true); /* Used for TLB flushing, set this across the world switch. */
+ /* Flush the appropriate tagged-TLB entries. */
+ hmR0SvmFlushTaggedTlb(pHostCpu, pVCpu, pVmcb);
+ Assert(pVCpu->hm.s.idLastCpu == idHostCpu);
+
+ STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatEntry, &pVCpu->hm.s.StatInGC, x);
+
+ TMNotifyStartOfExecution(pVCpu); /* Finally, notify TM to resume its clocks as we're about
+ to start executing. */
+
+ /*
+ * Save the current Host TSC_AUX and write the guest TSC_AUX to the host, so that RDTSCPs
+ * (that don't cause exits) reads the guest MSR, see @bugref{3324}.
+ *
+ * This should be done -after- any RDTSCPs for obtaining the host timestamp (TM, STAM etc).
+ */
+ if ( pVM->cpum.ro.HostFeatures.fRdTscP
+ && !(pVmcb->ctrl.u64InterceptCtrl & SVM_CTRL_INTERCEPT_RDTSCP))
+ {
+ uint64_t const uGuestTscAux = CPUMGetGuestTscAux(pVCpu);
+ pVCpu->hm.s.u64HostTscAux = ASMRdMsr(MSR_K8_TSC_AUX);
+ if (uGuestTscAux != pVCpu->hm.s.u64HostTscAux)
+ ASMWrMsr(MSR_K8_TSC_AUX, uGuestTscAux);
+ hmR0SvmSetMsrPermission(pVCpu, pbMsrBitmap, MSR_K8_TSC_AUX, SVMMSREXIT_PASSTHRU_READ, SVMMSREXIT_PASSTHRU_WRITE);
+ pSvmTransient->fRestoreTscAuxMsr = true;
+ }
+ else
+ {
+ hmR0SvmSetMsrPermission(pVCpu, pbMsrBitmap, MSR_K8_TSC_AUX, SVMMSREXIT_INTERCEPT_READ, SVMMSREXIT_INTERCEPT_WRITE);
+ pSvmTransient->fRestoreTscAuxMsr = false;
+ }
+ pVmcb->ctrl.u32VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_IOPM_MSRPM;
+
+ /*
+ * If VMCB Clean bits isn't supported by the CPU or exposed to the guest in the nested
+ * virtualization case, mark all state-bits as dirty indicating to the CPU to re-load
+ * from the VMCB.
+ */
+ bool const fSupportsVmcbCleanBits = hmR0SvmSupportsVmcbCleanBits(pVCpu);
+ if (!fSupportsVmcbCleanBits)
+ pVmcb->ctrl.u32VmcbCleanBits = 0;
+}
+
+
+/**
+ * Wrapper for running the guest (or nested-guest) code in AMD-V.
+ *
+ * @returns VBox strict status code.
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param HCPhysVmcb The host physical address of the VMCB.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+DECLINLINE(int) hmR0SvmRunGuest(PVMCPU pVCpu, RTHCPHYS HCPhysVmcb)
+{
+ /* Mark that HM is the keeper of all guest-CPU registers now that we're going to execute guest code. */
+ PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+ pCtx->fExtrn |= HMSVM_CPUMCTX_EXTRN_ALL | CPUMCTX_EXTRN_KEEPER_HM;
+
+ /*
+ * 64-bit Windows uses XMM registers in the kernel as the Microsoft compiler expresses
+ * floating-point operations using SSE instructions. Some XMM registers (XMM6-XMM15) are
+ * callee-saved and thus the need for this XMM wrapper.
+ *
+ * Refer MSDN "Configuring Programs for 64-bit/x64 Software Conventions / Register Usage".
+ */
+ PVM pVM = pVCpu->CTX_SUFF(pVM);
+#ifdef VBOX_WITH_KERNEL_USING_XMM
+ return hmR0SVMRunWrapXMM(pVCpu->hm.s.svm.HCPhysVmcbHost, HCPhysVmcb, pCtx, pVM, pVCpu, pVCpu->hm.s.svm.pfnVMRun);
+#else
+ return pVCpu->hm.s.svm.pfnVMRun(pVCpu->hm.s.svm.HCPhysVmcbHost, HCPhysVmcb, pCtx, pVM, pVCpu);
+#endif
+}
+
+
+/**
+ * Undoes the TSC offset applied for an SVM nested-guest and returns the TSC
+ * value for the guest.
+ *
+ * @returns The TSC offset after undoing any nested-guest TSC offset.
+ * @param pVCpu The cross context virtual CPU structure of the calling EMT.
+ * @param uTicks The nested-guest TSC.
+ *
+ * @note If you make any changes to this function, please check if
+ * hmR0SvmNstGstUndoTscOffset() needs adjusting.
+ *
+ * @sa HMApplySvmNstGstTscOffset().
+ */
+DECLINLINE(uint64_t) hmR0SvmNstGstUndoTscOffset(PVMCPU pVCpu, uint64_t uTicks)
+{
+ PCSVMNESTEDVMCBCACHE pVmcbNstGstCache = &pVCpu->hm.s.svm.NstGstVmcbCache;
+ Assert(pVmcbNstGstCache->fCacheValid);
+ return uTicks - pVmcbNstGstCache->u64TSCOffset;
+}
+
+
+/**
+ * Performs some essential restoration of state after running guest (or
+ * nested-guest) code in AMD-V.
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param pSvmTransient Pointer to the SVM transient structure.
+ * @param rcVMRun Return code of VMRUN.
+ *
+ * @remarks Called with interrupts disabled.
+ * @remarks No-long-jump zone!!! This function will however re-enable longjmps
+ * unconditionally when it is safe to do so.
+ */
+static void hmR0SvmPostRunGuest(PVMCPU pVCpu, PSVMTRANSIENT pSvmTransient, int rcVMRun)
+{
+ Assert(!VMMRZCallRing3IsEnabled(pVCpu));
+
+ uint64_t const uHostTsc = ASMReadTSC(); /* Read the TSC as soon as possible. */
+ ASMAtomicWriteBool(&pVCpu->hm.s.fCheckedTLBFlush, false); /* See HMInvalidatePageOnAllVCpus(): used for TLB flushing. */
+ ASMAtomicIncU32(&pVCpu->hm.s.cWorldSwitchExits); /* Initialized in vmR3CreateUVM(): used for EMT poking. */
+
+ PSVMVMCB pVmcb = pSvmTransient->pVmcb;
+ PSVMVMCBCTRL pVmcbCtrl = &pVmcb->ctrl;
+
+ /* TSC read must be done early for maximum accuracy. */
+ if (!(pVmcbCtrl->u64InterceptCtrl & SVM_CTRL_INTERCEPT_RDTSC))
+ {
+ if (!pSvmTransient->fIsNestedGuest)
+ TMCpuTickSetLastSeen(pVCpu, uHostTsc + pVmcbCtrl->u64TSCOffset);
+#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
+ else
+ {
+ /* The nested-guest VMCB TSC offset shall eventually be restored on #VMEXIT via HMNotifySvmNstGstVmexit(). */
+ uint64_t const uGstTsc = hmR0SvmNstGstUndoTscOffset(pVCpu, uHostTsc + pVmcbCtrl->u64TSCOffset);
+ TMCpuTickSetLastSeen(pVCpu, uGstTsc);
+ }
+#endif
+ }
+
+ if (pSvmTransient->fRestoreTscAuxMsr)
+ {
+ uint64_t u64GuestTscAuxMsr = ASMRdMsr(MSR_K8_TSC_AUX);
+ CPUMSetGuestTscAux(pVCpu, u64GuestTscAuxMsr);
+ if (u64GuestTscAuxMsr != pVCpu->hm.s.u64HostTscAux)
+ ASMWrMsr(MSR_K8_TSC_AUX, pVCpu->hm.s.u64HostTscAux);
+ }
+
+ STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatInGC, &pVCpu->hm.s.StatPreExit, x);
+ TMNotifyEndOfExecution(pVCpu); /* Notify TM that the guest is no longer running. */
+ VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_HM);
+
+ Assert(!(ASMGetFlags() & X86_EFL_IF));
+ ASMSetFlags(pSvmTransient->fEFlags); /* Enable interrupts. */
+ VMMRZCallRing3Enable(pVCpu); /* It is now safe to do longjmps to ring-3!!! */
+
+ /* If VMRUN failed, we can bail out early. This does -not- cover SVM_EXIT_INVALID. */
+ if (RT_UNLIKELY(rcVMRun != VINF_SUCCESS))
+ {
+ Log4Func(("VMRUN failure: rcVMRun=%Rrc\n", rcVMRun));
+ return;
+ }
+
+ pSvmTransient->u64ExitCode = pVmcbCtrl->u64ExitCode; /* Save the #VMEXIT reason. */
+ pVmcbCtrl->u32VmcbCleanBits = HMSVM_VMCB_CLEAN_ALL; /* Mark the VMCB-state cache as unmodified by VMM. */
+ pSvmTransient->fVectoringDoublePF = false; /* Vectoring double page-fault needs to be determined later. */
+ pSvmTransient->fVectoringPF = false; /* Vectoring page-fault needs to be determined later. */
+
+#ifdef HMSVM_SYNC_FULL_GUEST_STATE
+ hmR0SvmImportGuestState(pVCpu, HMSVM_CPUMCTX_EXTRN_ALL);
+ Assert(!(pVCpu->cpum.GstCtx.fExtrn & HMSVM_CPUMCTX_EXTRN_ALL));
+#else
+ /*
+ * Always import the following:
+ *
+ * - RIP for exit optimizations and evaluating event injection on re-entry.
+ * - RFLAGS for evaluating event injection on VM re-entry and for exporting shared debug
+ * state on preemption.
+ * - Interrupt shadow, GIF for evaluating event injection on VM re-entry.
+ * - CS for exit optimizations.
+ * - RAX, RSP for simplifying assumptions on GPRs. All other GPRs are swapped by the
+ * assembly switcher code.
+ * - Shared state (only DR7 currently) for exporting shared debug state on preemption.
+ */
+ hmR0SvmImportGuestState(pVCpu, CPUMCTX_EXTRN_RIP
+ | CPUMCTX_EXTRN_RFLAGS
+ | CPUMCTX_EXTRN_RAX
+ | CPUMCTX_EXTRN_RSP
+ | CPUMCTX_EXTRN_CS
+ | CPUMCTX_EXTRN_HWVIRT
+ | CPUMCTX_EXTRN_HM_SVM_INT_SHADOW
+ | CPUMCTX_EXTRN_HM_SVM_HWVIRT_VIRQ
+ | HMSVM_CPUMCTX_SHARED_STATE);
+#endif
+
+ if ( pSvmTransient->u64ExitCode != SVM_EXIT_INVALID
+ && pVCpu->hm.s.svm.fSyncVTpr)
+ {
+ Assert(!pSvmTransient->fIsNestedGuest);
+ /* TPR patching (for 32-bit guests) uses LSTAR MSR for holding the TPR value, otherwise uses the VTPR. */
+ if ( pVCpu->CTX_SUFF(pVM)->hm.s.fTPRPatchingActive
+ && (pVmcb->guest.u64LSTAR & 0xff) != pSvmTransient->u8GuestTpr)
+ {
+ int rc = APICSetTpr(pVCpu, pVmcb->guest.u64LSTAR & 0xff);
+ AssertRC(rc);
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_APIC_TPR);
+ }
+ /* Sync TPR when we aren't intercepting CR8 writes. */
+ else if (pSvmTransient->u8GuestTpr != pVmcbCtrl->IntCtrl.n.u8VTPR)
+ {
+ int rc = APICSetTpr(pVCpu, pVmcbCtrl->IntCtrl.n.u8VTPR << 4);
+ AssertRC(rc);
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_APIC_TPR);
+ }
+ }
+
+#ifdef DEBUG_ramshankar
+ if (CPUMIsGuestInSvmNestedHwVirtMode(&pVCpu->cpum.GstCtx))
+ {
+ hmR0SvmImportGuestState(pVCpu, HMSVM_CPUMCTX_EXTRN_ALL);
+ hmR0SvmLogState(pVCpu, pVmcb, pVCpu->cpum.GstCtx, "hmR0SvmPostRunGuestNested", HMSVM_LOG_ALL & ~HMSVM_LOG_LBR,
+ 0 /* uVerbose */);
+ }
+#endif
+
+ HMSVM_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_CS | CPUMCTX_EXTRN_RIP);
+ EMHistoryAddExit(pVCpu, EMEXIT_MAKE_FT(EMEXIT_F_KIND_SVM, pSvmTransient->u64ExitCode & EMEXIT_F_TYPE_MASK),
+ pVCpu->cpum.GstCtx.cs.u64Base + pVCpu->cpum.GstCtx.rip, uHostTsc);
+}
+
+
+/**
+ * Runs the guest code using AMD-V.
+ *
+ * @returns VBox status code.
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param pcLoops Pointer to the number of executed loops.
+ */
+static int hmR0SvmRunGuestCodeNormal(PVMCPU pVCpu, uint32_t *pcLoops)
+{
+ uint32_t const cMaxResumeLoops = pVCpu->CTX_SUFF(pVM)->hm.s.cMaxResumeLoops;
+ Assert(pcLoops);
+ Assert(*pcLoops <= cMaxResumeLoops);
+
+ SVMTRANSIENT SvmTransient;
+ RT_ZERO(SvmTransient);
+ SvmTransient.fUpdateTscOffsetting = true;
+ SvmTransient.pVmcb = pVCpu->hm.s.svm.pVmcb;
+
+ int rc = VERR_INTERNAL_ERROR_5;
+ for (;;)
+ {
+ Assert(!HMR0SuspendPending());
+ HMSVM_ASSERT_CPU_SAFE(pVCpu);
+
+ /* Preparatory work for running nested-guest code, this may force us to return to
+ ring-3. This bugger disables interrupts on VINF_SUCCESS! */
+ STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatEntry, x);
+ rc = hmR0SvmPreRunGuest(pVCpu, &SvmTransient);
+ if (rc != VINF_SUCCESS)
+ break;
+
+ /*
+ * No longjmps to ring-3 from this point on!!!
+ *
+ * Asserts() will still longjmp to ring-3 (but won't return), which is intentional,
+ * better than a kernel panic. This also disables flushing of the R0-logger instance.
+ */
+ hmR0SvmPreRunGuestCommitted(pVCpu, &SvmTransient);
+ rc = hmR0SvmRunGuest(pVCpu, pVCpu->hm.s.svm.HCPhysVmcb);
+
+ /* Restore any residual host-state and save any bits shared between host and guest
+ into the guest-CPU state. Re-enables interrupts! */
+ hmR0SvmPostRunGuest(pVCpu, &SvmTransient, rc);
+
+ if (RT_UNLIKELY( rc != VINF_SUCCESS /* Check for VMRUN errors. */
+ || SvmTransient.u64ExitCode == SVM_EXIT_INVALID)) /* Check for invalid guest-state errors. */
+ {
+ if (rc == VINF_SUCCESS)
+ rc = VERR_SVM_INVALID_GUEST_STATE;
+ STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatPreExit, x);
+ hmR0SvmReportWorldSwitchError(pVCpu, rc);
+ break;
+ }
+
+ /* Handle the #VMEXIT. */
+ HMSVM_EXITCODE_STAM_COUNTER_INC(SvmTransient.u64ExitCode);
+ STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatPreExit, &pVCpu->hm.s.StatExitHandling, x);
+ VBOXVMM_R0_HMSVM_VMEXIT(pVCpu, &pVCpu->cpum.GstCtx, SvmTransient.u64ExitCode, pVCpu->hm.s.svm.pVmcb);
+ rc = hmR0SvmHandleExit(pVCpu, &SvmTransient);
+ STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitHandling, x);
+ if (rc != VINF_SUCCESS)
+ break;
+ if (++(*pcLoops) >= cMaxResumeLoops)
+ {
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchMaxResumeLoops);
+ rc = VINF_EM_RAW_INTERRUPT;
+ break;
+ }
+ }
+
+ STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatEntry, x);
+ return rc;
+}
+
+
+/**
+ * Runs the guest code using AMD-V in single step mode.
+ *
+ * @returns VBox status code.
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param pcLoops Pointer to the number of executed loops.
+ */
+static int hmR0SvmRunGuestCodeStep(PVMCPU pVCpu, uint32_t *pcLoops)
+{
+ uint32_t const cMaxResumeLoops = pVCpu->CTX_SUFF(pVM)->hm.s.cMaxResumeLoops;
+ Assert(pcLoops);
+ Assert(*pcLoops <= cMaxResumeLoops);
+
+ SVMTRANSIENT SvmTransient;
+ RT_ZERO(SvmTransient);
+ SvmTransient.fUpdateTscOffsetting = true;
+ SvmTransient.pVmcb = pVCpu->hm.s.svm.pVmcb;
+
+ PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+ uint16_t uCsStart = pCtx->cs.Sel;
+ uint64_t uRipStart = pCtx->rip;
+
+ int rc = VERR_INTERNAL_ERROR_5;
+ for (;;)
+ {
+ Assert(!HMR0SuspendPending());
+ AssertMsg(pVCpu->hm.s.idEnteredCpu == RTMpCpuId(),
+ ("Illegal migration! Entered on CPU %u Current %u cLoops=%u\n", (unsigned)pVCpu->hm.s.idEnteredCpu,
+ (unsigned)RTMpCpuId(), *pcLoops));
+
+ /* Preparatory work for running nested-guest code, this may force us to return to
+ ring-3. This bugger disables interrupts on VINF_SUCCESS! */
+ STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatEntry, x);
+ rc = hmR0SvmPreRunGuest(pVCpu, &SvmTransient);
+ if (rc != VINF_SUCCESS)
+ break;
+
+ /*
+ * No longjmps to ring-3 from this point on!!!
+ *
+ * Asserts() will still longjmp to ring-3 (but won't return), which is intentional,
+ * better than a kernel panic. This also disables flushing of the R0-logger instance.
+ */
+ VMMRZCallRing3Disable(pVCpu);
+ VMMRZCallRing3RemoveNotification(pVCpu);
+ hmR0SvmPreRunGuestCommitted(pVCpu, &SvmTransient);
+
+ rc = hmR0SvmRunGuest(pVCpu, pVCpu->hm.s.svm.HCPhysVmcb);
+
+ /* Restore any residual host-state and save any bits shared between host and guest
+ into the guest-CPU state. Re-enables interrupts! */
+ hmR0SvmPostRunGuest(pVCpu, &SvmTransient, rc);
+
+ if (RT_UNLIKELY( rc != VINF_SUCCESS /* Check for VMRUN errors. */
+ || SvmTransient.u64ExitCode == SVM_EXIT_INVALID)) /* Check for invalid guest-state errors. */
+ {
+ if (rc == VINF_SUCCESS)
+ rc = VERR_SVM_INVALID_GUEST_STATE;
+ STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatPreExit, x);
+ hmR0SvmReportWorldSwitchError(pVCpu, rc);
+ return rc;
+ }
+
+ /* Handle the #VMEXIT. */
+ HMSVM_EXITCODE_STAM_COUNTER_INC(SvmTransient.u64ExitCode);
+ STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatPreExit, &pVCpu->hm.s.StatExitHandling, x);
+ VBOXVMM_R0_HMSVM_VMEXIT(pVCpu, pCtx, SvmTransient.u64ExitCode, pVCpu->hm.s.svm.pVmcb);
+ rc = hmR0SvmHandleExit(pVCpu, &SvmTransient);
+ STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitHandling, x);
+ if (rc != VINF_SUCCESS)
+ break;
+ if (++(*pcLoops) >= cMaxResumeLoops)
+ {
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchMaxResumeLoops);
+ rc = VINF_EM_RAW_INTERRUPT;
+ break;
+ }
+
+ /*
+ * Did the RIP change, if so, consider it a single step.
+ * Otherwise, make sure one of the TFs gets set.
+ */
+ if ( pCtx->rip != uRipStart
+ || pCtx->cs.Sel != uCsStart)
+ {
+ rc = VINF_EM_DBG_STEPPED;
+ break;
+ }
+ pVCpu->hm.s.fCtxChanged |= HM_CHANGED_GUEST_DR_MASK;
+ }
+
+ /*
+ * Clear the X86_EFL_TF if necessary.
+ */
+ if (pVCpu->hm.s.fClearTrapFlag)
+ {
+ pVCpu->hm.s.fClearTrapFlag = false;
+ pCtx->eflags.Bits.u1TF = 0;
+ }
+
+ STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatEntry, x);
+ return rc;
+}
+
+#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
+/**
+ * Runs the nested-guest code using AMD-V.
+ *
+ * @returns VBox status code.
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param pcLoops Pointer to the number of executed loops. If we're switching
+ * from the guest-code execution loop to this nested-guest
+ * execution loop pass the remainder value, else pass 0.
+ */
+static int hmR0SvmRunGuestCodeNested(PVMCPU pVCpu, uint32_t *pcLoops)
+{
+ PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+ HMSVM_ASSERT_IN_NESTED_GUEST(pCtx);
+ Assert(pcLoops);
+ Assert(*pcLoops <= pVCpu->CTX_SUFF(pVM)->hm.s.cMaxResumeLoops);
+
+ SVMTRANSIENT SvmTransient;
+ RT_ZERO(SvmTransient);
+ SvmTransient.fUpdateTscOffsetting = true;
+ SvmTransient.pVmcb = pCtx->hwvirt.svm.CTX_SUFF(pVmcb);
+ SvmTransient.fIsNestedGuest = true;
+
+ int rc = VERR_INTERNAL_ERROR_4;
+ for (;;)
+ {
+ Assert(!HMR0SuspendPending());
+ HMSVM_ASSERT_CPU_SAFE(pVCpu);
+
+ /* Preparatory work for running nested-guest code, this may force us to return to
+ ring-3. This bugger disables interrupts on VINF_SUCCESS! */
+ STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatEntry, x);
+ rc = hmR0SvmPreRunGuestNested(pVCpu, &SvmTransient);
+ if ( rc != VINF_SUCCESS
+ || !CPUMIsGuestInSvmNestedHwVirtMode(pCtx))
+ {
+ break;
+ }
+
+ /*
+ * No longjmps to ring-3 from this point on!!!
+ *
+ * Asserts() will still longjmp to ring-3 (but won't return), which is intentional,
+ * better than a kernel panic. This also disables flushing of the R0-logger instance.
+ */
+ hmR0SvmPreRunGuestCommitted(pVCpu, &SvmTransient);
+
+ rc = hmR0SvmRunGuest(pVCpu, pCtx->hwvirt.svm.HCPhysVmcb);
+
+ /* Restore any residual host-state and save any bits shared between host and guest
+ into the guest-CPU state. Re-enables interrupts! */
+ hmR0SvmPostRunGuest(pVCpu, &SvmTransient, rc);
+
+ if (RT_LIKELY( rc == VINF_SUCCESS
+ && SvmTransient.u64ExitCode != SVM_EXIT_INVALID))
+ { /* extremely likely */ }
+ else
+ {
+ /* VMRUN failed, shouldn't really happen, Guru. */
+ if (rc != VINF_SUCCESS)
+ break;
+
+ /* Invalid nested-guest state. Cause a #VMEXIT but assert on strict builds. */
+ HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, HMSVM_CPUMCTX_EXTRN_ALL);
+ AssertMsgFailed(("Invalid nested-guest state. rc=%Rrc u64ExitCode=%#RX64\n", rc, SvmTransient.u64ExitCode));
+ rc = VBOXSTRICTRC_TODO(IEMExecSvmVmexit(pVCpu, SVM_EXIT_INVALID, 0, 0));
+ break;
+ }
+
+ /* Handle the #VMEXIT. */
+ HMSVM_NESTED_EXITCODE_STAM_COUNTER_INC(SvmTransient.u64ExitCode);
+ STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatPreExit, &pVCpu->hm.s.StatExitHandling, x);
+ VBOXVMM_R0_HMSVM_VMEXIT(pVCpu, pCtx, SvmTransient.u64ExitCode, pCtx->hwvirt.svm.CTX_SUFF(pVmcb));
+ rc = hmR0SvmHandleExitNested(pVCpu, &SvmTransient);
+ STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitHandling, x);
+ if ( rc != VINF_SUCCESS
+ || !CPUMIsGuestInSvmNestedHwVirtMode(pCtx))
+ break;
+ if (++(*pcLoops) >= pVCpu->CTX_SUFF(pVM)->hm.s.cMaxResumeLoops)
+ {
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchMaxResumeLoops);
+ rc = VINF_EM_RAW_INTERRUPT;
+ break;
+ }
+
+ /** @todo handle single-stepping */
+ }
+
+ STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatEntry, x);
+ return rc;
+}
+#endif
+
+
+/**
+ * Runs the guest code using AMD-V.
+ *
+ * @returns Strict VBox status code.
+ * @param pVCpu The cross context virtual CPU structure.
+ */
+VMMR0DECL(VBOXSTRICTRC) SVMR0RunGuestCode(PVMCPU pVCpu)
+{
+ Assert(VMMRZCallRing3IsEnabled(pVCpu));
+ HMSVM_ASSERT_PREEMPT_SAFE(pVCpu);
+ VMMRZCallRing3SetNotification(pVCpu, hmR0SvmCallRing3Callback, NULL /* pvUser */);
+
+ uint32_t cLoops = 0;
+ int rc;
+#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
+ if (!CPUMIsGuestInSvmNestedHwVirtMode(&pVCpu->cpum.GstCtx))
+#endif
+ {
+ if (!pVCpu->hm.s.fSingleInstruction)
+ rc = hmR0SvmRunGuestCodeNormal(pVCpu, &cLoops);
+ else
+ rc = hmR0SvmRunGuestCodeStep(pVCpu, &cLoops);
+ }
+#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
+ else
+ {
+ rc = VINF_SVM_VMRUN;
+ }
+
+ /* Re-check the nested-guest condition here as we may be transitioning from the normal
+ execution loop into the nested-guest, hence this is not placed in the 'else' part above. */
+ if (rc == VINF_SVM_VMRUN)
+ {
+ rc = hmR0SvmRunGuestCodeNested(pVCpu, &cLoops);
+ if (rc == VINF_SVM_VMEXIT)
+ rc = VINF_SUCCESS;
+ }
+#endif
+
+ /* Fixup error codes. */
+ if (rc == VERR_EM_INTERPRETER)
+ rc = VINF_EM_RAW_EMULATE_INSTR;
+ else if (rc == VINF_EM_RESET)
+ rc = VINF_EM_TRIPLE_FAULT;
+
+ /* Prepare to return to ring-3. This will remove longjmp notifications. */
+ rc = hmR0SvmExitToRing3(pVCpu, rc);
+ Assert(!VMMRZCallRing3IsNotificationSet(pVCpu));
+ return rc;
+}
+
+
+#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
+/**
+ * Determines whether an IOIO intercept is active for the nested-guest or not.
+ *
+ * @param pvIoBitmap Pointer to the nested-guest IO bitmap.
+ * @param pIoExitInfo Pointer to the SVMIOIOEXITINFO.
+ */
+static bool hmR0SvmIsIoInterceptActive(void *pvIoBitmap, PSVMIOIOEXITINFO pIoExitInfo)
+{
+ const uint16_t u16Port = pIoExitInfo->n.u16Port;
+ const SVMIOIOTYPE enmIoType = (SVMIOIOTYPE)pIoExitInfo->n.u1Type;
+ const uint8_t cbReg = (pIoExitInfo->u >> SVM_IOIO_OP_SIZE_SHIFT) & 7;
+ const uint8_t cAddrSizeBits = ((pIoExitInfo->u >> SVM_IOIO_ADDR_SIZE_SHIFT) & 7) << 4;
+ const uint8_t iEffSeg = pIoExitInfo->n.u3Seg;
+ const bool fRep = pIoExitInfo->n.u1Rep;
+ const bool fStrIo = pIoExitInfo->n.u1Str;
+
+ return HMIsSvmIoInterceptActive(pvIoBitmap, u16Port, enmIoType, cbReg, cAddrSizeBits, iEffSeg, fRep, fStrIo,
+ NULL /* pIoExitInfo */);
+}
+
+
+/**
+ * Handles a nested-guest \#VMEXIT (for all EXITCODE values except
+ * SVM_EXIT_INVALID).
+ *
+ * @returns VBox status code (informational status codes included).
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param pSvmTransient Pointer to the SVM transient structure.
+ */
+static int hmR0SvmHandleExitNested(PVMCPU pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+ HMSVM_ASSERT_IN_NESTED_GUEST(&pVCpu->cpum.GstCtx);
+ Assert(pSvmTransient->u64ExitCode != SVM_EXIT_INVALID);
+ Assert(pSvmTransient->u64ExitCode <= SVM_EXIT_MAX);
+
+ /*
+ * We import the complete state here because we use separate VMCBs for the guest and the
+ * nested-guest, and the guest's VMCB is used after the #VMEXIT. We can only save/restore
+ * the #VMEXIT specific state if we used the same VMCB for both guest and nested-guest.
+ */
+#define NST_GST_VMEXIT_CALL_RET(a_pVCpu, a_uExitCode, a_uExitInfo1, a_uExitInfo2) \
+ do { \
+ HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, HMSVM_CPUMCTX_EXTRN_ALL); \
+ return VBOXSTRICTRC_TODO(IEMExecSvmVmexit((a_pVCpu), (a_uExitCode), (a_uExitInfo1), (a_uExitInfo2))); \
+ } while (0)
+
+ /*
+ * For all the #VMEXITs here we primarily figure out if the #VMEXIT is expected by the
+ * nested-guest. If it isn't, it should be handled by the (outer) guest.
+ */
+ PSVMVMCB pVmcbNstGst = pVCpu->cpum.GstCtx.hwvirt.svm.CTX_SUFF(pVmcb);
+ PSVMVMCBCTRL pVmcbNstGstCtrl = &pVmcbNstGst->ctrl;
+ uint64_t const uExitCode = pVmcbNstGstCtrl->u64ExitCode;
+ uint64_t const uExitInfo1 = pVmcbNstGstCtrl->u64ExitInfo1;
+ uint64_t const uExitInfo2 = pVmcbNstGstCtrl->u64ExitInfo2;
+
+ Assert(uExitCode == pVmcbNstGstCtrl->u64ExitCode);
+ switch (uExitCode)
+ {
+ case SVM_EXIT_CPUID:
+ {
+ if (HMIsGuestSvmCtrlInterceptSet(pVCpu, SVM_CTRL_INTERCEPT_CPUID))
+ NST_GST_VMEXIT_CALL_RET(pVCpu, uExitCode, uExitInfo1, uExitInfo2);
+ return hmR0SvmExitCpuid(pVCpu, pSvmTransient);
+ }
+
+ case SVM_EXIT_RDTSC:
+ {
+ if (HMIsGuestSvmCtrlInterceptSet(pVCpu, SVM_CTRL_INTERCEPT_RDTSC))
+ NST_GST_VMEXIT_CALL_RET(pVCpu, uExitCode, uExitInfo1, uExitInfo2);
+ return hmR0SvmExitRdtsc(pVCpu, pSvmTransient);
+ }
+
+ case SVM_EXIT_RDTSCP:
+ {
+ if (HMIsGuestSvmCtrlInterceptSet(pVCpu, SVM_CTRL_INTERCEPT_RDTSCP))
+ NST_GST_VMEXIT_CALL_RET(pVCpu, uExitCode, uExitInfo1, uExitInfo2);
+ return hmR0SvmExitRdtscp(pVCpu, pSvmTransient);
+ }
+
+ case SVM_EXIT_MONITOR:
+ {
+ if (HMIsGuestSvmCtrlInterceptSet(pVCpu, SVM_CTRL_INTERCEPT_MONITOR))
+ NST_GST_VMEXIT_CALL_RET(pVCpu, uExitCode, uExitInfo1, uExitInfo2);
+ return hmR0SvmExitMonitor(pVCpu, pSvmTransient);
+ }
+
+ case SVM_EXIT_MWAIT:
+ {
+ if (HMIsGuestSvmCtrlInterceptSet(pVCpu, SVM_CTRL_INTERCEPT_MWAIT))
+ NST_GST_VMEXIT_CALL_RET(pVCpu, uExitCode, uExitInfo1, uExitInfo2);
+ return hmR0SvmExitMwait(pVCpu, pSvmTransient);
+ }
+
+ case SVM_EXIT_HLT:
+ {
+ if (HMIsGuestSvmCtrlInterceptSet(pVCpu, SVM_CTRL_INTERCEPT_HLT))
+ NST_GST_VMEXIT_CALL_RET(pVCpu, uExitCode, uExitInfo1, uExitInfo2);
+ return hmR0SvmExitHlt(pVCpu, pSvmTransient);
+ }
+
+ case SVM_EXIT_MSR:
+ {
+ if (HMIsGuestSvmCtrlInterceptSet(pVCpu, SVM_CTRL_INTERCEPT_MSR_PROT))
+ {
+ uint32_t const idMsr = pVCpu->cpum.GstCtx.ecx;
+ uint16_t offMsrpm;
+ uint8_t uMsrpmBit;
+ int rc = HMGetSvmMsrpmOffsetAndBit(idMsr, &offMsrpm, &uMsrpmBit);
+ if (RT_SUCCESS(rc))
+ {
+ Assert(uMsrpmBit == 0 || uMsrpmBit == 2 || uMsrpmBit == 4 || uMsrpmBit == 6);
+ Assert(offMsrpm < SVM_MSRPM_PAGES << X86_PAGE_4K_SHIFT);
+
+ uint8_t const *pbMsrBitmap = (uint8_t const *)pVCpu->cpum.GstCtx.hwvirt.svm.CTX_SUFF(pvMsrBitmap);
+ pbMsrBitmap += offMsrpm;
+ bool const fInterceptRead = RT_BOOL(*pbMsrBitmap & RT_BIT(uMsrpmBit));
+ bool const fInterceptWrite = RT_BOOL(*pbMsrBitmap & RT_BIT(uMsrpmBit + 1));
+
+ if ( (fInterceptWrite && pVmcbNstGstCtrl->u64ExitInfo1 == SVM_EXIT1_MSR_WRITE)
+ || (fInterceptRead && pVmcbNstGstCtrl->u64ExitInfo1 == SVM_EXIT1_MSR_READ))
+ {
+ NST_GST_VMEXIT_CALL_RET(pVCpu, uExitCode, uExitInfo1, uExitInfo2);
+ }
+ }
+ else
+ {
+ /*
+ * MSRs not covered by the MSRPM automatically cause an #VMEXIT.
+ * See AMD-V spec. "15.11 MSR Intercepts".
+ */
+ Assert(rc == VERR_OUT_OF_RANGE);
+ NST_GST_VMEXIT_CALL_RET(pVCpu, uExitCode, uExitInfo1, uExitInfo2);
+ }
+ }
+ return hmR0SvmExitMsr(pVCpu, pSvmTransient);
+ }
+
+ case SVM_EXIT_IOIO:
+ {
+ if (HMIsGuestSvmCtrlInterceptSet(pVCpu, SVM_CTRL_INTERCEPT_IOIO_PROT))
+ {
+ void *pvIoBitmap = pVCpu->cpum.GstCtx.hwvirt.svm.CTX_SUFF(pvIoBitmap);
+ SVMIOIOEXITINFO IoExitInfo;
+ IoExitInfo.u = pVmcbNstGst->ctrl.u64ExitInfo1;
+ bool const fIntercept = hmR0SvmIsIoInterceptActive(pvIoBitmap, &IoExitInfo);
+ if (fIntercept)
+ NST_GST_VMEXIT_CALL_RET(pVCpu, uExitCode, uExitInfo1, uExitInfo2);
+ }
+ return hmR0SvmExitIOInstr(pVCpu, pSvmTransient);
+ }
+
+ case SVM_EXIT_XCPT_PF:
+ {
+ PVM pVM = pVCpu->CTX_SUFF(pVM);
+ if (pVM->hm.s.fNestedPaging)
+ {
+ uint32_t const u32ErrCode = pVmcbNstGstCtrl->u64ExitInfo1;
+ uint64_t const uFaultAddress = pVmcbNstGstCtrl->u64ExitInfo2;
+
+ /* If the nested-guest is intercepting #PFs, cause a #PF #VMEXIT. */
+ if (HMIsGuestSvmXcptInterceptSet(pVCpu, X86_XCPT_PF))
+ NST_GST_VMEXIT_CALL_RET(pVCpu, uExitCode, u32ErrCode, uFaultAddress);
+
+ /* If the nested-guest is not intercepting #PFs, forward the #PF to the guest. */
+ HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, CPUMCTX_EXTRN_CR2);
+ hmR0SvmSetPendingXcptPF(pVCpu, u32ErrCode, uFaultAddress);
+ return VINF_SUCCESS;
+ }
+ return hmR0SvmExitXcptPF(pVCpu, pSvmTransient);
+ }
+
+ case SVM_EXIT_XCPT_UD:
+ {
+ if (HMIsGuestSvmXcptInterceptSet(pVCpu, X86_XCPT_UD))
+ NST_GST_VMEXIT_CALL_RET(pVCpu, uExitCode, uExitInfo1, uExitInfo2);
+ hmR0SvmSetPendingXcptUD(pVCpu);
+ return VINF_SUCCESS;
+ }
+
+ case SVM_EXIT_XCPT_MF:
+ {
+ if (HMIsGuestSvmXcptInterceptSet(pVCpu, X86_XCPT_MF))
+ NST_GST_VMEXIT_CALL_RET(pVCpu, uExitCode, uExitInfo1, uExitInfo2);
+ return hmR0SvmExitXcptMF(pVCpu, pSvmTransient);
+ }
+
+ case SVM_EXIT_XCPT_DB:
+ {
+ if (HMIsGuestSvmXcptInterceptSet(pVCpu, X86_XCPT_DB))
+ NST_GST_VMEXIT_CALL_RET(pVCpu, uExitCode, uExitInfo1, uExitInfo2);
+ return hmR0SvmNestedExitXcptDB(pVCpu, pSvmTransient);
+ }
+
+ case SVM_EXIT_XCPT_AC:
+ {
+ if (HMIsGuestSvmXcptInterceptSet(pVCpu, X86_XCPT_AC))
+ NST_GST_VMEXIT_CALL_RET(pVCpu, uExitCode, uExitInfo1, uExitInfo2);
+ return hmR0SvmExitXcptAC(pVCpu, pSvmTransient);
+ }
+
+ case SVM_EXIT_XCPT_BP:
+ {
+ if (HMIsGuestSvmXcptInterceptSet(pVCpu, X86_XCPT_BP))
+ NST_GST_VMEXIT_CALL_RET(pVCpu, uExitCode, uExitInfo1, uExitInfo2);
+ return hmR0SvmNestedExitXcptBP(pVCpu, pSvmTransient);
+ }
+
+ case SVM_EXIT_READ_CR0:
+ case SVM_EXIT_READ_CR3:
+ case SVM_EXIT_READ_CR4:
+ {
+ uint8_t const uCr = uExitCode - SVM_EXIT_READ_CR0;
+ if (HMIsGuestSvmReadCRxInterceptSet(pVCpu, uCr))
+ NST_GST_VMEXIT_CALL_RET(pVCpu, uExitCode, uExitInfo1, uExitInfo2);
+ return hmR0SvmExitReadCRx(pVCpu, pSvmTransient);
+ }
+
+ case SVM_EXIT_CR0_SEL_WRITE:
+ {
+ if (HMIsGuestSvmCtrlInterceptSet(pVCpu, SVM_CTRL_INTERCEPT_CR0_SEL_WRITE))
+ NST_GST_VMEXIT_CALL_RET(pVCpu, uExitCode, uExitInfo1, uExitInfo2);
+ return hmR0SvmExitWriteCRx(pVCpu, pSvmTransient);
+ }
+
+ case SVM_EXIT_WRITE_CR0:
+ case SVM_EXIT_WRITE_CR3:
+ case SVM_EXIT_WRITE_CR4:
+ case SVM_EXIT_WRITE_CR8: /* CR8 writes would go to the V_TPR rather than here, since we run with V_INTR_MASKING. */
+ {
+ uint8_t const uCr = uExitCode - SVM_EXIT_WRITE_CR0;
+ Log4Func(("Write CR%u: uExitInfo1=%#RX64 uExitInfo2=%#RX64\n", uCr, uExitInfo1, uExitInfo2));
+
+ if (HMIsGuestSvmWriteCRxInterceptSet(pVCpu, uCr))
+ NST_GST_VMEXIT_CALL_RET(pVCpu, uExitCode, uExitInfo1, uExitInfo2);
+ return hmR0SvmExitWriteCRx(pVCpu, pSvmTransient);
+ }
+
+ case SVM_EXIT_PAUSE:
+ {
+ if (HMIsGuestSvmCtrlInterceptSet(pVCpu, SVM_CTRL_INTERCEPT_PAUSE))
+ NST_GST_VMEXIT_CALL_RET(pVCpu, uExitCode, uExitInfo1, uExitInfo2);
+ return hmR0SvmExitPause(pVCpu, pSvmTransient);
+ }
+
+ case SVM_EXIT_VINTR:
+ {
+ if (HMIsGuestSvmCtrlInterceptSet(pVCpu, SVM_CTRL_INTERCEPT_VINTR))
+ NST_GST_VMEXIT_CALL_RET(pVCpu, uExitCode, uExitInfo1, uExitInfo2);
+ return hmR0SvmExitUnexpected(pVCpu, pSvmTransient);
+ }
+
+ case SVM_EXIT_INTR:
+ case SVM_EXIT_NMI:
+ case SVM_EXIT_SMI:
+ case SVM_EXIT_XCPT_NMI: /* Should not occur, SVM_EXIT_NMI is used instead. */
+ {
+ /*
+ * We shouldn't direct physical interrupts, NMIs, SMIs to the nested-guest.
+ *
+ * Although we don't intercept SMIs, the nested-guest might. Therefore, we might
+ * get an SMI #VMEXIT here so simply ignore rather than causing a corresponding
+ * nested-guest #VMEXIT.
+ *
+ * We shall import the complete state here as we may cause #VMEXITs from ring-3
+ * while trying to inject interrupts, see comment at the top of this function.
+ */
+ HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, CPUMCTX_EXTRN_ALL);
+ return hmR0SvmExitIntr(pVCpu, pSvmTransient);
+ }
+
+ case SVM_EXIT_FERR_FREEZE:
+ {
+ if (HMIsGuestSvmCtrlInterceptSet(pVCpu, SVM_CTRL_INTERCEPT_FERR_FREEZE))
+ NST_GST_VMEXIT_CALL_RET(pVCpu, uExitCode, uExitInfo1, uExitInfo2);
+ return hmR0SvmExitFerrFreeze(pVCpu, pSvmTransient);
+ }
+
+ case SVM_EXIT_INVLPG:
+ {
+ if (HMIsGuestSvmCtrlInterceptSet(pVCpu, SVM_CTRL_INTERCEPT_INVLPG))
+ NST_GST_VMEXIT_CALL_RET(pVCpu, uExitCode, uExitInfo1, uExitInfo2);
+ return hmR0SvmExitInvlpg(pVCpu, pSvmTransient);
+ }
+
+ case SVM_EXIT_WBINVD:
+ {
+ if (HMIsGuestSvmCtrlInterceptSet(pVCpu, SVM_CTRL_INTERCEPT_WBINVD))
+ NST_GST_VMEXIT_CALL_RET(pVCpu, uExitCode, uExitInfo1, uExitInfo2);
+ return hmR0SvmExitWbinvd(pVCpu, pSvmTransient);
+ }
+
+ case SVM_EXIT_INVD:
+ {
+ if (HMIsGuestSvmCtrlInterceptSet(pVCpu, SVM_CTRL_INTERCEPT_INVD))
+ NST_GST_VMEXIT_CALL_RET(pVCpu, uExitCode, uExitInfo1, uExitInfo2);
+ return hmR0SvmExitInvd(pVCpu, pSvmTransient);
+ }
+
+ case SVM_EXIT_RDPMC:
+ {
+ if (HMIsGuestSvmCtrlInterceptSet(pVCpu, SVM_CTRL_INTERCEPT_RDPMC))
+ NST_GST_VMEXIT_CALL_RET(pVCpu, uExitCode, uExitInfo1, uExitInfo2);
+ return hmR0SvmExitRdpmc(pVCpu, pSvmTransient);
+ }
+
+ default:
+ {
+ switch (uExitCode)
+ {
+ case SVM_EXIT_READ_DR0: case SVM_EXIT_READ_DR1: case SVM_EXIT_READ_DR2: case SVM_EXIT_READ_DR3:
+ case SVM_EXIT_READ_DR6: case SVM_EXIT_READ_DR7: case SVM_EXIT_READ_DR8: case SVM_EXIT_READ_DR9:
+ case SVM_EXIT_READ_DR10: case SVM_EXIT_READ_DR11: case SVM_EXIT_READ_DR12: case SVM_EXIT_READ_DR13:
+ case SVM_EXIT_READ_DR14: case SVM_EXIT_READ_DR15:
+ {
+ uint8_t const uDr = uExitCode - SVM_EXIT_READ_DR0;
+ if (HMIsGuestSvmReadDRxInterceptSet(pVCpu, uDr))
+ NST_GST_VMEXIT_CALL_RET(pVCpu, uExitCode, uExitInfo1, uExitInfo2);
+ return hmR0SvmExitReadDRx(pVCpu, pSvmTransient);
+ }
+
+ case SVM_EXIT_WRITE_DR0: case SVM_EXIT_WRITE_DR1: case SVM_EXIT_WRITE_DR2: case SVM_EXIT_WRITE_DR3:
+ case SVM_EXIT_WRITE_DR6: case SVM_EXIT_WRITE_DR7: case SVM_EXIT_WRITE_DR8: case SVM_EXIT_WRITE_DR9:
+ case SVM_EXIT_WRITE_DR10: case SVM_EXIT_WRITE_DR11: case SVM_EXIT_WRITE_DR12: case SVM_EXIT_WRITE_DR13:
+ case SVM_EXIT_WRITE_DR14: case SVM_EXIT_WRITE_DR15:
+ {
+ uint8_t const uDr = uExitCode - SVM_EXIT_WRITE_DR0;
+ if (HMIsGuestSvmWriteDRxInterceptSet(pVCpu, uDr))
+ NST_GST_VMEXIT_CALL_RET(pVCpu, uExitCode, uExitInfo1, uExitInfo2);
+ return hmR0SvmExitWriteDRx(pVCpu, pSvmTransient);
+ }
+
+ case SVM_EXIT_XCPT_DE:
+ /* SVM_EXIT_XCPT_DB: */ /* Handled above. */
+ /* SVM_EXIT_XCPT_NMI: */ /* Handled above. */
+ /* SVM_EXIT_XCPT_BP: */ /* Handled above. */
+ case SVM_EXIT_XCPT_OF:
+ case SVM_EXIT_XCPT_BR:
+ /* SVM_EXIT_XCPT_UD: */ /* Handled above. */
+ case SVM_EXIT_XCPT_NM:
+ case SVM_EXIT_XCPT_DF:
+ case SVM_EXIT_XCPT_CO_SEG_OVERRUN:
+ case SVM_EXIT_XCPT_TS:
+ case SVM_EXIT_XCPT_NP:
+ case SVM_EXIT_XCPT_SS:
+ case SVM_EXIT_XCPT_GP:
+ /* SVM_EXIT_XCPT_PF: */ /* Handled above. */
+ case SVM_EXIT_XCPT_15: /* Reserved. */
+ /* SVM_EXIT_XCPT_MF: */ /* Handled above. */
+ /* SVM_EXIT_XCPT_AC: */ /* Handled above. */
+ case SVM_EXIT_XCPT_MC:
+ case SVM_EXIT_XCPT_XF:
+ case SVM_EXIT_XCPT_20: case SVM_EXIT_XCPT_21: case SVM_EXIT_XCPT_22: case SVM_EXIT_XCPT_23:
+ case SVM_EXIT_XCPT_24: case SVM_EXIT_XCPT_25: case SVM_EXIT_XCPT_26: case SVM_EXIT_XCPT_27:
+ case SVM_EXIT_XCPT_28: case SVM_EXIT_XCPT_29: case SVM_EXIT_XCPT_30: case SVM_EXIT_XCPT_31:
+ {
+ uint8_t const uVector = uExitCode - SVM_EXIT_XCPT_0;
+ if (HMIsGuestSvmXcptInterceptSet(pVCpu, uVector))
+ NST_GST_VMEXIT_CALL_RET(pVCpu, uExitCode, uExitInfo1, uExitInfo2);
+ return hmR0SvmExitXcptGeneric(pVCpu, pSvmTransient);
+ }
+
+ case SVM_EXIT_XSETBV:
+ {
+ if (HMIsGuestSvmCtrlInterceptSet(pVCpu, SVM_CTRL_INTERCEPT_XSETBV))
+ NST_GST_VMEXIT_CALL_RET(pVCpu, uExitCode, uExitInfo1, uExitInfo2);
+ return hmR0SvmExitXsetbv(pVCpu, pSvmTransient);
+ }
+
+ case SVM_EXIT_TASK_SWITCH:
+ {
+ if (HMIsGuestSvmCtrlInterceptSet(pVCpu, SVM_CTRL_INTERCEPT_TASK_SWITCH))
+ NST_GST_VMEXIT_CALL_RET(pVCpu, uExitCode, uExitInfo1, uExitInfo2);
+ return hmR0SvmExitTaskSwitch(pVCpu, pSvmTransient);
+ }
+
+ case SVM_EXIT_IRET:
+ {
+ if (HMIsGuestSvmCtrlInterceptSet(pVCpu, SVM_CTRL_INTERCEPT_IRET))
+ NST_GST_VMEXIT_CALL_RET(pVCpu, uExitCode, uExitInfo1, uExitInfo2);
+ return hmR0SvmExitIret(pVCpu, pSvmTransient);
+ }
+
+ case SVM_EXIT_SHUTDOWN:
+ {
+ if (HMIsGuestSvmCtrlInterceptSet(pVCpu, SVM_CTRL_INTERCEPT_SHUTDOWN))
+ NST_GST_VMEXIT_CALL_RET(pVCpu, uExitCode, uExitInfo1, uExitInfo2);
+ return hmR0SvmExitShutdown(pVCpu, pSvmTransient);
+ }
+
+ case SVM_EXIT_VMMCALL:
+ {
+ if (HMIsGuestSvmCtrlInterceptSet(pVCpu, SVM_CTRL_INTERCEPT_VMMCALL))
+ NST_GST_VMEXIT_CALL_RET(pVCpu, uExitCode, uExitInfo1, uExitInfo2);
+ return hmR0SvmExitVmmCall(pVCpu, pSvmTransient);
+ }
+
+ case SVM_EXIT_CLGI:
+ {
+ if (HMIsGuestSvmCtrlInterceptSet(pVCpu, SVM_CTRL_INTERCEPT_CLGI))
+ NST_GST_VMEXIT_CALL_RET(pVCpu, uExitCode, uExitInfo1, uExitInfo2);
+ return hmR0SvmExitClgi(pVCpu, pSvmTransient);
+ }
+
+ case SVM_EXIT_STGI:
+ {
+ if (HMIsGuestSvmCtrlInterceptSet(pVCpu, SVM_CTRL_INTERCEPT_STGI))
+ NST_GST_VMEXIT_CALL_RET(pVCpu, uExitCode, uExitInfo1, uExitInfo2);
+ return hmR0SvmExitStgi(pVCpu, pSvmTransient);
+ }
+
+ case SVM_EXIT_VMLOAD:
+ {
+ if (HMIsGuestSvmCtrlInterceptSet(pVCpu, SVM_CTRL_INTERCEPT_VMLOAD))
+ NST_GST_VMEXIT_CALL_RET(pVCpu, uExitCode, uExitInfo1, uExitInfo2);
+ return hmR0SvmExitVmload(pVCpu, pSvmTransient);
+ }
+
+ case SVM_EXIT_VMSAVE:
+ {
+ if (HMIsGuestSvmCtrlInterceptSet(pVCpu, SVM_CTRL_INTERCEPT_VMSAVE))
+ NST_GST_VMEXIT_CALL_RET(pVCpu, uExitCode, uExitInfo1, uExitInfo2);
+ return hmR0SvmExitVmsave(pVCpu, pSvmTransient);
+ }
+
+ case SVM_EXIT_INVLPGA:
+ {
+ if (HMIsGuestSvmCtrlInterceptSet(pVCpu, SVM_CTRL_INTERCEPT_INVLPGA))
+ NST_GST_VMEXIT_CALL_RET(pVCpu, uExitCode, uExitInfo1, uExitInfo2);
+ return hmR0SvmExitInvlpga(pVCpu, pSvmTransient);
+ }
+
+ case SVM_EXIT_VMRUN:
+ {
+ if (HMIsGuestSvmCtrlInterceptSet(pVCpu, SVM_CTRL_INTERCEPT_VMRUN))
+ NST_GST_VMEXIT_CALL_RET(pVCpu, uExitCode, uExitInfo1, uExitInfo2);
+ return hmR0SvmExitVmrun(pVCpu, pSvmTransient);
+ }
+
+ case SVM_EXIT_RSM:
+ {
+ if (HMIsGuestSvmCtrlInterceptSet(pVCpu, SVM_CTRL_INTERCEPT_RSM))
+ NST_GST_VMEXIT_CALL_RET(pVCpu, uExitCode, uExitInfo1, uExitInfo2);
+ hmR0SvmSetPendingXcptUD(pVCpu);
+ return VINF_SUCCESS;
+ }
+
+ case SVM_EXIT_SKINIT:
+ {
+ if (HMIsGuestSvmCtrlInterceptSet(pVCpu, SVM_CTRL_INTERCEPT_SKINIT))
+ NST_GST_VMEXIT_CALL_RET(pVCpu, uExitCode, uExitInfo1, uExitInfo2);
+ hmR0SvmSetPendingXcptUD(pVCpu);
+ return VINF_SUCCESS;
+ }
+
+ case SVM_EXIT_NPF:
+ {
+ Assert(pVCpu->CTX_SUFF(pVM)->hm.s.fNestedPaging);
+ return hmR0SvmExitNestedPF(pVCpu, pSvmTransient);
+ }
+
+ case SVM_EXIT_INIT: /* We shouldn't get INIT signals while executing a nested-guest. */
+ return hmR0SvmExitUnexpected(pVCpu, pSvmTransient);
+
+ default:
+ {
+ AssertMsgFailed(("hmR0SvmHandleExitNested: Unknown exit code %#x\n", pSvmTransient->u64ExitCode));
+ pVCpu->hm.s.u32HMError = pSvmTransient->u64ExitCode;
+ return VERR_SVM_UNKNOWN_EXIT;
+ }
+ }
+ }
+ }
+ /* not reached */
+
+#undef NST_GST_VMEXIT_CALL_RET
+}
+#endif
+
+
+/**
+ * Handles a guest \#VMEXIT (for all EXITCODE values except SVM_EXIT_INVALID).
+ *
+ * @returns VBox status code (informational status codes included).
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param pSvmTransient Pointer to the SVM transient structure.
+ */
+static int hmR0SvmHandleExit(PVMCPU pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+ Assert(pSvmTransient->u64ExitCode != SVM_EXIT_INVALID);
+ Assert(pSvmTransient->u64ExitCode <= SVM_EXIT_MAX);
+
+#ifdef DEBUG_ramshankar
+# define VMEXIT_CALL_RET(a_fDbg, a_CallExpr) \
+ do { \
+ if ((a_fDbg) == 1) \
+ HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, HMSVM_CPUMCTX_EXTRN_ALL); \
+ int rc = a_CallExpr; \
+ if ((a_fDbg) == 1) \
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_ALL_GUEST); \
+ return rc; \
+ } while (0)
+#else
+# define VMEXIT_CALL_RET(a_fDbg, a_CallExpr) return a_CallExpr
+#endif
+
+ /*
+ * The ordering of the case labels is based on most-frequently-occurring #VMEXITs
+ * for most guests under normal workloads (for some definition of "normal").
+ */
+ uint64_t const uExitCode = pSvmTransient->u64ExitCode;
+ switch (uExitCode)
+ {
+ case SVM_EXIT_NPF: VMEXIT_CALL_RET(0, hmR0SvmExitNestedPF(pVCpu, pSvmTransient));
+ case SVM_EXIT_IOIO: VMEXIT_CALL_RET(0, hmR0SvmExitIOInstr(pVCpu, pSvmTransient));
+ case SVM_EXIT_RDTSC: VMEXIT_CALL_RET(0, hmR0SvmExitRdtsc(pVCpu, pSvmTransient));
+ case SVM_EXIT_RDTSCP: VMEXIT_CALL_RET(0, hmR0SvmExitRdtscp(pVCpu, pSvmTransient));
+ case SVM_EXIT_CPUID: VMEXIT_CALL_RET(0, hmR0SvmExitCpuid(pVCpu, pSvmTransient));
+ case SVM_EXIT_XCPT_PF: VMEXIT_CALL_RET(0, hmR0SvmExitXcptPF(pVCpu, pSvmTransient));
+ case SVM_EXIT_MSR: VMEXIT_CALL_RET(0, hmR0SvmExitMsr(pVCpu, pSvmTransient));
+ case SVM_EXIT_MONITOR: VMEXIT_CALL_RET(0, hmR0SvmExitMonitor(pVCpu, pSvmTransient));
+ case SVM_EXIT_MWAIT: VMEXIT_CALL_RET(0, hmR0SvmExitMwait(pVCpu, pSvmTransient));
+ case SVM_EXIT_HLT: VMEXIT_CALL_RET(0, hmR0SvmExitHlt(pVCpu, pSvmTransient));
+
+ case SVM_EXIT_XCPT_NMI: /* Should not occur, SVM_EXIT_NMI is used instead. */
+ case SVM_EXIT_INTR:
+ case SVM_EXIT_NMI: VMEXIT_CALL_RET(0, hmR0SvmExitIntr(pVCpu, pSvmTransient));
+
+ case SVM_EXIT_READ_CR0:
+ case SVM_EXIT_READ_CR3:
+ case SVM_EXIT_READ_CR4: VMEXIT_CALL_RET(0, hmR0SvmExitReadCRx(pVCpu, pSvmTransient));
+
+ case SVM_EXIT_CR0_SEL_WRITE:
+ case SVM_EXIT_WRITE_CR0:
+ case SVM_EXIT_WRITE_CR3:
+ case SVM_EXIT_WRITE_CR4:
+ case SVM_EXIT_WRITE_CR8: VMEXIT_CALL_RET(0, hmR0SvmExitWriteCRx(pVCpu, pSvmTransient));
+
+ case SVM_EXIT_VINTR: VMEXIT_CALL_RET(0, hmR0SvmExitVIntr(pVCpu, pSvmTransient));
+ case SVM_EXIT_PAUSE: VMEXIT_CALL_RET(0, hmR0SvmExitPause(pVCpu, pSvmTransient));
+ case SVM_EXIT_VMMCALL: VMEXIT_CALL_RET(0, hmR0SvmExitVmmCall(pVCpu, pSvmTransient));
+ case SVM_EXIT_INVLPG: VMEXIT_CALL_RET(0, hmR0SvmExitInvlpg(pVCpu, pSvmTransient));
+ case SVM_EXIT_WBINVD: VMEXIT_CALL_RET(0, hmR0SvmExitWbinvd(pVCpu, pSvmTransient));
+ case SVM_EXIT_INVD: VMEXIT_CALL_RET(0, hmR0SvmExitInvd(pVCpu, pSvmTransient));
+ case SVM_EXIT_RDPMC: VMEXIT_CALL_RET(0, hmR0SvmExitRdpmc(pVCpu, pSvmTransient));
+ case SVM_EXIT_IRET: VMEXIT_CALL_RET(0, hmR0SvmExitIret(pVCpu, pSvmTransient));
+ case SVM_EXIT_XCPT_UD: VMEXIT_CALL_RET(0, hmR0SvmExitXcptUD(pVCpu, pSvmTransient));
+ case SVM_EXIT_XCPT_MF: VMEXIT_CALL_RET(0, hmR0SvmExitXcptMF(pVCpu, pSvmTransient));
+ case SVM_EXIT_XCPT_DB: VMEXIT_CALL_RET(0, hmR0SvmExitXcptDB(pVCpu, pSvmTransient));
+ case SVM_EXIT_XCPT_AC: VMEXIT_CALL_RET(0, hmR0SvmExitXcptAC(pVCpu, pSvmTransient));
+ case SVM_EXIT_XCPT_BP: VMEXIT_CALL_RET(0, hmR0SvmExitXcptBP(pVCpu, pSvmTransient));
+ case SVM_EXIT_XCPT_GP: VMEXIT_CALL_RET(0, hmR0SvmExitXcptGP(pVCpu, pSvmTransient));
+ case SVM_EXIT_XSETBV: VMEXIT_CALL_RET(0, hmR0SvmExitXsetbv(pVCpu, pSvmTransient));
+ case SVM_EXIT_FERR_FREEZE: VMEXIT_CALL_RET(0, hmR0SvmExitFerrFreeze(pVCpu, pSvmTransient));
+
+ default:
+ {
+ switch (pSvmTransient->u64ExitCode)
+ {
+ case SVM_EXIT_READ_DR0: case SVM_EXIT_READ_DR1: case SVM_EXIT_READ_DR2: case SVM_EXIT_READ_DR3:
+ case SVM_EXIT_READ_DR6: case SVM_EXIT_READ_DR7: case SVM_EXIT_READ_DR8: case SVM_EXIT_READ_DR9:
+ case SVM_EXIT_READ_DR10: case SVM_EXIT_READ_DR11: case SVM_EXIT_READ_DR12: case SVM_EXIT_READ_DR13:
+ case SVM_EXIT_READ_DR14: case SVM_EXIT_READ_DR15:
+ VMEXIT_CALL_RET(0, hmR0SvmExitReadDRx(pVCpu, pSvmTransient));
+
+ case SVM_EXIT_WRITE_DR0: case SVM_EXIT_WRITE_DR1: case SVM_EXIT_WRITE_DR2: case SVM_EXIT_WRITE_DR3:
+ case SVM_EXIT_WRITE_DR6: case SVM_EXIT_WRITE_DR7: case SVM_EXIT_WRITE_DR8: case SVM_EXIT_WRITE_DR9:
+ case SVM_EXIT_WRITE_DR10: case SVM_EXIT_WRITE_DR11: case SVM_EXIT_WRITE_DR12: case SVM_EXIT_WRITE_DR13:
+ case SVM_EXIT_WRITE_DR14: case SVM_EXIT_WRITE_DR15:
+ VMEXIT_CALL_RET(0, hmR0SvmExitWriteDRx(pVCpu, pSvmTransient));
+
+ case SVM_EXIT_TASK_SWITCH: VMEXIT_CALL_RET(0, hmR0SvmExitTaskSwitch(pVCpu, pSvmTransient));
+ case SVM_EXIT_SHUTDOWN: VMEXIT_CALL_RET(0, hmR0SvmExitShutdown(pVCpu, pSvmTransient));
+
+ case SVM_EXIT_SMI:
+ case SVM_EXIT_INIT:
+ {
+ /*
+ * We don't intercept SMIs. As for INIT signals, it really shouldn't ever happen here.
+ * If it ever does, we want to know about it so log the exit code and bail.
+ */
+ VMEXIT_CALL_RET(0, hmR0SvmExitUnexpected(pVCpu, pSvmTransient));
+ }
+
+#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
+ case SVM_EXIT_CLGI: VMEXIT_CALL_RET(0, hmR0SvmExitClgi(pVCpu, pSvmTransient));
+ case SVM_EXIT_STGI: VMEXIT_CALL_RET(0, hmR0SvmExitStgi(pVCpu, pSvmTransient));
+ case SVM_EXIT_VMLOAD: VMEXIT_CALL_RET(0, hmR0SvmExitVmload(pVCpu, pSvmTransient));
+ case SVM_EXIT_VMSAVE: VMEXIT_CALL_RET(0, hmR0SvmExitVmsave(pVCpu, pSvmTransient));
+ case SVM_EXIT_INVLPGA: VMEXIT_CALL_RET(0, hmR0SvmExitInvlpga(pVCpu, pSvmTransient));
+ case SVM_EXIT_VMRUN: VMEXIT_CALL_RET(0, hmR0SvmExitVmrun(pVCpu, pSvmTransient));
+#else
+ case SVM_EXIT_CLGI:
+ case SVM_EXIT_STGI:
+ case SVM_EXIT_VMLOAD:
+ case SVM_EXIT_VMSAVE:
+ case SVM_EXIT_INVLPGA:
+ case SVM_EXIT_VMRUN:
+#endif
+ case SVM_EXIT_RSM:
+ case SVM_EXIT_SKINIT:
+ {
+ hmR0SvmSetPendingXcptUD(pVCpu);
+ return VINF_SUCCESS;
+ }
+
+#ifdef HMSVM_ALWAYS_TRAP_ALL_XCPTS
+ case SVM_EXIT_XCPT_DE:
+ /* SVM_EXIT_XCPT_DB: */ /* Handled above. */
+ /* SVM_EXIT_XCPT_NMI: */ /* Handled above. */
+ /* SVM_EXIT_XCPT_BP: */ /* Handled above. */
+ case SVM_EXIT_XCPT_OF:
+ case SVM_EXIT_XCPT_BR:
+ /* SVM_EXIT_XCPT_UD: */ /* Handled above. */
+ case SVM_EXIT_XCPT_NM:
+ case SVM_EXIT_XCPT_DF:
+ case SVM_EXIT_XCPT_CO_SEG_OVERRUN:
+ case SVM_EXIT_XCPT_TS:
+ case SVM_EXIT_XCPT_NP:
+ case SVM_EXIT_XCPT_SS:
+ /* SVM_EXIT_XCPT_GP: */ /* Handled above. */
+ /* SVM_EXIT_XCPT_PF: */
+ case SVM_EXIT_XCPT_15: /* Reserved. */
+ /* SVM_EXIT_XCPT_MF: */ /* Handled above. */
+ /* SVM_EXIT_XCPT_AC: */ /* Handled above. */
+ case SVM_EXIT_XCPT_MC:
+ case SVM_EXIT_XCPT_XF:
+ case SVM_EXIT_XCPT_20: case SVM_EXIT_XCPT_21: case SVM_EXIT_XCPT_22: case SVM_EXIT_XCPT_23:
+ case SVM_EXIT_XCPT_24: case SVM_EXIT_XCPT_25: case SVM_EXIT_XCPT_26: case SVM_EXIT_XCPT_27:
+ case SVM_EXIT_XCPT_28: case SVM_EXIT_XCPT_29: case SVM_EXIT_XCPT_30: case SVM_EXIT_XCPT_31:
+ VMEXIT_CALL_RET(0, hmR0SvmExitXcptGeneric(pVCpu, pSvmTransient));
+#endif /* HMSVM_ALWAYS_TRAP_ALL_XCPTS */
+
+ default:
+ {
+ AssertMsgFailed(("hmR0SvmHandleExit: Unknown exit code %#RX64\n", uExitCode));
+ pVCpu->hm.s.u32HMError = uExitCode;
+ return VERR_SVM_UNKNOWN_EXIT;
+ }
+ }
+ }
+ }
+ /* not reached */
+#undef VMEXIT_CALL_RET
+}
+
+
+#ifdef VBOX_STRICT
+/* Is there some generic IPRT define for this that are not in Runtime/internal/\* ?? */
+# define HMSVM_ASSERT_PREEMPT_CPUID_VAR() \
+ RTCPUID const idAssertCpu = RTThreadPreemptIsEnabled(NIL_RTTHREAD) ? NIL_RTCPUID : RTMpCpuId()
+
+# define HMSVM_ASSERT_PREEMPT_CPUID() \
+ do \
+ { \
+ RTCPUID const idAssertCpuNow = RTThreadPreemptIsEnabled(NIL_RTTHREAD) ? NIL_RTCPUID : RTMpCpuId(); \
+ AssertMsg(idAssertCpu == idAssertCpuNow, ("SVM %#x, %#x\n", idAssertCpu, idAssertCpuNow)); \
+ } while (0)
+
+# define HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(a_pVCpu, a_pSvmTransient) \
+ do { \
+ AssertPtr((a_pVCpu)); \
+ AssertPtr((a_pSvmTransient)); \
+ Assert(ASMIntAreEnabled()); \
+ HMSVM_ASSERT_PREEMPT_SAFE((a_pVCpu)); \
+ HMSVM_ASSERT_PREEMPT_CPUID_VAR(); \
+ Log4Func(("vcpu[%u] -v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-\n", (a_pVCpu)->idCpu)); \
+ HMSVM_ASSERT_PREEMPT_SAFE((a_pVCpu)); \
+ if (VMMR0IsLogFlushDisabled((a_pVCpu))) \
+ HMSVM_ASSERT_PREEMPT_CPUID(); \
+ } while (0)
+#else
+# define HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(a_pVCpu, a_pSvmTransient) \
+ do { \
+ RT_NOREF2(a_pVCpu, a_pSvmTransient); \
+ } while (0)
+#endif
+
+
+/**
+ * Gets the IEM exception flags for the specified SVM event.
+ *
+ * @returns The IEM exception flags.
+ * @param pEvent Pointer to the SVM event.
+ *
+ * @remarks This function currently only constructs flags required for
+ * IEMEvaluateRecursiveXcpt and not the complete flags (e.g. error-code
+ * and CR2 aspects of an exception are not included).
+ */
+static uint32_t hmR0SvmGetIemXcptFlags(PCSVMEVENT pEvent)
+{
+ uint8_t const uEventType = pEvent->n.u3Type;
+ uint32_t fIemXcptFlags;
+ switch (uEventType)
+ {
+ case SVM_EVENT_EXCEPTION:
+ /*
+ * Only INT3 and INTO instructions can raise #BP and #OF exceptions.
+ * See AMD spec. Table 8-1. "Interrupt Vector Source and Cause".
+ */
+ if (pEvent->n.u8Vector == X86_XCPT_BP)
+ {
+ fIemXcptFlags = IEM_XCPT_FLAGS_T_SOFT_INT | IEM_XCPT_FLAGS_BP_INSTR;
+ break;
+ }
+ if (pEvent->n.u8Vector == X86_XCPT_OF)
+ {
+ fIemXcptFlags = IEM_XCPT_FLAGS_T_SOFT_INT | IEM_XCPT_FLAGS_OF_INSTR;
+ break;
+ }
+ /** @todo How do we distinguish ICEBP \#DB from the regular one? */
+ RT_FALL_THRU();
+ case SVM_EVENT_NMI:
+ fIemXcptFlags = IEM_XCPT_FLAGS_T_CPU_XCPT;
+ break;
+
+ case SVM_EVENT_EXTERNAL_IRQ:
+ fIemXcptFlags = IEM_XCPT_FLAGS_T_EXT_INT;
+ break;
+
+ case SVM_EVENT_SOFTWARE_INT:
+ fIemXcptFlags = IEM_XCPT_FLAGS_T_SOFT_INT;
+ break;
+
+ default:
+ fIemXcptFlags = 0;
+ AssertMsgFailed(("Unexpected event type! uEventType=%#x uVector=%#x", uEventType, pEvent->n.u8Vector));
+ break;
+ }
+ return fIemXcptFlags;
+}
+
+
+/**
+ * Handle a condition that occurred while delivering an event through the guest
+ * IDT.
+ *
+ * @returns VBox status code (informational error codes included).
+ * @retval VINF_SUCCESS if we should continue handling the \#VMEXIT.
+ * @retval VINF_HM_DOUBLE_FAULT if a \#DF condition was detected and we ought to
+ * continue execution of the guest which will delivery the \#DF.
+ * @retval VINF_EM_RESET if we detected a triple-fault condition.
+ * @retval VERR_EM_GUEST_CPU_HANG if we detected a guest CPU hang.
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param pSvmTransient Pointer to the SVM transient structure.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+static int hmR0SvmCheckExitDueToEventDelivery(PVMCPU pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+ int rc = VINF_SUCCESS;
+ PSVMVMCB pVmcb = hmR0SvmGetCurrentVmcb(pVCpu);
+ HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, CPUMCTX_EXTRN_CR2);
+
+ Log4(("EXITINTINFO: Pending vectoring event %#RX64 Valid=%RTbool ErrValid=%RTbool Err=%#RX32 Type=%u Vector=%u\n",
+ pVmcb->ctrl.ExitIntInfo.u, !!pVmcb->ctrl.ExitIntInfo.n.u1Valid, !!pVmcb->ctrl.ExitIntInfo.n.u1ErrorCodeValid,
+ pVmcb->ctrl.ExitIntInfo.n.u32ErrorCode, pVmcb->ctrl.ExitIntInfo.n.u3Type, pVmcb->ctrl.ExitIntInfo.n.u8Vector));
+
+ /*
+ * The EXITINTINFO (if valid) contains the prior exception (IDT vector) that was trying to
+ * be delivered to the guest which caused a #VMEXIT which was intercepted (Exit vector).
+ *
+ * See AMD spec. 15.7.3 "EXITINFO Pseudo-Code".
+ */
+ if (pVmcb->ctrl.ExitIntInfo.n.u1Valid)
+ {
+ IEMXCPTRAISE enmRaise;
+ IEMXCPTRAISEINFO fRaiseInfo;
+ bool const fExitIsHwXcpt = pSvmTransient->u64ExitCode - SVM_EXIT_XCPT_0 <= SVM_EXIT_XCPT_31;
+ uint8_t const uIdtVector = pVmcb->ctrl.ExitIntInfo.n.u8Vector;
+ if (fExitIsHwXcpt)
+ {
+ uint8_t const uExitVector = pSvmTransient->u64ExitCode - SVM_EXIT_XCPT_0;
+ uint32_t const fIdtVectorFlags = hmR0SvmGetIemXcptFlags(&pVmcb->ctrl.ExitIntInfo);
+ uint32_t const fExitVectorFlags = IEM_XCPT_FLAGS_T_CPU_XCPT;
+ enmRaise = IEMEvaluateRecursiveXcpt(pVCpu, fIdtVectorFlags, uIdtVector, fExitVectorFlags, uExitVector, &fRaiseInfo);
+ }
+ else
+ {
+ /*
+ * If delivery of an event caused a #VMEXIT that is not an exception (e.g. #NPF)
+ * then we end up here.
+ *
+ * If the event was:
+ * - a software interrupt, we can re-execute the instruction which will
+ * regenerate the event.
+ * - an NMI, we need to clear NMI blocking and re-inject the NMI.
+ * - a hardware exception or external interrupt, we re-inject it.
+ */
+ fRaiseInfo = IEMXCPTRAISEINFO_NONE;
+ if (pVmcb->ctrl.ExitIntInfo.n.u3Type == SVM_EVENT_SOFTWARE_INT)
+ enmRaise = IEMXCPTRAISE_REEXEC_INSTR;
+ else
+ enmRaise = IEMXCPTRAISE_PREV_EVENT;
+ }
+
+ switch (enmRaise)
+ {
+ case IEMXCPTRAISE_CURRENT_XCPT:
+ case IEMXCPTRAISE_PREV_EVENT:
+ {
+ /* For software interrupts, we shall re-execute the instruction. */
+ if (!(fRaiseInfo & IEMXCPTRAISEINFO_SOFT_INT_XCPT))
+ {
+ RTGCUINTPTR GCPtrFaultAddress = 0;
+
+ /* If we are re-injecting an NMI, clear NMI blocking. */
+ if (pVmcb->ctrl.ExitIntInfo.n.u3Type == SVM_EVENT_NMI)
+ VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_BLOCK_NMIS);
+
+ /* Determine a vectoring #PF condition, see comment in hmR0SvmExitXcptPF(). */
+ if (fRaiseInfo & (IEMXCPTRAISEINFO_EXT_INT_PF | IEMXCPTRAISEINFO_NMI_PF))
+ {
+ pSvmTransient->fVectoringPF = true;
+ Log4Func(("IDT: Pending vectoring #PF due to delivery of Ext-Int/NMI. uCR2=%#RX64\n",
+ pVCpu->cpum.GstCtx.cr2));
+ }
+ else if ( pVmcb->ctrl.ExitIntInfo.n.u3Type == SVM_EVENT_EXCEPTION
+ && uIdtVector == X86_XCPT_PF)
+ {
+ /*
+ * If the previous exception was a #PF, we need to recover the CR2 value.
+ * This can't happen with shadow paging.
+ */
+ GCPtrFaultAddress = pVCpu->cpum.GstCtx.cr2;
+ }
+
+ /*
+ * Without nested paging, when uExitVector is #PF, CR2 value will be updated from the VMCB's
+ * exit info. fields, if it's a guest #PF, see hmR0SvmExitXcptPF().
+ */
+ Assert(pVmcb->ctrl.ExitIntInfo.n.u3Type != SVM_EVENT_SOFTWARE_INT);
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatInjectPendingReflect);
+ hmR0SvmSetPendingEvent(pVCpu, &pVmcb->ctrl.ExitIntInfo, GCPtrFaultAddress);
+
+ Log4Func(("IDT: Pending vectoring event %#RX64 ErrValid=%RTbool Err=%#RX32 GCPtrFaultAddress=%#RX64\n",
+ pVmcb->ctrl.ExitIntInfo.u, RT_BOOL(pVmcb->ctrl.ExitIntInfo.n.u1ErrorCodeValid),
+ pVmcb->ctrl.ExitIntInfo.n.u32ErrorCode, GCPtrFaultAddress));
+ }
+ break;
+ }
+
+ case IEMXCPTRAISE_REEXEC_INSTR:
+ {
+ Assert(rc == VINF_SUCCESS);
+ break;
+ }
+
+ case IEMXCPTRAISE_DOUBLE_FAULT:
+ {
+ /*
+ * Determing a vectoring double #PF condition. Used later, when PGM evaluates
+ * the second #PF as a guest #PF (and not a shadow #PF) and needs to be
+ * converted into a #DF.
+ */
+ if (fRaiseInfo & IEMXCPTRAISEINFO_PF_PF)
+ {
+ Log4Func(("IDT: Pending vectoring double #PF uCR2=%#RX64\n", pVCpu->cpum.GstCtx.cr2));
+ pSvmTransient->fVectoringDoublePF = true;
+ Assert(rc == VINF_SUCCESS);
+ }
+ else
+ {
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatInjectPendingReflect);
+ hmR0SvmSetPendingXcptDF(pVCpu);
+ rc = VINF_HM_DOUBLE_FAULT;
+ }
+ break;
+ }
+
+ case IEMXCPTRAISE_TRIPLE_FAULT:
+ {
+ rc = VINF_EM_RESET;
+ break;
+ }
+
+ case IEMXCPTRAISE_CPU_HANG:
+ {
+ rc = VERR_EM_GUEST_CPU_HANG;
+ break;
+ }
+
+ default:
+ AssertMsgFailedBreakStmt(("Bogus enmRaise value: %d (%#x)\n", enmRaise, enmRaise), rc = VERR_SVM_IPE_2);
+ }
+ }
+ Assert(rc == VINF_SUCCESS || rc == VINF_HM_DOUBLE_FAULT || rc == VINF_EM_RESET || rc == VERR_EM_GUEST_CPU_HANG);
+ return rc;
+}
+
+
+/**
+ * Advances the guest RIP by the number of bytes specified in @a cb.
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param cb RIP increment value in bytes.
+ */
+DECLINLINE(void) hmR0SvmAdvanceRip(PVMCPU pVCpu, uint32_t cb)
+{
+ PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+ pCtx->rip += cb;
+
+ /* Update interrupt shadow. */
+ if ( VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS)
+ && pCtx->rip != EMGetInhibitInterruptsPC(pVCpu))
+ VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS);
+}
+
+
+/* -=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= */
+/* -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- #VMEXIT handlers -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- */
+/* -=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= */
+
+/** @name \#VMEXIT handlers.
+ * @{
+ */
+
+/**
+ * \#VMEXIT handler for external interrupts, NMIs, FPU assertion freeze and INIT
+ * signals (SVM_EXIT_INTR, SVM_EXIT_NMI, SVM_EXIT_FERR_FREEZE, SVM_EXIT_INIT).
+ */
+HMSVM_EXIT_DECL hmR0SvmExitIntr(PVMCPU pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+ HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pSvmTransient);
+
+ if (pSvmTransient->u64ExitCode == SVM_EXIT_NMI)
+ STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatExitHostNmiInGC);
+ else if (pSvmTransient->u64ExitCode == SVM_EXIT_INTR)
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatExitExtInt);
+
+ /*
+ * AMD-V has no preemption timer and the generic periodic preemption timer has no way to
+ * signal -before- the timer fires if the current interrupt is our own timer or a some
+ * other host interrupt. We also cannot examine what interrupt it is until the host
+ * actually take the interrupt.
+ *
+ * Going back to executing guest code here unconditionally causes random scheduling
+ * problems (observed on an AMD Phenom 9850 Quad-Core on Windows 64-bit host).
+ */
+ return VINF_EM_RAW_INTERRUPT;
+}
+
+
+/**
+ * \#VMEXIT handler for WBINVD (SVM_EXIT_WBINVD). Conditional \#VMEXIT.
+ */
+HMSVM_EXIT_DECL hmR0SvmExitWbinvd(PVMCPU pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+ HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pSvmTransient);
+
+ VBOXSTRICTRC rcStrict;
+ bool const fSupportsNextRipSave = hmR0SvmSupportsNextRipSave(pVCpu);
+ if (fSupportsNextRipSave)
+ {
+ HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_EXEC_DECODED_NO_MEM_MASK);
+ PCSVMVMCB pVmcb = hmR0SvmGetCurrentVmcb(pVCpu);
+ uint8_t const cbInstr = pVmcb->ctrl.u64NextRIP - pVCpu->cpum.GstCtx.rip;
+ rcStrict = IEMExecDecodedWbinvd(pVCpu, cbInstr);
+ }
+ else
+ {
+ HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_MUST_MASK);
+ rcStrict = IEMExecOne(pVCpu);
+ }
+
+ if (rcStrict == VINF_IEM_RAISED_XCPT)
+ {
+ rcStrict = VINF_SUCCESS;
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
+ }
+ HMSVM_CHECK_SINGLE_STEP(pVCpu, rcStrict);
+ return VBOXSTRICTRC_TODO(rcStrict);
+}
+
+
+/**
+ * \#VMEXIT handler for INVD (SVM_EXIT_INVD). Unconditional \#VMEXIT.
+ */
+HMSVM_EXIT_DECL hmR0SvmExitInvd(PVMCPU pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+ HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pSvmTransient);
+
+ VBOXSTRICTRC rcStrict;
+ bool const fSupportsNextRipSave = hmR0SvmSupportsNextRipSave(pVCpu);
+ if (fSupportsNextRipSave)
+ {
+ HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_EXEC_DECODED_NO_MEM_MASK);
+ PCSVMVMCB pVmcb = hmR0SvmGetCurrentVmcb(pVCpu);
+ uint8_t const cbInstr = pVmcb->ctrl.u64NextRIP - pVCpu->cpum.GstCtx.rip;
+ rcStrict = IEMExecDecodedInvd(pVCpu, cbInstr);
+ }
+ else
+ {
+ HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_MUST_MASK);
+ rcStrict = IEMExecOne(pVCpu);
+ }
+
+ if (rcStrict == VINF_IEM_RAISED_XCPT)
+ {
+ rcStrict = VINF_SUCCESS;
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
+ }
+ HMSVM_CHECK_SINGLE_STEP(pVCpu, rcStrict);
+ return VBOXSTRICTRC_TODO(rcStrict);
+}
+
+
+/**
+ * \#VMEXIT handler for INVD (SVM_EXIT_CPUID). Conditional \#VMEXIT.
+ */
+HMSVM_EXIT_DECL hmR0SvmExitCpuid(PVMCPU pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+ HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pSvmTransient);
+
+ HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_EXEC_DECODED_NO_MEM_MASK | CPUMCTX_EXTRN_RAX | CPUMCTX_EXTRN_RCX);
+ VBOXSTRICTRC rcStrict;
+ PCEMEXITREC pExitRec = EMHistoryUpdateFlagsAndTypeAndPC(pVCpu,
+ EMEXIT_MAKE_FT(EMEXIT_F_KIND_EM | EMEXIT_F_HM, EMEXITTYPE_CPUID),
+ pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base);
+ if (!pExitRec)
+ {
+ bool const fSupportsNextRipSave = hmR0SvmSupportsNextRipSave(pVCpu);
+ if (fSupportsNextRipSave)
+ {
+ PCSVMVMCB pVmcb = hmR0SvmGetCurrentVmcb(pVCpu);
+ uint8_t const cbInstr = pVmcb->ctrl.u64NextRIP - pVCpu->cpum.GstCtx.rip;
+ rcStrict = IEMExecDecodedCpuid(pVCpu, cbInstr);
+ }
+ else
+ {
+ HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_MUST_MASK);
+ rcStrict = IEMExecOne(pVCpu);
+ }
+
+ if (rcStrict == VINF_IEM_RAISED_XCPT)
+ {
+ rcStrict = VINF_SUCCESS;
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
+ }
+ HMSVM_CHECK_SINGLE_STEP(pVCpu, rcStrict);
+ }
+ else
+ {
+ /*
+ * Frequent exit or something needing probing. Get state and call EMHistoryExec.
+ */
+ HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_MUST_MASK);
+
+ Log4(("CpuIdExit/%u: %04x:%08RX64: %#x/%#x -> EMHistoryExec\n",
+ pVCpu->idCpu, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, pVCpu->cpum.GstCtx.eax, pVCpu->cpum.GstCtx.ecx));
+
+ rcStrict = EMHistoryExec(pVCpu, pExitRec, 0);
+
+ Log4(("CpuIdExit/%u: %04x:%08RX64: EMHistoryExec -> %Rrc + %04x:%08RX64\n",
+ pVCpu->idCpu, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
+ VBOXSTRICTRC_VAL(rcStrict), pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
+ }
+ return VBOXSTRICTRC_TODO(rcStrict);
+}
+
+
+/**
+ * \#VMEXIT handler for RDTSC (SVM_EXIT_RDTSC). Conditional \#VMEXIT.
+ */
+HMSVM_EXIT_DECL hmR0SvmExitRdtsc(PVMCPU pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+ HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pSvmTransient);
+
+ VBOXSTRICTRC rcStrict;
+ bool const fSupportsNextRipSave = hmR0SvmSupportsNextRipSave(pVCpu);
+ if (fSupportsNextRipSave)
+ {
+ HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_EXEC_DECODED_NO_MEM_MASK | CPUMCTX_EXTRN_CR4);
+ PCSVMVMCB pVmcb = hmR0SvmGetCurrentVmcb(pVCpu);
+ uint8_t const cbInstr = pVmcb->ctrl.u64NextRIP - pVCpu->cpum.GstCtx.rip;
+ rcStrict = IEMExecDecodedRdtsc(pVCpu, cbInstr);
+ }
+ else
+ {
+ HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_MUST_MASK);
+ rcStrict = IEMExecOne(pVCpu);
+ }
+
+ if (rcStrict == VINF_SUCCESS)
+ pSvmTransient->fUpdateTscOffsetting = true;
+ else if (rcStrict == VINF_IEM_RAISED_XCPT)
+ {
+ rcStrict = VINF_SUCCESS;
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
+ }
+ HMSVM_CHECK_SINGLE_STEP(pVCpu, rcStrict);
+ return VBOXSTRICTRC_TODO(rcStrict);
+}
+
+
+/**
+ * \#VMEXIT handler for RDTSCP (SVM_EXIT_RDTSCP). Conditional \#VMEXIT.
+ */
+HMSVM_EXIT_DECL hmR0SvmExitRdtscp(PVMCPU pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+ HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pSvmTransient);
+
+ VBOXSTRICTRC rcStrict;
+ bool const fSupportsNextRipSave = hmR0SvmSupportsNextRipSave(pVCpu);
+ if (fSupportsNextRipSave)
+ {
+ HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_EXEC_DECODED_NO_MEM_MASK | CPUMCTX_EXTRN_CR4 | CPUMCTX_EXTRN_TSC_AUX);
+ PCSVMVMCB pVmcb = hmR0SvmGetCurrentVmcb(pVCpu);
+ uint8_t const cbInstr = pVmcb->ctrl.u64NextRIP - pVCpu->cpum.GstCtx.rip;
+ rcStrict = IEMExecDecodedRdtscp(pVCpu, cbInstr);
+ }
+ else
+ {
+ HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_MUST_MASK);
+ rcStrict = IEMExecOne(pVCpu);
+ }
+
+ if (rcStrict == VINF_SUCCESS)
+ pSvmTransient->fUpdateTscOffsetting = true;
+ else if (rcStrict == VINF_IEM_RAISED_XCPT)
+ {
+ rcStrict = VINF_SUCCESS;
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
+ }
+ HMSVM_CHECK_SINGLE_STEP(pVCpu, rcStrict);
+ return VBOXSTRICTRC_TODO(rcStrict);
+}
+
+
+/**
+ * \#VMEXIT handler for RDPMC (SVM_EXIT_RDPMC). Conditional \#VMEXIT.
+ */
+HMSVM_EXIT_DECL hmR0SvmExitRdpmc(PVMCPU pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+ HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pSvmTransient);
+
+ VBOXSTRICTRC rcStrict;
+ bool const fSupportsNextRipSave = hmR0SvmSupportsNextRipSave(pVCpu);
+ if (fSupportsNextRipSave)
+ {
+ HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_EXEC_DECODED_NO_MEM_MASK | CPUMCTX_EXTRN_CR4);
+ PCSVMVMCB pVmcb = hmR0SvmGetCurrentVmcb(pVCpu);
+ uint8_t const cbInstr = pVmcb->ctrl.u64NextRIP - pVCpu->cpum.GstCtx.rip;
+ rcStrict = IEMExecDecodedRdpmc(pVCpu, cbInstr);
+ }
+ else
+ {
+ HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_MUST_MASK);
+ rcStrict = IEMExecOne(pVCpu);
+ }
+
+ if (rcStrict == VINF_IEM_RAISED_XCPT)
+ {
+ rcStrict = VINF_SUCCESS;
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
+ }
+ HMSVM_CHECK_SINGLE_STEP(pVCpu, rcStrict);
+ return VBOXSTRICTRC_TODO(rcStrict);
+}
+
+
+/**
+ * \#VMEXIT handler for INVLPG (SVM_EXIT_INVLPG). Conditional \#VMEXIT.
+ */
+HMSVM_EXIT_DECL hmR0SvmExitInvlpg(PVMCPU pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+ HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pSvmTransient);
+ Assert(!pVCpu->CTX_SUFF(pVM)->hm.s.fNestedPaging);
+
+ VBOXSTRICTRC rcStrict;
+ bool const fSupportsDecodeAssists = hmR0SvmSupportsDecodeAssists(pVCpu);
+ bool const fSupportsNextRipSave = hmR0SvmSupportsNextRipSave(pVCpu);
+ if ( fSupportsDecodeAssists
+ && fSupportsNextRipSave)
+ {
+ HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_EXEC_DECODED_MEM_MASK);
+ PCSVMVMCB pVmcb = hmR0SvmGetCurrentVmcb(pVCpu);
+ uint8_t const cbInstr = pVmcb->ctrl.u64NextRIP - pVCpu->cpum.GstCtx.rip;
+ RTGCPTR const GCPtrPage = pVmcb->ctrl.u64ExitInfo1;
+ rcStrict = IEMExecDecodedInvlpg(pVCpu, cbInstr, GCPtrPage);
+ }
+ else
+ {
+ HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_MUST_MASK);
+ rcStrict = IEMExecOne(pVCpu);
+ }
+
+ if (rcStrict == VINF_IEM_RAISED_XCPT)
+ {
+ rcStrict = VINF_SUCCESS;
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
+ }
+ HMSVM_CHECK_SINGLE_STEP(pVCpu, rcStrict);
+ return VBOXSTRICTRC_VAL(rcStrict);
+}
+
+
+/**
+ * \#VMEXIT handler for HLT (SVM_EXIT_HLT). Conditional \#VMEXIT.
+ */
+HMSVM_EXIT_DECL hmR0SvmExitHlt(PVMCPU pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+ HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pSvmTransient);
+
+ VBOXSTRICTRC rcStrict;
+ bool const fSupportsNextRipSave = hmR0SvmSupportsNextRipSave(pVCpu);
+ if (fSupportsNextRipSave)
+ {
+ HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_EXEC_DECODED_NO_MEM_MASK);
+ PCSVMVMCB pVmcb = hmR0SvmGetCurrentVmcb(pVCpu);
+ uint8_t const cbInstr = pVmcb->ctrl.u64NextRIP - pVCpu->cpum.GstCtx.rip;
+ rcStrict = IEMExecDecodedHlt(pVCpu, cbInstr);
+ }
+ else
+ {
+ HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_MUST_MASK);
+ rcStrict = IEMExecOne(pVCpu);
+ }
+
+ if ( rcStrict == VINF_EM_HALT
+ || rcStrict == VINF_SUCCESS)
+ rcStrict = EMShouldContinueAfterHalt(pVCpu, &pVCpu->cpum.GstCtx) ? VINF_SUCCESS : VINF_EM_HALT;
+ else if (rcStrict == VINF_IEM_RAISED_XCPT)
+ {
+ rcStrict = VINF_SUCCESS;
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
+ }
+ HMSVM_CHECK_SINGLE_STEP(pVCpu, rcStrict);
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatExitHlt);
+ if (rcStrict != VINF_SUCCESS)
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchHltToR3);
+ return VBOXSTRICTRC_VAL(rcStrict);;
+}
+
+
+/**
+ * \#VMEXIT handler for MONITOR (SVM_EXIT_MONITOR). Conditional \#VMEXIT.
+ */
+HMSVM_EXIT_DECL hmR0SvmExitMonitor(PVMCPU pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+ HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pSvmTransient);
+
+ /*
+ * If the instruction length is supplied by the CPU is 3 bytes, we can be certain that no
+ * segment override prefix is present (and thus use the default segment DS). Otherwise, a
+ * segment override prefix or other prefixes might be used, in which case we fallback to
+ * IEMExecOne() to figure out.
+ */
+ VBOXSTRICTRC rcStrict;
+ PCSVMVMCB pVmcb = hmR0SvmGetCurrentVmcb(pVCpu);
+ uint8_t const cbInstr = hmR0SvmSupportsNextRipSave(pVCpu) ? pVmcb->ctrl.u64NextRIP - pVCpu->cpum.GstCtx.rip : 0;
+ if (cbInstr)
+ {
+ HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_EXEC_DECODED_MEM_MASK | CPUMCTX_EXTRN_DS);
+ rcStrict = IEMExecDecodedMonitor(pVCpu, cbInstr);
+ }
+ else
+ {
+ HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_MUST_MASK);
+ rcStrict = IEMExecOne(pVCpu);
+ }
+
+ if (rcStrict == VINF_IEM_RAISED_XCPT)
+ {
+ rcStrict = VINF_SUCCESS;
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
+ }
+ HMSVM_CHECK_SINGLE_STEP(pVCpu, rcStrict);
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatExitMonitor);
+ return VBOXSTRICTRC_TODO(rcStrict);
+}
+
+
+/**
+ * \#VMEXIT handler for MWAIT (SVM_EXIT_MWAIT). Conditional \#VMEXIT.
+ */
+HMSVM_EXIT_DECL hmR0SvmExitMwait(PVMCPU pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+ HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pSvmTransient);
+
+ VBOXSTRICTRC rcStrict;
+ bool const fSupportsNextRipSave = hmR0SvmSupportsNextRipSave(pVCpu);
+ if (fSupportsNextRipSave)
+ {
+ HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_EXEC_DECODED_NO_MEM_MASK);
+ PCSVMVMCB pVmcb = hmR0SvmGetCurrentVmcb(pVCpu);
+ uint8_t const cbInstr = pVmcb->ctrl.u64NextRIP - pVCpu->cpum.GstCtx.rip;
+ rcStrict = IEMExecDecodedMwait(pVCpu, cbInstr);
+ }
+ else
+ {
+ HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_MUST_MASK);
+ rcStrict = IEMExecOne(pVCpu);
+ }
+
+ if ( rcStrict == VINF_EM_HALT
+ && EMMonitorWaitShouldContinue(pVCpu, &pVCpu->cpum.GstCtx))
+ rcStrict = VINF_SUCCESS;
+ else if (rcStrict == VINF_IEM_RAISED_XCPT)
+ {
+ rcStrict = VINF_SUCCESS;
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
+ }
+ HMSVM_CHECK_SINGLE_STEP(pVCpu, rcStrict);
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatExitMwait);
+ return VBOXSTRICTRC_TODO(rcStrict);
+}
+
+
+/**
+ * \#VMEXIT handler for shutdown (triple-fault) (SVM_EXIT_SHUTDOWN). Conditional
+ * \#VMEXIT.
+ */
+HMSVM_EXIT_DECL hmR0SvmExitShutdown(PVMCPU pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+ HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pSvmTransient);
+ HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, HMSVM_CPUMCTX_EXTRN_ALL);
+ return VINF_EM_RESET;
+}
+
+
+/**
+ * \#VMEXIT handler for unexpected exits. Conditional \#VMEXIT.
+ */
+HMSVM_EXIT_DECL hmR0SvmExitUnexpected(PVMCPU pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+ PCSVMVMCB pVmcb = hmR0SvmGetCurrentVmcb(pVCpu);
+ HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, HMSVM_CPUMCTX_EXTRN_ALL);
+ AssertMsgFailed(("hmR0SvmExitUnexpected: ExitCode=%#RX64 uExitInfo1=%#RX64 uExitInfo2=%#RX64\n", pSvmTransient->u64ExitCode,
+ pVmcb->ctrl.u64ExitInfo1, pVmcb->ctrl.u64ExitInfo2));
+ RT_NOREF(pVmcb);
+ pVCpu->hm.s.u32HMError = (uint32_t)pSvmTransient->u64ExitCode;
+ return VERR_SVM_UNEXPECTED_EXIT;
+}
+
+
+/**
+ * \#VMEXIT handler for CRx reads (SVM_EXIT_READ_CR*). Conditional \#VMEXIT.
+ */
+HMSVM_EXIT_DECL hmR0SvmExitReadCRx(PVMCPU pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+ HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pSvmTransient);
+
+ PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+ Log4Func(("CS:RIP=%04x:%#RX64\n", pCtx->cs.Sel, pCtx->rip));
+#ifdef VBOX_WITH_STATISTICS
+ switch (pSvmTransient->u64ExitCode)
+ {
+ case SVM_EXIT_READ_CR0: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCR0Read); break;
+ case SVM_EXIT_READ_CR2: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCR2Read); break;
+ case SVM_EXIT_READ_CR3: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCR3Read); break;
+ case SVM_EXIT_READ_CR4: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCR4Read); break;
+ case SVM_EXIT_READ_CR8: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCR8Read); break;
+ }
+#endif
+
+ bool const fSupportsDecodeAssists = hmR0SvmSupportsDecodeAssists(pVCpu);
+ bool const fSupportsNextRipSave = hmR0SvmSupportsNextRipSave(pVCpu);
+ if ( fSupportsDecodeAssists
+ && fSupportsNextRipSave)
+ {
+ PCSVMVMCB pVmcb = hmR0SvmGetCurrentVmcb(pVCpu);
+ bool const fMovCRx = RT_BOOL(pVmcb->ctrl.u64ExitInfo1 & SVM_EXIT1_MOV_CRX_MASK);
+ if (fMovCRx)
+ {
+ HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_EXEC_DECODED_NO_MEM_MASK | CPUMCTX_EXTRN_CR_MASK
+ | CPUMCTX_EXTRN_APIC_TPR);
+ uint8_t const cbInstr = pVmcb->ctrl.u64NextRIP - pCtx->rip;
+ uint8_t const iCrReg = pSvmTransient->u64ExitCode - SVM_EXIT_READ_CR0;
+ uint8_t const iGReg = pVmcb->ctrl.u64ExitInfo1 & SVM_EXIT1_MOV_CRX_GPR_NUMBER;
+ VBOXSTRICTRC rcStrict = IEMExecDecodedMovCRxRead(pVCpu, cbInstr, iGReg, iCrReg);
+ HMSVM_CHECK_SINGLE_STEP(pVCpu, rcStrict);
+ return VBOXSTRICTRC_VAL(rcStrict);
+ }
+ /* else: SMSW instruction, fall back below to IEM for this. */
+ }
+
+ HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_MUST_MASK);
+ VBOXSTRICTRC rcStrict = IEMExecOne(pVCpu);
+ AssertMsg( rcStrict == VINF_SUCCESS
+ || rcStrict == VINF_PGM_SYNC_CR3
+ || rcStrict == VINF_IEM_RAISED_XCPT,
+ ("hmR0SvmExitReadCRx: IEMExecOne failed rc=%Rrc\n", VBOXSTRICTRC_VAL(rcStrict)));
+ Assert((pSvmTransient->u64ExitCode - SVM_EXIT_READ_CR0) <= 15);
+ if (rcStrict == VINF_IEM_RAISED_XCPT)
+ {
+ rcStrict = VINF_SUCCESS;
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
+ }
+ HMSVM_CHECK_SINGLE_STEP(pVCpu, rcStrict);
+ return VBOXSTRICTRC_TODO(rcStrict);
+}
+
+
+/**
+ * \#VMEXIT handler for CRx writes (SVM_EXIT_WRITE_CR*). Conditional \#VMEXIT.
+ */
+HMSVM_EXIT_DECL hmR0SvmExitWriteCRx(PVMCPU pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+ HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pSvmTransient);
+
+ uint64_t const uExitCode = pSvmTransient->u64ExitCode;
+ uint8_t const iCrReg = uExitCode == SVM_EXIT_CR0_SEL_WRITE ? 0 : (pSvmTransient->u64ExitCode - SVM_EXIT_WRITE_CR0);
+ Assert(iCrReg <= 15);
+
+ VBOXSTRICTRC rcStrict = VERR_SVM_IPE_5;
+ PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+ bool fDecodedInstr = false;
+ bool const fSupportsDecodeAssists = hmR0SvmSupportsDecodeAssists(pVCpu);
+ bool const fSupportsNextRipSave = hmR0SvmSupportsNextRipSave(pVCpu);
+ if ( fSupportsDecodeAssists
+ && fSupportsNextRipSave)
+ {
+ PCSVMVMCB pVmcb = hmR0SvmGetCurrentVmcb(pVCpu);
+ bool const fMovCRx = RT_BOOL(pVmcb->ctrl.u64ExitInfo1 & SVM_EXIT1_MOV_CRX_MASK);
+ if (fMovCRx)
+ {
+ HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_EXEC_DECODED_MEM_MASK | CPUMCTX_EXTRN_CR3 | CPUMCTX_EXTRN_CR4
+ | CPUMCTX_EXTRN_APIC_TPR);
+ uint8_t const cbInstr = pVmcb->ctrl.u64NextRIP - pCtx->rip;
+ uint8_t const iGReg = pVmcb->ctrl.u64ExitInfo1 & SVM_EXIT1_MOV_CRX_GPR_NUMBER;
+ Log4Func(("Mov CR%u w/ iGReg=%#x\n", iCrReg, iGReg));
+ rcStrict = IEMExecDecodedMovCRxWrite(pVCpu, cbInstr, iCrReg, iGReg);
+ fDecodedInstr = true;
+ }
+ /* else: LMSW or CLTS instruction, fall back below to IEM for this. */
+ }
+
+ if (!fDecodedInstr)
+ {
+ HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_MUST_MASK);
+ Log4Func(("iCrReg=%#x\n", iCrReg));
+ rcStrict = IEMExecOne(pVCpu);
+ if (RT_UNLIKELY( rcStrict == VERR_IEM_ASPECT_NOT_IMPLEMENTED
+ || rcStrict == VERR_IEM_INSTR_NOT_IMPLEMENTED))
+ rcStrict = VERR_EM_INTERPRETER;
+ }
+
+ if (rcStrict == VINF_SUCCESS)
+ {
+ switch (iCrReg)
+ {
+ case 0:
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_CR0);
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCR0Write);
+ break;
+
+ case 2:
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_CR2);
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCR2Write);
+ break;
+
+ case 3:
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_CR3);
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCR3Write);
+ break;
+
+ case 4:
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_CR4);
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCR4Write);
+ break;
+
+ case 8:
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_APIC_TPR);
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCR8Write);
+ break;
+
+ default:
+ {
+ AssertMsgFailed(("hmR0SvmExitWriteCRx: Invalid/Unexpected Write-CRx exit. u64ExitCode=%#RX64 %#x\n",
+ pSvmTransient->u64ExitCode, iCrReg));
+ break;
+ }
+ }
+ HMSVM_CHECK_SINGLE_STEP(pVCpu, rcStrict);
+ }
+ else if (rcStrict == VINF_IEM_RAISED_XCPT)
+ {
+ rcStrict = VINF_SUCCESS;
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
+ HMSVM_CHECK_SINGLE_STEP(pVCpu, rcStrict);
+ }
+ else
+ Assert(rcStrict == VERR_EM_INTERPRETER || rcStrict == VINF_PGM_SYNC_CR3);
+ return VBOXSTRICTRC_TODO(rcStrict);
+}
+
+
+/**
+ * \#VMEXIT helper for read MSRs, see hmR0SvmExitMsr.
+ *
+ * @returns Strict VBox status code.
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param pVmcb Pointer to the VM control block.
+ */
+static VBOXSTRICTRC hmR0SvmExitReadMsr(PVMCPU pVCpu, PSVMVMCB pVmcb)
+{
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatExitRdmsr);
+ Log4Func(("idMsr=%#RX32\n", pVCpu->cpum.GstCtx.ecx));
+
+ VBOXSTRICTRC rcStrict;
+ bool const fSupportsNextRipSave = hmR0SvmSupportsNextRipSave(pVCpu);
+ if (fSupportsNextRipSave)
+ {
+ /** @todo Optimize this: Only retrieve the MSR bits we need here. CPUMAllMsrs.cpp
+ * can ask for what it needs instead of using CPUMCTX_EXTRN_ALL_MSRS. */
+ HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_EXEC_DECODED_NO_MEM_MASK | CPUMCTX_EXTRN_ALL_MSRS);
+ uint8_t const cbInstr = pVmcb->ctrl.u64NextRIP - pVCpu->cpum.GstCtx.rip;
+ rcStrict = IEMExecDecodedRdmsr(pVCpu, cbInstr);
+ }
+ else
+ {
+ HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_MUST_MASK | CPUMCTX_EXTRN_ALL_MSRS);
+ rcStrict = IEMExecOne(pVCpu);
+ }
+
+ AssertMsg( rcStrict == VINF_SUCCESS
+ || rcStrict == VINF_IEM_RAISED_XCPT
+ || rcStrict == VINF_CPUM_R3_MSR_READ,
+ ("hmR0SvmExitReadMsr: Unexpected status %Rrc\n", VBOXSTRICTRC_VAL(rcStrict)));
+
+ if (rcStrict == VINF_IEM_RAISED_XCPT)
+ {
+ rcStrict = VINF_SUCCESS;
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
+ }
+ HMSVM_CHECK_SINGLE_STEP(pVCpu, rcStrict);
+ return rcStrict;
+}
+
+
+/**
+ * \#VMEXIT helper for write MSRs, see hmR0SvmExitMsr.
+ *
+ * @returns Strict VBox status code.
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param pVmcb Pointer to the VM control block.
+ * @param pSvmTransient Pointer to the SVM-transient structure.
+ */
+static VBOXSTRICTRC hmR0SvmExitWriteMsr(PVMCPU pVCpu, PSVMVMCB pVmcb, PSVMTRANSIENT pSvmTransient)
+{
+ PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+ uint32_t const idMsr = pCtx->ecx;
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatExitWrmsr);
+ Log4Func(("idMsr=%#RX32\n", idMsr));
+
+ /*
+ * Handle TPR patching MSR writes.
+ * We utilitize the LSTAR MSR for patching.
+ */
+ bool const fSupportsNextRipSave = hmR0SvmSupportsNextRipSave(pVCpu);
+ if ( pVCpu->CTX_SUFF(pVM)->hm.s.fTPRPatchingActive
+ && idMsr == MSR_K8_LSTAR)
+ {
+ unsigned cbInstr;
+ if (fSupportsNextRipSave)
+ cbInstr = pVmcb->ctrl.u64NextRIP - pVCpu->cpum.GstCtx.rip;
+ else
+ {
+ PDISCPUSTATE pDis = &pVCpu->hm.s.DisState;
+ int rc = EMInterpretDisasCurrent(pVCpu->CTX_SUFF(pVM), pVCpu, pDis, &cbInstr);
+ if ( rc == VINF_SUCCESS
+ && pDis->pCurInstr->uOpcode == OP_WRMSR)
+ Assert(cbInstr > 0);
+ else
+ cbInstr = 0;
+ }
+
+ /* Our patch code uses LSTAR for TPR caching for 32-bit guests. */
+ if ((pCtx->eax & 0xff) != pSvmTransient->u8GuestTpr)
+ {
+ int rc = APICSetTpr(pVCpu, pCtx->eax & 0xff);
+ AssertRCReturn(rc, rc);
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_APIC_TPR);
+ }
+
+ int rc = VINF_SUCCESS;
+ hmR0SvmAdvanceRip(pVCpu, cbInstr);
+ HMSVM_CHECK_SINGLE_STEP(pVCpu, rc);
+ return rc;
+ }
+
+ /*
+ * Handle regular MSR writes.
+ */
+ VBOXSTRICTRC rcStrict;
+ if (fSupportsNextRipSave)
+ {
+ /** @todo Optimize this: We don't need to get much of the MSR state here
+ * since we're only updating. CPUMAllMsrs.cpp can ask for what it needs and
+ * clear the applicable extern flags. */
+ HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_EXEC_DECODED_NO_MEM_MASK | CPUMCTX_EXTRN_ALL_MSRS);
+ uint8_t const cbInstr = pVmcb->ctrl.u64NextRIP - pVCpu->cpum.GstCtx.rip;
+ rcStrict = IEMExecDecodedWrmsr(pVCpu, cbInstr);
+ }
+ else
+ {
+ HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_MUST_MASK | CPUMCTX_EXTRN_ALL_MSRS);
+ rcStrict = IEMExecOne(pVCpu);
+ }
+
+ AssertMsg( rcStrict == VINF_SUCCESS
+ || rcStrict == VINF_IEM_RAISED_XCPT
+ || rcStrict == VINF_CPUM_R3_MSR_WRITE,
+ ("hmR0SvmExitWriteMsr: Unexpected status %Rrc\n", VBOXSTRICTRC_VAL(rcStrict)));
+
+ if (rcStrict == VINF_SUCCESS)
+ {
+ /* If this is an X2APIC WRMSR access, update the APIC TPR state. */
+ if ( idMsr >= MSR_IA32_X2APIC_START
+ && idMsr <= MSR_IA32_X2APIC_END)
+ {
+ /*
+ * We've already saved the APIC related guest-state (TPR) in hmR0SvmPostRunGuest().
+ * When full APIC register virtualization is implemented we'll have to make sure
+ * APIC state is saved from the VMCB before IEM changes it.
+ */
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_APIC_TPR);
+ }
+ else
+ {
+ switch (idMsr)
+ {
+ case MSR_IA32_TSC: pSvmTransient->fUpdateTscOffsetting = true; break;
+ case MSR_K6_EFER: ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_EFER_MSR); break;
+ case MSR_K8_FS_BASE: ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_FS); break;
+ case MSR_K8_GS_BASE: ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_GS); break;
+ case MSR_IA32_SYSENTER_CS: ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_SYSENTER_CS_MSR); break;
+ case MSR_IA32_SYSENTER_EIP: ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_SYSENTER_EIP_MSR); break;
+ case MSR_IA32_SYSENTER_ESP: ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_SYSENTER_ESP_MSR); break;
+ }
+ }
+ }
+ else if (rcStrict == VINF_IEM_RAISED_XCPT)
+ {
+ rcStrict = VINF_SUCCESS;
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
+ }
+ HMSVM_CHECK_SINGLE_STEP(pVCpu, rcStrict);
+ return rcStrict;
+}
+
+
+/**
+ * \#VMEXIT handler for MSR read and writes (SVM_EXIT_MSR). Conditional
+ * \#VMEXIT.
+ */
+HMSVM_EXIT_DECL hmR0SvmExitMsr(PVMCPU pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+ HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pSvmTransient);
+
+ PSVMVMCB pVmcb = hmR0SvmGetCurrentVmcb(pVCpu);
+ if (pVmcb->ctrl.u64ExitInfo1 == SVM_EXIT1_MSR_READ)
+ return VBOXSTRICTRC_TODO(hmR0SvmExitReadMsr(pVCpu, pVmcb));
+
+ Assert(pVmcb->ctrl.u64ExitInfo1 == SVM_EXIT1_MSR_WRITE);
+ return VBOXSTRICTRC_TODO(hmR0SvmExitWriteMsr(pVCpu, pVmcb, pSvmTransient));
+}
+
+
+/**
+ * \#VMEXIT handler for DRx read (SVM_EXIT_READ_DRx). Conditional \#VMEXIT.
+ */
+HMSVM_EXIT_DECL hmR0SvmExitReadDRx(PVMCPU pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+ HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pSvmTransient);
+ HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, HMSVM_CPUMCTX_EXTRN_ALL);
+
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatExitDRxRead);
+
+ /** @todo Stepping with nested-guest. */
+ PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+ if (!CPUMIsGuestInSvmNestedHwVirtMode(pCtx))
+ {
+ /* We should -not- get this #VMEXIT if the guest's debug registers were active. */
+ if (pSvmTransient->fWasGuestDebugStateActive)
+ {
+ AssertMsgFailed(("hmR0SvmExitReadDRx: Unexpected exit %#RX32\n", (uint32_t)pSvmTransient->u64ExitCode));
+ pVCpu->hm.s.u32HMError = (uint32_t)pSvmTransient->u64ExitCode;
+ return VERR_SVM_UNEXPECTED_EXIT;
+ }
+
+ /*
+ * Lazy DR0-3 loading.
+ */
+ if (!pSvmTransient->fWasHyperDebugStateActive)
+ {
+ Assert(!DBGFIsStepping(pVCpu)); Assert(!pVCpu->hm.s.fSingleInstruction);
+ Log5(("hmR0SvmExitReadDRx: Lazy loading guest debug registers\n"));
+
+ /* Don't intercept DRx read and writes. */
+ PSVMVMCB pVmcb = pVCpu->hm.s.svm.pVmcb;
+ pVmcb->ctrl.u16InterceptRdDRx = 0;
+ pVmcb->ctrl.u16InterceptWrDRx = 0;
+ pVmcb->ctrl.u32VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_INTERCEPTS;
+
+ /* We're playing with the host CPU state here, make sure we don't preempt or longjmp. */
+ VMMRZCallRing3Disable(pVCpu);
+ HM_DISABLE_PREEMPT(pVCpu);
+
+ /* Save the host & load the guest debug state, restart execution of the MOV DRx instruction. */
+ CPUMR0LoadGuestDebugState(pVCpu, false /* include DR6 */);
+ Assert(CPUMIsGuestDebugStateActive(pVCpu) || HC_ARCH_BITS == 32);
+
+ HM_RESTORE_PREEMPT();
+ VMMRZCallRing3Enable(pVCpu);
+
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatDRxContextSwitch);
+ return VINF_SUCCESS;
+ }
+ }
+
+ /*
+ * Interpret the read/writing of DRx.
+ */
+ /** @todo Decode assist. */
+ VBOXSTRICTRC rc = EMInterpretInstruction(pVCpu, CPUMCTX2CORE(pCtx), 0 /* pvFault */);
+ Log5(("hmR0SvmExitReadDRx: Emulated DRx access: rc=%Rrc\n", VBOXSTRICTRC_VAL(rc)));
+ if (RT_LIKELY(rc == VINF_SUCCESS))
+ {
+ /* Not necessary for read accesses but whatever doesn't hurt for now, will be fixed with decode assist. */
+ /** @todo CPUM should set this flag! */
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_DR_MASK);
+ HMSVM_CHECK_SINGLE_STEP(pVCpu, rc);
+ }
+ else
+ Assert(rc == VERR_EM_INTERPRETER);
+ return VBOXSTRICTRC_TODO(rc);
+}
+
+
+/**
+ * \#VMEXIT handler for DRx write (SVM_EXIT_WRITE_DRx). Conditional \#VMEXIT.
+ */
+HMSVM_EXIT_DECL hmR0SvmExitWriteDRx(PVMCPU pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+ HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pSvmTransient);
+ /* For now it's the same since we interpret the instruction anyway. Will change when using of Decode Assist is implemented. */
+ int rc = hmR0SvmExitReadDRx(pVCpu, pSvmTransient);
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatExitDRxWrite);
+ STAM_COUNTER_DEC(&pVCpu->hm.s.StatExitDRxRead);
+ return rc;
+}
+
+
+/**
+ * \#VMEXIT handler for XCRx write (SVM_EXIT_XSETBV). Conditional \#VMEXIT.
+ */
+HMSVM_EXIT_DECL hmR0SvmExitXsetbv(PVMCPU pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+ HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pSvmTransient);
+ HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_MUST_MASK);
+
+ /** @todo decode assists... */
+ VBOXSTRICTRC rcStrict = IEMExecOne(pVCpu);
+ if (RT_LIKELY(rcStrict == VINF_SUCCESS))
+ {
+ PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+ pVCpu->hm.s.fLoadSaveGuestXcr0 = (pCtx->cr4 & X86_CR4_OSXSAVE) && pCtx->aXcr[0] != ASMGetXcr0();
+ Log4Func(("New XCR0=%#RX64 fLoadSaveGuestXcr0=%RTbool (cr4=%#RX64)\n", pCtx->aXcr[0], pVCpu->hm.s.fLoadSaveGuestXcr0,
+ pCtx->cr4));
+ }
+ else if (rcStrict == VINF_IEM_RAISED_XCPT)
+ {
+ rcStrict = VINF_SUCCESS;
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
+ }
+ HMSVM_CHECK_SINGLE_STEP(pVCpu, rcStrict);
+ return VBOXSTRICTRC_TODO(rcStrict);
+}
+
+
+/**
+ * \#VMEXIT handler for I/O instructions (SVM_EXIT_IOIO). Conditional \#VMEXIT.
+ */
+HMSVM_EXIT_DECL hmR0SvmExitIOInstr(PVMCPU pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+ HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pSvmTransient);
+ HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_MUST_MASK | CPUMCTX_EXTRN_SREG_MASK);
+
+ /* I/O operation lookup arrays. */
+ static uint32_t const s_aIOSize[8] = { 0, 1, 2, 0, 4, 0, 0, 0 }; /* Size of the I/O accesses in bytes. */
+ static uint32_t const s_aIOOpAnd[8] = { 0, 0xff, 0xffff, 0, 0xffffffff, 0, 0, 0 }; /* AND masks for saving
+ the result (in AL/AX/EAX). */
+ PVM pVM = pVCpu->CTX_SUFF(pVM);
+ PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+ PSVMVMCB pVmcb = hmR0SvmGetCurrentVmcb(pVCpu);
+
+ Log4Func(("CS:RIP=%04x:%#RX64\n", pCtx->cs.Sel, pCtx->rip));
+
+ /* Refer AMD spec. 15.10.2 "IN and OUT Behaviour" and Figure 15-2. "EXITINFO1 for IOIO Intercept" for the format. */
+ SVMIOIOEXITINFO IoExitInfo;
+ IoExitInfo.u = (uint32_t)pVmcb->ctrl.u64ExitInfo1;
+ uint32_t uIOWidth = (IoExitInfo.u >> 4) & 0x7;
+ uint32_t cbValue = s_aIOSize[uIOWidth];
+ uint32_t uAndVal = s_aIOOpAnd[uIOWidth];
+
+ if (RT_UNLIKELY(!cbValue))
+ {
+ AssertMsgFailed(("hmR0SvmExitIOInstr: Invalid IO operation. uIOWidth=%u\n", uIOWidth));
+ return VERR_EM_INTERPRETER;
+ }
+
+ HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, CPUMCTX_EXTRN_CS | CPUMCTX_EXTRN_RIP | CPUMCTX_EXTRN_RFLAGS);
+ VBOXSTRICTRC rcStrict;
+ PCEMEXITREC pExitRec = NULL;
+ if ( !pVCpu->hm.s.fSingleInstruction
+ && !pVCpu->cpum.GstCtx.eflags.Bits.u1TF)
+ pExitRec = EMHistoryUpdateFlagsAndTypeAndPC(pVCpu,
+ !IoExitInfo.n.u1Str
+ ? IoExitInfo.n.u1Type == SVM_IOIO_READ
+ ? EMEXIT_MAKE_FT(EMEXIT_F_KIND_EM | EMEXIT_F_HM, EMEXITTYPE_IO_PORT_READ)
+ : EMEXIT_MAKE_FT(EMEXIT_F_KIND_EM | EMEXIT_F_HM, EMEXITTYPE_IO_PORT_WRITE)
+ : IoExitInfo.n.u1Type == SVM_IOIO_READ
+ ? EMEXIT_MAKE_FT(EMEXIT_F_KIND_EM | EMEXIT_F_HM, EMEXITTYPE_IO_PORT_STR_READ)
+ : EMEXIT_MAKE_FT(EMEXIT_F_KIND_EM | EMEXIT_F_HM, EMEXITTYPE_IO_PORT_STR_WRITE),
+ pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base);
+ if (!pExitRec)
+ {
+ bool fUpdateRipAlready = false;
+ if (IoExitInfo.n.u1Str)
+ {
+ /* INS/OUTS - I/O String instruction. */
+ /** @todo Huh? why can't we use the segment prefix information given by AMD-V
+ * in EXITINFO1? Investigate once this thing is up and running. */
+ Log4Func(("CS:RIP=%04x:%08RX64 %#06x/%u %c str\n", pCtx->cs.Sel, pCtx->rip, IoExitInfo.n.u16Port, cbValue,
+ IoExitInfo.n.u1Type == SVM_IOIO_WRITE ? 'w' : 'r'));
+ AssertReturn(pCtx->dx == IoExitInfo.n.u16Port, VERR_SVM_IPE_2);
+ static IEMMODE const s_aenmAddrMode[8] =
+ {
+ (IEMMODE)-1, IEMMODE_16BIT, IEMMODE_32BIT, (IEMMODE)-1, IEMMODE_64BIT, (IEMMODE)-1, (IEMMODE)-1, (IEMMODE)-1
+ };
+ IEMMODE enmAddrMode = s_aenmAddrMode[(IoExitInfo.u >> 7) & 0x7];
+ if (enmAddrMode != (IEMMODE)-1)
+ {
+ uint64_t cbInstr = pVmcb->ctrl.u64ExitInfo2 - pCtx->rip;
+ if (cbInstr <= 15 && cbInstr >= 1)
+ {
+ Assert(cbInstr >= 1U + IoExitInfo.n.u1Rep);
+ if (IoExitInfo.n.u1Type == SVM_IOIO_WRITE)
+ {
+ /* Don't know exactly how to detect whether u3Seg is valid, currently
+ only enabling it for Bulldozer and later with NRIP. OS/2 broke on
+ 2384 Opterons when only checking NRIP. */
+ bool const fSupportsNextRipSave = hmR0SvmSupportsNextRipSave(pVCpu);
+ if ( fSupportsNextRipSave
+ && pVM->cpum.ro.GuestFeatures.enmMicroarch >= kCpumMicroarch_AMD_15h_First)
+ {
+ AssertMsg(IoExitInfo.n.u3Seg == X86_SREG_DS || cbInstr > 1U + IoExitInfo.n.u1Rep,
+ ("u32Seg=%d cbInstr=%d u1REP=%d", IoExitInfo.n.u3Seg, cbInstr, IoExitInfo.n.u1Rep));
+ rcStrict = IEMExecStringIoWrite(pVCpu, cbValue, enmAddrMode, IoExitInfo.n.u1Rep, (uint8_t)cbInstr,
+ IoExitInfo.n.u3Seg, true /*fIoChecked*/);
+ }
+ else if (cbInstr == 1U + IoExitInfo.n.u1Rep)
+ rcStrict = IEMExecStringIoWrite(pVCpu, cbValue, enmAddrMode, IoExitInfo.n.u1Rep, (uint8_t)cbInstr,
+ X86_SREG_DS, true /*fIoChecked*/);
+ else
+ rcStrict = IEMExecOne(pVCpu);
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIOStringWrite);
+ }
+ else
+ {
+ AssertMsg(IoExitInfo.n.u3Seg == X86_SREG_ES /*=0*/, ("%#x\n", IoExitInfo.n.u3Seg));
+ rcStrict = IEMExecStringIoRead(pVCpu, cbValue, enmAddrMode, IoExitInfo.n.u1Rep, (uint8_t)cbInstr,
+ true /*fIoChecked*/);
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIOStringRead);
+ }
+ }
+ else
+ {
+ AssertMsgFailed(("rip=%RX64 nrip=%#RX64 cbInstr=%#RX64\n", pCtx->rip, pVmcb->ctrl.u64ExitInfo2, cbInstr));
+ rcStrict = IEMExecOne(pVCpu);
+ }
+ }
+ else
+ {
+ AssertMsgFailed(("IoExitInfo=%RX64\n", IoExitInfo.u));
+ rcStrict = IEMExecOne(pVCpu);
+ }
+ fUpdateRipAlready = true;
+ }
+ else
+ {
+ /* IN/OUT - I/O instruction. */
+ Assert(!IoExitInfo.n.u1Rep);
+
+ uint8_t const cbInstr = pVmcb->ctrl.u64ExitInfo2 - pCtx->rip;
+ if (IoExitInfo.n.u1Type == SVM_IOIO_WRITE)
+ {
+ rcStrict = IOMIOPortWrite(pVM, pVCpu, IoExitInfo.n.u16Port, pCtx->eax & uAndVal, cbValue);
+ if ( rcStrict == VINF_IOM_R3_IOPORT_WRITE
+ && !pCtx->eflags.Bits.u1TF)
+ rcStrict = EMRZSetPendingIoPortWrite(pVCpu, IoExitInfo.n.u16Port, cbInstr, cbValue, pCtx->eax & uAndVal);
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIOWrite);
+ }
+ else
+ {
+ uint32_t u32Val = 0;
+ rcStrict = IOMIOPortRead(pVM, pVCpu, IoExitInfo.n.u16Port, &u32Val, cbValue);
+ if (IOM_SUCCESS(rcStrict))
+ {
+ /* Save result of I/O IN instr. in AL/AX/EAX. */
+ /** @todo r=bird: 32-bit op size should clear high bits of rax! */
+ pCtx->eax = (pCtx->eax & ~uAndVal) | (u32Val & uAndVal);
+ }
+ else if ( rcStrict == VINF_IOM_R3_IOPORT_READ
+ && !pCtx->eflags.Bits.u1TF)
+ rcStrict = EMRZSetPendingIoPortRead(pVCpu, IoExitInfo.n.u16Port, cbInstr, cbValue);
+
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIORead);
+ }
+ }
+
+ if (IOM_SUCCESS(rcStrict))
+ {
+ /* AMD-V saves the RIP of the instruction following the IO instruction in EXITINFO2. */
+ if (!fUpdateRipAlready)
+ pCtx->rip = pVmcb->ctrl.u64ExitInfo2;
+
+ /*
+ * If any I/O breakpoints are armed, we need to check if one triggered
+ * and take appropriate action.
+ * Note that the I/O breakpoint type is undefined if CR4.DE is 0.
+ */
+ /** @todo Optimize away the DBGFBpIsHwIoArmed call by having DBGF tell the
+ * execution engines about whether hyper BPs and such are pending. */
+ HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, CPUMCTX_EXTRN_DR7);
+ uint32_t const uDr7 = pCtx->dr[7];
+ if (RT_UNLIKELY( ( (uDr7 & X86_DR7_ENABLED_MASK)
+ && X86_DR7_ANY_RW_IO(uDr7)
+ && (pCtx->cr4 & X86_CR4_DE))
+ || DBGFBpIsHwIoArmed(pVM)))
+ {
+ /* We're playing with the host CPU state here, make sure we don't preempt or longjmp. */
+ VMMRZCallRing3Disable(pVCpu);
+ HM_DISABLE_PREEMPT(pVCpu);
+
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatDRxIoCheck);
+ CPUMR0DebugStateMaybeSaveGuest(pVCpu, false /*fDr6*/);
+
+ VBOXSTRICTRC rcStrict2 = DBGFBpCheckIo(pVM, pVCpu, &pVCpu->cpum.GstCtx, IoExitInfo.n.u16Port, cbValue);
+ if (rcStrict2 == VINF_EM_RAW_GUEST_TRAP)
+ {
+ /* Raise #DB. */
+ pVmcb->guest.u64DR6 = pCtx->dr[6];
+ pVmcb->guest.u64DR7 = pCtx->dr[7];
+ pVmcb->ctrl.u32VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_DRX;
+ hmR0SvmSetPendingXcptDB(pVCpu);
+ }
+ /* rcStrict is VINF_SUCCESS, VINF_IOM_R3_IOPORT_COMMIT_WRITE, or in [VINF_EM_FIRST..VINF_EM_LAST],
+ however we can ditch VINF_IOM_R3_IOPORT_COMMIT_WRITE as it has VMCPU_FF_IOM as backup. */
+ else if ( rcStrict2 != VINF_SUCCESS
+ && (rcStrict == VINF_SUCCESS || rcStrict2 < rcStrict))
+ rcStrict = rcStrict2;
+ AssertCompile(VINF_EM_LAST < VINF_IOM_R3_IOPORT_COMMIT_WRITE);
+
+ HM_RESTORE_PREEMPT();
+ VMMRZCallRing3Enable(pVCpu);
+ }
+
+ HMSVM_CHECK_SINGLE_STEP(pVCpu, rcStrict);
+ }
+
+#ifdef VBOX_STRICT
+ if ( rcStrict == VINF_IOM_R3_IOPORT_READ
+ || rcStrict == VINF_EM_PENDING_R3_IOPORT_READ)
+ Assert(IoExitInfo.n.u1Type == SVM_IOIO_READ);
+ else if ( rcStrict == VINF_IOM_R3_IOPORT_WRITE
+ || rcStrict == VINF_IOM_R3_IOPORT_COMMIT_WRITE
+ || rcStrict == VINF_EM_PENDING_R3_IOPORT_WRITE)
+ Assert(IoExitInfo.n.u1Type == SVM_IOIO_WRITE);
+ else
+ {
+ /** @todo r=bird: This is missing a bunch of VINF_EM_FIRST..VINF_EM_LAST
+ * statuses, that the VMM device and some others may return. See
+ * IOM_SUCCESS() for guidance. */
+ AssertMsg( RT_FAILURE(rcStrict)
+ || rcStrict == VINF_SUCCESS
+ || rcStrict == VINF_EM_RAW_EMULATE_INSTR
+ || rcStrict == VINF_EM_DBG_BREAKPOINT
+ || rcStrict == VINF_EM_RAW_GUEST_TRAP
+ || rcStrict == VINF_EM_RAW_TO_R3
+ || rcStrict == VINF_TRPM_XCPT_DISPATCHED
+ || rcStrict == VINF_EM_TRIPLE_FAULT, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict)));
+ }
+#endif
+ }
+ else
+ {
+ /*
+ * Frequent exit or something needing probing. Get state and call EMHistoryExec.
+ */
+ HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, HMSVM_CPUMCTX_EXTRN_ALL);
+ STAM_COUNTER_INC(!IoExitInfo.n.u1Str
+ ? IoExitInfo.n.u1Type == SVM_IOIO_WRITE ? &pVCpu->hm.s.StatExitIOWrite : &pVCpu->hm.s.StatExitIORead
+ : IoExitInfo.n.u1Type == SVM_IOIO_WRITE ? &pVCpu->hm.s.StatExitIOStringWrite : &pVCpu->hm.s.StatExitIOStringRead);
+ Log4(("IOExit/%u: %04x:%08RX64: %s%s%s %#x LB %u -> EMHistoryExec\n",
+ pVCpu->idCpu, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, IoExitInfo.n.u1Rep ? "REP " : "",
+ IoExitInfo.n.u1Type == SVM_IOIO_WRITE ? "OUT" : "IN", IoExitInfo.n.u1Str ? "S" : "", IoExitInfo.n.u16Port, uIOWidth));
+
+ rcStrict = EMHistoryExec(pVCpu, pExitRec, 0);
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_ALL_GUEST);
+
+ Log4(("IOExit/%u: %04x:%08RX64: EMHistoryExec -> %Rrc + %04x:%08RX64\n",
+ pVCpu->idCpu, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
+ VBOXSTRICTRC_VAL(rcStrict), pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
+ }
+ return VBOXSTRICTRC_TODO(rcStrict);
+}
+
+
+/**
+ * \#VMEXIT handler for Nested Page-faults (SVM_EXIT_NPF). Conditional \#VMEXIT.
+ */
+HMSVM_EXIT_DECL hmR0SvmExitNestedPF(PVMCPU pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+ HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pSvmTransient);
+ HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, HMSVM_CPUMCTX_EXTRN_ALL);
+ HMSVM_CHECK_EXIT_DUE_TO_EVENT_DELIVERY(pVCpu, pSvmTransient);
+
+ PVM pVM = pVCpu->CTX_SUFF(pVM);
+ PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+ Assert(pVM->hm.s.fNestedPaging);
+
+ /* See AMD spec. 15.25.6 "Nested versus Guest Page Faults, Fault Ordering" for VMCB details for #NPF. */
+ PSVMVMCB pVmcb = hmR0SvmGetCurrentVmcb(pVCpu);
+ RTGCPHYS GCPhysFaultAddr = pVmcb->ctrl.u64ExitInfo2;
+ uint32_t u32ErrCode = pVmcb->ctrl.u64ExitInfo1; /* Note! High bits in EXITINFO1 may contain additional info and are
+ thus intentionally not copied into u32ErrCode. */
+
+ Log4Func(("#NPF at CS:RIP=%04x:%#RX64 GCPhysFaultAddr=%RGp ErrCode=%#x \n", pCtx->cs.Sel, pCtx->rip, GCPhysFaultAddr,
+ u32ErrCode));
+
+ /*
+ * TPR patching for 32-bit guests, using the reserved bit in the page tables for MMIO regions.
+ */
+ if ( pVM->hm.s.fTprPatchingAllowed
+ && (GCPhysFaultAddr & PAGE_OFFSET_MASK) == XAPIC_OFF_TPR
+ && ( !(u32ErrCode & X86_TRAP_PF_P) /* Not present */
+ || (u32ErrCode & (X86_TRAP_PF_P | X86_TRAP_PF_RSVD)) == (X86_TRAP_PF_P | X86_TRAP_PF_RSVD)) /* MMIO page. */
+ && !CPUMIsGuestInSvmNestedHwVirtMode(pCtx)
+ && !CPUMIsGuestInLongModeEx(pCtx)
+ && !CPUMGetGuestCPL(pVCpu)
+ && pVM->hm.s.cPatches < RT_ELEMENTS(pVM->hm.s.aPatches))
+ {
+ RTGCPHYS GCPhysApicBase = APICGetBaseMsrNoCheck(pVCpu);
+ GCPhysApicBase &= PAGE_BASE_GC_MASK;
+
+ if (GCPhysFaultAddr == GCPhysApicBase + XAPIC_OFF_TPR)
+ {
+ /* Only attempt to patch the instruction once. */
+ PHMTPRPATCH pPatch = (PHMTPRPATCH)RTAvloU32Get(&pVM->hm.s.PatchTree, (AVLOU32KEY)pCtx->eip);
+ if (!pPatch)
+ return VINF_EM_HM_PATCH_TPR_INSTR;
+ }
+ }
+
+ /*
+ * Determine the nested paging mode.
+ */
+/** @todo r=bird: Gotta love this nested paging hacking we're still carrying with us... (Split PGM_TYPE_NESTED.) */
+ PGMMODE enmNestedPagingMode;
+#if HC_ARCH_BITS == 32
+ if (CPUMIsGuestInLongModeEx(pCtx))
+ enmNestedPagingMode = PGMMODE_AMD64_NX;
+ else
+#endif
+ enmNestedPagingMode = PGMGetHostMode(pVM);
+
+ /*
+ * MMIO optimization using the reserved (RSVD) bit in the guest page tables for MMIO pages.
+ */
+ Assert((u32ErrCode & (X86_TRAP_PF_RSVD | X86_TRAP_PF_P)) != X86_TRAP_PF_RSVD);
+ if ((u32ErrCode & (X86_TRAP_PF_RSVD | X86_TRAP_PF_P)) == (X86_TRAP_PF_RSVD | X86_TRAP_PF_P))
+ {
+ /*
+ * If event delivery causes an MMIO #NPF, go back to instruction emulation as otherwise
+ * injecting the original pending event would most likely cause the same MMIO #NPF.
+ */
+ if (pVCpu->hm.s.Event.fPending)
+ return VINF_EM_RAW_INJECT_TRPM_EVENT;
+
+ HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, CPUMCTX_EXTRN_CS | CPUMCTX_EXTRN_RIP);
+ VBOXSTRICTRC rcStrict;
+ PCEMEXITREC pExitRec = EMHistoryUpdateFlagsAndTypeAndPC(pVCpu,
+ EMEXIT_MAKE_FT(EMEXIT_F_KIND_EM | EMEXIT_F_HM, EMEXITTYPE_MMIO),
+ pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base);
+ if (!pExitRec)
+ {
+
+ rcStrict = PGMR0Trap0eHandlerNPMisconfig(pVM, pVCpu, enmNestedPagingMode, CPUMCTX2CORE(pCtx), GCPhysFaultAddr,
+ u32ErrCode);
+
+ /*
+ * If we succeed, resume guest execution.
+ *
+ * If we fail in interpreting the instruction because we couldn't get the guest
+ * physical address of the page containing the instruction via the guest's page
+ * tables (we would invalidate the guest page in the host TLB), resume execution
+ * which would cause a guest page fault to let the guest handle this weird case.
+ *
+ * See @bugref{6043}.
+ */
+ if ( rcStrict == VINF_SUCCESS
+ || rcStrict == VERR_PAGE_TABLE_NOT_PRESENT
+ || rcStrict == VERR_PAGE_NOT_PRESENT)
+ {
+ /* Successfully handled MMIO operation. */
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_APIC_TPR);
+ rcStrict = VINF_SUCCESS;
+ }
+ }
+ else
+ {
+ /*
+ * Frequent exit or something needing probing. Get state and call EMHistoryExec.
+ */
+ Assert(pCtx == &pVCpu->cpum.GstCtx);
+ HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, HMSVM_CPUMCTX_EXTRN_ALL);
+ Log4(("EptMisscfgExit/%u: %04x:%08RX64: %RGp -> EMHistoryExec\n",
+ pVCpu->idCpu, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, GCPhysFaultAddr));
+
+ rcStrict = EMHistoryExec(pVCpu, pExitRec, 0);
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_ALL_GUEST);
+
+ Log4(("EptMisscfgExit/%u: %04x:%08RX64: EMHistoryExec -> %Rrc + %04x:%08RX64\n",
+ pVCpu->idCpu, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
+ VBOXSTRICTRC_VAL(rcStrict), pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
+ }
+ return VBOXSTRICTRC_TODO(rcStrict);
+ }
+
+ TRPMAssertXcptPF(pVCpu, GCPhysFaultAddr, u32ErrCode);
+ int rc = PGMR0Trap0eHandlerNestedPaging(pVM, pVCpu, enmNestedPagingMode, u32ErrCode, CPUMCTX2CORE(pCtx), GCPhysFaultAddr);
+ TRPMResetTrap(pVCpu);
+
+ Log4Func(("#NPF: PGMR0Trap0eHandlerNestedPaging returns %Rrc CS:RIP=%04x:%#RX64\n", rc, pCtx->cs.Sel, pCtx->rip));
+
+ /*
+ * Same case as PGMR0Trap0eHandlerNPMisconfig(). See comment above, @bugref{6043}.
+ */
+ if ( rc == VINF_SUCCESS
+ || rc == VERR_PAGE_TABLE_NOT_PRESENT
+ || rc == VERR_PAGE_NOT_PRESENT)
+ {
+ /* We've successfully synced our shadow page tables. */
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatExitShadowPF);
+ rc = VINF_SUCCESS;
+ }
+
+ return rc;
+}
+
+
+/**
+ * \#VMEXIT handler for virtual interrupt (SVM_EXIT_VINTR). Conditional
+ * \#VMEXIT.
+ */
+HMSVM_EXIT_DECL hmR0SvmExitVIntr(PVMCPU pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+ HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pSvmTransient);
+ HMSVM_ASSERT_NOT_IN_NESTED_GUEST(&pVCpu->cpum.GstCtx);
+
+ /* Indicate that we no longer need to #VMEXIT when the guest is ready to receive NMIs, it is now ready. */
+ PSVMVMCB pVmcb = hmR0SvmGetCurrentVmcb(pVCpu);
+ hmR0SvmClearIntWindowExiting(pVCpu, pVmcb);
+
+ /* Deliver the pending interrupt via hmR0SvmEvaluatePendingEvent() and resume guest execution. */
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIntWindow);
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * \#VMEXIT handler for task switches (SVM_EXIT_TASK_SWITCH). Conditional
+ * \#VMEXIT.
+ */
+HMSVM_EXIT_DECL hmR0SvmExitTaskSwitch(PVMCPU pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+ HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pSvmTransient);
+ HMSVM_CHECK_EXIT_DUE_TO_EVENT_DELIVERY(pVCpu, pSvmTransient);
+
+#ifndef HMSVM_ALWAYS_TRAP_TASK_SWITCH
+ Assert(!pVCpu->CTX_SUFF(pVM)->hm.s.fNestedPaging);
+#endif
+
+ /* Check if this task-switch occurred while delivering an event through the guest IDT. */
+ if (pVCpu->hm.s.Event.fPending) /* Can happen with exceptions/NMI. See @bugref{8411}. */
+ {
+ /*
+ * AMD-V provides us with the exception which caused the TS; we collect
+ * the information in the call to hmR0SvmCheckExitDueToEventDelivery().
+ */
+ Log4Func(("TS occurred during event delivery\n"));
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatExitTaskSwitch);
+ return VINF_EM_RAW_INJECT_TRPM_EVENT;
+ }
+
+ /** @todo Emulate task switch someday, currently just going back to ring-3 for
+ * emulation. */
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatExitTaskSwitch);
+ return VERR_EM_INTERPRETER;
+}
+
+
+/**
+ * \#VMEXIT handler for VMMCALL (SVM_EXIT_VMMCALL). Conditional \#VMEXIT.
+ */
+HMSVM_EXIT_DECL hmR0SvmExitVmmCall(PVMCPU pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+ HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pSvmTransient);
+ HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, HMSVM_CPUMCTX_EXTRN_ALL);
+
+ if (pVCpu->CTX_SUFF(pVM)->hm.s.fTprPatchingAllowed)
+ {
+ int rc = hmSvmEmulateMovTpr(pVCpu);
+ if (rc != VERR_NOT_FOUND)
+ {
+ Log4Func(("hmSvmEmulateMovTpr returns %Rrc\n", rc));
+ return rc;
+ }
+ }
+
+ if (EMAreHypercallInstructionsEnabled(pVCpu))
+ {
+ unsigned cbInstr;
+ if (hmR0SvmSupportsNextRipSave(pVCpu))
+ {
+ PCSVMVMCB pVmcb = hmR0SvmGetCurrentVmcb(pVCpu);
+ cbInstr = pVmcb->ctrl.u64NextRIP - pVCpu->cpum.GstCtx.rip;
+ }
+ else
+ {
+ PDISCPUSTATE pDis = &pVCpu->hm.s.DisState;
+ int rc = EMInterpretDisasCurrent(pVCpu->CTX_SUFF(pVM), pVCpu, pDis, &cbInstr);
+ if ( rc == VINF_SUCCESS
+ && pDis->pCurInstr->uOpcode == OP_VMMCALL)
+ Assert(cbInstr > 0);
+ else
+ cbInstr = 0;
+ }
+
+ VBOXSTRICTRC rcStrict = GIMHypercall(pVCpu, &pVCpu->cpum.GstCtx);
+ if (RT_SUCCESS(rcStrict))
+ {
+ /* Only update the RIP if we're continuing guest execution and not in the case
+ of say VINF_GIM_R3_HYPERCALL. */
+ if (rcStrict == VINF_SUCCESS)
+ hmR0SvmAdvanceRip(pVCpu, cbInstr);
+
+ return VBOXSTRICTRC_VAL(rcStrict);
+ }
+ else
+ Log4Func(("GIMHypercall returns %Rrc -> #UD\n", VBOXSTRICTRC_VAL(rcStrict)));
+ }
+
+ hmR0SvmSetPendingXcptUD(pVCpu);
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * \#VMEXIT handler for VMMCALL (SVM_EXIT_VMMCALL). Conditional \#VMEXIT.
+ */
+HMSVM_EXIT_DECL hmR0SvmExitPause(PVMCPU pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+ HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pSvmTransient);
+
+ unsigned cbInstr;
+ bool const fSupportsNextRipSave = hmR0SvmSupportsNextRipSave(pVCpu);
+ if (fSupportsNextRipSave)
+ {
+ PCSVMVMCB pVmcb = hmR0SvmGetCurrentVmcb(pVCpu);
+ cbInstr = pVmcb->ctrl.u64NextRIP - pVCpu->cpum.GstCtx.rip;
+ }
+ else
+ {
+ PDISCPUSTATE pDis = &pVCpu->hm.s.DisState;
+ int rc = EMInterpretDisasCurrent(pVCpu->CTX_SUFF(pVM), pVCpu, pDis, &cbInstr);
+ if ( rc == VINF_SUCCESS
+ && pDis->pCurInstr->uOpcode == OP_PAUSE)
+ Assert(cbInstr > 0);
+ else
+ cbInstr = 0;
+ }
+
+ /** @todo The guest has likely hit a contended spinlock. We might want to
+ * poke a schedule different guest VCPU. */
+ hmR0SvmAdvanceRip(pVCpu, cbInstr);
+ return VINF_EM_RAW_INTERRUPT;
+}
+
+
+/**
+ * \#VMEXIT handler for FERR intercept (SVM_EXIT_FERR_FREEZE). Conditional
+ * \#VMEXIT.
+ */
+HMSVM_EXIT_DECL hmR0SvmExitFerrFreeze(PVMCPU pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+ HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pSvmTransient);
+ HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, CPUMCTX_EXTRN_CR0);
+ Assert(!(pVCpu->cpum.GstCtx.cr0 & X86_CR0_NE));
+
+ Log4Func(("Raising IRQ 13 in response to #FERR\n"));
+ return PDMIsaSetIrq(pVCpu->CTX_SUFF(pVM), 13 /* u8Irq */, 1 /* u8Level */, 0 /* uTagSrc */);
+}
+
+
+/**
+ * \#VMEXIT handler for IRET (SVM_EXIT_IRET). Conditional \#VMEXIT.
+ */
+HMSVM_EXIT_DECL hmR0SvmExitIret(PVMCPU pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+ HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pSvmTransient);
+
+ /* Clear NMI blocking. */
+ if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_BLOCK_NMIS))
+ VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_BLOCK_NMIS);
+
+ /* Indicate that we no longer need to #VMEXIT when the guest is ready to receive NMIs, it is now ready. */
+ PSVMVMCB pVmcb = hmR0SvmGetCurrentVmcb(pVCpu);
+ hmR0SvmClearCtrlIntercept(pVCpu, pVmcb, SVM_CTRL_INTERCEPT_IRET);
+
+ /* Deliver the pending NMI via hmR0SvmEvaluatePendingEvent() and resume guest execution. */
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * \#VMEXIT handler for page-fault exceptions (SVM_EXIT_XCPT_14).
+ * Conditional \#VMEXIT.
+ */
+HMSVM_EXIT_DECL hmR0SvmExitXcptPF(PVMCPU pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+ HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pSvmTransient);
+ HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, HMSVM_CPUMCTX_EXTRN_ALL);
+ HMSVM_CHECK_EXIT_DUE_TO_EVENT_DELIVERY(pVCpu, pSvmTransient);
+
+ /* See AMD spec. 15.12.15 "#PF (Page Fault)". */
+ PVM pVM = pVCpu->CTX_SUFF(pVM);
+ PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+ PSVMVMCB pVmcb = hmR0SvmGetCurrentVmcb(pVCpu);
+ uint32_t uErrCode = pVmcb->ctrl.u64ExitInfo1;
+ uint64_t const uFaultAddress = pVmcb->ctrl.u64ExitInfo2;
+
+#if defined(HMSVM_ALWAYS_TRAP_ALL_XCPTS) || defined(HMSVM_ALWAYS_TRAP_PF)
+ if (pVM->hm.s.fNestedPaging)
+ {
+ pVCpu->hm.s.Event.fPending = false; /* In case it's a contributory or vectoring #PF. */
+ if ( !pSvmTransient->fVectoringDoublePF
+ || CPUMIsGuestInSvmNestedHwVirtMode(pCtx))
+ {
+ /* A genuine guest #PF, reflect it to the guest. */
+ hmR0SvmSetPendingXcptPF(pVCpu, uErrCode, uFaultAddress);
+ Log4Func(("#PF: Guest page fault at %04X:%RGv FaultAddr=%RX64 ErrCode=%#x\n", pCtx->cs.Sel, (RTGCPTR)pCtx->rip,
+ uFaultAddress, uErrCode));
+ }
+ else
+ {
+ /* A guest page-fault occurred during delivery of a page-fault. Inject #DF. */
+ hmR0SvmSetPendingXcptDF(pVCpu);
+ Log4Func(("Pending #DF due to vectoring #PF. NP\n"));
+ }
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestPF);
+ return VINF_SUCCESS;
+ }
+#endif
+
+ Assert(!pVM->hm.s.fNestedPaging);
+
+ /*
+ * TPR patching shortcut for APIC TPR reads and writes; only applicable to 32-bit guests.
+ */
+ if ( pVM->hm.s.fTprPatchingAllowed
+ && (uFaultAddress & 0xfff) == XAPIC_OFF_TPR
+ && !(uErrCode & X86_TRAP_PF_P) /* Not present. */
+ && !CPUMIsGuestInSvmNestedHwVirtMode(pCtx)
+ && !CPUMIsGuestInLongModeEx(pCtx)
+ && !CPUMGetGuestCPL(pVCpu)
+ && pVM->hm.s.cPatches < RT_ELEMENTS(pVM->hm.s.aPatches))
+ {
+ RTGCPHYS GCPhysApicBase;
+ GCPhysApicBase = APICGetBaseMsrNoCheck(pVCpu);
+ GCPhysApicBase &= PAGE_BASE_GC_MASK;
+
+ /* Check if the page at the fault-address is the APIC base. */
+ RTGCPHYS GCPhysPage;
+ int rc2 = PGMGstGetPage(pVCpu, (RTGCPTR)uFaultAddress, NULL /* pfFlags */, &GCPhysPage);
+ if ( rc2 == VINF_SUCCESS
+ && GCPhysPage == GCPhysApicBase)
+ {
+ /* Only attempt to patch the instruction once. */
+ PHMTPRPATCH pPatch = (PHMTPRPATCH)RTAvloU32Get(&pVM->hm.s.PatchTree, (AVLOU32KEY)pCtx->eip);
+ if (!pPatch)
+ return VINF_EM_HM_PATCH_TPR_INSTR;
+ }
+ }
+
+ Log4Func(("#PF: uFaultAddress=%#RX64 CS:RIP=%#04x:%#RX64 uErrCode %#RX32 cr3=%#RX64\n", uFaultAddress, pCtx->cs.Sel,
+ pCtx->rip, uErrCode, pCtx->cr3));
+
+ /*
+ * If it's a vectoring #PF, emulate injecting the original event injection as
+ * PGMTrap0eHandler() is incapable of differentiating between instruction emulation and
+ * event injection that caused a #PF. See @bugref{6607}.
+ */
+ if (pSvmTransient->fVectoringPF)
+ {
+ Assert(pVCpu->hm.s.Event.fPending);
+ return VINF_EM_RAW_INJECT_TRPM_EVENT;
+ }
+
+ TRPMAssertXcptPF(pVCpu, uFaultAddress, uErrCode);
+ int rc = PGMTrap0eHandler(pVCpu, uErrCode, CPUMCTX2CORE(pCtx), (RTGCPTR)uFaultAddress);
+
+ Log4Func(("#PF: rc=%Rrc\n", rc));
+
+ if (rc == VINF_SUCCESS)
+ {
+ /* Successfully synced shadow pages tables or emulated an MMIO instruction. */
+ TRPMResetTrap(pVCpu);
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatExitShadowPF);
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_ALL_GUEST);
+ return rc;
+ }
+
+ if (rc == VINF_EM_RAW_GUEST_TRAP)
+ {
+ pVCpu->hm.s.Event.fPending = false; /* In case it's a contributory or vectoring #PF. */
+
+ /*
+ * If a nested-guest delivers a #PF and that causes a #PF which is -not- a shadow #PF,
+ * we should simply forward the #PF to the guest and is up to the nested-hypervisor to
+ * determine whether it is a nested-shadow #PF or a #DF, see @bugref{7243#c121}.
+ */
+ if ( !pSvmTransient->fVectoringDoublePF
+ || CPUMIsGuestInSvmNestedHwVirtMode(pCtx))
+ {
+ /* It's a guest (or nested-guest) page fault and needs to be reflected. */
+ uErrCode = TRPMGetErrorCode(pVCpu); /* The error code might have been changed. */
+ TRPMResetTrap(pVCpu);
+
+#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
+ /* If the nested-guest is intercepting #PFs, cause a #PF #VMEXIT. */
+ if ( CPUMIsGuestInSvmNestedHwVirtMode(pCtx)
+ && HMIsGuestSvmXcptInterceptSet(pVCpu, X86_XCPT_PF))
+ return VBOXSTRICTRC_TODO(IEMExecSvmVmexit(pVCpu, SVM_EXIT_XCPT_PF, uErrCode, uFaultAddress));
+#endif
+
+ hmR0SvmSetPendingXcptPF(pVCpu, uErrCode, uFaultAddress);
+ }
+ else
+ {
+ /* A guest page-fault occurred during delivery of a page-fault. Inject #DF. */
+ TRPMResetTrap(pVCpu);
+ hmR0SvmSetPendingXcptDF(pVCpu);
+ Log4Func(("#PF: Pending #DF due to vectoring #PF\n"));
+ }
+
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestPF);
+ return VINF_SUCCESS;
+ }
+
+ TRPMResetTrap(pVCpu);
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatExitShadowPFEM);
+ return rc;
+}
+
+
+/**
+ * \#VMEXIT handler for undefined opcode (SVM_EXIT_XCPT_6).
+ * Conditional \#VMEXIT.
+ */
+HMSVM_EXIT_DECL hmR0SvmExitXcptUD(PVMCPU pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+ HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pSvmTransient);
+ HMSVM_ASSERT_NOT_IN_NESTED_GUEST(&pVCpu->cpum.GstCtx);
+
+ /* Paranoia; Ensure we cannot be called as a result of event delivery. */
+ PSVMVMCB pVmcb = pVCpu->hm.s.svm.pVmcb;
+ Assert(!pVmcb->ctrl.ExitIntInfo.n.u1Valid); NOREF(pVmcb);
+
+ int rc = VERR_SVM_UNEXPECTED_XCPT_EXIT;
+ if (pVCpu->hm.s.fGIMTrapXcptUD)
+ {
+ HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, HMSVM_CPUMCTX_EXTRN_ALL);
+ uint8_t cbInstr = 0;
+ VBOXSTRICTRC rcStrict = GIMXcptUD(pVCpu, &pVCpu->cpum.GstCtx, NULL /* pDis */, &cbInstr);
+ if (rcStrict == VINF_SUCCESS)
+ {
+ /* #UD #VMEXIT does not have valid NRIP information, manually advance RIP. See @bugref{7270#c170}. */
+ hmR0SvmAdvanceRip(pVCpu, cbInstr);
+ rc = VINF_SUCCESS;
+ HMSVM_CHECK_SINGLE_STEP(pVCpu, rc);
+ }
+ else if (rcStrict == VINF_GIM_HYPERCALL_CONTINUING)
+ rc = VINF_SUCCESS;
+ else if (rcStrict == VINF_GIM_R3_HYPERCALL)
+ rc = VINF_GIM_R3_HYPERCALL;
+ else
+ Assert(RT_FAILURE(VBOXSTRICTRC_VAL(rcStrict)));
+ }
+
+ /* If the GIM #UD exception handler didn't succeed for some reason or wasn't needed, raise #UD. */
+ if (RT_FAILURE(rc))
+ {
+ hmR0SvmSetPendingXcptUD(pVCpu);
+ rc = VINF_SUCCESS;
+ }
+
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestUD);
+ return rc;
+}
+
+
+/**
+ * \#VMEXIT handler for math-fault exceptions (SVM_EXIT_XCPT_16).
+ * Conditional \#VMEXIT.
+ */
+HMSVM_EXIT_DECL hmR0SvmExitXcptMF(PVMCPU pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+ HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pSvmTransient);
+ HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, HMSVM_CPUMCTX_EXTRN_ALL);
+
+ PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+ PSVMVMCB pVmcb = hmR0SvmGetCurrentVmcb(pVCpu);
+
+ /* Paranoia; Ensure we cannot be called as a result of event delivery. */
+ Assert(!pVmcb->ctrl.ExitIntInfo.n.u1Valid); NOREF(pVmcb);
+
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestMF);
+
+ if (!(pCtx->cr0 & X86_CR0_NE))
+ {
+ PVM pVM = pVCpu->CTX_SUFF(pVM);
+ PDISSTATE pDis = &pVCpu->hm.s.DisState;
+ unsigned cbInstr;
+ int rc = EMInterpretDisasCurrent(pVM, pVCpu, pDis, &cbInstr);
+ if (RT_SUCCESS(rc))
+ {
+ /* Convert a #MF into a FERR -> IRQ 13. See @bugref{6117}. */
+ rc = PDMIsaSetIrq(pVCpu->CTX_SUFF(pVM), 13 /* u8Irq */, 1 /* u8Level */, 0 /* uTagSrc */);
+ if (RT_SUCCESS(rc))
+ hmR0SvmAdvanceRip(pVCpu, cbInstr);
+ }
+ else
+ Log4Func(("EMInterpretDisasCurrent returned %Rrc uOpCode=%#x\n", rc, pDis->pCurInstr->uOpcode));
+ return rc;
+ }
+
+ hmR0SvmSetPendingXcptMF(pVCpu);
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * \#VMEXIT handler for debug exceptions (SVM_EXIT_XCPT_1). Conditional
+ * \#VMEXIT.
+ */
+HMSVM_EXIT_DECL hmR0SvmExitXcptDB(PVMCPU pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+ HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pSvmTransient);
+ HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, HMSVM_CPUMCTX_EXTRN_ALL);
+ HMSVM_CHECK_EXIT_DUE_TO_EVENT_DELIVERY(pVCpu, pSvmTransient);
+
+ if (RT_UNLIKELY(pVCpu->hm.s.Event.fPending))
+ {
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatInjectPendingInterpret);
+ return VINF_EM_RAW_INJECT_TRPM_EVENT;
+ }
+
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestDB);
+
+ /*
+ * This can be a fault-type #DB (instruction breakpoint) or a trap-type #DB (data
+ * breakpoint). However, for both cases DR6 and DR7 are updated to what the exception
+ * handler expects. See AMD spec. 15.12.2 "#DB (Debug)".
+ */
+ PVM pVM = pVCpu->CTX_SUFF(pVM);
+ PSVMVMCB pVmcb = pVCpu->hm.s.svm.pVmcb;
+ PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+ int rc = DBGFRZTrap01Handler(pVM, pVCpu, CPUMCTX2CORE(pCtx), pVmcb->guest.u64DR6, pVCpu->hm.s.fSingleInstruction);
+ if (rc == VINF_EM_RAW_GUEST_TRAP)
+ {
+ Log5(("hmR0SvmExitXcptDB: DR6=%#RX64 -> guest trap\n", pVmcb->guest.u64DR6));
+ if (CPUMIsHyperDebugStateActive(pVCpu))
+ CPUMSetGuestDR6(pVCpu, CPUMGetGuestDR6(pVCpu) | pVmcb->guest.u64DR6);
+
+ /* Reflect the exception back to the guest. */
+ hmR0SvmSetPendingXcptDB(pVCpu);
+ rc = VINF_SUCCESS;
+ }
+
+ /*
+ * Update DR6.
+ */
+ if (CPUMIsHyperDebugStateActive(pVCpu))
+ {
+ Log5(("hmR0SvmExitXcptDB: DR6=%#RX64 -> %Rrc\n", pVmcb->guest.u64DR6, rc));
+ pVmcb->guest.u64DR6 = X86_DR6_INIT_VAL;
+ pVmcb->ctrl.u32VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_DRX;
+ }
+ else
+ {
+ AssertMsg(rc == VINF_SUCCESS, ("rc=%Rrc\n", rc));
+ Assert(!pVCpu->hm.s.fSingleInstruction && !DBGFIsStepping(pVCpu));
+ }
+
+ return rc;
+}
+
+
+/**
+ * \#VMEXIT handler for alignment check exceptions (SVM_EXIT_XCPT_17).
+ * Conditional \#VMEXIT.
+ */
+HMSVM_EXIT_DECL hmR0SvmExitXcptAC(PVMCPU pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+ HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pSvmTransient);
+ HMSVM_CHECK_EXIT_DUE_TO_EVENT_DELIVERY(pVCpu, pSvmTransient);
+
+ SVMEVENT Event;
+ Event.u = 0;
+ Event.n.u1Valid = 1;
+ Event.n.u3Type = SVM_EVENT_EXCEPTION;
+ Event.n.u8Vector = X86_XCPT_AC;
+ Event.n.u1ErrorCodeValid = 1;
+ hmR0SvmSetPendingEvent(pVCpu, &Event, 0 /* GCPtrFaultAddress */);
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * \#VMEXIT handler for breakpoint exceptions (SVM_EXIT_XCPT_3).
+ * Conditional \#VMEXIT.
+ */
+HMSVM_EXIT_DECL hmR0SvmExitXcptBP(PVMCPU pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+ HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pSvmTransient);
+ HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, HMSVM_CPUMCTX_EXTRN_ALL);
+ HMSVM_CHECK_EXIT_DUE_TO_EVENT_DELIVERY(pVCpu, pSvmTransient);
+
+ PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+ int rc = DBGFRZTrap03Handler(pVCpu->CTX_SUFF(pVM), pVCpu, CPUMCTX2CORE(pCtx));
+ if (rc == VINF_EM_RAW_GUEST_TRAP)
+ {
+ SVMEVENT Event;
+ Event.u = 0;
+ Event.n.u1Valid = 1;
+ Event.n.u3Type = SVM_EVENT_EXCEPTION;
+ Event.n.u8Vector = X86_XCPT_BP;
+ hmR0SvmSetPendingEvent(pVCpu, &Event, 0 /* GCPtrFaultAddress */);
+ }
+
+ Assert(rc == VINF_SUCCESS || rc == VINF_EM_RAW_GUEST_TRAP || rc == VINF_EM_DBG_BREAKPOINT);
+ return rc;
+}
+
+
+/**
+ * Hacks its way around the lovely mesa driver's backdoor accesses.
+ *
+ * @sa hmR0VmxHandleMesaDrvGp
+ */
+static int hmR0SvmHandleMesaDrvGp(PVMCPU pVCpu, PCPUMCTX pCtx, PCSVMVMCB pVmcb)
+{
+ HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, CPUMCTX_EXTRN_CS | CPUMCTX_EXTRN_RIP | CPUMCTX_EXTRN_RFLAGS | CPUMCTX_EXTRN_GPRS_MASK);
+ Log(("hmR0SvmHandleMesaDrvGp: at %04x:%08RX64 rcx=%RX64 rbx=%RX64\n",
+ pVmcb->guest.CS.u16Sel, pVmcb->guest.u64RIP, pCtx->rcx, pCtx->rbx));
+ RT_NOREF(pCtx, pVmcb);
+
+ /* For now we'll just skip the instruction. */
+ hmR0SvmAdvanceRip(pVCpu, 1);
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Checks if the \#GP'ing instruction is the mesa driver doing it's lovely
+ * backdoor logging w/o checking what it is running inside.
+ *
+ * This recognizes an "IN EAX,DX" instruction executed in flat ring-3, with the
+ * backdoor port and magic numbers loaded in registers.
+ *
+ * @returns true if it is, false if it isn't.
+ * @sa hmR0VmxIsMesaDrvGp
+ */
+DECLINLINE(bool) hmR0SvmIsMesaDrvGp(PVMCPU pVCpu, PCPUMCTX pCtx, PCSVMVMCB pVmcb)
+{
+ /* Check magic and port. */
+ Assert(!(pCtx->fExtrn & (CPUMCTX_EXTRN_RDX | CPUMCTX_EXTRN_RCX)));
+ /*Log8(("hmR0SvmIsMesaDrvGp: rax=%RX64 rdx=%RX64\n", pCtx->fExtrn & CPUMCTX_EXTRN_RAX ? pVmcb->guest.u64RAX : pCtx->rax, pCtx->rdx));*/
+ if (pCtx->dx != UINT32_C(0x5658))
+ return false;
+ if ((pCtx->fExtrn & CPUMCTX_EXTRN_RAX ? pVmcb->guest.u64RAX : pCtx->rax) != UINT32_C(0x564d5868))
+ return false;
+
+ /* Check that it is #GP(0). */
+ if (pVmcb->ctrl.u64ExitInfo1 != 0)
+ return false;
+
+ /* Flat ring-3 CS. */
+ /*Log8(("hmR0SvmIsMesaDrvGp: u8CPL=%d base=%RX64\n", pVmcb->guest.u8CPL, pCtx->fExtrn & CPUMCTX_EXTRN_CS ? pVmcb->guest.CS.u64Base : pCtx->cs.u64Base));*/
+ if (pVmcb->guest.u8CPL != 3)
+ return false;
+ if ((pCtx->fExtrn & CPUMCTX_EXTRN_CS ? pVmcb->guest.CS.u64Base : pCtx->cs.u64Base) != 0)
+ return false;
+
+ /* 0xed: IN eAX,dx */
+ if (pVmcb->ctrl.cbInstrFetched < 1) /* unlikely, it turns out. */
+ {
+ HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, CPUMCTX_EXTRN_CS | CPUMCTX_EXTRN_RIP | CPUMCTX_EXTRN_GPRS_MASK
+ | CPUMCTX_EXTRN_CR0 | CPUMCTX_EXTRN_CR3 | CPUMCTX_EXTRN_CR4 | CPUMCTX_EXTRN_EFER);
+ uint8_t abInstr[1];
+ int rc = PGMPhysSimpleReadGCPtr(pVCpu, abInstr, pCtx->rip, sizeof(abInstr));
+ /*Log8(("hmR0SvmIsMesaDrvGp: PGMPhysSimpleReadGCPtr -> %Rrc %#x\n", rc, abInstr[0])); */
+ if (RT_FAILURE(rc))
+ return false;
+ if (abInstr[0] != 0xed)
+ return false;
+ }
+ else
+ {
+ /*Log8(("hmR0SvmIsMesaDrvGp: %#x\n", pVmcb->ctrl.abInstr));*/
+ if (pVmcb->ctrl.abInstr[0] != 0xed)
+ return false;
+ }
+ return true;
+}
+
+
+/**
+ * \#VMEXIT handler for general protection faults (SVM_EXIT_XCPT_BP).
+ * Conditional \#VMEXIT.
+ */
+HMSVM_EXIT_DECL hmR0SvmExitXcptGP(PVMCPU pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+ HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pSvmTransient);
+ HMSVM_CHECK_EXIT_DUE_TO_EVENT_DELIVERY(pVCpu, pSvmTransient);
+
+ PCSVMVMCB pVmcb = hmR0SvmGetCurrentVmcb(pVCpu);
+ Assert(pSvmTransient->u64ExitCode == pVmcb->ctrl.u64ExitCode);
+
+ PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+ if ( !pVCpu->hm.s.fTrapXcptGpForLovelyMesaDrv
+ || !hmR0SvmIsMesaDrvGp(pVCpu, pCtx, pVmcb))
+ {
+ SVMEVENT Event;
+ Event.u = 0;
+ Event.n.u1Valid = 1;
+ Event.n.u3Type = SVM_EVENT_EXCEPTION;
+ Event.n.u8Vector = X86_XCPT_GP;
+ Event.n.u1ErrorCodeValid = 1;
+ Event.n.u32ErrorCode = (uint32_t)pVmcb->ctrl.u64ExitInfo1;
+ hmR0SvmSetPendingEvent(pVCpu, &Event, 0 /* GCPtrFaultAddress */);
+ return VINF_SUCCESS;
+ }
+ return hmR0SvmHandleMesaDrvGp(pVCpu, pCtx, pVmcb);
+}
+
+
+#if defined(HMSVM_ALWAYS_TRAP_ALL_XCPTS) || defined(VBOX_WITH_NESTED_HWVIRT_SVM)
+/**
+ * \#VMEXIT handler for generic exceptions. Conditional \#VMEXIT.
+ */
+HMSVM_EXIT_DECL hmR0SvmExitXcptGeneric(PVMCPU pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+ HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pSvmTransient);
+ HMSVM_CHECK_EXIT_DUE_TO_EVENT_DELIVERY(pVCpu, pSvmTransient);
+
+ PCSVMVMCB pVmcb = hmR0SvmGetCurrentVmcb(pVCpu);
+ uint8_t const uVector = pVmcb->ctrl.u64ExitCode - SVM_EXIT_XCPT_0;
+ uint32_t const uErrCode = pVmcb->ctrl.u64ExitInfo1;
+ Assert(pSvmTransient->u64ExitCode == pVmcb->ctrl.u64ExitCode);
+ Assert(uVector <= X86_XCPT_LAST);
+ Log4Func(("uVector=%#x uErrCode=%u\n", uVector, uErrCode));
+
+ SVMEVENT Event;
+ Event.u = 0;
+ Event.n.u1Valid = 1;
+ Event.n.u3Type = SVM_EVENT_EXCEPTION;
+ Event.n.u8Vector = uVector;
+ switch (uVector)
+ {
+ /* Shouldn't be here for reflecting #PFs (among other things, the fault address isn't passed along). */
+ case X86_XCPT_PF: AssertMsgFailed(("hmR0SvmExitXcptGeneric: Unexpected exception")); return VERR_SVM_IPE_5;
+ case X86_XCPT_DF:
+ case X86_XCPT_TS:
+ case X86_XCPT_NP:
+ case X86_XCPT_SS:
+ case X86_XCPT_GP:
+ case X86_XCPT_AC:
+ {
+ Event.n.u1ErrorCodeValid = 1;
+ Event.n.u32ErrorCode = uErrCode;
+ break;
+ }
+ }
+
+ hmR0SvmSetPendingEvent(pVCpu, &Event, 0 /* GCPtrFaultAddress */);
+ return VINF_SUCCESS;
+}
+#endif
+
+#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
+/**
+ * \#VMEXIT handler for CLGI (SVM_EXIT_CLGI). Conditional \#VMEXIT.
+ */
+HMSVM_EXIT_DECL hmR0SvmExitClgi(PVMCPU pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+ HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pSvmTransient);
+
+ PCSVMVMCB pVmcb = hmR0SvmGetCurrentVmcb(pVCpu);
+ Assert(pVmcb);
+ Assert(!pVmcb->ctrl.IntCtrl.n.u1VGifEnable);
+
+ VBOXSTRICTRC rcStrict;
+ bool const fSupportsNextRipSave = hmR0SvmSupportsNextRipSave(pVCpu);
+ uint64_t const fImport = CPUMCTX_EXTRN_HWVIRT;
+ if (fSupportsNextRipSave)
+ {
+ HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_EXEC_DECODED_NO_MEM_MASK | fImport);
+ uint8_t const cbInstr = pVmcb->ctrl.u64NextRIP - pVCpu->cpum.GstCtx.rip;
+ rcStrict = IEMExecDecodedClgi(pVCpu, cbInstr);
+ }
+ else
+ {
+ HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_MUST_MASK | fImport);
+ rcStrict = IEMExecOne(pVCpu);
+ }
+
+ if (rcStrict == VINF_SUCCESS)
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_HWVIRT);
+ else if (rcStrict == VINF_IEM_RAISED_XCPT)
+ {
+ rcStrict = VINF_SUCCESS;
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
+ }
+ HMSVM_CHECK_SINGLE_STEP(pVCpu, rcStrict);
+ return VBOXSTRICTRC_TODO(rcStrict);
+}
+
+
+/**
+ * \#VMEXIT handler for STGI (SVM_EXIT_STGI). Conditional \#VMEXIT.
+ */
+HMSVM_EXIT_DECL hmR0SvmExitStgi(PVMCPU pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+ HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pSvmTransient);
+
+ /*
+ * When VGIF is not used we always intercept STGI instructions. When VGIF is used,
+ * we only intercept STGI when events are pending for GIF to become 1.
+ */
+ PSVMVMCB pVmcb = hmR0SvmGetCurrentVmcb(pVCpu);
+ if (pVmcb->ctrl.IntCtrl.n.u1VGifEnable)
+ hmR0SvmClearCtrlIntercept(pVCpu, pVmcb, SVM_CTRL_INTERCEPT_STGI);
+
+ VBOXSTRICTRC rcStrict;
+ bool const fSupportsNextRipSave = hmR0SvmSupportsNextRipSave(pVCpu);
+ uint64_t const fImport = CPUMCTX_EXTRN_HWVIRT;
+ if (fSupportsNextRipSave)
+ {
+ HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_EXEC_DECODED_NO_MEM_MASK | fImport);
+ uint8_t const cbInstr = pVmcb->ctrl.u64NextRIP - pVCpu->cpum.GstCtx.rip;
+ rcStrict = IEMExecDecodedStgi(pVCpu, cbInstr);
+ }
+ else
+ {
+ HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_MUST_MASK | fImport);
+ rcStrict = IEMExecOne(pVCpu);
+ }
+
+ if (rcStrict == VINF_SUCCESS)
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_HWVIRT);
+ else if (rcStrict == VINF_IEM_RAISED_XCPT)
+ {
+ rcStrict = VINF_SUCCESS;
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
+ }
+ HMSVM_CHECK_SINGLE_STEP(pVCpu, rcStrict);
+ return VBOXSTRICTRC_TODO(rcStrict);
+}
+
+
+/**
+ * \#VMEXIT handler for VMLOAD (SVM_EXIT_VMLOAD). Conditional \#VMEXIT.
+ */
+HMSVM_EXIT_DECL hmR0SvmExitVmload(PVMCPU pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+ HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pSvmTransient);
+
+ PCSVMVMCB pVmcb = hmR0SvmGetCurrentVmcb(pVCpu);
+ Assert(pVmcb);
+ Assert(!pVmcb->ctrl.LbrVirt.n.u1VirtVmsaveVmload);
+
+ VBOXSTRICTRC rcStrict;
+ bool const fSupportsNextRipSave = hmR0SvmSupportsNextRipSave(pVCpu);
+ uint64_t const fImport = CPUMCTX_EXTRN_FS | CPUMCTX_EXTRN_GS | CPUMCTX_EXTRN_KERNEL_GS_BASE
+ | CPUMCTX_EXTRN_TR | CPUMCTX_EXTRN_LDTR | CPUMCTX_EXTRN_SYSCALL_MSRS
+ | CPUMCTX_EXTRN_SYSENTER_MSRS;
+ if (fSupportsNextRipSave)
+ {
+ HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_EXEC_DECODED_NO_MEM_MASK | fImport);
+ uint8_t const cbInstr = pVmcb->ctrl.u64NextRIP - pVCpu->cpum.GstCtx.rip;
+ rcStrict = IEMExecDecodedVmload(pVCpu, cbInstr);
+ }
+ else
+ {
+ HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_MUST_MASK | fImport);
+ rcStrict = IEMExecOne(pVCpu);
+ }
+
+ if (rcStrict == VINF_SUCCESS)
+ {
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_FS | HM_CHANGED_GUEST_GS
+ | HM_CHANGED_GUEST_TR | HM_CHANGED_GUEST_LDTR
+ | HM_CHANGED_GUEST_KERNEL_GS_BASE | HM_CHANGED_GUEST_SYSCALL_MSRS
+ | HM_CHANGED_GUEST_SYSENTER_MSR_MASK);
+ }
+ else if (rcStrict == VINF_IEM_RAISED_XCPT)
+ {
+ rcStrict = VINF_SUCCESS;
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
+ }
+ HMSVM_CHECK_SINGLE_STEP(pVCpu, rcStrict);
+ return VBOXSTRICTRC_TODO(rcStrict);
+}
+
+
+/**
+ * \#VMEXIT handler for VMSAVE (SVM_EXIT_VMSAVE). Conditional \#VMEXIT.
+ */
+HMSVM_EXIT_DECL hmR0SvmExitVmsave(PVMCPU pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+ HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pSvmTransient);
+
+ PCSVMVMCB pVmcb = hmR0SvmGetCurrentVmcb(pVCpu);
+ Assert(!pVmcb->ctrl.LbrVirt.n.u1VirtVmsaveVmload);
+
+ VBOXSTRICTRC rcStrict;
+ bool const fSupportsNextRipSave = hmR0SvmSupportsNextRipSave(pVCpu);
+ if (fSupportsNextRipSave)
+ {
+ HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_EXEC_DECODED_NO_MEM_MASK);
+ uint8_t const cbInstr = pVmcb->ctrl.u64NextRIP - pVCpu->cpum.GstCtx.rip;
+ rcStrict = IEMExecDecodedVmsave(pVCpu, cbInstr);
+ }
+ else
+ {
+ HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_MUST_MASK);
+ rcStrict = IEMExecOne(pVCpu);
+ }
+
+ if (rcStrict == VINF_IEM_RAISED_XCPT)
+ {
+ rcStrict = VINF_SUCCESS;
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
+ }
+ HMSVM_CHECK_SINGLE_STEP(pVCpu, rcStrict);
+ return VBOXSTRICTRC_TODO(rcStrict);
+}
+
+
+/**
+ * \#VMEXIT handler for INVLPGA (SVM_EXIT_INVLPGA). Conditional \#VMEXIT.
+ */
+HMSVM_EXIT_DECL hmR0SvmExitInvlpga(PVMCPU pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+ HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pSvmTransient);
+
+ VBOXSTRICTRC rcStrict;
+ bool const fSupportsNextRipSave = hmR0SvmSupportsNextRipSave(pVCpu);
+ if (fSupportsNextRipSave)
+ {
+ HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_EXEC_DECODED_NO_MEM_MASK);
+ PCSVMVMCB pVmcb = hmR0SvmGetCurrentVmcb(pVCpu);
+ uint8_t const cbInstr = pVmcb->ctrl.u64NextRIP - pVCpu->cpum.GstCtx.rip;
+ rcStrict = IEMExecDecodedInvlpga(pVCpu, cbInstr);
+ }
+ else
+ {
+ HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_MUST_MASK);
+ rcStrict = IEMExecOne(pVCpu);
+ }
+
+ if (rcStrict == VINF_IEM_RAISED_XCPT)
+ {
+ rcStrict = VINF_SUCCESS;
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
+ }
+ HMSVM_CHECK_SINGLE_STEP(pVCpu, rcStrict);
+ return VBOXSTRICTRC_TODO(rcStrict);
+}
+
+
+/**
+ * \#VMEXIT handler for STGI (SVM_EXIT_VMRUN). Conditional \#VMEXIT.
+ */
+HMSVM_EXIT_DECL hmR0SvmExitVmrun(PVMCPU pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+ HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pSvmTransient);
+ /* We shall import the entire state here, just in case we enter and continue execution of
+ the nested-guest with hardware-assisted SVM in ring-0, we would be switching VMCBs and
+ could lose lose part of CPU state. */
+ HMSVM_CPUMCTX_IMPORT_STATE(pVCpu, HMSVM_CPUMCTX_EXTRN_ALL);
+
+ VBOXSTRICTRC rcStrict;
+ bool const fSupportsNextRipSave = hmR0SvmSupportsNextRipSave(pVCpu);
+ if (fSupportsNextRipSave)
+ {
+ PCSVMVMCB pVmcb = hmR0SvmGetCurrentVmcb(pVCpu);
+ uint8_t const cbInstr = pVmcb->ctrl.u64NextRIP - pVCpu->cpum.GstCtx.rip;
+ rcStrict = IEMExecDecodedVmrun(pVCpu, cbInstr);
+ }
+ else
+ {
+ /* We use IEMExecOneBypassEx() here as it supresses attempt to continue emulating any
+ instruction(s) when interrupt inhibition is set as part of emulating the VMRUN
+ instruction itself, see @bugref{7243#c126} */
+ rcStrict = IEMExecOneBypassEx(pVCpu, CPUMCTX2CORE(&pVCpu->cpum.GstCtx), NULL /* pcbWritten */);
+ }
+
+ if (rcStrict == VINF_SUCCESS)
+ {
+ rcStrict = VINF_SVM_VMRUN;
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_SVM_VMRUN_MASK);
+ }
+ else if (rcStrict == VINF_IEM_RAISED_XCPT)
+ {
+ rcStrict = VINF_SUCCESS;
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
+ }
+ HMSVM_CHECK_SINGLE_STEP(pVCpu, rcStrict);
+ return VBOXSTRICTRC_TODO(rcStrict);
+}
+
+
+/**
+ * Nested-guest \#VMEXIT handler for debug exceptions (SVM_EXIT_XCPT_1).
+ * Unconditional \#VMEXIT.
+ */
+HMSVM_EXIT_DECL hmR0SvmNestedExitXcptDB(PVMCPU pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+ HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pSvmTransient);
+ HMSVM_CHECK_EXIT_DUE_TO_EVENT_DELIVERY(pVCpu, pSvmTransient);
+
+ if (pVCpu->hm.s.Event.fPending)
+ {
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatInjectPendingInterpret);
+ return VINF_EM_RAW_INJECT_TRPM_EVENT;
+ }
+
+ hmR0SvmSetPendingXcptDB(pVCpu);
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Nested-guest \#VMEXIT handler for breakpoint exceptions (SVM_EXIT_XCPT_3).
+ * Conditional \#VMEXIT.
+ */
+HMSVM_EXIT_DECL hmR0SvmNestedExitXcptBP(PVMCPU pVCpu, PSVMTRANSIENT pSvmTransient)
+{
+ HMSVM_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pSvmTransient);
+ HMSVM_CHECK_EXIT_DUE_TO_EVENT_DELIVERY(pVCpu, pSvmTransient);
+
+ SVMEVENT Event;
+ Event.u = 0;
+ Event.n.u1Valid = 1;
+ Event.n.u3Type = SVM_EVENT_EXCEPTION;
+ Event.n.u8Vector = X86_XCPT_BP;
+ hmR0SvmSetPendingEvent(pVCpu, &Event, 0 /* GCPtrFaultAddress */);
+ return VINF_SUCCESS;
+}
+#endif /* VBOX_WITH_NESTED_HWVIRT_SVM */
+
+/** @} */
+
diff --git a/src/VBox/VMM/VMMR0/HMSVMR0.h b/src/VBox/VMM/VMMR0/HMSVMR0.h
new file mode 100644
index 00000000..19b15ede
--- /dev/null
+++ b/src/VBox/VMM/VMMR0/HMSVMR0.h
@@ -0,0 +1,99 @@
+/* $Id: HMSVMR0.h $ */
+/** @file
+ * HM SVM (AMD-V) - Internal header file.
+ */
+
+/*
+ * Copyright (C) 2006-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+#ifndef VMM_INCLUDED_SRC_VMMR0_HMSVMR0_h
+#define VMM_INCLUDED_SRC_VMMR0_HMSVMR0_h
+#ifndef RT_WITHOUT_PRAGMA_ONCE
+# pragma once
+#endif
+
+#include <VBox/cdefs.h>
+#include <VBox/types.h>
+#include <VBox/vmm/hm.h>
+#include <VBox/vmm/hm_svm.h>
+
+RT_C_DECLS_BEGIN
+
+/** @defgroup grp_svm_int Internal
+ * @ingroup grp_svm
+ * @internal
+ * @{
+ */
+
+#ifdef IN_RING0
+
+VMMR0DECL(int) SVMR0GlobalInit(void);
+VMMR0DECL(void) SVMR0GlobalTerm(void);
+VMMR0DECL(int) SVMR0Enter(PVMCPU pVCpu);
+VMMR0DECL(void) SVMR0ThreadCtxCallback(RTTHREADCTXEVENT enmEvent, PVMCPU pVCpu, bool fGlobalInit);
+VMMR0DECL(int) SVMR0EnableCpu(PHMPHYSCPU pHostCpu, PVM pVM, void *pvPageCpu, RTHCPHYS HCPhysCpuPage,
+ bool fEnabledBySystem, PCSUPHWVIRTMSRS pHwvirtMsrs);
+VMMR0DECL(int) SVMR0DisableCpu(void *pvPageCpu, RTHCPHYS pPageCpuPhys);
+VMMR0DECL(int) SVMR0InitVM(PVM pVM);
+VMMR0DECL(int) SVMR0TermVM(PVM pVM);
+VMMR0DECL(int) SVMR0SetupVM(PVM pVM);
+VMMR0DECL(VBOXSTRICTRC) SVMR0RunGuestCode(PVMCPU pVCpu);
+VMMR0DECL(int) SVMR0ExportHostState(PVMCPU pVCpu);
+VMMR0DECL(int) SVMR0ImportStateOnDemand(PVMCPU pVCpu, uint64_t fWhat);
+VMMR0DECL(int) SVMR0InvalidatePage(PVMCPU pVCpu, RTGCPTR GCVirt);
+
+#if HC_ARCH_BITS == 32 && defined(VBOX_WITH_64_BITS_GUESTS)
+DECLASM(int) SVMR0VMSwitcherRun64(RTHCPHYS pVMCBHostPhys, RTHCPHYS pVMCBPhys, PCPUMCTX pCtx, PVM pVM, PVMCPU pVCpu);
+VMMR0DECL(int) SVMR0Execute64BitsHandler(PVMCPU pVCpu, HM64ON32OP enmOp, uint32_t cbParam, uint32_t *paParam);
+#endif /* HC_ARCH_BITS == 32 && defined(VBOX_WITH_64_BITS_GUESTS) */
+
+/**
+ * Prepares for and executes VMRUN (32-bit guests).
+ *
+ * @returns VBox status code.
+ * @param pVMCBHostPhys Physical address of host VMCB.
+ * @param pVMCBPhys Physical address of the VMCB.
+ * @param pCtx Pointer to the guest CPU context.
+ * @param pVM The cross context VM structure. (Not used.)
+ * @param pVCpu The cross context virtual CPU structure. (Not used.)
+ */
+DECLASM(int) SVMR0VMRun(RTHCPHYS pVMCBHostPhys, RTHCPHYS pVMCBPhys, PCPUMCTX pCtx, PVM pVM, PVMCPU pVCpu);
+
+
+/**
+ * Prepares for and executes VMRUN (64-bit guests).
+ *
+ * @returns VBox status code.
+ * @param pVMCBHostPhys Physical address of host VMCB.
+ * @param pVMCBPhys Physical address of the VMCB.
+ * @param pCtx Pointer to the guest CPU context.
+ * @param pVM The cross context VM structure. (Not used.)
+ * @param pVCpu The cross context virtual CPU structure. (Not used.)
+ */
+DECLASM(int) SVMR0VMRun64(RTHCPHYS pVMCBHostPhys, RTHCPHYS pVMCBPhys, PCPUMCTX pCtx, PVM pVM, PVMCPU pVCpu);
+
+/**
+ * Executes INVLPGA.
+ *
+ * @param pPageGC Virtual page to invalidate.
+ * @param u32ASID Tagged TLB id.
+ */
+DECLASM(void) SVMR0InvlpgA(RTGCPTR pPageGC, uint32_t u32ASID);
+
+#endif /* IN_RING0 */
+
+/** @} */
+
+RT_C_DECLS_END
+
+#endif /* !VMM_INCLUDED_SRC_VMMR0_HMSVMR0_h */
+
diff --git a/src/VBox/VMM/VMMR0/HMVMXR0.cpp b/src/VBox/VMM/VMMR0/HMVMXR0.cpp
new file mode 100644
index 00000000..62d2b7e1
--- /dev/null
+++ b/src/VBox/VMM/VMMR0/HMVMXR0.cpp
@@ -0,0 +1,13777 @@
+/* $Id: HMVMXR0.cpp $ */
+/** @file
+ * HM VMX (Intel VT-x) - Host Context Ring-0.
+ */
+
+/*
+ * Copyright (C) 2012-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#define LOG_GROUP LOG_GROUP_HM
+#define VMCPU_INCL_CPUM_GST_CTX
+#include <iprt/x86.h>
+#include <iprt/asm-amd64-x86.h>
+#include <iprt/thread.h>
+
+#include <VBox/vmm/pdmapi.h>
+#include <VBox/vmm/dbgf.h>
+#include <VBox/vmm/iem.h>
+#include <VBox/vmm/iom.h>
+#include <VBox/vmm/selm.h>
+#include <VBox/vmm/tm.h>
+#include <VBox/vmm/em.h>
+#include <VBox/vmm/gim.h>
+#include <VBox/vmm/apic.h>
+#ifdef VBOX_WITH_REM
+# include <VBox/vmm/rem.h>
+#endif
+#include "HMInternal.h"
+#include <VBox/vmm/vm.h>
+#include <VBox/vmm/hmvmxinline.h>
+#include "HMVMXR0.h"
+#include "dtrace/VBoxVMM.h"
+
+#ifdef DEBUG_ramshankar
+# define HMVMX_ALWAYS_SAVE_GUEST_RFLAGS
+# define HMVMX_ALWAYS_SAVE_FULL_GUEST_STATE
+# define HMVMX_ALWAYS_SYNC_FULL_GUEST_STATE
+# define HMVMX_ALWAYS_CHECK_GUEST_STATE
+# define HMVMX_ALWAYS_TRAP_ALL_XCPTS
+# define HMVMX_ALWAYS_TRAP_PF
+# define HMVMX_ALWAYS_FLUSH_TLB
+# define HMVMX_ALWAYS_SWAP_EFER
+#endif
+
+
+/*********************************************************************************************************************************
+* Defined Constants And Macros *
+*********************************************************************************************************************************/
+/** Use the function table. */
+#define HMVMX_USE_FUNCTION_TABLE
+
+/** Determine which tagged-TLB flush handler to use. */
+#define HMVMX_FLUSH_TAGGED_TLB_EPT_VPID 0
+#define HMVMX_FLUSH_TAGGED_TLB_EPT 1
+#define HMVMX_FLUSH_TAGGED_TLB_VPID 2
+#define HMVMX_FLUSH_TAGGED_TLB_NONE 3
+
+/** @name HMVMX_READ_XXX
+ * Flags to skip redundant reads of some common VMCS fields that are not part of
+ * the guest-CPU or VCPU state but are needed while handling VM-exits.
+ */
+#define HMVMX_READ_IDT_VECTORING_INFO RT_BIT_32(0)
+#define HMVMX_READ_IDT_VECTORING_ERROR_CODE RT_BIT_32(1)
+#define HMVMX_READ_EXIT_QUALIFICATION RT_BIT_32(2)
+#define HMVMX_READ_EXIT_INSTR_LEN RT_BIT_32(3)
+#define HMVMX_READ_EXIT_INTERRUPTION_INFO RT_BIT_32(4)
+#define HMVMX_READ_EXIT_INTERRUPTION_ERROR_CODE RT_BIT_32(5)
+#define HMVMX_READ_EXIT_INSTR_INFO RT_BIT_32(6)
+#define HMVMX_READ_GUEST_LINEAR_ADDR RT_BIT_32(7)
+/** @} */
+
+/**
+ * States of the VMCS.
+ *
+ * This does not reflect all possible VMCS states but currently only those
+ * needed for maintaining the VMCS consistently even when thread-context hooks
+ * are used. Maybe later this can be extended (i.e. Nested Virtualization).
+ */
+#define HMVMX_VMCS_STATE_CLEAR RT_BIT(0)
+#define HMVMX_VMCS_STATE_ACTIVE RT_BIT(1)
+#define HMVMX_VMCS_STATE_LAUNCHED RT_BIT(2)
+
+/**
+ * Subset of the guest-CPU state that is kept by VMX R0 code while executing the
+ * guest using hardware-assisted VMX.
+ *
+ * This excludes state like GPRs (other than RSP) which are always are
+ * swapped and restored across the world-switch and also registers like EFER,
+ * MSR which cannot be modified by the guest without causing a VM-exit.
+ */
+#define HMVMX_CPUMCTX_EXTRN_ALL ( CPUMCTX_EXTRN_RIP \
+ | CPUMCTX_EXTRN_RFLAGS \
+ | CPUMCTX_EXTRN_RSP \
+ | CPUMCTX_EXTRN_SREG_MASK \
+ | CPUMCTX_EXTRN_TABLE_MASK \
+ | CPUMCTX_EXTRN_KERNEL_GS_BASE \
+ | CPUMCTX_EXTRN_SYSCALL_MSRS \
+ | CPUMCTX_EXTRN_SYSENTER_MSRS \
+ | CPUMCTX_EXTRN_TSC_AUX \
+ | CPUMCTX_EXTRN_OTHER_MSRS \
+ | CPUMCTX_EXTRN_CR0 \
+ | CPUMCTX_EXTRN_CR3 \
+ | CPUMCTX_EXTRN_CR4 \
+ | CPUMCTX_EXTRN_DR7 \
+ | CPUMCTX_EXTRN_HM_VMX_MASK)
+
+/**
+ * Exception bitmap mask for real-mode guests (real-on-v86).
+ *
+ * We need to intercept all exceptions manually except:
+ * - \#AC and \#DB are always intercepted to prevent the CPU from deadlocking
+ * due to bugs in Intel CPUs.
+ * - \#PF need not be intercepted even in real-mode if we have Nested Paging
+ * support.
+ */
+#define HMVMX_REAL_MODE_XCPT_MASK ( RT_BIT(X86_XCPT_DE) /* always: | RT_BIT(X86_XCPT_DB) */ | RT_BIT(X86_XCPT_NMI) \
+ | RT_BIT(X86_XCPT_BP) | RT_BIT(X86_XCPT_OF) | RT_BIT(X86_XCPT_BR) \
+ | RT_BIT(X86_XCPT_UD) | RT_BIT(X86_XCPT_NM) | RT_BIT(X86_XCPT_DF) \
+ | RT_BIT(X86_XCPT_CO_SEG_OVERRUN) | RT_BIT(X86_XCPT_TS) | RT_BIT(X86_XCPT_NP) \
+ | RT_BIT(X86_XCPT_SS) | RT_BIT(X86_XCPT_GP) /* RT_BIT(X86_XCPT_PF) */ \
+ | RT_BIT(X86_XCPT_MF) /* always: | RT_BIT(X86_XCPT_AC) */ | RT_BIT(X86_XCPT_MC) \
+ | RT_BIT(X86_XCPT_XF))
+
+/** Maximum VM-instruction error number. */
+#define HMVMX_INSTR_ERROR_MAX 28
+
+/** Profiling macro. */
+#ifdef HM_PROFILE_EXIT_DISPATCH
+# define HMVMX_START_EXIT_DISPATCH_PROF() STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatExitDispatch, ed)
+# define HMVMX_STOP_EXIT_DISPATCH_PROF() STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitDispatch, ed)
+#else
+# define HMVMX_START_EXIT_DISPATCH_PROF() do { } while (0)
+# define HMVMX_STOP_EXIT_DISPATCH_PROF() do { } while (0)
+#endif
+
+/** Assert that preemption is disabled or covered by thread-context hooks. */
+#define HMVMX_ASSERT_PREEMPT_SAFE(a_pVCpu) Assert( VMMR0ThreadCtxHookIsEnabled((a_pVCpu)) \
+ || !RTThreadPreemptIsEnabled(NIL_RTTHREAD))
+
+/** Assert that we haven't migrated CPUs when thread-context hooks are not
+ * used. */
+#define HMVMX_ASSERT_CPU_SAFE(a_pVCpu) AssertMsg( VMMR0ThreadCtxHookIsEnabled((a_pVCpu)) \
+ || (a_pVCpu)->hm.s.idEnteredCpu == RTMpCpuId(), \
+ ("Illegal migration! Entered on CPU %u Current %u\n", \
+ (a_pVCpu)->hm.s.idEnteredCpu, RTMpCpuId()))
+
+/** Asserts that the given CPUMCTX_EXTRN_XXX bits are present in the guest-CPU
+ * context. */
+#define HMVMX_CPUMCTX_ASSERT(a_pVCpu, a_fExtrnMbz) AssertMsg(!((a_pVCpu)->cpum.GstCtx.fExtrn & (a_fExtrnMbz)), \
+ ("fExtrn=%#RX64 fExtrnMbz=%#RX64\n", \
+ (a_pVCpu)->cpum.GstCtx.fExtrn, (a_fExtrnMbz)))
+
+/** Macro for importing guest state from the VMCS back into CPUMCTX (intended to be
+ * used only from VM-exit handlers). */
+#define HMVMX_CPUMCTX_IMPORT_STATE(a_pVCpu, a_fWhat) (hmR0VmxImportGuestState((a_pVCpu), (a_fWhat)))
+
+/** Helper macro for VM-exit handlers called unexpectedly. */
+#define HMVMX_UNEXPECTED_EXIT_RET(a_pVCpu, a_pVmxTransient) \
+ do { \
+ (a_pVCpu)->hm.s.u32HMError = (a_pVmxTransient)->uExitReason; \
+ return VERR_VMX_UNEXPECTED_EXIT; \
+ } while (0)
+
+/** Macro for importing segment registers to the VMCS from the guest-CPU context. */
+#ifdef VMX_USE_CACHED_VMCS_ACCESSES
+# define HMVMX_IMPORT_SREG(Sel, a_pCtxSelReg) \
+ hmR0VmxImportGuestSegmentReg(pVCpu, VMX_VMCS16_GUEST_##Sel##_SEL, VMX_VMCS32_GUEST_##Sel##_LIMIT, \
+ VMX_VMCS_GUEST_##Sel##_BASE_CACHE_IDX, VMX_VMCS32_GUEST_##Sel##_ACCESS_RIGHTS, (a_pCtxSelReg))
+#else
+# define HMVMX_IMPORT_SREG(Sel, a_pCtxSelReg) \
+ hmR0VmxImportGuestSegmentReg(pVCpu, VMX_VMCS16_GUEST_##Sel##_SEL, VMX_VMCS32_GUEST_##Sel##_LIMIT, \
+ VMX_VMCS_GUEST_##Sel##_BASE, VMX_VMCS32_GUEST_##Sel##_ACCESS_RIGHTS, (a_pCtxSelReg))
+#endif
+
+/** Macro for exporting segment registers to the VMCS from the guest-CPU context. */
+#define HMVMX_EXPORT_SREG(Sel, a_pCtxSelReg) \
+ hmR0VmxExportGuestSegmentReg(pVCpu, VMX_VMCS16_GUEST_##Sel##_SEL, VMX_VMCS32_GUEST_##Sel##_LIMIT, \
+ VMX_VMCS_GUEST_##Sel##_BASE, VMX_VMCS32_GUEST_##Sel##_ACCESS_RIGHTS, (a_pCtxSelReg))
+
+#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+/** Macro that does the necessary privilege checks and intercepted VM-exits for
+ * guests that attempted to execute a VMX instruction. */
+# define HMVMX_CHECK_EXIT_DUE_TO_VMX_INSTR(a_pVCpu, a_uExitReason) \
+ do \
+ { \
+ VBOXSTRICTRC rcStrictTmp = hmR0VmxCheckExitDueToVmxInstr((a_pVCpu), (a_uExitReason)); \
+ if (rcStrictTmp == VINF_SUCCESS) \
+ { /* likely */ } \
+ else if (rcStrictTmp == VINF_HM_PENDING_XCPT) \
+ { \
+ Assert((a_pVCpu)->hm.s.Event.fPending); \
+ Log4Func(("Privilege checks failed -> %#x\n", VMX_ENTRY_INT_INFO_VECTOR((a_pVCpu)->hm.s.Event.u64IntInfo))); \
+ return VINF_SUCCESS; \
+ } \
+ else \
+ { \
+ int rcTmp = VBOXSTRICTRC_VAL(rcStrictTmp); \
+ AssertMsgFailedReturn(("Unexpected failure. rc=%Rrc", rcTmp), rcTmp); \
+ } \
+ } while (0)
+
+/** Macro that decodes a memory operand for an instruction VM-exit. */
+# define HMVMX_DECODE_MEM_OPERAND(a_pVCpu, a_uExitInstrInfo, a_uExitQual, a_enmMemAccess, a_pGCPtrEffAddr) \
+ do \
+ { \
+ VBOXSTRICTRC rcStrictTmp = hmR0VmxDecodeMemOperand((a_pVCpu), (a_uExitInstrInfo), (a_uExitQual), (a_enmMemAccess), \
+ (a_pGCPtrEffAddr)); \
+ if (rcStrictTmp == VINF_SUCCESS) \
+ { /* likely */ } \
+ else if (rcStrictTmp == VINF_HM_PENDING_XCPT) \
+ { \
+ uint8_t const uXcptTmp = VMX_ENTRY_INT_INFO_VECTOR((a_pVCpu)->hm.s.Event.u64IntInfo); \
+ Log4Func(("Memory operand decoding failed, raising xcpt %#x\n", uXcptTmp)); \
+ NOREF(uXcptTmp); \
+ return VINF_SUCCESS; \
+ } \
+ else \
+ { \
+ Log4Func(("hmR0VmxDecodeMemOperand failed. rc=%Rrc\n", VBOXSTRICTRC_VAL(rcStrictTmp))); \
+ return rcStrictTmp; \
+ } \
+ } while (0)
+
+# ifdef VBOX_WITH_NESTED_HWVIRT_ONLY_IN_IEM
+/** Macro that executes a VMX instruction in IEM. */
+# define HMVMX_IEM_EXEC_VMX_INSTR_RET(a_pVCpu) \
+ do { \
+ int rc = HMVMX_CPUMCTX_IMPORT_STATE((a_pVCpu), HMVMX_CPUMCTX_EXTRN_ALL); \
+ AssertRCReturn(rc, rc); \
+ VBOXSTRICTRC rcStrict = IEMExecOne((a_pVCpu)); \
+ if (rcStrict == VINF_SUCCESS) \
+ ASMAtomicUoOrU64(&(a_pVCpu)->hm.s.fCtxChanged, HM_CHANGED_ALL_GUEST); \
+ else if (rcStrict == VINF_IEM_RAISED_XCPT) \
+ { \
+ rcStrict = VINF_SUCCESS; \
+ ASMAtomicUoOrU64(&(a_pVCpu)->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK); \
+ } \
+ return VBOXSTRICTRC_VAL(rcStrict); \
+ } while (0)
+
+# endif /* VBOX_WITH_NESTED_HWVIRT_ONLY_IN_IEM */
+#endif /* VBOX_WITH_NESTED_HWVIRT_VMX */
+
+
+/*********************************************************************************************************************************
+* Structures and Typedefs *
+*********************************************************************************************************************************/
+/**
+ * VMX transient state.
+ *
+ * A state structure for holding miscellaneous information across
+ * VMX non-root operation and restored after the transition.
+ */
+typedef struct VMXTRANSIENT
+{
+ /** The host's rflags/eflags. */
+ RTCCUINTREG fEFlags;
+#if HC_ARCH_BITS == 32
+ uint32_t u32Alignment0;
+#endif
+ /** The guest's TPR value used for TPR shadowing. */
+ uint8_t u8GuestTpr;
+ /** Alignment. */
+ uint8_t abAlignment0[7];
+
+ /** The basic VM-exit reason. */
+ uint16_t uExitReason;
+ /** Alignment. */
+ uint16_t u16Alignment0;
+ /** The VM-exit interruption error code. */
+ uint32_t uExitIntErrorCode;
+ /** The VM-exit exit code qualification. */
+ uint64_t uExitQual;
+ /** The Guest-linear address. */
+ uint64_t uGuestLinearAddr;
+
+ /** The VM-exit interruption-information field. */
+ uint32_t uExitIntInfo;
+ /** The VM-exit instruction-length field. */
+ uint32_t cbInstr;
+ /** The VM-exit instruction-information field. */
+ VMXEXITINSTRINFO ExitInstrInfo;
+ /** Whether the VM-entry failed or not. */
+ bool fVMEntryFailed;
+ /** Alignment. */
+ uint8_t abAlignment1[3];
+
+ /** The VM-entry interruption-information field. */
+ uint32_t uEntryIntInfo;
+ /** The VM-entry exception error code field. */
+ uint32_t uEntryXcptErrorCode;
+ /** The VM-entry instruction length field. */
+ uint32_t cbEntryInstr;
+
+ /** IDT-vectoring information field. */
+ uint32_t uIdtVectoringInfo;
+ /** IDT-vectoring error code. */
+ uint32_t uIdtVectoringErrorCode;
+
+ /** Mask of currently read VMCS fields; HMVMX_READ_XXX. */
+ uint32_t fVmcsFieldsRead;
+
+ /** Whether the guest debug state was active at the time of VM-exit. */
+ bool fWasGuestDebugStateActive;
+ /** Whether the hyper debug state was active at the time of VM-exit. */
+ bool fWasHyperDebugStateActive;
+ /** Whether TSC-offsetting should be setup before VM-entry. */
+ bool fUpdateTscOffsettingAndPreemptTimer;
+ /** Whether the VM-exit was caused by a page-fault during delivery of a
+ * contributory exception or a page-fault. */
+ bool fVectoringDoublePF;
+ /** Whether the VM-exit was caused by a page-fault during delivery of an
+ * external interrupt or NMI. */
+ bool fVectoringPF;
+} VMXTRANSIENT;
+AssertCompileMemberAlignment(VMXTRANSIENT, uExitReason, sizeof(uint64_t));
+AssertCompileMemberAlignment(VMXTRANSIENT, uExitIntInfo, sizeof(uint64_t));
+AssertCompileMemberAlignment(VMXTRANSIENT, uEntryIntInfo, sizeof(uint64_t));
+AssertCompileMemberAlignment(VMXTRANSIENT, fWasGuestDebugStateActive, sizeof(uint64_t));
+AssertCompileMemberSize(VMXTRANSIENT, ExitInstrInfo, sizeof(uint32_t));
+/** Pointer to VMX transient state. */
+typedef VMXTRANSIENT *PVMXTRANSIENT;
+
+/**
+ * Memory operand read or write access.
+ */
+typedef enum VMXMEMACCESS
+{
+ VMXMEMACCESS_READ = 0,
+ VMXMEMACCESS_WRITE = 1
+} VMXMEMACCESS;
+
+/**
+ * VMX VM-exit handler.
+ *
+ * @returns Strict VBox status code (i.e. informational status codes too).
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param pVmxTransient Pointer to the VMX-transient structure.
+ */
+#ifndef HMVMX_USE_FUNCTION_TABLE
+typedef VBOXSTRICTRC FNVMXEXITHANDLER(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient);
+#else
+typedef DECLCALLBACK(VBOXSTRICTRC) FNVMXEXITHANDLER(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient);
+/** Pointer to VM-exit handler. */
+typedef FNVMXEXITHANDLER *PFNVMXEXITHANDLER;
+#endif
+
+/**
+ * VMX VM-exit handler, non-strict status code.
+ *
+ * This is generally the same as FNVMXEXITHANDLER, the NSRC bit is just FYI.
+ *
+ * @returns VBox status code, no informational status code returned.
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param pVmxTransient Pointer to the VMX-transient structure.
+ *
+ * @remarks This is not used on anything returning VERR_EM_INTERPRETER as the
+ * use of that status code will be replaced with VINF_EM_SOMETHING
+ * later when switching over to IEM.
+ */
+#ifndef HMVMX_USE_FUNCTION_TABLE
+typedef int FNVMXEXITHANDLERNSRC(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient);
+#else
+typedef FNVMXEXITHANDLER FNVMXEXITHANDLERNSRC;
+#endif
+
+
+/*********************************************************************************************************************************
+* Internal Functions *
+*********************************************************************************************************************************/
+static void hmR0VmxFlushEpt(PVMCPU pVCpu, VMXTLBFLUSHEPT enmTlbFlush);
+static void hmR0VmxFlushVpid(PVMCPU pVCpu, VMXTLBFLUSHVPID enmTlbFlush, RTGCPTR GCPtr);
+static void hmR0VmxClearIntNmiWindowsVmcs(PVMCPU pVCpu);
+static int hmR0VmxImportGuestState(PVMCPU pVCpu, uint64_t fWhat);
+static VBOXSTRICTRC hmR0VmxInjectEventVmcs(PVMCPU pVCpu, uint64_t u64IntInfo, uint32_t cbInstr, uint32_t u32ErrCode,
+ RTGCUINTREG GCPtrFaultAddress, bool fStepping, uint32_t *pfIntrState);
+#if HC_ARCH_BITS == 32
+static int hmR0VmxInitVmcsReadCache(PVMCPU pVCpu);
+#endif
+#ifndef HMVMX_USE_FUNCTION_TABLE
+DECLINLINE(VBOXSTRICTRC) hmR0VmxHandleExit(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient, uint32_t rcReason);
+# define HMVMX_EXIT_DECL DECLINLINE(VBOXSTRICTRC)
+# define HMVMX_EXIT_NSRC_DECL DECLINLINE(int)
+#else
+# define HMVMX_EXIT_DECL static DECLCALLBACK(VBOXSTRICTRC)
+# define HMVMX_EXIT_NSRC_DECL HMVMX_EXIT_DECL
+#endif
+
+/** @name VM-exit handlers.
+ * @{
+ */
+static FNVMXEXITHANDLER hmR0VmxExitXcptOrNmi;
+static FNVMXEXITHANDLER hmR0VmxExitExtInt;
+static FNVMXEXITHANDLER hmR0VmxExitTripleFault;
+static FNVMXEXITHANDLERNSRC hmR0VmxExitInitSignal;
+static FNVMXEXITHANDLERNSRC hmR0VmxExitSipi;
+static FNVMXEXITHANDLERNSRC hmR0VmxExitIoSmi;
+static FNVMXEXITHANDLERNSRC hmR0VmxExitSmi;
+static FNVMXEXITHANDLERNSRC hmR0VmxExitIntWindow;
+static FNVMXEXITHANDLERNSRC hmR0VmxExitNmiWindow;
+static FNVMXEXITHANDLER hmR0VmxExitTaskSwitch;
+static FNVMXEXITHANDLER hmR0VmxExitCpuid;
+static FNVMXEXITHANDLER hmR0VmxExitGetsec;
+static FNVMXEXITHANDLER hmR0VmxExitHlt;
+static FNVMXEXITHANDLERNSRC hmR0VmxExitInvd;
+static FNVMXEXITHANDLER hmR0VmxExitInvlpg;
+static FNVMXEXITHANDLER hmR0VmxExitRdpmc;
+static FNVMXEXITHANDLER hmR0VmxExitVmcall;
+#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+static FNVMXEXITHANDLER hmR0VmxExitVmclear;
+static FNVMXEXITHANDLER hmR0VmxExitVmlaunch;
+static FNVMXEXITHANDLER hmR0VmxExitVmptrld;
+static FNVMXEXITHANDLER hmR0VmxExitVmptrst;
+static FNVMXEXITHANDLER hmR0VmxExitVmread;
+static FNVMXEXITHANDLER hmR0VmxExitVmresume;
+static FNVMXEXITHANDLER hmR0VmxExitVmwrite;
+static FNVMXEXITHANDLER hmR0VmxExitVmxoff;
+static FNVMXEXITHANDLER hmR0VmxExitVmxon;
+#endif
+static FNVMXEXITHANDLER hmR0VmxExitRdtsc;
+static FNVMXEXITHANDLERNSRC hmR0VmxExitRsm;
+static FNVMXEXITHANDLERNSRC hmR0VmxExitSetPendingXcptUD;
+static FNVMXEXITHANDLER hmR0VmxExitMovCRx;
+static FNVMXEXITHANDLER hmR0VmxExitMovDRx;
+static FNVMXEXITHANDLER hmR0VmxExitIoInstr;
+static FNVMXEXITHANDLER hmR0VmxExitRdmsr;
+static FNVMXEXITHANDLER hmR0VmxExitWrmsr;
+static FNVMXEXITHANDLERNSRC hmR0VmxExitErrInvalidGuestState;
+static FNVMXEXITHANDLERNSRC hmR0VmxExitErrMsrLoad;
+static FNVMXEXITHANDLERNSRC hmR0VmxExitErrUndefined;
+static FNVMXEXITHANDLER hmR0VmxExitMwait;
+static FNVMXEXITHANDLER hmR0VmxExitMtf;
+static FNVMXEXITHANDLER hmR0VmxExitMonitor;
+static FNVMXEXITHANDLER hmR0VmxExitPause;
+static FNVMXEXITHANDLERNSRC hmR0VmxExitErrMachineCheck;
+static FNVMXEXITHANDLERNSRC hmR0VmxExitTprBelowThreshold;
+static FNVMXEXITHANDLER hmR0VmxExitApicAccess;
+static FNVMXEXITHANDLER hmR0VmxExitXdtrAccess;
+static FNVMXEXITHANDLER hmR0VmxExitEptViolation;
+static FNVMXEXITHANDLER hmR0VmxExitEptMisconfig;
+static FNVMXEXITHANDLER hmR0VmxExitRdtscp;
+static FNVMXEXITHANDLER hmR0VmxExitPreemptTimer;
+static FNVMXEXITHANDLERNSRC hmR0VmxExitWbinvd;
+static FNVMXEXITHANDLER hmR0VmxExitXsetbv;
+static FNVMXEXITHANDLER hmR0VmxExitRdrand;
+static FNVMXEXITHANDLER hmR0VmxExitInvpcid;
+/** @} */
+
+static int hmR0VmxExitXcptPF(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient);
+static int hmR0VmxExitXcptMF(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient);
+static int hmR0VmxExitXcptDB(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient);
+static int hmR0VmxExitXcptBP(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient);
+static int hmR0VmxExitXcptGP(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient);
+static int hmR0VmxExitXcptAC(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient);
+static int hmR0VmxExitXcptGeneric(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient);
+static uint32_t hmR0VmxCheckGuestState(PVMCPU pVCpu);
+
+
+/*********************************************************************************************************************************
+* Global Variables *
+*********************************************************************************************************************************/
+#ifdef HMVMX_USE_FUNCTION_TABLE
+
+/**
+ * VMX_EXIT dispatch table.
+ */
+static const PFNVMXEXITHANDLER g_apfnVMExitHandlers[VMX_EXIT_MAX + 1] =
+{
+ /* 00 VMX_EXIT_XCPT_OR_NMI */ hmR0VmxExitXcptOrNmi,
+ /* 01 VMX_EXIT_EXT_INT */ hmR0VmxExitExtInt,
+ /* 02 VMX_EXIT_TRIPLE_FAULT */ hmR0VmxExitTripleFault,
+ /* 03 VMX_EXIT_INIT_SIGNAL */ hmR0VmxExitInitSignal,
+ /* 04 VMX_EXIT_SIPI */ hmR0VmxExitSipi,
+ /* 05 VMX_EXIT_IO_SMI */ hmR0VmxExitIoSmi,
+ /* 06 VMX_EXIT_SMI */ hmR0VmxExitSmi,
+ /* 07 VMX_EXIT_INT_WINDOW */ hmR0VmxExitIntWindow,
+ /* 08 VMX_EXIT_NMI_WINDOW */ hmR0VmxExitNmiWindow,
+ /* 09 VMX_EXIT_TASK_SWITCH */ hmR0VmxExitTaskSwitch,
+ /* 10 VMX_EXIT_CPUID */ hmR0VmxExitCpuid,
+ /* 11 VMX_EXIT_GETSEC */ hmR0VmxExitGetsec,
+ /* 12 VMX_EXIT_HLT */ hmR0VmxExitHlt,
+ /* 13 VMX_EXIT_INVD */ hmR0VmxExitInvd,
+ /* 14 VMX_EXIT_INVLPG */ hmR0VmxExitInvlpg,
+ /* 15 VMX_EXIT_RDPMC */ hmR0VmxExitRdpmc,
+ /* 16 VMX_EXIT_RDTSC */ hmR0VmxExitRdtsc,
+ /* 17 VMX_EXIT_RSM */ hmR0VmxExitRsm,
+ /* 18 VMX_EXIT_VMCALL */ hmR0VmxExitVmcall,
+#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+ /* 19 VMX_EXIT_VMCLEAR */ hmR0VmxExitVmclear,
+ /* 20 VMX_EXIT_VMLAUNCH */ hmR0VmxExitVmlaunch,
+ /* 21 VMX_EXIT_VMPTRLD */ hmR0VmxExitVmptrld,
+ /* 22 VMX_EXIT_VMPTRST */ hmR0VmxExitVmptrst,
+ /* 23 VMX_EXIT_VMREAD */ hmR0VmxExitVmread,
+ /* 24 VMX_EXIT_VMRESUME */ hmR0VmxExitVmresume,
+ /* 25 VMX_EXIT_VMWRITE */ hmR0VmxExitVmwrite,
+ /* 26 VMX_EXIT_VMXOFF */ hmR0VmxExitVmxoff,
+ /* 27 VMX_EXIT_VMXON */ hmR0VmxExitVmxon,
+#else
+ /* 19 VMX_EXIT_VMCLEAR */ hmR0VmxExitSetPendingXcptUD,
+ /* 20 VMX_EXIT_VMLAUNCH */ hmR0VmxExitSetPendingXcptUD,
+ /* 21 VMX_EXIT_VMPTRLD */ hmR0VmxExitSetPendingXcptUD,
+ /* 22 VMX_EXIT_VMPTRST */ hmR0VmxExitSetPendingXcptUD,
+ /* 23 VMX_EXIT_VMREAD */ hmR0VmxExitSetPendingXcptUD,
+ /* 24 VMX_EXIT_VMRESUME */ hmR0VmxExitSetPendingXcptUD,
+ /* 25 VMX_EXIT_VMWRITE */ hmR0VmxExitSetPendingXcptUD,
+ /* 26 VMX_EXIT_VMXOFF */ hmR0VmxExitSetPendingXcptUD,
+ /* 27 VMX_EXIT_VMXON */ hmR0VmxExitSetPendingXcptUD,
+#endif
+ /* 28 VMX_EXIT_MOV_CRX */ hmR0VmxExitMovCRx,
+ /* 29 VMX_EXIT_MOV_DRX */ hmR0VmxExitMovDRx,
+ /* 30 VMX_EXIT_IO_INSTR */ hmR0VmxExitIoInstr,
+ /* 31 VMX_EXIT_RDMSR */ hmR0VmxExitRdmsr,
+ /* 32 VMX_EXIT_WRMSR */ hmR0VmxExitWrmsr,
+ /* 33 VMX_EXIT_ERR_INVALID_GUEST_STATE */ hmR0VmxExitErrInvalidGuestState,
+ /* 34 VMX_EXIT_ERR_MSR_LOAD */ hmR0VmxExitErrMsrLoad,
+ /* 35 UNDEFINED */ hmR0VmxExitErrUndefined,
+ /* 36 VMX_EXIT_MWAIT */ hmR0VmxExitMwait,
+ /* 37 VMX_EXIT_MTF */ hmR0VmxExitMtf,
+ /* 38 UNDEFINED */ hmR0VmxExitErrUndefined,
+ /* 39 VMX_EXIT_MONITOR */ hmR0VmxExitMonitor,
+ /* 40 UNDEFINED */ hmR0VmxExitPause,
+ /* 41 VMX_EXIT_PAUSE */ hmR0VmxExitErrMachineCheck,
+ /* 42 VMX_EXIT_ERR_MACHINE_CHECK */ hmR0VmxExitErrUndefined,
+ /* 43 VMX_EXIT_TPR_BELOW_THRESHOLD */ hmR0VmxExitTprBelowThreshold,
+ /* 44 VMX_EXIT_APIC_ACCESS */ hmR0VmxExitApicAccess,
+ /* 45 UNDEFINED */ hmR0VmxExitErrUndefined,
+ /* 46 VMX_EXIT_GDTR_IDTR_ACCESS */ hmR0VmxExitXdtrAccess,
+ /* 47 VMX_EXIT_LDTR_TR_ACCESS */ hmR0VmxExitXdtrAccess,
+ /* 48 VMX_EXIT_EPT_VIOLATION */ hmR0VmxExitEptViolation,
+ /* 49 VMX_EXIT_EPT_MISCONFIG */ hmR0VmxExitEptMisconfig,
+ /* 50 VMX_EXIT_INVEPT */ hmR0VmxExitSetPendingXcptUD,
+ /* 51 VMX_EXIT_RDTSCP */ hmR0VmxExitRdtscp,
+ /* 52 VMX_EXIT_PREEMPT_TIMER */ hmR0VmxExitPreemptTimer,
+ /* 53 VMX_EXIT_INVVPID */ hmR0VmxExitSetPendingXcptUD,
+ /* 54 VMX_EXIT_WBINVD */ hmR0VmxExitWbinvd,
+ /* 55 VMX_EXIT_XSETBV */ hmR0VmxExitXsetbv,
+ /* 56 VMX_EXIT_APIC_WRITE */ hmR0VmxExitErrUndefined,
+ /* 57 VMX_EXIT_RDRAND */ hmR0VmxExitRdrand,
+ /* 58 VMX_EXIT_INVPCID */ hmR0VmxExitInvpcid,
+ /* 59 VMX_EXIT_VMFUNC */ hmR0VmxExitSetPendingXcptUD,
+ /* 60 VMX_EXIT_ENCLS */ hmR0VmxExitErrUndefined,
+ /* 61 VMX_EXIT_RDSEED */ hmR0VmxExitErrUndefined, /* only spurious exits, so undefined */
+ /* 62 VMX_EXIT_PML_FULL */ hmR0VmxExitErrUndefined,
+ /* 63 VMX_EXIT_XSAVES */ hmR0VmxExitSetPendingXcptUD,
+ /* 64 VMX_EXIT_XRSTORS */ hmR0VmxExitSetPendingXcptUD,
+};
+#endif /* HMVMX_USE_FUNCTION_TABLE */
+
+#if defined(VBOX_STRICT) && defined(LOG_ENABLED)
+static const char * const g_apszVmxInstrErrors[HMVMX_INSTR_ERROR_MAX + 1] =
+{
+ /* 0 */ "(Not Used)",
+ /* 1 */ "VMCALL executed in VMX root operation.",
+ /* 2 */ "VMCLEAR with invalid physical address.",
+ /* 3 */ "VMCLEAR with VMXON pointer.",
+ /* 4 */ "VMLAUNCH with non-clear VMCS.",
+ /* 5 */ "VMRESUME with non-launched VMCS.",
+ /* 6 */ "VMRESUME after VMXOFF",
+ /* 7 */ "VM-entry with invalid control fields.",
+ /* 8 */ "VM-entry with invalid host state fields.",
+ /* 9 */ "VMPTRLD with invalid physical address.",
+ /* 10 */ "VMPTRLD with VMXON pointer.",
+ /* 11 */ "VMPTRLD with incorrect revision identifier.",
+ /* 12 */ "VMREAD/VMWRITE from/to unsupported VMCS component.",
+ /* 13 */ "VMWRITE to read-only VMCS component.",
+ /* 14 */ "(Not Used)",
+ /* 15 */ "VMXON executed in VMX root operation.",
+ /* 16 */ "VM-entry with invalid executive-VMCS pointer.",
+ /* 17 */ "VM-entry with non-launched executing VMCS.",
+ /* 18 */ "VM-entry with executive-VMCS pointer not VMXON pointer.",
+ /* 19 */ "VMCALL with non-clear VMCS.",
+ /* 20 */ "VMCALL with invalid VM-exit control fields.",
+ /* 21 */ "(Not Used)",
+ /* 22 */ "VMCALL with incorrect MSEG revision identifier.",
+ /* 23 */ "VMXOFF under dual monitor treatment of SMIs and SMM.",
+ /* 24 */ "VMCALL with invalid SMM-monitor features.",
+ /* 25 */ "VM-entry with invalid VM-execution control fields in executive VMCS.",
+ /* 26 */ "VM-entry with events blocked by MOV SS.",
+ /* 27 */ "(Not Used)",
+ /* 28 */ "Invalid operand to INVEPT/INVVPID."
+};
+#endif /* VBOX_STRICT */
+
+
+/**
+ * Updates the VM's last error record.
+ *
+ * If there was a VMX instruction error, reads the error data from the VMCS and
+ * updates VCPU's last error record as well.
+ *
+ * @param pVCpu The cross context virtual CPU structure of the calling EMT.
+ * Can be NULL if @a rc is not VERR_VMX_UNABLE_TO_START_VM or
+ * VERR_VMX_INVALID_VMCS_FIELD.
+ * @param rc The error code.
+ */
+static void hmR0VmxUpdateErrorRecord(PVMCPU pVCpu, int rc)
+{
+ if ( rc == VERR_VMX_INVALID_VMCS_FIELD
+ || rc == VERR_VMX_UNABLE_TO_START_VM)
+ {
+ AssertPtrReturnVoid(pVCpu);
+ VMXReadVmcs32(VMX_VMCS32_RO_VM_INSTR_ERROR, &pVCpu->hm.s.vmx.LastError.u32InstrError);
+ }
+ pVCpu->CTX_SUFF(pVM)->hm.s.rcInit = rc;
+}
+
+
+/**
+ * Reads the VM-entry interruption-information field from the VMCS into the VMX
+ * transient structure.
+ *
+ * @returns VBox status code.
+ * @param pVmxTransient Pointer to the VMX transient structure.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+DECLINLINE(int) hmR0VmxReadEntryIntInfoVmcs(PVMXTRANSIENT pVmxTransient)
+{
+ int rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, &pVmxTransient->uEntryIntInfo);
+ AssertRCReturn(rc, rc);
+ return VINF_SUCCESS;
+}
+
+#ifdef VBOX_STRICT
+/**
+ * Reads the VM-entry exception error code field from the VMCS into
+ * the VMX transient structure.
+ *
+ * @returns VBox status code.
+ * @param pVmxTransient Pointer to the VMX transient structure.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+DECLINLINE(int) hmR0VmxReadEntryXcptErrorCodeVmcs(PVMXTRANSIENT pVmxTransient)
+{
+ int rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_EXCEPTION_ERRCODE, &pVmxTransient->uEntryXcptErrorCode);
+ AssertRCReturn(rc, rc);
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Reads the VM-entry exception error code field from the VMCS into
+ * the VMX transient structure.
+ *
+ * @returns VBox status code.
+ * @param pVmxTransient Pointer to the VMX transient structure.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+DECLINLINE(int) hmR0VmxReadEntryInstrLenVmcs(PVMXTRANSIENT pVmxTransient)
+{
+ int rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_INSTR_LENGTH, &pVmxTransient->cbEntryInstr);
+ AssertRCReturn(rc, rc);
+ return VINF_SUCCESS;
+}
+#endif /* VBOX_STRICT */
+
+
+/**
+ * Reads the VM-exit interruption-information field from the VMCS into the VMX
+ * transient structure.
+ *
+ * @returns VBox status code.
+ * @param pVmxTransient Pointer to the VMX transient structure.
+ */
+DECLINLINE(int) hmR0VmxReadExitIntInfoVmcs(PVMXTRANSIENT pVmxTransient)
+{
+ if (!(pVmxTransient->fVmcsFieldsRead & HMVMX_READ_EXIT_INTERRUPTION_INFO))
+ {
+ int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_INTERRUPTION_INFO, &pVmxTransient->uExitIntInfo);
+ AssertRCReturn(rc,rc);
+ pVmxTransient->fVmcsFieldsRead |= HMVMX_READ_EXIT_INTERRUPTION_INFO;
+ }
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Reads the VM-exit interruption error code from the VMCS into the VMX
+ * transient structure.
+ *
+ * @returns VBox status code.
+ * @param pVmxTransient Pointer to the VMX transient structure.
+ */
+DECLINLINE(int) hmR0VmxReadExitIntErrorCodeVmcs(PVMXTRANSIENT pVmxTransient)
+{
+ if (!(pVmxTransient->fVmcsFieldsRead & HMVMX_READ_EXIT_INTERRUPTION_ERROR_CODE))
+ {
+ int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_INTERRUPTION_ERROR_CODE, &pVmxTransient->uExitIntErrorCode);
+ AssertRCReturn(rc, rc);
+ pVmxTransient->fVmcsFieldsRead |= HMVMX_READ_EXIT_INTERRUPTION_ERROR_CODE;
+ }
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Reads the VM-exit instruction length field from the VMCS into the VMX
+ * transient structure.
+ *
+ * @returns VBox status code.
+ * @param pVmxTransient Pointer to the VMX transient structure.
+ */
+DECLINLINE(int) hmR0VmxReadExitInstrLenVmcs(PVMXTRANSIENT pVmxTransient)
+{
+ if (!(pVmxTransient->fVmcsFieldsRead & HMVMX_READ_EXIT_INSTR_LEN))
+ {
+ int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_INSTR_LENGTH, &pVmxTransient->cbInstr);
+ AssertRCReturn(rc, rc);
+ pVmxTransient->fVmcsFieldsRead |= HMVMX_READ_EXIT_INSTR_LEN;
+ }
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Reads the VM-exit instruction-information field from the VMCS into
+ * the VMX transient structure.
+ *
+ * @returns VBox status code.
+ * @param pVmxTransient Pointer to the VMX transient structure.
+ */
+DECLINLINE(int) hmR0VmxReadExitInstrInfoVmcs(PVMXTRANSIENT pVmxTransient)
+{
+ if (!(pVmxTransient->fVmcsFieldsRead & HMVMX_READ_EXIT_INSTR_INFO))
+ {
+ int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_INSTR_INFO, &pVmxTransient->ExitInstrInfo.u);
+ AssertRCReturn(rc, rc);
+ pVmxTransient->fVmcsFieldsRead |= HMVMX_READ_EXIT_INSTR_INFO;
+ }
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Reads the VM-exit Qualification from the VMCS into the VMX transient structure.
+ *
+ * @returns VBox status code.
+ * @param pVCpu The cross context virtual CPU structure of the
+ * calling EMT. (Required for the VMCS cache case.)
+ * @param pVmxTransient Pointer to the VMX transient structure.
+ */
+DECLINLINE(int) hmR0VmxReadExitQualVmcs(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+ if (!(pVmxTransient->fVmcsFieldsRead & HMVMX_READ_EXIT_QUALIFICATION))
+ {
+ int rc = VMXReadVmcsGstN(VMX_VMCS_RO_EXIT_QUALIFICATION, &pVmxTransient->uExitQual); NOREF(pVCpu);
+ AssertRCReturn(rc, rc);
+ pVmxTransient->fVmcsFieldsRead |= HMVMX_READ_EXIT_QUALIFICATION;
+ }
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Reads the Guest-linear address from the VMCS into the VMX transient structure.
+ *
+ * @returns VBox status code.
+ * @param pVCpu The cross context virtual CPU structure of the
+ * calling EMT. (Required for the VMCS cache case.)
+ * @param pVmxTransient Pointer to the VMX transient structure.
+ */
+DECLINLINE(int) hmR0VmxReadGuestLinearAddrVmcs(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+ if (!(pVmxTransient->fVmcsFieldsRead & HMVMX_READ_GUEST_LINEAR_ADDR))
+ {
+ int rc = VMXReadVmcsGstN(VMX_VMCS_RO_GUEST_LINEAR_ADDR, &pVmxTransient->uGuestLinearAddr); NOREF(pVCpu);
+ AssertRCReturn(rc, rc);
+ pVmxTransient->fVmcsFieldsRead |= HMVMX_READ_GUEST_LINEAR_ADDR;
+ }
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Reads the IDT-vectoring information field from the VMCS into the VMX
+ * transient structure.
+ *
+ * @returns VBox status code.
+ * @param pVmxTransient Pointer to the VMX transient structure.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+DECLINLINE(int) hmR0VmxReadIdtVectoringInfoVmcs(PVMXTRANSIENT pVmxTransient)
+{
+ if (!(pVmxTransient->fVmcsFieldsRead & HMVMX_READ_IDT_VECTORING_INFO))
+ {
+ int rc = VMXReadVmcs32(VMX_VMCS32_RO_IDT_VECTORING_INFO, &pVmxTransient->uIdtVectoringInfo);
+ AssertRCReturn(rc, rc);
+ pVmxTransient->fVmcsFieldsRead |= HMVMX_READ_IDT_VECTORING_INFO;
+ }
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Reads the IDT-vectoring error code from the VMCS into the VMX
+ * transient structure.
+ *
+ * @returns VBox status code.
+ * @param pVmxTransient Pointer to the VMX transient structure.
+ */
+DECLINLINE(int) hmR0VmxReadIdtVectoringErrorCodeVmcs(PVMXTRANSIENT pVmxTransient)
+{
+ if (!(pVmxTransient->fVmcsFieldsRead & HMVMX_READ_IDT_VECTORING_ERROR_CODE))
+ {
+ int rc = VMXReadVmcs32(VMX_VMCS32_RO_IDT_VECTORING_ERROR_CODE, &pVmxTransient->uIdtVectoringErrorCode);
+ AssertRCReturn(rc, rc);
+ pVmxTransient->fVmcsFieldsRead |= HMVMX_READ_IDT_VECTORING_ERROR_CODE;
+ }
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Enters VMX root mode operation on the current CPU.
+ *
+ * @returns VBox status code.
+ * @param pVM The cross context VM structure. Can be
+ * NULL, after a resume.
+ * @param HCPhysCpuPage Physical address of the VMXON region.
+ * @param pvCpuPage Pointer to the VMXON region.
+ */
+static int hmR0VmxEnterRootMode(PVM pVM, RTHCPHYS HCPhysCpuPage, void *pvCpuPage)
+{
+ Assert(HCPhysCpuPage && HCPhysCpuPage != NIL_RTHCPHYS);
+ Assert(RT_ALIGN_T(HCPhysCpuPage, _4K, RTHCPHYS) == HCPhysCpuPage);
+ Assert(pvCpuPage);
+ Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+
+ if (pVM)
+ {
+ /* Write the VMCS revision dword to the VMXON region. */
+ *(uint32_t *)pvCpuPage = RT_BF_GET(pVM->hm.s.vmx.Msrs.u64Basic, VMX_BF_BASIC_VMCS_ID);
+ }
+
+ /* Paranoid: Disable interrupts as, in theory, interrupt handlers might mess with CR4. */
+ RTCCUINTREG fEFlags = ASMIntDisableFlags();
+
+ /* Enable the VMX bit in CR4 if necessary. */
+ RTCCUINTREG uOldCr4 = SUPR0ChangeCR4(X86_CR4_VMXE, RTCCUINTREG_MAX);
+
+ /* Enter VMX root mode. */
+ int rc = VMXEnable(HCPhysCpuPage);
+ if (RT_FAILURE(rc))
+ {
+ if (!(uOldCr4 & X86_CR4_VMXE))
+ SUPR0ChangeCR4(0, ~X86_CR4_VMXE);
+
+ if (pVM)
+ pVM->hm.s.vmx.HCPhysVmxEnableError = HCPhysCpuPage;
+ }
+
+ /* Restore interrupts. */
+ ASMSetFlags(fEFlags);
+ return rc;
+}
+
+
+/**
+ * Exits VMX root mode operation on the current CPU.
+ *
+ * @returns VBox status code.
+ */
+static int hmR0VmxLeaveRootMode(void)
+{
+ Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+
+ /* Paranoid: Disable interrupts as, in theory, interrupts handlers might mess with CR4. */
+ RTCCUINTREG fEFlags = ASMIntDisableFlags();
+
+ /* If we're for some reason not in VMX root mode, then don't leave it. */
+ RTCCUINTREG uHostCR4 = ASMGetCR4();
+
+ int rc;
+ if (uHostCR4 & X86_CR4_VMXE)
+ {
+ /* Exit VMX root mode and clear the VMX bit in CR4. */
+ VMXDisable();
+ SUPR0ChangeCR4(0, ~X86_CR4_VMXE);
+ rc = VINF_SUCCESS;
+ }
+ else
+ rc = VERR_VMX_NOT_IN_VMX_ROOT_MODE;
+
+ /* Restore interrupts. */
+ ASMSetFlags(fEFlags);
+ return rc;
+}
+
+
+/**
+ * Allocates and maps one physically contiguous page. The allocated page is
+ * zero'd out. (Used by various VT-x structures).
+ *
+ * @returns IPRT status code.
+ * @param pMemObj Pointer to the ring-0 memory object.
+ * @param ppVirt Where to store the virtual address of the
+ * allocation.
+ * @param pHCPhys Where to store the physical address of the
+ * allocation.
+ */
+static int hmR0VmxPageAllocZ(PRTR0MEMOBJ pMemObj, PRTR0PTR ppVirt, PRTHCPHYS pHCPhys)
+{
+ AssertPtrReturn(pMemObj, VERR_INVALID_PARAMETER);
+ AssertPtrReturn(ppVirt, VERR_INVALID_PARAMETER);
+ AssertPtrReturn(pHCPhys, VERR_INVALID_PARAMETER);
+
+ int rc = RTR0MemObjAllocCont(pMemObj, PAGE_SIZE, false /* fExecutable */);
+ if (RT_FAILURE(rc))
+ return rc;
+ *ppVirt = RTR0MemObjAddress(*pMemObj);
+ *pHCPhys = RTR0MemObjGetPagePhysAddr(*pMemObj, 0 /* iPage */);
+ ASMMemZero32(*ppVirt, PAGE_SIZE);
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Frees and unmaps an allocated physical page.
+ *
+ * @param pMemObj Pointer to the ring-0 memory object.
+ * @param ppVirt Where to re-initialize the virtual address of
+ * allocation as 0.
+ * @param pHCPhys Where to re-initialize the physical address of the
+ * allocation as 0.
+ */
+static void hmR0VmxPageFree(PRTR0MEMOBJ pMemObj, PRTR0PTR ppVirt, PRTHCPHYS pHCPhys)
+{
+ AssertPtr(pMemObj);
+ AssertPtr(ppVirt);
+ AssertPtr(pHCPhys);
+ if (*pMemObj != NIL_RTR0MEMOBJ)
+ {
+ int rc = RTR0MemObjFree(*pMemObj, true /* fFreeMappings */);
+ AssertRC(rc);
+ *pMemObj = NIL_RTR0MEMOBJ;
+ *ppVirt = 0;
+ *pHCPhys = 0;
+ }
+}
+
+
+/**
+ * Worker function to free VT-x related structures.
+ *
+ * @returns IPRT status code.
+ * @param pVM The cross context VM structure.
+ */
+static void hmR0VmxStructsFree(PVM pVM)
+{
+ for (VMCPUID i = 0; i < pVM->cCpus; i++)
+ {
+ PVMCPU pVCpu = &pVM->aCpus[i];
+ AssertPtr(pVCpu);
+
+ hmR0VmxPageFree(&pVCpu->hm.s.vmx.hMemObjHostMsr, &pVCpu->hm.s.vmx.pvHostMsr, &pVCpu->hm.s.vmx.HCPhysHostMsr);
+ hmR0VmxPageFree(&pVCpu->hm.s.vmx.hMemObjGuestMsr, &pVCpu->hm.s.vmx.pvGuestMsr, &pVCpu->hm.s.vmx.HCPhysGuestMsr);
+
+ if (pVM->hm.s.vmx.Msrs.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_MSR_BITMAPS)
+ hmR0VmxPageFree(&pVCpu->hm.s.vmx.hMemObjMsrBitmap, &pVCpu->hm.s.vmx.pvMsrBitmap, &pVCpu->hm.s.vmx.HCPhysMsrBitmap);
+
+ hmR0VmxPageFree(&pVCpu->hm.s.vmx.hMemObjVmcs, &pVCpu->hm.s.vmx.pvVmcs, &pVCpu->hm.s.vmx.HCPhysVmcs);
+ }
+
+ hmR0VmxPageFree(&pVM->hm.s.vmx.hMemObjApicAccess, (PRTR0PTR)&pVM->hm.s.vmx.pbApicAccess, &pVM->hm.s.vmx.HCPhysApicAccess);
+#ifdef VBOX_WITH_CRASHDUMP_MAGIC
+ hmR0VmxPageFree(&pVM->hm.s.vmx.hMemObjScratch, &pVM->hm.s.vmx.pbScratch, &pVM->hm.s.vmx.HCPhysScratch);
+#endif
+}
+
+
+/**
+ * Worker function to allocate VT-x related VM structures.
+ *
+ * @returns IPRT status code.
+ * @param pVM The cross context VM structure.
+ */
+static int hmR0VmxStructsAlloc(PVM pVM)
+{
+ /*
+ * Initialize members up-front so we can cleanup properly on allocation failure.
+ */
+#define VMXLOCAL_INIT_VM_MEMOBJ(a_Name, a_VirtPrefix) \
+ pVM->hm.s.vmx.hMemObj##a_Name = NIL_RTR0MEMOBJ; \
+ pVM->hm.s.vmx.a_VirtPrefix##a_Name = 0; \
+ pVM->hm.s.vmx.HCPhys##a_Name = 0;
+
+#define VMXLOCAL_INIT_VMCPU_MEMOBJ(a_Name, a_VirtPrefix) \
+ pVCpu->hm.s.vmx.hMemObj##a_Name = NIL_RTR0MEMOBJ; \
+ pVCpu->hm.s.vmx.a_VirtPrefix##a_Name = 0; \
+ pVCpu->hm.s.vmx.HCPhys##a_Name = 0;
+
+#ifdef VBOX_WITH_CRASHDUMP_MAGIC
+ VMXLOCAL_INIT_VM_MEMOBJ(Scratch, pv);
+#endif
+ VMXLOCAL_INIT_VM_MEMOBJ(ApicAccess, pb);
+
+ AssertCompile(sizeof(VMCPUID) == sizeof(pVM->cCpus));
+ for (VMCPUID i = 0; i < pVM->cCpus; i++)
+ {
+ PVMCPU pVCpu = &pVM->aCpus[i];
+ VMXLOCAL_INIT_VMCPU_MEMOBJ(Vmcs, pv);
+ VMXLOCAL_INIT_VMCPU_MEMOBJ(MsrBitmap, pv);
+ VMXLOCAL_INIT_VMCPU_MEMOBJ(GuestMsr, pv);
+ VMXLOCAL_INIT_VMCPU_MEMOBJ(HostMsr, pv);
+ }
+#undef VMXLOCAL_INIT_VMCPU_MEMOBJ
+#undef VMXLOCAL_INIT_VM_MEMOBJ
+
+ /* The VMCS size cannot be more than 4096 bytes. See Intel spec. Appendix A.1 "Basic VMX Information". */
+ AssertReturnStmt(RT_BF_GET(pVM->hm.s.vmx.Msrs.u64Basic, VMX_BF_BASIC_VMCS_SIZE) <= PAGE_SIZE,
+ (&pVM->aCpus[0])->hm.s.u32HMError = VMX_UFC_INVALID_VMCS_SIZE,
+ VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO);
+
+ /*
+ * Allocate all the VT-x structures.
+ */
+ int rc = VINF_SUCCESS;
+#ifdef VBOX_WITH_CRASHDUMP_MAGIC
+ rc = hmR0VmxPageAllocZ(&pVM->hm.s.vmx.hMemObjScratch, &pVM->hm.s.vmx.pbScratch, &pVM->hm.s.vmx.HCPhysScratch);
+ if (RT_FAILURE(rc))
+ goto cleanup;
+ strcpy((char *)pVM->hm.s.vmx.pbScratch, "SCRATCH Magic");
+ *(uint64_t *)(pVM->hm.s.vmx.pbScratch + 16) = UINT64_C(0xdeadbeefdeadbeef);
+#endif
+
+ /* Allocate the APIC-access page for trapping APIC accesses from the guest. */
+ if (pVM->hm.s.vmx.Msrs.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS)
+ {
+ rc = hmR0VmxPageAllocZ(&pVM->hm.s.vmx.hMemObjApicAccess, (PRTR0PTR)&pVM->hm.s.vmx.pbApicAccess,
+ &pVM->hm.s.vmx.HCPhysApicAccess);
+ if (RT_FAILURE(rc))
+ goto cleanup;
+ }
+
+ /*
+ * Initialize per-VCPU VT-x structures.
+ */
+ for (VMCPUID i = 0; i < pVM->cCpus; i++)
+ {
+ PVMCPU pVCpu = &pVM->aCpus[i];
+ AssertPtr(pVCpu);
+
+ /* Allocate the VM control structure (VMCS). */
+ rc = hmR0VmxPageAllocZ(&pVCpu->hm.s.vmx.hMemObjVmcs, &pVCpu->hm.s.vmx.pvVmcs, &pVCpu->hm.s.vmx.HCPhysVmcs);
+ if (RT_FAILURE(rc))
+ goto cleanup;
+
+ /* Get the allocated virtual-APIC page from the APIC device for transparent TPR accesses. */
+ if ( PDMHasApic(pVM)
+ && (pVM->hm.s.vmx.Msrs.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_TPR_SHADOW))
+ {
+ rc = APICGetApicPageForCpu(pVCpu, &pVCpu->hm.s.vmx.HCPhysVirtApic, (PRTR0PTR)&pVCpu->hm.s.vmx.pbVirtApic,
+ NULL /* pR3Ptr */, NULL /* pRCPtr */);
+ if (RT_FAILURE(rc))
+ goto cleanup;
+ }
+
+ /*
+ * Allocate the MSR-bitmap if supported by the CPU. The MSR-bitmap is for
+ * transparent accesses of specific MSRs.
+ *
+ * If the condition for enabling MSR bitmaps changes here, don't forget to
+ * update HMAreMsrBitmapsAvailable().
+ */
+ if (pVM->hm.s.vmx.Msrs.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_MSR_BITMAPS)
+ {
+ rc = hmR0VmxPageAllocZ(&pVCpu->hm.s.vmx.hMemObjMsrBitmap, &pVCpu->hm.s.vmx.pvMsrBitmap,
+ &pVCpu->hm.s.vmx.HCPhysMsrBitmap);
+ if (RT_FAILURE(rc))
+ goto cleanup;
+ ASMMemFill32(pVCpu->hm.s.vmx.pvMsrBitmap, PAGE_SIZE, UINT32_C(0xffffffff));
+ }
+
+ /* Allocate the VM-entry MSR-load and VM-exit MSR-store page for the guest MSRs. */
+ rc = hmR0VmxPageAllocZ(&pVCpu->hm.s.vmx.hMemObjGuestMsr, &pVCpu->hm.s.vmx.pvGuestMsr, &pVCpu->hm.s.vmx.HCPhysGuestMsr);
+ if (RT_FAILURE(rc))
+ goto cleanup;
+
+ /* Allocate the VM-exit MSR-load page for the host MSRs. */
+ rc = hmR0VmxPageAllocZ(&pVCpu->hm.s.vmx.hMemObjHostMsr, &pVCpu->hm.s.vmx.pvHostMsr, &pVCpu->hm.s.vmx.HCPhysHostMsr);
+ if (RT_FAILURE(rc))
+ goto cleanup;
+ }
+
+ return VINF_SUCCESS;
+
+cleanup:
+ hmR0VmxStructsFree(pVM);
+ return rc;
+}
+
+
+/**
+ * Does global VT-x initialization (called during module initialization).
+ *
+ * @returns VBox status code.
+ */
+VMMR0DECL(int) VMXR0GlobalInit(void)
+{
+#ifdef HMVMX_USE_FUNCTION_TABLE
+ AssertCompile(VMX_EXIT_MAX + 1 == RT_ELEMENTS(g_apfnVMExitHandlers));
+# ifdef VBOX_STRICT
+ for (unsigned i = 0; i < RT_ELEMENTS(g_apfnVMExitHandlers); i++)
+ Assert(g_apfnVMExitHandlers[i]);
+# endif
+#endif
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Does global VT-x termination (called during module termination).
+ */
+VMMR0DECL(void) VMXR0GlobalTerm()
+{
+ /* Nothing to do currently. */
+}
+
+
+/**
+ * Sets up and activates VT-x on the current CPU.
+ *
+ * @returns VBox status code.
+ * @param pHostCpu The HM physical-CPU structure.
+ * @param pVM The cross context VM structure. Can be
+ * NULL after a host resume operation.
+ * @param pvCpuPage Pointer to the VMXON region (can be NULL if @a
+ * fEnabledByHost is @c true).
+ * @param HCPhysCpuPage Physical address of the VMXON region (can be 0 if
+ * @a fEnabledByHost is @c true).
+ * @param fEnabledByHost Set if SUPR0EnableVTx() or similar was used to
+ * enable VT-x on the host.
+ * @param pHwvirtMsrs Pointer to the hardware-virtualization MSRs.
+ */
+VMMR0DECL(int) VMXR0EnableCpu(PHMPHYSCPU pHostCpu, PVM pVM, void *pvCpuPage, RTHCPHYS HCPhysCpuPage, bool fEnabledByHost,
+ PCSUPHWVIRTMSRS pHwvirtMsrs)
+{
+ Assert(pHostCpu);
+ Assert(pHwvirtMsrs);
+ Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+
+ /* Enable VT-x if it's not already enabled by the host. */
+ if (!fEnabledByHost)
+ {
+ int rc = hmR0VmxEnterRootMode(pVM, HCPhysCpuPage, pvCpuPage);
+ if (RT_FAILURE(rc))
+ return rc;
+ }
+
+ /*
+ * Flush all EPT tagged-TLB entries (in case VirtualBox or any other hypervisor have been
+ * using EPTPs) so we don't retain any stale guest-physical mappings which won't get
+ * invalidated when flushing by VPID.
+ */
+ if (pHwvirtMsrs->u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT_ALL_CONTEXTS)
+ {
+ hmR0VmxFlushEpt(NULL /* pVCpu */, VMXTLBFLUSHEPT_ALL_CONTEXTS);
+ pHostCpu->fFlushAsidBeforeUse = false;
+ }
+ else
+ pHostCpu->fFlushAsidBeforeUse = true;
+
+ /* Ensure each VCPU scheduled on this CPU gets a new VPID on resume. See @bugref{6255}. */
+ ++pHostCpu->cTlbFlushes;
+
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Deactivates VT-x on the current CPU.
+ *
+ * @returns VBox status code.
+ * @param pvCpuPage Pointer to the VMXON region.
+ * @param HCPhysCpuPage Physical address of the VMXON region.
+ *
+ * @remarks This function should never be called when SUPR0EnableVTx() or
+ * similar was used to enable VT-x on the host.
+ */
+VMMR0DECL(int) VMXR0DisableCpu(void *pvCpuPage, RTHCPHYS HCPhysCpuPage)
+{
+ RT_NOREF2(pvCpuPage, HCPhysCpuPage);
+
+ Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+ return hmR0VmxLeaveRootMode();
+}
+
+
+/**
+ * Sets the permission bits for the specified MSR in the MSR bitmap.
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param uMsr The MSR value.
+ * @param enmRead Whether reading this MSR causes a VM-exit.
+ * @param enmWrite Whether writing this MSR causes a VM-exit.
+ */
+static void hmR0VmxSetMsrPermission(PVMCPU pVCpu, uint32_t uMsr, VMXMSREXITREAD enmRead, VMXMSREXITWRITE enmWrite)
+{
+ int32_t iBit;
+ uint8_t *pbMsrBitmap = (uint8_t *)pVCpu->hm.s.vmx.pvMsrBitmap;
+
+ /*
+ * MSR Layout:
+ * Byte index MSR range Interpreted as
+ * 0x000 - 0x3ff 0x00000000 - 0x00001fff Low MSR read bits.
+ * 0x400 - 0x7ff 0xc0000000 - 0xc0001fff High MSR read bits.
+ * 0x800 - 0xbff 0x00000000 - 0x00001fff Low MSR write bits.
+ * 0xc00 - 0xfff 0xc0000000 - 0xc0001fff High MSR write bits.
+ *
+ * A bit corresponding to an MSR within the above range causes a VM-exit
+ * if the bit is 1 on executions of RDMSR/WRMSR.
+ *
+ * If an MSR falls out of the MSR range, it always cause a VM-exit.
+ *
+ * See Intel spec. 24.6.9 "MSR-Bitmap Address".
+ */
+ if (uMsr <= 0x00001fff)
+ iBit = uMsr;
+ else if (uMsr - UINT32_C(0xc0000000) <= UINT32_C(0x00001fff))
+ {
+ iBit = uMsr - UINT32_C(0xc0000000);
+ pbMsrBitmap += 0x400;
+ }
+ else
+ AssertMsgFailedReturnVoid(("hmR0VmxSetMsrPermission: Invalid MSR %#RX32\n", uMsr));
+
+ Assert(iBit <= 0x1fff);
+ if (enmRead == VMXMSREXIT_INTERCEPT_READ)
+ ASMBitSet(pbMsrBitmap, iBit);
+ else
+ ASMBitClear(pbMsrBitmap, iBit);
+
+ if (enmWrite == VMXMSREXIT_INTERCEPT_WRITE)
+ ASMBitSet(pbMsrBitmap + 0x800, iBit);
+ else
+ ASMBitClear(pbMsrBitmap + 0x800, iBit);
+}
+
+
+/**
+ * Updates the VMCS with the number of effective MSRs in the auto-load/store MSR
+ * area.
+ *
+ * @returns VBox status code.
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param cMsrs The number of MSRs.
+ */
+static int hmR0VmxSetAutoLoadStoreMsrCount(PVMCPU pVCpu, uint32_t cMsrs)
+{
+ /* Shouldn't ever happen but there -is- a number. We're well within the recommended 512. */
+ uint64_t const uVmxMiscMsr = pVCpu->CTX_SUFF(pVM)->hm.s.vmx.Msrs.u64Misc;
+ uint32_t const cMaxSupportedMsrs = VMX_MISC_MAX_MSRS(uVmxMiscMsr);
+ if (RT_UNLIKELY(cMsrs > cMaxSupportedMsrs))
+ {
+ LogRel(("CPU auto-load/store MSR count in VMCS exceeded cMsrs=%u Supported=%u.\n", cMsrs, cMaxSupportedMsrs));
+ pVCpu->hm.s.u32HMError = VMX_UFC_INSUFFICIENT_GUEST_MSR_STORAGE;
+ return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
+ }
+
+ /* Update number of guest MSRs to load/store across the world-switch. */
+ int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT, cMsrs);
+ rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT, cMsrs);
+
+ /* Update number of host MSRs to load after the world-switch. Identical to guest-MSR count as it's always paired. */
+ rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT, cMsrs);
+ AssertRCReturn(rc, rc);
+
+ /* Update the VCPU's copy of the MSR count. */
+ pVCpu->hm.s.vmx.cMsrs = cMsrs;
+
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Adds a new (or updates the value of an existing) guest/host MSR
+ * pair to be swapped during the world-switch as part of the
+ * auto-load/store MSR area in the VMCS.
+ *
+ * @returns VBox status code.
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param uMsr The MSR.
+ * @param uGuestMsrValue Value of the guest MSR.
+ * @param fUpdateHostMsr Whether to update the value of the host MSR if
+ * necessary.
+ * @param pfAddedAndUpdated Where to store whether the MSR was added -and-
+ * its value was updated. Optional, can be NULL.
+ */
+static int hmR0VmxAddAutoLoadStoreMsr(PVMCPU pVCpu, uint32_t uMsr, uint64_t uGuestMsrValue, bool fUpdateHostMsr,
+ bool *pfAddedAndUpdated)
+{
+ PVMXAUTOMSR pGuestMsr = (PVMXAUTOMSR)pVCpu->hm.s.vmx.pvGuestMsr;
+ uint32_t cMsrs = pVCpu->hm.s.vmx.cMsrs;
+ uint32_t i;
+ for (i = 0; i < cMsrs; i++)
+ {
+ if (pGuestMsr->u32Msr == uMsr)
+ break;
+ pGuestMsr++;
+ }
+
+ bool fAdded = false;
+ if (i == cMsrs)
+ {
+ ++cMsrs;
+ int rc = hmR0VmxSetAutoLoadStoreMsrCount(pVCpu, cMsrs);
+ AssertMsgRCReturn(rc, ("hmR0VmxAddAutoLoadStoreMsr: Insufficient space to add MSR %u\n", uMsr), rc);
+
+ /* Now that we're swapping MSRs during the world-switch, allow the guest to read/write them without causing VM-exits. */
+ if (pVCpu->hm.s.vmx.u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS)
+ hmR0VmxSetMsrPermission(pVCpu, uMsr, VMXMSREXIT_PASSTHRU_READ, VMXMSREXIT_PASSTHRU_WRITE);
+
+ fAdded = true;
+ }
+
+ /* Update the MSR values in the auto-load/store MSR area. */
+ pGuestMsr->u32Msr = uMsr;
+ pGuestMsr->u64Value = uGuestMsrValue;
+
+ /* Create/update the MSR slot in the host MSR area. */
+ PVMXAUTOMSR pHostMsr = (PVMXAUTOMSR)pVCpu->hm.s.vmx.pvHostMsr;
+ pHostMsr += i;
+ pHostMsr->u32Msr = uMsr;
+
+ /*
+ * Update the host MSR only when requested by the caller AND when we're
+ * adding it to the auto-load/store area. Otherwise, it would have been
+ * updated by hmR0VmxExportHostMsrs(). We do this for performance reasons.
+ */
+ bool fUpdatedMsrValue = false;
+ if ( fAdded
+ && fUpdateHostMsr)
+ {
+ Assert(!VMMRZCallRing3IsEnabled(pVCpu));
+ Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+ pHostMsr->u64Value = ASMRdMsr(pHostMsr->u32Msr);
+ fUpdatedMsrValue = true;
+ }
+
+ if (pfAddedAndUpdated)
+ *pfAddedAndUpdated = fUpdatedMsrValue;
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Removes a guest/host MSR pair to be swapped during the world-switch from the
+ * auto-load/store MSR area in the VMCS.
+ *
+ * @returns VBox status code.
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param uMsr The MSR.
+ */
+static int hmR0VmxRemoveAutoLoadStoreMsr(PVMCPU pVCpu, uint32_t uMsr)
+{
+ PVMXAUTOMSR pGuestMsr = (PVMXAUTOMSR)pVCpu->hm.s.vmx.pvGuestMsr;
+ uint32_t cMsrs = pVCpu->hm.s.vmx.cMsrs;
+ for (uint32_t i = 0; i < cMsrs; i++)
+ {
+ /* Find the MSR. */
+ if (pGuestMsr->u32Msr == uMsr)
+ {
+ /* If it's the last MSR, simply reduce the count. */
+ if (i == cMsrs - 1)
+ {
+ --cMsrs;
+ break;
+ }
+
+ /* Remove it by swapping the last MSR in place of it, and reducing the count. */
+ PVMXAUTOMSR pLastGuestMsr = (PVMXAUTOMSR)pVCpu->hm.s.vmx.pvGuestMsr;
+ pLastGuestMsr += cMsrs - 1;
+ pGuestMsr->u32Msr = pLastGuestMsr->u32Msr;
+ pGuestMsr->u64Value = pLastGuestMsr->u64Value;
+
+ PVMXAUTOMSR pHostMsr = (PVMXAUTOMSR)pVCpu->hm.s.vmx.pvHostMsr;
+ PVMXAUTOMSR pLastHostMsr = (PVMXAUTOMSR)pVCpu->hm.s.vmx.pvHostMsr;
+ pLastHostMsr += cMsrs - 1;
+ pHostMsr->u32Msr = pLastHostMsr->u32Msr;
+ pHostMsr->u64Value = pLastHostMsr->u64Value;
+ --cMsrs;
+ break;
+ }
+ pGuestMsr++;
+ }
+
+ /* Update the VMCS if the count changed (meaning the MSR was found). */
+ if (cMsrs != pVCpu->hm.s.vmx.cMsrs)
+ {
+ int rc = hmR0VmxSetAutoLoadStoreMsrCount(pVCpu, cMsrs);
+ AssertRCReturn(rc, rc);
+
+ /* We're no longer swapping MSRs during the world-switch, intercept guest read/writes to them. */
+ if (pVCpu->hm.s.vmx.u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS)
+ hmR0VmxSetMsrPermission(pVCpu, uMsr, VMXMSREXIT_INTERCEPT_READ, VMXMSREXIT_INTERCEPT_WRITE);
+
+ Log4Func(("Removed MSR %#RX32 new cMsrs=%u\n", uMsr, pVCpu->hm.s.vmx.cMsrs));
+ return VINF_SUCCESS;
+ }
+
+ return VERR_NOT_FOUND;
+}
+
+
+/**
+ * Checks if the specified guest MSR is part of the auto-load/store area in
+ * the VMCS.
+ *
+ * @returns true if found, false otherwise.
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param uMsr The MSR to find.
+ */
+static bool hmR0VmxIsAutoLoadStoreGuestMsr(PVMCPU pVCpu, uint32_t uMsr)
+{
+ PVMXAUTOMSR pGuestMsr = (PVMXAUTOMSR)pVCpu->hm.s.vmx.pvGuestMsr;
+ uint32_t const cMsrs = pVCpu->hm.s.vmx.cMsrs;
+
+ for (uint32_t i = 0; i < cMsrs; i++, pGuestMsr++)
+ {
+ if (pGuestMsr->u32Msr == uMsr)
+ return true;
+ }
+ return false;
+}
+
+
+/**
+ * Updates the value of all host MSRs in the auto-load/store area in the VMCS.
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+static void hmR0VmxUpdateAutoLoadStoreHostMsrs(PVMCPU pVCpu)
+{
+ Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+ PVMXAUTOMSR pHostMsr = (PVMXAUTOMSR)pVCpu->hm.s.vmx.pvHostMsr;
+ PVMXAUTOMSR pGuestMsr = (PVMXAUTOMSR)pVCpu->hm.s.vmx.pvGuestMsr;
+ uint32_t const cMsrs = pVCpu->hm.s.vmx.cMsrs;
+
+ for (uint32_t i = 0; i < cMsrs; i++, pHostMsr++, pGuestMsr++)
+ {
+ AssertReturnVoid(pHostMsr->u32Msr == pGuestMsr->u32Msr);
+
+ /*
+ * Performance hack for the host EFER MSR. We use the cached value rather than re-read it.
+ * Strict builds will catch mismatches in hmR0VmxCheckAutoLoadStoreMsrs(). See @bugref{7368}.
+ */
+ if (pHostMsr->u32Msr == MSR_K6_EFER)
+ pHostMsr->u64Value = pVCpu->CTX_SUFF(pVM)->hm.s.vmx.u64HostEfer;
+ else
+ pHostMsr->u64Value = ASMRdMsr(pHostMsr->u32Msr);
+ }
+
+ pVCpu->hm.s.vmx.fUpdatedHostMsrs = true;
+}
+
+
+/**
+ * Saves a set of host MSRs to allow read/write passthru access to the guest and
+ * perform lazy restoration of the host MSRs while leaving VT-x.
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+static void hmR0VmxLazySaveHostMsrs(PVMCPU pVCpu)
+{
+ Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+
+ /*
+ * Note: If you're adding MSRs here, make sure to update the MSR-bitmap permissions in hmR0VmxSetupProcCtls().
+ */
+ if (!(pVCpu->hm.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_SAVED_HOST))
+ {
+ Assert(!(pVCpu->hm.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST)); /* Guest MSRs better not be loaded now. */
+#if HC_ARCH_BITS == 64
+ if (pVCpu->CTX_SUFF(pVM)->hm.s.fAllow64BitGuests)
+ {
+ pVCpu->hm.s.vmx.u64HostLStarMsr = ASMRdMsr(MSR_K8_LSTAR);
+ pVCpu->hm.s.vmx.u64HostStarMsr = ASMRdMsr(MSR_K6_STAR);
+ pVCpu->hm.s.vmx.u64HostSFMaskMsr = ASMRdMsr(MSR_K8_SF_MASK);
+ pVCpu->hm.s.vmx.u64HostKernelGSBaseMsr = ASMRdMsr(MSR_K8_KERNEL_GS_BASE);
+ }
+#endif
+ pVCpu->hm.s.vmx.fLazyMsrs |= VMX_LAZY_MSRS_SAVED_HOST;
+ }
+}
+
+
+/**
+ * Checks whether the MSR belongs to the set of guest MSRs that we restore
+ * lazily while leaving VT-x.
+ *
+ * @returns true if it does, false otherwise.
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param uMsr The MSR to check.
+ */
+static bool hmR0VmxIsLazyGuestMsr(PVMCPU pVCpu, uint32_t uMsr)
+{
+ NOREF(pVCpu);
+#if HC_ARCH_BITS == 64
+ if (pVCpu->CTX_SUFF(pVM)->hm.s.fAllow64BitGuests)
+ {
+ switch (uMsr)
+ {
+ case MSR_K8_LSTAR:
+ case MSR_K6_STAR:
+ case MSR_K8_SF_MASK:
+ case MSR_K8_KERNEL_GS_BASE:
+ return true;
+ }
+ }
+#else
+ RT_NOREF(pVCpu, uMsr);
+#endif
+ return false;
+}
+
+
+/**
+ * Loads a set of guests MSRs to allow read/passthru to the guest.
+ *
+ * The name of this function is slightly confusing. This function does NOT
+ * postpone loading, but loads the MSR right now. "hmR0VmxLazy" is simply a
+ * common prefix for functions dealing with "lazy restoration" of the shared
+ * MSRs.
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+static void hmR0VmxLazyLoadGuestMsrs(PVMCPU pVCpu)
+{
+ Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+ Assert(!VMMRZCallRing3IsEnabled(pVCpu));
+
+ Assert(pVCpu->hm.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_SAVED_HOST);
+#if HC_ARCH_BITS == 64
+ if (pVCpu->CTX_SUFF(pVM)->hm.s.fAllow64BitGuests)
+ {
+ /*
+ * If the guest MSRs are not loaded -and- if all the guest MSRs are identical
+ * to the MSRs on the CPU (which are the saved host MSRs, see assertion above) then
+ * we can skip a few MSR writes.
+ *
+ * Otherwise, it implies either 1. they're not loaded, or 2. they're loaded but the
+ * guest MSR values in the guest-CPU context might be different to what's currently
+ * loaded in the CPU. In either case, we need to write the new guest MSR values to the
+ * CPU, see @bugref{8728}.
+ */
+ PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+ if ( !(pVCpu->hm.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST)
+ && pCtx->msrKERNELGSBASE == pVCpu->hm.s.vmx.u64HostKernelGSBaseMsr
+ && pCtx->msrLSTAR == pVCpu->hm.s.vmx.u64HostLStarMsr
+ && pCtx->msrSTAR == pVCpu->hm.s.vmx.u64HostStarMsr
+ && pCtx->msrSFMASK == pVCpu->hm.s.vmx.u64HostSFMaskMsr)
+ {
+#ifdef VBOX_STRICT
+ Assert(ASMRdMsr(MSR_K8_KERNEL_GS_BASE) == pCtx->msrKERNELGSBASE);
+ Assert(ASMRdMsr(MSR_K8_LSTAR) == pCtx->msrLSTAR);
+ Assert(ASMRdMsr(MSR_K6_STAR) == pCtx->msrSTAR);
+ Assert(ASMRdMsr(MSR_K8_SF_MASK) == pCtx->msrSFMASK);
+#endif
+ }
+ else
+ {
+ ASMWrMsr(MSR_K8_KERNEL_GS_BASE, pCtx->msrKERNELGSBASE);
+ ASMWrMsr(MSR_K8_LSTAR, pCtx->msrLSTAR);
+ ASMWrMsr(MSR_K6_STAR, pCtx->msrSTAR);
+ ASMWrMsr(MSR_K8_SF_MASK, pCtx->msrSFMASK);
+ }
+ }
+#endif
+ pVCpu->hm.s.vmx.fLazyMsrs |= VMX_LAZY_MSRS_LOADED_GUEST;
+}
+
+
+/**
+ * Performs lazy restoration of the set of host MSRs if they were previously
+ * loaded with guest MSR values.
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ *
+ * @remarks No-long-jump zone!!!
+ * @remarks The guest MSRs should have been saved back into the guest-CPU
+ * context by hmR0VmxImportGuestState()!!!
+ */
+static void hmR0VmxLazyRestoreHostMsrs(PVMCPU pVCpu)
+{
+ Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+ Assert(!VMMRZCallRing3IsEnabled(pVCpu));
+
+ if (pVCpu->hm.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST)
+ {
+ Assert(pVCpu->hm.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_SAVED_HOST);
+#if HC_ARCH_BITS == 64
+ if (pVCpu->CTX_SUFF(pVM)->hm.s.fAllow64BitGuests)
+ {
+ ASMWrMsr(MSR_K8_LSTAR, pVCpu->hm.s.vmx.u64HostLStarMsr);
+ ASMWrMsr(MSR_K6_STAR, pVCpu->hm.s.vmx.u64HostStarMsr);
+ ASMWrMsr(MSR_K8_SF_MASK, pVCpu->hm.s.vmx.u64HostSFMaskMsr);
+ ASMWrMsr(MSR_K8_KERNEL_GS_BASE, pVCpu->hm.s.vmx.u64HostKernelGSBaseMsr);
+ }
+#endif
+ }
+ pVCpu->hm.s.vmx.fLazyMsrs &= ~(VMX_LAZY_MSRS_LOADED_GUEST | VMX_LAZY_MSRS_SAVED_HOST);
+}
+
+
+/**
+ * Verifies that our cached values of the VMCS fields are all consistent with
+ * what's actually present in the VMCS.
+ *
+ * @returns VBox status code.
+ * @retval VINF_SUCCESS if all our caches match their respective VMCS fields.
+ * @retval VERR_VMX_VMCS_FIELD_CACHE_INVALID if a cache field doesn't match the
+ * VMCS content. HMCPU error-field is
+ * updated, see VMX_VCI_XXX.
+ * @param pVCpu The cross context virtual CPU structure.
+ */
+static int hmR0VmxCheckVmcsCtls(PVMCPU pVCpu)
+{
+ uint32_t u32Val;
+ int rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY, &u32Val);
+ AssertRCReturn(rc, rc);
+ AssertMsgReturnStmt(pVCpu->hm.s.vmx.u32EntryCtls == u32Val,
+ ("Cache=%#RX32 VMCS=%#RX32\n", pVCpu->hm.s.vmx.u32EntryCtls, u32Val),
+ pVCpu->hm.s.u32HMError = VMX_VCI_CTRL_ENTRY,
+ VERR_VMX_VMCS_FIELD_CACHE_INVALID);
+
+ rc = VMXReadVmcs32(VMX_VMCS32_CTRL_EXIT, &u32Val);
+ AssertRCReturn(rc, rc);
+ AssertMsgReturnStmt(pVCpu->hm.s.vmx.u32ExitCtls == u32Val,
+ ("Cache=%#RX32 VMCS=%#RX32\n", pVCpu->hm.s.vmx.u32ExitCtls, u32Val),
+ pVCpu->hm.s.u32HMError = VMX_VCI_CTRL_EXIT,
+ VERR_VMX_VMCS_FIELD_CACHE_INVALID);
+
+ rc = VMXReadVmcs32(VMX_VMCS32_CTRL_PIN_EXEC, &u32Val);
+ AssertRCReturn(rc, rc);
+ AssertMsgReturnStmt(pVCpu->hm.s.vmx.u32PinCtls == u32Val,
+ ("Cache=%#RX32 VMCS=%#RX32\n", pVCpu->hm.s.vmx.u32PinCtls, u32Val),
+ pVCpu->hm.s.u32HMError = VMX_VCI_CTRL_PIN_EXEC,
+ VERR_VMX_VMCS_FIELD_CACHE_INVALID);
+
+ rc = VMXReadVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, &u32Val);
+ AssertRCReturn(rc, rc);
+ AssertMsgReturnStmt(pVCpu->hm.s.vmx.u32ProcCtls == u32Val,
+ ("Cache=%#RX32 VMCS=%#RX32\n", pVCpu->hm.s.vmx.u32ProcCtls, u32Val),
+ pVCpu->hm.s.u32HMError = VMX_VCI_CTRL_PROC_EXEC,
+ VERR_VMX_VMCS_FIELD_CACHE_INVALID);
+
+ if (pVCpu->hm.s.vmx.u32ProcCtls & VMX_PROC_CTLS_USE_SECONDARY_CTLS)
+ {
+ rc = VMXReadVmcs32(VMX_VMCS32_CTRL_PROC_EXEC2, &u32Val);
+ AssertRCReturn(rc, rc);
+ AssertMsgReturnStmt(pVCpu->hm.s.vmx.u32ProcCtls2 == u32Val,
+ ("Cache=%#RX32 VMCS=%#RX32\n", pVCpu->hm.s.vmx.u32ProcCtls2, u32Val),
+ pVCpu->hm.s.u32HMError = VMX_VCI_CTRL_PROC_EXEC2,
+ VERR_VMX_VMCS_FIELD_CACHE_INVALID);
+ }
+
+ rc = VMXReadVmcs32(VMX_VMCS32_CTRL_EXCEPTION_BITMAP, &u32Val);
+ AssertRCReturn(rc, rc);
+ AssertMsgReturnStmt(pVCpu->hm.s.vmx.u32XcptBitmap == u32Val,
+ ("Cache=%#RX32 VMCS=%#RX32\n", pVCpu->hm.s.vmx.u32XcptBitmap, u32Val),
+ pVCpu->hm.s.u32HMError = VMX_VCI_CTRL_XCPT_BITMAP,
+ VERR_VMX_VMCS_FIELD_CACHE_INVALID);
+
+ uint64_t u64Val;
+ rc = VMXReadVmcs64(VMX_VMCS64_CTRL_TSC_OFFSET_FULL, &u64Val);
+ AssertRCReturn(rc, rc);
+ AssertMsgReturnStmt(pVCpu->hm.s.vmx.u64TscOffset == u64Val,
+ ("Cache=%#RX64 VMCS=%#RX64\n", pVCpu->hm.s.vmx.u64TscOffset, u64Val),
+ pVCpu->hm.s.u32HMError = VMX_VCI_CTRL_TSC_OFFSET,
+ VERR_VMX_VMCS_FIELD_CACHE_INVALID);
+
+ return VINF_SUCCESS;
+}
+
+
+#ifdef VBOX_STRICT
+/**
+ * Verifies that our cached host EFER value has not changed
+ * since we cached it.
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ */
+static void hmR0VmxCheckHostEferMsr(PVMCPU pVCpu)
+{
+ Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+
+ if (pVCpu->hm.s.vmx.u32ExitCtls & VMX_EXIT_CTLS_LOAD_EFER_MSR)
+ {
+ uint64_t u64Val;
+ int rc = VMXReadVmcs64(VMX_VMCS64_HOST_EFER_FULL, &u64Val);
+ AssertRC(rc);
+
+ uint64_t u64HostEferMsr = ASMRdMsr(MSR_K6_EFER);
+ AssertMsgReturnVoid(u64HostEferMsr == u64Val, ("u64HostEferMsr=%#RX64 u64Val=%#RX64\n", u64HostEferMsr, u64Val));
+ }
+}
+
+
+/**
+ * Verifies whether the guest/host MSR pairs in the auto-load/store area in the
+ * VMCS are correct.
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ */
+static void hmR0VmxCheckAutoLoadStoreMsrs(PVMCPU pVCpu)
+{
+ Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+
+ /* Verify MSR counts in the VMCS are what we think it should be. */
+ uint32_t cMsrs;
+ int rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT, &cMsrs); AssertRC(rc);
+ Assert(cMsrs == pVCpu->hm.s.vmx.cMsrs);
+
+ rc = VMXReadVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT, &cMsrs); AssertRC(rc);
+ Assert(cMsrs == pVCpu->hm.s.vmx.cMsrs);
+
+ rc = VMXReadVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT, &cMsrs); AssertRC(rc);
+ Assert(cMsrs == pVCpu->hm.s.vmx.cMsrs);
+
+ PCVMXAUTOMSR pHostMsr = (PCVMXAUTOMSR)pVCpu->hm.s.vmx.pvHostMsr;
+ PCVMXAUTOMSR pGuestMsr = (PCVMXAUTOMSR)pVCpu->hm.s.vmx.pvGuestMsr;
+ for (uint32_t i = 0; i < cMsrs; i++, pHostMsr++, pGuestMsr++)
+ {
+ /* Verify that the MSRs are paired properly and that the host MSR has the correct value. */
+ AssertMsgReturnVoid(pHostMsr->u32Msr == pGuestMsr->u32Msr, ("HostMsr=%#RX32 GuestMsr=%#RX32 cMsrs=%u\n", pHostMsr->u32Msr,
+ pGuestMsr->u32Msr, cMsrs));
+
+ uint64_t u64Msr = ASMRdMsr(pHostMsr->u32Msr);
+ AssertMsgReturnVoid(pHostMsr->u64Value == u64Msr, ("u32Msr=%#RX32 VMCS Value=%#RX64 ASMRdMsr=%#RX64 cMsrs=%u\n",
+ pHostMsr->u32Msr, pHostMsr->u64Value, u64Msr, cMsrs));
+
+ /* Verify that the permissions are as expected in the MSR bitmap. */
+ if (pVCpu->hm.s.vmx.u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS)
+ {
+ VMXMSREXITREAD enmRead;
+ VMXMSREXITWRITE enmWrite;
+ rc = HMGetVmxMsrPermission(pVCpu->hm.s.vmx.pvMsrBitmap, pGuestMsr->u32Msr, &enmRead, &enmWrite);
+ AssertMsgReturnVoid(rc == VINF_SUCCESS, ("HMGetVmxMsrPermission! failed. rc=%Rrc\n", rc));
+ if (pGuestMsr->u32Msr == MSR_K6_EFER)
+ {
+ AssertMsgReturnVoid(enmRead == VMXMSREXIT_INTERCEPT_READ, ("Passthru read for EFER!?\n"));
+ AssertMsgReturnVoid(enmWrite == VMXMSREXIT_INTERCEPT_WRITE, ("Passthru write for EFER!?\n"));
+ }
+ else
+ {
+ AssertMsgReturnVoid(enmRead == VMXMSREXIT_PASSTHRU_READ, ("u32Msr=%#RX32 cMsrs=%u No passthru read!\n",
+ pGuestMsr->u32Msr, cMsrs));
+ AssertMsgReturnVoid(enmWrite == VMXMSREXIT_PASSTHRU_WRITE, ("u32Msr=%#RX32 cMsrs=%u No passthru write!\n",
+ pGuestMsr->u32Msr, cMsrs));
+ }
+ }
+ }
+}
+#endif /* VBOX_STRICT */
+
+
+/**
+ * Flushes the TLB using EPT.
+ *
+ * @returns VBox status code.
+ * @param pVCpu The cross context virtual CPU structure of the calling
+ * EMT. Can be NULL depending on @a enmTlbFlush.
+ * @param enmTlbFlush Type of flush.
+ *
+ * @remarks Caller is responsible for making sure this function is called only
+ * when NestedPaging is supported and providing @a enmTlbFlush that is
+ * supported by the CPU.
+ * @remarks Can be called with interrupts disabled.
+ */
+static void hmR0VmxFlushEpt(PVMCPU pVCpu, VMXTLBFLUSHEPT enmTlbFlush)
+{
+ uint64_t au64Descriptor[2];
+ if (enmTlbFlush == VMXTLBFLUSHEPT_ALL_CONTEXTS)
+ au64Descriptor[0] = 0;
+ else
+ {
+ Assert(pVCpu);
+ au64Descriptor[0] = pVCpu->hm.s.vmx.HCPhysEPTP;
+ }
+ au64Descriptor[1] = 0; /* MBZ. Intel spec. 33.3 "VMX Instructions" */
+
+ int rc = VMXR0InvEPT(enmTlbFlush, &au64Descriptor[0]);
+ AssertMsg(rc == VINF_SUCCESS,
+ ("VMXR0InvEPT %#x %RGv failed with %Rrc\n", enmTlbFlush, pVCpu ? pVCpu->hm.s.vmx.HCPhysEPTP : 0, rc));
+
+ if ( RT_SUCCESS(rc)
+ && pVCpu)
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushNestedPaging);
+}
+
+
+/**
+ * Flushes the TLB using VPID.
+ *
+ * @returns VBox status code.
+ * @param pVCpu The cross context virtual CPU structure of the calling
+ * EMT. Can be NULL depending on @a enmTlbFlush.
+ * @param enmTlbFlush Type of flush.
+ * @param GCPtr Virtual address of the page to flush (can be 0 depending
+ * on @a enmTlbFlush).
+ *
+ * @remarks Can be called with interrupts disabled.
+ */
+static void hmR0VmxFlushVpid(PVMCPU pVCpu, VMXTLBFLUSHVPID enmTlbFlush, RTGCPTR GCPtr)
+{
+ Assert(pVCpu->CTX_SUFF(pVM)->hm.s.vmx.fVpid);
+
+ uint64_t au64Descriptor[2];
+ if (enmTlbFlush == VMXTLBFLUSHVPID_ALL_CONTEXTS)
+ {
+ au64Descriptor[0] = 0;
+ au64Descriptor[1] = 0;
+ }
+ else
+ {
+ AssertPtr(pVCpu);
+ AssertMsg(pVCpu->hm.s.uCurrentAsid != 0, ("VMXR0InvVPID: invalid ASID %lu\n", pVCpu->hm.s.uCurrentAsid));
+ AssertMsg(pVCpu->hm.s.uCurrentAsid <= UINT16_MAX, ("VMXR0InvVPID: invalid ASID %lu\n", pVCpu->hm.s.uCurrentAsid));
+ au64Descriptor[0] = pVCpu->hm.s.uCurrentAsid;
+ au64Descriptor[1] = GCPtr;
+ }
+
+ int rc = VMXR0InvVPID(enmTlbFlush, &au64Descriptor[0]);
+ AssertMsg(rc == VINF_SUCCESS,
+ ("VMXR0InvVPID %#x %u %RGv failed with %Rrc\n", enmTlbFlush, pVCpu ? pVCpu->hm.s.uCurrentAsid : 0, GCPtr, rc));
+
+ if ( RT_SUCCESS(rc)
+ && pVCpu)
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushAsid);
+ NOREF(rc);
+}
+
+
+/**
+ * Invalidates a guest page by guest virtual address. Only relevant for
+ * EPT/VPID, otherwise there is nothing really to invalidate.
+ *
+ * @returns VBox status code.
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param GCVirt Guest virtual address of the page to invalidate.
+ */
+VMMR0DECL(int) VMXR0InvalidatePage(PVMCPU pVCpu, RTGCPTR GCVirt)
+{
+ AssertPtr(pVCpu);
+ LogFlowFunc(("pVCpu=%p GCVirt=%RGv\n", pVCpu, GCVirt));
+
+ bool fFlushPending = VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
+ if (!fFlushPending)
+ {
+ /*
+ * We must invalidate the guest TLB entry in either case, we cannot ignore it even for
+ * the EPT case. See @bugref{6043} and @bugref{6177}.
+ *
+ * Set the VMCPU_FF_TLB_FLUSH force flag and flush before VM-entry in hmR0VmxFlushTLB*()
+ * as this function maybe called in a loop with individual addresses.
+ */
+ PVM pVM = pVCpu->CTX_SUFF(pVM);
+ if (pVM->hm.s.vmx.fVpid)
+ {
+ bool fVpidFlush = RT_BOOL(pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_INDIV_ADDR);
+
+#if HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS)
+ /*
+ * Workaround Erratum BV75, AAJ159 and others that affect several Intel CPUs
+ * where executing INVVPID outside 64-bit mode does not flush translations of
+ * 64-bit linear addresses, see @bugref{6208#c72}.
+ */
+ if (RT_HI_U32(GCVirt))
+ fVpidFlush = false;
+#endif
+
+ if (fVpidFlush)
+ {
+ hmR0VmxFlushVpid(pVCpu, VMXTLBFLUSHVPID_INDIV_ADDR, GCVirt);
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbInvlpgVirt);
+ }
+ else
+ VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
+ }
+ else if (pVM->hm.s.fNestedPaging)
+ VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
+ }
+
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Dummy placeholder for tagged-TLB flush handling before VM-entry. Used in the
+ * case where neither EPT nor VPID is supported by the CPU.
+ *
+ * @param pHostCpu The HM physical-CPU structure.
+ * @param pVCpu The cross context virtual CPU structure.
+ *
+ * @remarks Called with interrupts disabled.
+ */
+static void hmR0VmxFlushTaggedTlbNone(PHMPHYSCPU pHostCpu, PVMCPU pVCpu)
+{
+ AssertPtr(pVCpu);
+ AssertPtr(pHostCpu);
+
+ VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH);
+
+ Assert(pHostCpu->idCpu != NIL_RTCPUID);
+ pVCpu->hm.s.idLastCpu = pHostCpu->idCpu;
+ pVCpu->hm.s.cTlbFlushes = pHostCpu->cTlbFlushes;
+ pVCpu->hm.s.fForceTLBFlush = false;
+ return;
+}
+
+
+/**
+ * Flushes the tagged-TLB entries for EPT+VPID CPUs as necessary.
+ *
+ * @param pHostCpu The HM physical-CPU structure.
+ * @param pVCpu The cross context virtual CPU structure.
+ *
+ * @remarks All references to "ASID" in this function pertains to "VPID" in Intel's
+ * nomenclature. The reason is, to avoid confusion in compare statements
+ * since the host-CPU copies are named "ASID".
+ *
+ * @remarks Called with interrupts disabled.
+ */
+static void hmR0VmxFlushTaggedTlbBoth(PHMPHYSCPU pHostCpu, PVMCPU pVCpu)
+{
+#ifdef VBOX_WITH_STATISTICS
+ bool fTlbFlushed = false;
+# define HMVMX_SET_TAGGED_TLB_FLUSHED() do { fTlbFlushed = true; } while (0)
+# define HMVMX_UPDATE_FLUSH_SKIPPED_STAT() do { \
+ if (!fTlbFlushed) \
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatNoFlushTlbWorldSwitch); \
+ } while (0)
+#else
+# define HMVMX_SET_TAGGED_TLB_FLUSHED() do { } while (0)
+# define HMVMX_UPDATE_FLUSH_SKIPPED_STAT() do { } while (0)
+#endif
+
+ AssertPtr(pVCpu);
+ AssertPtr(pHostCpu);
+ Assert(pHostCpu->idCpu != NIL_RTCPUID);
+
+ PVM pVM = pVCpu->CTX_SUFF(pVM);
+ AssertMsg(pVM->hm.s.fNestedPaging && pVM->hm.s.vmx.fVpid,
+ ("hmR0VmxFlushTaggedTlbBoth cannot be invoked unless NestedPaging & VPID are enabled."
+ "fNestedPaging=%RTbool fVpid=%RTbool", pVM->hm.s.fNestedPaging, pVM->hm.s.vmx.fVpid));
+
+ /*
+ * Force a TLB flush for the first world-switch if the current CPU differs from the one we
+ * ran on last. If the TLB flush count changed, another VM (VCPU rather) has hit the ASID
+ * limit while flushing the TLB or the host CPU is online after a suspend/resume, so we
+ * cannot reuse the current ASID anymore.
+ */
+ if ( pVCpu->hm.s.idLastCpu != pHostCpu->idCpu
+ || pVCpu->hm.s.cTlbFlushes != pHostCpu->cTlbFlushes)
+ {
+ ++pHostCpu->uCurrentAsid;
+ if (pHostCpu->uCurrentAsid >= pVM->hm.s.uMaxAsid)
+ {
+ pHostCpu->uCurrentAsid = 1; /* Wraparound to 1; host uses 0. */
+ pHostCpu->cTlbFlushes++; /* All VCPUs that run on this host CPU must use a new VPID. */
+ pHostCpu->fFlushAsidBeforeUse = true; /* All VCPUs that run on this host CPU must flush their new VPID before use. */
+ }
+
+ pVCpu->hm.s.uCurrentAsid = pHostCpu->uCurrentAsid;
+ pVCpu->hm.s.idLastCpu = pHostCpu->idCpu;
+ pVCpu->hm.s.cTlbFlushes = pHostCpu->cTlbFlushes;
+
+ /*
+ * Flush by EPT when we get rescheduled to a new host CPU to ensure EPT-only tagged mappings are also
+ * invalidated. We don't need to flush-by-VPID here as flushing by EPT covers it. See @bugref{6568}.
+ */
+ hmR0VmxFlushEpt(pVCpu, pVM->hm.s.vmx.enmTlbFlushEpt);
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbWorldSwitch);
+ HMVMX_SET_TAGGED_TLB_FLUSHED();
+ VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH);
+ }
+ else if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH)) /* Check for explicit TLB flushes. */
+ {
+ /*
+ * Changes to the EPT paging structure by VMM requires flushing-by-EPT as the CPU
+ * creates guest-physical (ie. only EPT-tagged) mappings while traversing the EPT
+ * tables when EPT is in use. Flushing-by-VPID will only flush linear (only
+ * VPID-tagged) and combined (EPT+VPID tagged) mappings but not guest-physical
+ * mappings, see @bugref{6568}.
+ *
+ * See Intel spec. 28.3.2 "Creating and Using Cached Translation Information".
+ */
+ hmR0VmxFlushEpt(pVCpu, pVM->hm.s.vmx.enmTlbFlushEpt);
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlb);
+ HMVMX_SET_TAGGED_TLB_FLUSHED();
+ }
+
+ pVCpu->hm.s.fForceTLBFlush = false;
+ HMVMX_UPDATE_FLUSH_SKIPPED_STAT();
+
+ Assert(pVCpu->hm.s.idLastCpu == pHostCpu->idCpu);
+ Assert(pVCpu->hm.s.cTlbFlushes == pHostCpu->cTlbFlushes);
+ AssertMsg(pVCpu->hm.s.cTlbFlushes == pHostCpu->cTlbFlushes,
+ ("Flush count mismatch for cpu %d (%u vs %u)\n", pHostCpu->idCpu, pVCpu->hm.s.cTlbFlushes, pHostCpu->cTlbFlushes));
+ AssertMsg(pHostCpu->uCurrentAsid >= 1 && pHostCpu->uCurrentAsid < pVM->hm.s.uMaxAsid,
+ ("Cpu[%u] uCurrentAsid=%u cTlbFlushes=%u pVCpu->idLastCpu=%u pVCpu->cTlbFlushes=%u\n", pHostCpu->idCpu,
+ pHostCpu->uCurrentAsid, pHostCpu->cTlbFlushes, pVCpu->hm.s.idLastCpu, pVCpu->hm.s.cTlbFlushes));
+ AssertMsg(pVCpu->hm.s.uCurrentAsid >= 1 && pVCpu->hm.s.uCurrentAsid < pVM->hm.s.uMaxAsid,
+ ("Cpu[%u] pVCpu->uCurrentAsid=%u\n", pHostCpu->idCpu, pVCpu->hm.s.uCurrentAsid));
+
+ /* Update VMCS with the VPID. */
+ int rc = VMXWriteVmcs32(VMX_VMCS16_VPID, pVCpu->hm.s.uCurrentAsid);
+ AssertRC(rc);
+
+#undef HMVMX_SET_TAGGED_TLB_FLUSHED
+}
+
+
+/**
+ * Flushes the tagged-TLB entries for EPT CPUs as necessary.
+ *
+ * @param pHostCpu The HM physical-CPU structure.
+ * @param pVCpu The cross context virtual CPU structure.
+ *
+ * @remarks Called with interrupts disabled.
+ */
+static void hmR0VmxFlushTaggedTlbEpt(PHMPHYSCPU pHostCpu, PVMCPU pVCpu)
+{
+ AssertPtr(pVCpu);
+ AssertPtr(pHostCpu);
+ Assert(pHostCpu->idCpu != NIL_RTCPUID);
+ AssertMsg(pVCpu->CTX_SUFF(pVM)->hm.s.fNestedPaging, ("hmR0VmxFlushTaggedTlbEpt cannot be invoked without NestedPaging."));
+ AssertMsg(!pVCpu->CTX_SUFF(pVM)->hm.s.vmx.fVpid, ("hmR0VmxFlushTaggedTlbEpt cannot be invoked with VPID."));
+
+ /*
+ * Force a TLB flush for the first world-switch if the current CPU differs from the one we ran on last.
+ * A change in the TLB flush count implies the host CPU is online after a suspend/resume.
+ */
+ if ( pVCpu->hm.s.idLastCpu != pHostCpu->idCpu
+ || pVCpu->hm.s.cTlbFlushes != pHostCpu->cTlbFlushes)
+ {
+ pVCpu->hm.s.fForceTLBFlush = true;
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbWorldSwitch);
+ }
+
+ /* Check for explicit TLB flushes. */
+ if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH))
+ {
+ pVCpu->hm.s.fForceTLBFlush = true;
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlb);
+ }
+
+ pVCpu->hm.s.idLastCpu = pHostCpu->idCpu;
+ pVCpu->hm.s.cTlbFlushes = pHostCpu->cTlbFlushes;
+
+ if (pVCpu->hm.s.fForceTLBFlush)
+ {
+ hmR0VmxFlushEpt(pVCpu, pVCpu->CTX_SUFF(pVM)->hm.s.vmx.enmTlbFlushEpt);
+ pVCpu->hm.s.fForceTLBFlush = false;
+ }
+}
+
+
+/**
+ * Flushes the tagged-TLB entries for VPID CPUs as necessary.
+ *
+ * @param pHostCpu The HM physical-CPU structure.
+ * @param pVCpu The cross context virtual CPU structure.
+ *
+ * @remarks Called with interrupts disabled.
+ */
+static void hmR0VmxFlushTaggedTlbVpid(PHMPHYSCPU pHostCpu, PVMCPU pVCpu)
+{
+ AssertPtr(pVCpu);
+ AssertPtr(pHostCpu);
+ Assert(pHostCpu->idCpu != NIL_RTCPUID);
+ AssertMsg(pVCpu->CTX_SUFF(pVM)->hm.s.vmx.fVpid, ("hmR0VmxFlushTlbVpid cannot be invoked without VPID."));
+ AssertMsg(!pVCpu->CTX_SUFF(pVM)->hm.s.fNestedPaging, ("hmR0VmxFlushTlbVpid cannot be invoked with NestedPaging"));
+
+ /*
+ * Force a TLB flush for the first world switch if the current CPU differs from the one we
+ * ran on last. If the TLB flush count changed, another VM (VCPU rather) has hit the ASID
+ * limit while flushing the TLB or the host CPU is online after a suspend/resume, so we
+ * cannot reuse the current ASID anymore.
+ */
+ if ( pVCpu->hm.s.idLastCpu != pHostCpu->idCpu
+ || pVCpu->hm.s.cTlbFlushes != pHostCpu->cTlbFlushes)
+ {
+ pVCpu->hm.s.fForceTLBFlush = true;
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbWorldSwitch);
+ }
+
+ /* Check for explicit TLB flushes. */
+ if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH))
+ {
+ /*
+ * If we ever support VPID flush combinations other than ALL or SINGLE-context (see
+ * hmR0VmxSetupTaggedTlb()) we would need to explicitly flush in this case (add an
+ * fExplicitFlush = true here and change the pHostCpu->fFlushAsidBeforeUse check below to
+ * include fExplicitFlush's too) - an obscure corner case.
+ */
+ pVCpu->hm.s.fForceTLBFlush = true;
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlb);
+ }
+
+ PVM pVM = pVCpu->CTX_SUFF(pVM);
+ pVCpu->hm.s.idLastCpu = pHostCpu->idCpu;
+ if (pVCpu->hm.s.fForceTLBFlush)
+ {
+ ++pHostCpu->uCurrentAsid;
+ if (pHostCpu->uCurrentAsid >= pVM->hm.s.uMaxAsid)
+ {
+ pHostCpu->uCurrentAsid = 1; /* Wraparound to 1; host uses 0 */
+ pHostCpu->cTlbFlushes++; /* All VCPUs that run on this host CPU must use a new VPID. */
+ pHostCpu->fFlushAsidBeforeUse = true; /* All VCPUs that run on this host CPU must flush their new VPID before use. */
+ }
+
+ pVCpu->hm.s.fForceTLBFlush = false;
+ pVCpu->hm.s.cTlbFlushes = pHostCpu->cTlbFlushes;
+ pVCpu->hm.s.uCurrentAsid = pHostCpu->uCurrentAsid;
+ if (pHostCpu->fFlushAsidBeforeUse)
+ {
+ if (pVM->hm.s.vmx.enmTlbFlushVpid == VMXTLBFLUSHVPID_SINGLE_CONTEXT)
+ hmR0VmxFlushVpid(pVCpu, VMXTLBFLUSHVPID_SINGLE_CONTEXT, 0 /* GCPtr */);
+ else if (pVM->hm.s.vmx.enmTlbFlushVpid == VMXTLBFLUSHVPID_ALL_CONTEXTS)
+ {
+ hmR0VmxFlushVpid(pVCpu, VMXTLBFLUSHVPID_ALL_CONTEXTS, 0 /* GCPtr */);
+ pHostCpu->fFlushAsidBeforeUse = false;
+ }
+ else
+ {
+ /* hmR0VmxSetupTaggedTlb() ensures we never get here. Paranoia. */
+ AssertMsgFailed(("Unsupported VPID-flush context type.\n"));
+ }
+ }
+ }
+
+ AssertMsg(pVCpu->hm.s.cTlbFlushes == pHostCpu->cTlbFlushes,
+ ("Flush count mismatch for cpu %d (%u vs %u)\n", pHostCpu->idCpu, pVCpu->hm.s.cTlbFlushes, pHostCpu->cTlbFlushes));
+ AssertMsg(pHostCpu->uCurrentAsid >= 1 && pHostCpu->uCurrentAsid < pVM->hm.s.uMaxAsid,
+ ("Cpu[%u] uCurrentAsid=%u cTlbFlushes=%u pVCpu->idLastCpu=%u pVCpu->cTlbFlushes=%u\n", pHostCpu->idCpu,
+ pHostCpu->uCurrentAsid, pHostCpu->cTlbFlushes, pVCpu->hm.s.idLastCpu, pVCpu->hm.s.cTlbFlushes));
+ AssertMsg(pVCpu->hm.s.uCurrentAsid >= 1 && pVCpu->hm.s.uCurrentAsid < pVM->hm.s.uMaxAsid,
+ ("Cpu[%u] pVCpu->uCurrentAsid=%u\n", pHostCpu->idCpu, pVCpu->hm.s.uCurrentAsid));
+
+ int rc = VMXWriteVmcs32(VMX_VMCS16_VPID, pVCpu->hm.s.uCurrentAsid);
+ AssertRC(rc);
+}
+
+
+/**
+ * Flushes the guest TLB entry based on CPU capabilities.
+ *
+ * @param pHostCpu The HM physical-CPU structure.
+ * @param pVCpu The cross context virtual CPU structure.
+ *
+ * @remarks Called with interrupts disabled.
+ */
+DECLINLINE(void) hmR0VmxFlushTaggedTlb(PHMPHYSCPU pHostCpu, PVMCPU pVCpu)
+{
+#ifdef HMVMX_ALWAYS_FLUSH_TLB
+ VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
+#endif
+ PVM pVM = pVCpu->CTX_SUFF(pVM);
+ switch (pVM->hm.s.vmx.enmTlbFlushType)
+ {
+ case VMXTLBFLUSHTYPE_EPT_VPID: hmR0VmxFlushTaggedTlbBoth(pHostCpu, pVCpu); break;
+ case VMXTLBFLUSHTYPE_EPT: hmR0VmxFlushTaggedTlbEpt(pHostCpu, pVCpu); break;
+ case VMXTLBFLUSHTYPE_VPID: hmR0VmxFlushTaggedTlbVpid(pHostCpu, pVCpu); break;
+ case VMXTLBFLUSHTYPE_NONE: hmR0VmxFlushTaggedTlbNone(pHostCpu, pVCpu); break;
+ default:
+ AssertMsgFailed(("Invalid flush-tag function identifier\n"));
+ break;
+ }
+ /* Don't assert that VMCPU_FF_TLB_FLUSH should no longer be pending. It can be set by other EMTs. */
+}
+
+
+/**
+ * Sets up the appropriate tagged TLB-flush level and handler for flushing guest
+ * TLB entries from the host TLB before VM-entry.
+ *
+ * @returns VBox status code.
+ * @param pVM The cross context VM structure.
+ */
+static int hmR0VmxSetupTaggedTlb(PVM pVM)
+{
+ /*
+ * Determine optimal flush type for Nested Paging.
+ * We cannot ignore EPT if no suitable flush-types is supported by the CPU as we've already setup unrestricted
+ * guest execution (see hmR3InitFinalizeR0()).
+ */
+ if (pVM->hm.s.fNestedPaging)
+ {
+ if (pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT)
+ {
+ if (pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT_SINGLE_CONTEXT)
+ pVM->hm.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_SINGLE_CONTEXT;
+ else if (pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT_ALL_CONTEXTS)
+ pVM->hm.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_ALL_CONTEXTS;
+ else
+ {
+ /* Shouldn't happen. EPT is supported but no suitable flush-types supported. */
+ pVM->hm.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_NOT_SUPPORTED;
+ pVM->aCpus[0].hm.s.u32HMError = VMX_UFC_EPT_FLUSH_TYPE_UNSUPPORTED;
+ return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
+ }
+
+ /* Make sure the write-back cacheable memory type for EPT is supported. */
+ if (RT_UNLIKELY(!(pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_EMT_WB)))
+ {
+ pVM->hm.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_NOT_SUPPORTED;
+ pVM->aCpus[0].hm.s.u32HMError = VMX_UFC_EPT_MEM_TYPE_NOT_WB;
+ return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
+ }
+
+ /* EPT requires a page-walk length of 4. */
+ if (RT_UNLIKELY(!(pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_PAGE_WALK_LENGTH_4)))
+ {
+ pVM->hm.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_NOT_SUPPORTED;
+ pVM->aCpus[0].hm.s.u32HMError = VMX_UFC_EPT_PAGE_WALK_LENGTH_UNSUPPORTED;
+ return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
+ }
+ }
+ else
+ {
+ /* Shouldn't happen. EPT is supported but INVEPT instruction is not supported. */
+ pVM->hm.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_NOT_SUPPORTED;
+ pVM->aCpus[0].hm.s.u32HMError = VMX_UFC_EPT_INVEPT_UNAVAILABLE;
+ return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
+ }
+ }
+
+ /*
+ * Determine optimal flush type for VPID.
+ */
+ if (pVM->hm.s.vmx.fVpid)
+ {
+ if (pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID)
+ {
+ if (pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_SINGLE_CONTEXT)
+ pVM->hm.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_SINGLE_CONTEXT;
+ else if (pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_ALL_CONTEXTS)
+ pVM->hm.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_ALL_CONTEXTS;
+ else
+ {
+ /* Neither SINGLE nor ALL-context flush types for VPID is supported by the CPU. Ignore VPID capability. */
+ if (pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_INDIV_ADDR)
+ LogRelFunc(("Only INDIV_ADDR supported. Ignoring VPID.\n"));
+ if (pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_SINGLE_CONTEXT_RETAIN_GLOBALS)
+ LogRelFunc(("Only SINGLE_CONTEXT_RETAIN_GLOBALS supported. Ignoring VPID.\n"));
+ pVM->hm.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_NOT_SUPPORTED;
+ pVM->hm.s.vmx.fVpid = false;
+ }
+ }
+ else
+ {
+ /* Shouldn't happen. VPID is supported but INVVPID is not supported by the CPU. Ignore VPID capability. */
+ Log4Func(("VPID supported without INVEPT support. Ignoring VPID.\n"));
+ pVM->hm.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_NOT_SUPPORTED;
+ pVM->hm.s.vmx.fVpid = false;
+ }
+ }
+
+ /*
+ * Setup the handler for flushing tagged-TLBs.
+ */
+ if (pVM->hm.s.fNestedPaging && pVM->hm.s.vmx.fVpid)
+ pVM->hm.s.vmx.enmTlbFlushType = VMXTLBFLUSHTYPE_EPT_VPID;
+ else if (pVM->hm.s.fNestedPaging)
+ pVM->hm.s.vmx.enmTlbFlushType = VMXTLBFLUSHTYPE_EPT;
+ else if (pVM->hm.s.vmx.fVpid)
+ pVM->hm.s.vmx.enmTlbFlushType = VMXTLBFLUSHTYPE_VPID;
+ else
+ pVM->hm.s.vmx.enmTlbFlushType = VMXTLBFLUSHTYPE_NONE;
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Sets up pin-based VM-execution controls in the VMCS.
+ *
+ * @returns VBox status code.
+ * @param pVCpu The cross context virtual CPU structure.
+ *
+ * @remarks We don't really care about optimizing vmwrites here as it's done only
+ * once per VM and hence we don't care about VMCS-field cache comparisons.
+ */
+static int hmR0VmxSetupPinCtls(PVMCPU pVCpu)
+{
+ PVM pVM = pVCpu->CTX_SUFF(pVM);
+ uint32_t fVal = pVM->hm.s.vmx.Msrs.PinCtls.n.allowed0; /* Bits set here must always be set. */
+ uint32_t const fZap = pVM->hm.s.vmx.Msrs.PinCtls.n.allowed1; /* Bits cleared here must always be cleared. */
+
+ fVal |= VMX_PIN_CTLS_EXT_INT_EXIT /* External interrupts cause a VM-exit. */
+ | VMX_PIN_CTLS_NMI_EXIT; /* Non-maskable interrupts (NMIs) cause a VM-exit. */
+
+ if (pVM->hm.s.vmx.Msrs.PinCtls.n.allowed1 & VMX_PIN_CTLS_VIRT_NMI)
+ fVal |= VMX_PIN_CTLS_VIRT_NMI; /* Use virtual NMIs and virtual-NMI blocking features. */
+
+ /* Enable the VMX preemption timer. */
+ if (pVM->hm.s.vmx.fUsePreemptTimer)
+ {
+ Assert(pVM->hm.s.vmx.Msrs.PinCtls.n.allowed1 & VMX_PIN_CTLS_PREEMPT_TIMER);
+ fVal |= VMX_PIN_CTLS_PREEMPT_TIMER;
+ }
+
+#if 0
+ /* Enable posted-interrupt processing. */
+ if (pVM->hm.s.fPostedIntrs)
+ {
+ Assert(pVM->hm.s.vmx.Msrs.PinCtls.n.allowed1 & VMX_PIN_CTLS_POSTED_INT);
+ Assert(pVM->hm.s.vmx.Msrs.ExitCtls.n.allowed1 & VMX_EXIT_CTLS_ACK_EXT_INT);
+ fVal |= VMX_PIN_CTL_POSTED_INT;
+ }
+#endif
+
+ if ((fVal & fZap) != fVal)
+ {
+ LogRelFunc(("Invalid pin-based VM-execution controls combo! Cpu=%#RX32 fVal=%#RX32 fZap=%#RX32\n",
+ pVM->hm.s.vmx.Msrs.PinCtls.n.allowed0, fVal, fZap));
+ pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_PIN_EXEC;
+ return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
+ }
+
+ /* Commit it to the VMCS and update our cache. */
+ int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PIN_EXEC, fVal);
+ AssertRCReturn(rc, rc);
+ pVCpu->hm.s.vmx.u32PinCtls = fVal;
+
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Sets up secondary processor-based VM-execution controls in the VMCS.
+ *
+ * @returns VBox status code.
+ * @param pVCpu The cross context virtual CPU structure.
+ *
+ * @remarks We don't really care about optimizing vmwrites here as it's done only
+ * once per VM and hence we don't care about VMCS-field cache comparisons.
+ */
+static int hmR0VmxSetupProcCtls2(PVMCPU pVCpu)
+{
+ PVM pVM = pVCpu->CTX_SUFF(pVM);
+ uint32_t fVal = pVM->hm.s.vmx.Msrs.ProcCtls2.n.allowed0; /* Bits set here must be set in the VMCS. */
+ uint32_t const fZap = pVM->hm.s.vmx.Msrs.ProcCtls2.n.allowed1; /* Bits cleared here must be cleared in the VMCS. */
+
+ /* WBINVD causes a VM-exit. */
+ if (pVM->hm.s.vmx.Msrs.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_WBINVD_EXIT)
+ fVal |= VMX_PROC_CTLS2_WBINVD_EXIT;
+
+ /* Enable EPT (aka nested-paging). */
+ if (pVM->hm.s.fNestedPaging)
+ fVal |= VMX_PROC_CTLS2_EPT;
+
+ /*
+ * Enable the INVPCID instruction if supported by the hardware and we expose
+ * it to the guest. Without this, guest executing INVPCID would cause a #UD.
+ */
+ if ( (pVM->hm.s.vmx.Msrs.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_INVPCID)
+ && pVM->cpum.ro.GuestFeatures.fInvpcid)
+ fVal |= VMX_PROC_CTLS2_INVPCID;
+
+ /* Enable VPID. */
+ if (pVM->hm.s.vmx.fVpid)
+ fVal |= VMX_PROC_CTLS2_VPID;
+
+ /* Enable Unrestricted guest execution. */
+ if (pVM->hm.s.vmx.fUnrestrictedGuest)
+ fVal |= VMX_PROC_CTLS2_UNRESTRICTED_GUEST;
+
+#if 0
+ if (pVM->hm.s.fVirtApicRegs)
+ {
+ /* Enable APIC-register virtualization. */
+ Assert(pVM->hm.s.vmx.Msrs.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_APIC_REG_VIRT);
+ fVal |= VMX_PROC_CTLS2_APIC_REG_VIRT;
+
+ /* Enable virtual-interrupt delivery. */
+ Assert(pVM->hm.s.vmx.Msrs.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_VIRT_INTR_DELIVERY);
+ fVal |= VMX_PROC_CTLS2_VIRT_INTR_DELIVERY;
+ }
+#endif
+
+ /* Virtualize-APIC accesses if supported by the CPU. The virtual-APIC page is where the TPR shadow resides. */
+ /** @todo VIRT_X2APIC support, it's mutually exclusive with this. So must be
+ * done dynamically. */
+ if (pVM->hm.s.vmx.Msrs.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS)
+ {
+ Assert(pVM->hm.s.vmx.HCPhysApicAccess);
+ Assert(!(pVM->hm.s.vmx.HCPhysApicAccess & 0xfff)); /* Bits 11:0 MBZ. */
+ fVal |= VMX_PROC_CTLS2_VIRT_APIC_ACCESS; /* Virtualize APIC accesses. */
+ int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_APIC_ACCESSADDR_FULL, pVM->hm.s.vmx.HCPhysApicAccess);
+ AssertRCReturn(rc, rc);
+ }
+
+ /* Enable RDTSCP. */
+ if (pVM->hm.s.vmx.Msrs.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_RDTSCP)
+ fVal |= VMX_PROC_CTLS2_RDTSCP;
+
+ /* Enable Pause-Loop exiting. */
+ if ( pVM->hm.s.vmx.Msrs.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_PAUSE_LOOP_EXIT
+ && pVM->hm.s.vmx.cPleGapTicks
+ && pVM->hm.s.vmx.cPleWindowTicks)
+ {
+ fVal |= VMX_PROC_CTLS2_PAUSE_LOOP_EXIT;
+
+ int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PLE_GAP, pVM->hm.s.vmx.cPleGapTicks);
+ rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PLE_WINDOW, pVM->hm.s.vmx.cPleWindowTicks);
+ AssertRCReturn(rc, rc);
+ }
+
+ if ((fVal & fZap) != fVal)
+ {
+ LogRelFunc(("Invalid secondary processor-based VM-execution controls combo! cpu=%#RX32 fVal=%#RX32 fZap=%#RX32\n",
+ pVM->hm.s.vmx.Msrs.ProcCtls2.n.allowed0, fVal, fZap));
+ pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_PROC_EXEC2;
+ return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
+ }
+
+ /* Commit it to the VMCS and update our cache. */
+ int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC2, fVal);
+ AssertRCReturn(rc, rc);
+ pVCpu->hm.s.vmx.u32ProcCtls2 = fVal;
+
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Sets up processor-based VM-execution controls in the VMCS.
+ *
+ * @returns VBox status code.
+ * @param pVCpu The cross context virtual CPU structure.
+ *
+ * @remarks We don't really care about optimizing vmwrites here as it's done only
+ * once per VM and hence we don't care about VMCS-field cache comparisons.
+ */
+static int hmR0VmxSetupProcCtls(PVMCPU pVCpu)
+{
+ PVM pVM = pVCpu->CTX_SUFF(pVM);
+ uint32_t fVal = pVM->hm.s.vmx.Msrs.ProcCtls.n.allowed0; /* Bits set here must be set in the VMCS. */
+ uint32_t const fZap = pVM->hm.s.vmx.Msrs.ProcCtls.n.allowed1; /* Bits cleared here must be cleared in the VMCS. */
+
+ fVal |= VMX_PROC_CTLS_HLT_EXIT /* HLT causes a VM-exit. */
+ | VMX_PROC_CTLS_USE_TSC_OFFSETTING /* Use TSC-offsetting. */
+ | VMX_PROC_CTLS_MOV_DR_EXIT /* MOV DRx causes a VM-exit. */
+ | VMX_PROC_CTLS_UNCOND_IO_EXIT /* All IO instructions cause a VM-exit. */
+ | VMX_PROC_CTLS_RDPMC_EXIT /* RDPMC causes a VM-exit. */
+ | VMX_PROC_CTLS_MONITOR_EXIT /* MONITOR causes a VM-exit. */
+ | VMX_PROC_CTLS_MWAIT_EXIT; /* MWAIT causes a VM-exit. */
+
+ /* We toggle VMX_PROC_CTLS_MOV_DR_EXIT later, check if it's not -always- needed to be set or clear. */
+ if ( !(pVM->hm.s.vmx.Msrs.ProcCtls.n.allowed1 & VMX_PROC_CTLS_MOV_DR_EXIT)
+ || (pVM->hm.s.vmx.Msrs.ProcCtls.n.allowed0 & VMX_PROC_CTLS_MOV_DR_EXIT))
+ {
+ LogRelFunc(("Unsupported VMX_PROC_CTLS_MOV_DR_EXIT combo!"));
+ pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_PROC_MOV_DRX_EXIT;
+ return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
+ }
+
+ /* Without Nested Paging, INVLPG (also affects INVPCID) and MOV CR3 instructions should cause VM-exits. */
+ if (!pVM->hm.s.fNestedPaging)
+ {
+ Assert(!pVM->hm.s.vmx.fUnrestrictedGuest); /* Paranoia. */
+ fVal |= VMX_PROC_CTLS_INVLPG_EXIT
+ | VMX_PROC_CTLS_CR3_LOAD_EXIT
+ | VMX_PROC_CTLS_CR3_STORE_EXIT;
+ }
+
+ /* Use TPR shadowing if supported by the CPU. */
+ if ( PDMHasApic(pVM)
+ && pVM->hm.s.vmx.Msrs.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_TPR_SHADOW)
+ {
+ Assert(pVCpu->hm.s.vmx.HCPhysVirtApic);
+ Assert(!(pVCpu->hm.s.vmx.HCPhysVirtApic & 0xfff)); /* Bits 11:0 MBZ. */
+ int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_TPR_THRESHOLD, 0);
+ rc |= VMXWriteVmcs64(VMX_VMCS64_CTRL_VIRT_APIC_PAGEADDR_FULL, pVCpu->hm.s.vmx.HCPhysVirtApic);
+ AssertRCReturn(rc, rc);
+
+ fVal |= VMX_PROC_CTLS_USE_TPR_SHADOW; /* CR8 reads from the Virtual-APIC page. */
+ /* CR8 writes cause a VM-exit based on TPR threshold. */
+ Assert(!(fVal & VMX_PROC_CTLS_CR8_STORE_EXIT));
+ Assert(!(fVal & VMX_PROC_CTLS_CR8_LOAD_EXIT));
+ }
+ else
+ {
+ /*
+ * Some 32-bit CPUs do not support CR8 load/store exiting as MOV CR8 is invalid on 32-bit Intel CPUs.
+ * Set this control only for 64-bit guests.
+ */
+ if (pVM->hm.s.fAllow64BitGuests)
+ {
+ fVal |= VMX_PROC_CTLS_CR8_STORE_EXIT /* CR8 reads cause a VM-exit. */
+ | VMX_PROC_CTLS_CR8_LOAD_EXIT; /* CR8 writes cause a VM-exit. */
+ }
+ }
+
+ /* Use MSR-bitmaps if supported by the CPU. */
+ if (pVM->hm.s.vmx.Msrs.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_MSR_BITMAPS)
+ {
+ fVal |= VMX_PROC_CTLS_USE_MSR_BITMAPS;
+
+ Assert(pVCpu->hm.s.vmx.HCPhysMsrBitmap);
+ Assert(!(pVCpu->hm.s.vmx.HCPhysMsrBitmap & 0xfff)); /* Bits 11:0 MBZ. */
+ int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_MSR_BITMAP_FULL, pVCpu->hm.s.vmx.HCPhysMsrBitmap);
+ AssertRCReturn(rc, rc);
+
+ /*
+ * The guest can access the following MSRs (read, write) without causing VM-exits; they are loaded/stored
+ * automatically using dedicated fields in the VMCS.
+ */
+ hmR0VmxSetMsrPermission(pVCpu, MSR_IA32_SYSENTER_CS, VMXMSREXIT_PASSTHRU_READ, VMXMSREXIT_PASSTHRU_WRITE);
+ hmR0VmxSetMsrPermission(pVCpu, MSR_IA32_SYSENTER_ESP, VMXMSREXIT_PASSTHRU_READ, VMXMSREXIT_PASSTHRU_WRITE);
+ hmR0VmxSetMsrPermission(pVCpu, MSR_IA32_SYSENTER_EIP, VMXMSREXIT_PASSTHRU_READ, VMXMSREXIT_PASSTHRU_WRITE);
+ hmR0VmxSetMsrPermission(pVCpu, MSR_K8_GS_BASE, VMXMSREXIT_PASSTHRU_READ, VMXMSREXIT_PASSTHRU_WRITE);
+ hmR0VmxSetMsrPermission(pVCpu, MSR_K8_FS_BASE, VMXMSREXIT_PASSTHRU_READ, VMXMSREXIT_PASSTHRU_WRITE);
+#if HC_ARCH_BITS == 64
+ /*
+ * Set passthru permissions for the following MSRs (mandatory for VT-x) required for 64-bit guests.
+ */
+ if (pVM->hm.s.fAllow64BitGuests)
+ {
+ hmR0VmxSetMsrPermission(pVCpu, MSR_K8_LSTAR, VMXMSREXIT_PASSTHRU_READ, VMXMSREXIT_PASSTHRU_WRITE);
+ hmR0VmxSetMsrPermission(pVCpu, MSR_K6_STAR, VMXMSREXIT_PASSTHRU_READ, VMXMSREXIT_PASSTHRU_WRITE);
+ hmR0VmxSetMsrPermission(pVCpu, MSR_K8_SF_MASK, VMXMSREXIT_PASSTHRU_READ, VMXMSREXIT_PASSTHRU_WRITE);
+ hmR0VmxSetMsrPermission(pVCpu, MSR_K8_KERNEL_GS_BASE, VMXMSREXIT_PASSTHRU_READ, VMXMSREXIT_PASSTHRU_WRITE);
+ }
+#endif
+ /*
+ * The IA32_PRED_CMD and IA32_FLUSH_CMD MSRs are write-only and has no state
+ * associated with then. We never need to intercept access (writes need to
+ * be executed without exiting, reads will #GP-fault anyway).
+ */
+ if (pVM->cpum.ro.GuestFeatures.fIbpb)
+ hmR0VmxSetMsrPermission(pVCpu, MSR_IA32_PRED_CMD, VMXMSREXIT_PASSTHRU_READ, VMXMSREXIT_PASSTHRU_WRITE);
+ if (pVM->cpum.ro.GuestFeatures.fFlushCmd)
+ hmR0VmxSetMsrPermission(pVCpu, MSR_IA32_FLUSH_CMD, VMXMSREXIT_PASSTHRU_READ, VMXMSREXIT_PASSTHRU_WRITE);
+
+ /* Though MSR_IA32_PERF_GLOBAL_CTRL is saved/restored lazily, we want intercept reads/write to it for now. */
+ }
+
+ /* Use the secondary processor-based VM-execution controls if supported by the CPU. */
+ if (pVM->hm.s.vmx.Msrs.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_SECONDARY_CTLS)
+ fVal |= VMX_PROC_CTLS_USE_SECONDARY_CTLS;
+
+ if ((fVal & fZap) != fVal)
+ {
+ LogRelFunc(("Invalid processor-based VM-execution controls combo! cpu=%#RX32 fVal=%#RX32 fZap=%#RX32\n",
+ pVM->hm.s.vmx.Msrs.ProcCtls.n.allowed0, fVal, fZap));
+ pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_PROC_EXEC;
+ return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
+ }
+
+ /* Commit it to the VMCS and update our cache. */
+ int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, fVal);
+ AssertRCReturn(rc, rc);
+ pVCpu->hm.s.vmx.u32ProcCtls = fVal;
+
+ /* Set up secondary processor-based VM-execution controls if the CPU supports it. */
+ if (pVCpu->hm.s.vmx.u32ProcCtls & VMX_PROC_CTLS_USE_SECONDARY_CTLS)
+ return hmR0VmxSetupProcCtls2(pVCpu);
+
+ /* Sanity check, should not really happen. */
+ if (RT_UNLIKELY(pVM->hm.s.vmx.fUnrestrictedGuest))
+ {
+ LogRelFunc(("Unrestricted Guest enabled when secondary processor-based VM-execution controls not available\n"));
+ pVCpu->hm.s.u32HMError = VMX_UFC_INVALID_UX_COMBO;
+ return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
+ }
+
+ /* Old CPUs without secondary processor-based VM-execution controls would end up here. */
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Sets up miscellaneous (everything other than Pin & Processor-based
+ * VM-execution) control fields in the VMCS.
+ *
+ * @returns VBox status code.
+ * @param pVCpu The cross context virtual CPU structure.
+ */
+static int hmR0VmxSetupMiscCtls(PVMCPU pVCpu)
+{
+ AssertPtr(pVCpu);
+
+ int rc = VERR_GENERAL_FAILURE;
+
+ /* All fields are zero-initialized during allocation; but don't remove the commented block below. */
+#if 0
+ /* All CR3 accesses cause VM-exits. Later we optimize CR3 accesses (see hmR0VmxExportGuestCR3AndCR4())*/
+ rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_CR3_TARGET_COUNT, 0);
+ rc |= VMXWriteVmcs64(VMX_VMCS64_CTRL_TSC_OFFSET_FULL, 0);
+
+ /*
+ * Set MASK & MATCH to 0. VMX checks if GuestPFErrCode & MASK == MATCH. If equal (in our case it always is)
+ * and if the X86_XCPT_PF bit in the exception bitmap is set it causes a VM-exit, if clear doesn't cause an exit.
+ * We thus use the exception bitmap to control it rather than use both.
+ */
+ rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MASK, 0);
+ rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MATCH, 0);
+
+ /* All IO & IOIO instructions cause VM-exits. */
+ rc |= VMXWriteVmcs64(VMX_VMCS64_CTRL_IO_BITMAP_A_FULL, 0);
+ rc |= VMXWriteVmcs64(VMX_VMCS64_CTRL_IO_BITMAP_B_FULL, 0);
+
+ /* Initialize the MSR-bitmap area. */
+ rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT, 0);
+ rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT, 0);
+ rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT, 0);
+ AssertRCReturn(rc, rc);
+#endif
+
+ /* Setup MSR auto-load/store area. */
+ Assert(pVCpu->hm.s.vmx.HCPhysGuestMsr);
+ Assert(!(pVCpu->hm.s.vmx.HCPhysGuestMsr & 0xf)); /* Lower 4 bits MBZ. */
+ rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_ENTRY_MSR_LOAD_FULL, pVCpu->hm.s.vmx.HCPhysGuestMsr);
+ rc |= VMXWriteVmcs64(VMX_VMCS64_CTRL_EXIT_MSR_STORE_FULL, pVCpu->hm.s.vmx.HCPhysGuestMsr);
+ AssertRCReturn(rc, rc);
+
+ Assert(pVCpu->hm.s.vmx.HCPhysHostMsr);
+ Assert(!(pVCpu->hm.s.vmx.HCPhysHostMsr & 0xf)); /* Lower 4 bits MBZ. */
+ rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_EXIT_MSR_LOAD_FULL, pVCpu->hm.s.vmx.HCPhysHostMsr);
+ AssertRCReturn(rc, rc);
+
+ /* Set VMCS link pointer. Reserved for future use, must be -1. Intel spec. 24.4 "Guest-State Area". */
+ rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_VMCS_LINK_PTR_FULL, UINT64_C(0xffffffffffffffff));
+ AssertRCReturn(rc, rc);
+
+ /* All fields are zero-initialized during allocation; but don't remove the commented block below. */
+#if 0
+ /* Setup debug controls */
+ rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_DEBUGCTL_FULL, 0);
+ rc |= VMXWriteVmcs32(VMX_VMCS_GUEST_PENDING_DEBUG_XCPTS, 0);
+ AssertRCReturn(rc, rc);
+#endif
+
+ return rc;
+}
+
+
+/**
+ * Sets up the initial exception bitmap in the VMCS based on static conditions.
+ *
+ * We shall setup those exception intercepts that don't change during the
+ * lifetime of the VM here. The rest are done dynamically while loading the
+ * guest state.
+ *
+ * @returns VBox status code.
+ * @param pVCpu The cross context virtual CPU structure.
+ */
+static int hmR0VmxInitXcptBitmap(PVMCPU pVCpu)
+{
+ AssertPtr(pVCpu);
+
+ uint32_t uXcptBitmap;
+
+ /* Must always intercept #AC to prevent the guest from hanging the CPU. */
+ uXcptBitmap = RT_BIT_32(X86_XCPT_AC);
+
+ /* Because we need to maintain the DR6 state even when intercepting DRx reads
+ and writes, and because recursive #DBs can cause the CPU hang, we must always
+ intercept #DB. */
+ uXcptBitmap |= RT_BIT_32(X86_XCPT_DB);
+
+ /* Without Nested Paging, #PF must cause a VM-exit so we can sync our shadow page tables. */
+ if (!pVCpu->CTX_SUFF(pVM)->hm.s.fNestedPaging)
+ uXcptBitmap |= RT_BIT(X86_XCPT_PF);
+
+ /* Commit it to the VMCS. */
+ int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXCEPTION_BITMAP, uXcptBitmap);
+ AssertRCReturn(rc, rc);
+
+ /* Update our cache of the exception bitmap. */
+ pVCpu->hm.s.vmx.u32XcptBitmap = uXcptBitmap;
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Does per-VM VT-x initialization.
+ *
+ * @returns VBox status code.
+ * @param pVM The cross context VM structure.
+ */
+VMMR0DECL(int) VMXR0InitVM(PVM pVM)
+{
+ LogFlowFunc(("pVM=%p\n", pVM));
+
+ int rc = hmR0VmxStructsAlloc(pVM);
+ if (RT_FAILURE(rc))
+ {
+ LogRelFunc(("hmR0VmxStructsAlloc failed! rc=%Rrc\n", rc));
+ return rc;
+ }
+
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Does per-VM VT-x termination.
+ *
+ * @returns VBox status code.
+ * @param pVM The cross context VM structure.
+ */
+VMMR0DECL(int) VMXR0TermVM(PVM pVM)
+{
+ LogFlowFunc(("pVM=%p\n", pVM));
+
+#ifdef VBOX_WITH_CRASHDUMP_MAGIC
+ if (pVM->hm.s.vmx.hMemObjScratch != NIL_RTR0MEMOBJ)
+ ASMMemZero32(pVM->hm.s.vmx.pvScratch, PAGE_SIZE);
+#endif
+ hmR0VmxStructsFree(pVM);
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Sets up the VM for execution under VT-x.
+ * This function is only called once per-VM during initialization.
+ *
+ * @returns VBox status code.
+ * @param pVM The cross context VM structure.
+ */
+VMMR0DECL(int) VMXR0SetupVM(PVM pVM)
+{
+ AssertPtrReturn(pVM, VERR_INVALID_PARAMETER);
+ Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+
+ LogFlowFunc(("pVM=%p\n", pVM));
+
+ /*
+ * Without UnrestrictedGuest, pRealModeTSS and pNonPagingModeEPTPageTable *must* always be
+ * allocated. We no longer support the highly unlikely case of UnrestrictedGuest without
+ * pRealModeTSS, see hmR3InitFinalizeR0Intel().
+ */
+ if ( !pVM->hm.s.vmx.fUnrestrictedGuest
+ && ( !pVM->hm.s.vmx.pNonPagingModeEPTPageTable
+ || !pVM->hm.s.vmx.pRealModeTSS))
+ {
+ LogRelFunc(("Invalid real-on-v86 state.\n"));
+ return VERR_INTERNAL_ERROR;
+ }
+
+ /* Initialize these always, see hmR3InitFinalizeR0().*/
+ pVM->hm.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_NONE;
+ pVM->hm.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_NONE;
+
+ /* Setup the tagged-TLB flush handlers. */
+ int rc = hmR0VmxSetupTaggedTlb(pVM);
+ if (RT_FAILURE(rc))
+ {
+ LogRelFunc(("hmR0VmxSetupTaggedTlb failed! rc=%Rrc\n", rc));
+ return rc;
+ }
+
+ /* Check if we can use the VMCS controls for swapping the EFER MSR. */
+ Assert(!pVM->hm.s.vmx.fSupportsVmcsEfer);
+#if HC_ARCH_BITS == 64
+ if ( (pVM->hm.s.vmx.Msrs.EntryCtls.n.allowed1 & VMX_ENTRY_CTLS_LOAD_EFER_MSR)
+ && (pVM->hm.s.vmx.Msrs.ExitCtls.n.allowed1 & VMX_EXIT_CTLS_LOAD_EFER_MSR)
+ && (pVM->hm.s.vmx.Msrs.ExitCtls.n.allowed1 & VMX_EXIT_CTLS_SAVE_EFER_MSR))
+ {
+ pVM->hm.s.vmx.fSupportsVmcsEfer = true;
+ }
+#endif
+
+ /* At least verify VMX is enabled, since we can't check if we're in VMX root mode without #GP'ing. */
+ RTCCUINTREG const uHostCR4 = ASMGetCR4();
+ if (RT_UNLIKELY(!(uHostCR4 & X86_CR4_VMXE)))
+ return VERR_VMX_NOT_IN_VMX_ROOT_MODE;
+
+ for (VMCPUID i = 0; i < pVM->cCpus; i++)
+ {
+ PVMCPU pVCpu = &pVM->aCpus[i];
+ AssertPtr(pVCpu);
+ AssertPtr(pVCpu->hm.s.vmx.pvVmcs);
+
+ /* Log the VCPU pointers, useful for debugging SMP VMs. */
+ Log4Func(("pVCpu=%p idCpu=%RU32\n", pVCpu, pVCpu->idCpu));
+
+ /* Set revision dword at the beginning of the VMCS structure. */
+ *(uint32_t *)pVCpu->hm.s.vmx.pvVmcs = RT_BF_GET(pVM->hm.s.vmx.Msrs.u64Basic, VMX_BF_BASIC_VMCS_ID);
+
+ /* Set the VMCS launch state to "clear", see Intel spec. 31.6 "Preparation and launch a virtual machine". */
+ rc = VMXClearVmcs(pVCpu->hm.s.vmx.HCPhysVmcs);
+ AssertLogRelMsgRCReturnStmt(rc, ("VMXR0SetupVM: VMXClearVmcs failed! rc=%Rrc\n", rc),
+ hmR0VmxUpdateErrorRecord(pVCpu, rc), rc);
+
+ /* Load this VMCS as the current VMCS. */
+ rc = VMXActivateVmcs(pVCpu->hm.s.vmx.HCPhysVmcs);
+ AssertLogRelMsgRCReturnStmt(rc, ("VMXR0SetupVM: VMXActivateVmcs failed! rc=%Rrc\n", rc),
+ hmR0VmxUpdateErrorRecord(pVCpu, rc), rc);
+
+ rc = hmR0VmxSetupPinCtls(pVCpu);
+ AssertLogRelMsgRCReturnStmt(rc, ("VMXR0SetupVM: hmR0VmxSetupPinCtls failed! rc=%Rrc\n", rc),
+ hmR0VmxUpdateErrorRecord(pVCpu, rc), rc);
+
+ rc = hmR0VmxSetupProcCtls(pVCpu);
+ AssertLogRelMsgRCReturnStmt(rc, ("VMXR0SetupVM: hmR0VmxSetupProcCtls failed! rc=%Rrc\n", rc),
+ hmR0VmxUpdateErrorRecord(pVCpu, rc), rc);
+
+ rc = hmR0VmxSetupMiscCtls(pVCpu);
+ AssertLogRelMsgRCReturnStmt(rc, ("VMXR0SetupVM: hmR0VmxSetupMiscCtls failed! rc=%Rrc\n", rc),
+ hmR0VmxUpdateErrorRecord(pVCpu, rc), rc);
+
+ rc = hmR0VmxInitXcptBitmap(pVCpu);
+ AssertLogRelMsgRCReturnStmt(rc, ("VMXR0SetupVM: hmR0VmxInitXcptBitmap failed! rc=%Rrc\n", rc),
+ hmR0VmxUpdateErrorRecord(pVCpu, rc), rc);
+
+#if HC_ARCH_BITS == 32
+ rc = hmR0VmxInitVmcsReadCache(pVCpu);
+ AssertLogRelMsgRCReturnStmt(rc, ("VMXR0SetupVM: hmR0VmxInitVmcsReadCache failed! rc=%Rrc\n", rc),
+ hmR0VmxUpdateErrorRecord(pVCpu, rc), rc);
+#endif
+
+ /* Sync any CPU internal VMCS data back into our VMCS in memory. */
+ rc = VMXClearVmcs(pVCpu->hm.s.vmx.HCPhysVmcs);
+ AssertLogRelMsgRCReturnStmt(rc, ("VMXR0SetupVM: VMXClearVmcs(2) failed! rc=%Rrc\n", rc),
+ hmR0VmxUpdateErrorRecord(pVCpu, rc), rc);
+
+ pVCpu->hm.s.vmx.fVmcsState = HMVMX_VMCS_STATE_CLEAR;
+
+ hmR0VmxUpdateErrorRecord(pVCpu, rc);
+ }
+
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Saves the host control registers (CR0, CR3, CR4) into the host-state area in
+ * the VMCS.
+ *
+ * @returns VBox status code.
+ */
+static int hmR0VmxExportHostControlRegs(void)
+{
+ RTCCUINTREG uReg = ASMGetCR0();
+ int rc = VMXWriteVmcsHstN(VMX_VMCS_HOST_CR0, uReg);
+ AssertRCReturn(rc, rc);
+
+ uReg = ASMGetCR3();
+ rc = VMXWriteVmcsHstN(VMX_VMCS_HOST_CR3, uReg);
+ AssertRCReturn(rc, rc);
+
+ uReg = ASMGetCR4();
+ rc = VMXWriteVmcsHstN(VMX_VMCS_HOST_CR4, uReg);
+ AssertRCReturn(rc, rc);
+ return rc;
+}
+
+
+/**
+ * Saves the host segment registers and GDTR, IDTR, (TR, GS and FS bases) into
+ * the host-state area in the VMCS.
+ *
+ * @returns VBox status code.
+ * @param pVCpu The cross context virtual CPU structure.
+ */
+static int hmR0VmxExportHostSegmentRegs(PVMCPU pVCpu)
+{
+#if HC_ARCH_BITS == 64
+/**
+ * Macro for adjusting host segment selectors to satisfy VT-x's VM-entry
+ * requirements. See hmR0VmxExportHostSegmentRegs().
+ */
+# define VMXLOCAL_ADJUST_HOST_SEG(seg, selValue) \
+ if ((selValue) & (X86_SEL_RPL | X86_SEL_LDT)) \
+ { \
+ bool fValidSelector = true; \
+ if ((selValue) & X86_SEL_LDT) \
+ { \
+ uint32_t uAttr = ASMGetSegAttr((selValue)); \
+ fValidSelector = RT_BOOL(uAttr != UINT32_MAX && (uAttr & X86_DESC_P)); \
+ } \
+ if (fValidSelector) \
+ { \
+ pVCpu->hm.s.vmx.fRestoreHostFlags |= VMX_RESTORE_HOST_SEL_##seg; \
+ pVCpu->hm.s.vmx.RestoreHost.uHostSel##seg = (selValue); \
+ } \
+ (selValue) = 0; \
+ }
+
+ /*
+ * If we've executed guest code using VT-x, the host-state bits will be messed up. We
+ * should -not- save the messed up state without restoring the original host-state,
+ * see @bugref{7240}.
+ *
+ * This apparently can happen (most likely the FPU changes), deal with it rather than
+ * asserting. Was observed booting Solaris 10u10 32-bit guest.
+ */
+ if ( (pVCpu->hm.s.vmx.fRestoreHostFlags & VMX_RESTORE_HOST_REQUIRED)
+ && (pVCpu->hm.s.vmx.fRestoreHostFlags & ~VMX_RESTORE_HOST_REQUIRED))
+ {
+ Log4Func(("Restoring Host State: fRestoreHostFlags=%#RX32 HostCpuId=%u\n", pVCpu->hm.s.vmx.fRestoreHostFlags,
+ pVCpu->idCpu));
+ VMXRestoreHostState(pVCpu->hm.s.vmx.fRestoreHostFlags, &pVCpu->hm.s.vmx.RestoreHost);
+ }
+ pVCpu->hm.s.vmx.fRestoreHostFlags = 0;
+#else
+ RT_NOREF(pVCpu);
+#endif
+
+ /*
+ * Host DS, ES, FS and GS segment registers.
+ */
+#if HC_ARCH_BITS == 64
+ RTSEL uSelDS = ASMGetDS();
+ RTSEL uSelES = ASMGetES();
+ RTSEL uSelFS = ASMGetFS();
+ RTSEL uSelGS = ASMGetGS();
+#else
+ RTSEL uSelDS = 0;
+ RTSEL uSelES = 0;
+ RTSEL uSelFS = 0;
+ RTSEL uSelGS = 0;
+#endif
+
+ /*
+ * Host CS and SS segment registers.
+ */
+ RTSEL uSelCS = ASMGetCS();
+ RTSEL uSelSS = ASMGetSS();
+
+ /*
+ * Host TR segment register.
+ */
+ RTSEL uSelTR = ASMGetTR();
+
+#if HC_ARCH_BITS == 64
+ /*
+ * Determine if the host segment registers are suitable for VT-x. Otherwise use zero to
+ * gain VM-entry and restore them before we get preempted.
+ *
+ * See Intel spec. 26.2.3 "Checks on Host Segment and Descriptor-Table Registers".
+ */
+ VMXLOCAL_ADJUST_HOST_SEG(DS, uSelDS);
+ VMXLOCAL_ADJUST_HOST_SEG(ES, uSelES);
+ VMXLOCAL_ADJUST_HOST_SEG(FS, uSelFS);
+ VMXLOCAL_ADJUST_HOST_SEG(GS, uSelGS);
+# undef VMXLOCAL_ADJUST_HOST_SEG
+#endif
+
+ /* Verification based on Intel spec. 26.2.3 "Checks on Host Segment and Descriptor-Table Registers" */
+ Assert(!(uSelCS & X86_SEL_RPL)); Assert(!(uSelCS & X86_SEL_LDT));
+ Assert(!(uSelSS & X86_SEL_RPL)); Assert(!(uSelSS & X86_SEL_LDT));
+ Assert(!(uSelDS & X86_SEL_RPL)); Assert(!(uSelDS & X86_SEL_LDT));
+ Assert(!(uSelES & X86_SEL_RPL)); Assert(!(uSelES & X86_SEL_LDT));
+ Assert(!(uSelFS & X86_SEL_RPL)); Assert(!(uSelFS & X86_SEL_LDT));
+ Assert(!(uSelGS & X86_SEL_RPL)); Assert(!(uSelGS & X86_SEL_LDT));
+ Assert(!(uSelTR & X86_SEL_RPL)); Assert(!(uSelTR & X86_SEL_LDT));
+ Assert(uSelCS);
+ Assert(uSelTR);
+
+ /* Assertion is right but we would not have updated u32ExitCtls yet. */
+#if 0
+ if (!(pVCpu->hm.s.vmx.u32ExitCtls & VMX_EXIT_CTLS_HOST_ADDR_SPACE_SIZE))
+ Assert(uSelSS != 0);
+#endif
+
+ /* Write these host selector fields into the host-state area in the VMCS. */
+ int rc = VMXWriteVmcs32(VMX_VMCS16_HOST_CS_SEL, uSelCS);
+ rc |= VMXWriteVmcs32(VMX_VMCS16_HOST_SS_SEL, uSelSS);
+#if HC_ARCH_BITS == 64
+ rc |= VMXWriteVmcs32(VMX_VMCS16_HOST_DS_SEL, uSelDS);
+ rc |= VMXWriteVmcs32(VMX_VMCS16_HOST_ES_SEL, uSelES);
+ rc |= VMXWriteVmcs32(VMX_VMCS16_HOST_FS_SEL, uSelFS);
+ rc |= VMXWriteVmcs32(VMX_VMCS16_HOST_GS_SEL, uSelGS);
+#else
+ NOREF(uSelDS);
+ NOREF(uSelES);
+ NOREF(uSelFS);
+ NOREF(uSelGS);
+#endif
+ rc |= VMXWriteVmcs32(VMX_VMCS16_HOST_TR_SEL, uSelTR);
+ AssertRCReturn(rc, rc);
+
+ /*
+ * Host GDTR and IDTR.
+ */
+ RTGDTR Gdtr;
+ RTIDTR Idtr;
+ RT_ZERO(Gdtr);
+ RT_ZERO(Idtr);
+ ASMGetGDTR(&Gdtr);
+ ASMGetIDTR(&Idtr);
+ rc = VMXWriteVmcsHstN(VMX_VMCS_HOST_GDTR_BASE, Gdtr.pGdt);
+ rc |= VMXWriteVmcsHstN(VMX_VMCS_HOST_IDTR_BASE, Idtr.pIdt);
+ AssertRCReturn(rc, rc);
+
+#if HC_ARCH_BITS == 64
+ /*
+ * Determine if we need to manually need to restore the GDTR and IDTR limits as VT-x zaps
+ * them to the maximum limit (0xffff) on every VM-exit.
+ */
+ if (Gdtr.cbGdt != 0xffff)
+ pVCpu->hm.s.vmx.fRestoreHostFlags |= VMX_RESTORE_HOST_GDTR;
+
+ /*
+ * IDT limit is effectively capped at 0xfff. (See Intel spec. 6.14.1 "64-Bit Mode IDT" and
+ * Intel spec. 6.2 "Exception and Interrupt Vectors".) Therefore if the host has the limit
+ * as 0xfff, VT-x bloating the limit to 0xffff shouldn't cause any different CPU behavior.
+ * However, several hosts either insists on 0xfff being the limit (Windows Patch Guard) or
+ * uses the limit for other purposes (darwin puts the CPU ID in there but botches sidt
+ * alignment in at least one consumer). So, we're only allowing the IDTR.LIMIT to be left
+ * at 0xffff on hosts where we are sure it won't cause trouble.
+ */
+# if defined(RT_OS_LINUX) || defined(RT_OS_SOLARIS)
+ if (Idtr.cbIdt < 0x0fff)
+# else
+ if (Idtr.cbIdt != 0xffff)
+# endif
+ {
+ pVCpu->hm.s.vmx.fRestoreHostFlags |= VMX_RESTORE_HOST_IDTR;
+ AssertCompile(sizeof(Idtr) == sizeof(X86XDTR64));
+ memcpy(&pVCpu->hm.s.vmx.RestoreHost.HostIdtr, &Idtr, sizeof(X86XDTR64));
+ }
+#endif
+
+ /*
+ * Host TR base. Verify that TR selector doesn't point past the GDT. Masking off the TI
+ * and RPL bits is effectively what the CPU does for "scaling by 8". TI is always 0 and
+ * RPL should be too in most cases.
+ */
+ AssertMsgReturn((uSelTR | X86_SEL_RPL_LDT) <= Gdtr.cbGdt,
+ ("TR selector exceeds limit. TR=%RTsel cbGdt=%#x\n", uSelTR, Gdtr.cbGdt), VERR_VMX_INVALID_HOST_STATE);
+
+ PCX86DESCHC pDesc = (PCX86DESCHC)(Gdtr.pGdt + (uSelTR & X86_SEL_MASK));
+#if HC_ARCH_BITS == 64
+ uintptr_t uTRBase = X86DESC64_BASE(pDesc);
+
+ /*
+ * VT-x unconditionally restores the TR limit to 0x67 and type to 11 (32-bit busy TSS) on
+ * all VM-exits. The type is the same for 64-bit busy TSS[1]. The limit needs manual
+ * restoration if the host has something else. Task switching is not supported in 64-bit
+ * mode[2], but the limit still matters as IOPM is supported in 64-bit mode. Restoring the
+ * limit lazily while returning to ring-3 is safe because IOPM is not applicable in ring-0.
+ *
+ * [1] See Intel spec. 3.5 "System Descriptor Types".
+ * [2] See Intel spec. 7.2.3 "TSS Descriptor in 64-bit mode".
+ */
+ PVM pVM = pVCpu->CTX_SUFF(pVM);
+ Assert(pDesc->System.u4Type == 11);
+ if ( pDesc->System.u16LimitLow != 0x67
+ || pDesc->System.u4LimitHigh)
+ {
+ pVCpu->hm.s.vmx.fRestoreHostFlags |= VMX_RESTORE_HOST_SEL_TR;
+ /* If the host has made GDT read-only, we would need to temporarily toggle CR0.WP before writing the GDT. */
+ if (pVM->hm.s.fHostKernelFeatures & SUPKERNELFEATURES_GDT_READ_ONLY)
+ pVCpu->hm.s.vmx.fRestoreHostFlags |= VMX_RESTORE_HOST_GDT_READ_ONLY;
+ pVCpu->hm.s.vmx.RestoreHost.uHostSelTR = uSelTR;
+ }
+
+ /*
+ * Store the GDTR as we need it when restoring the GDT and while restoring the TR.
+ */
+ if (pVCpu->hm.s.vmx.fRestoreHostFlags & (VMX_RESTORE_HOST_GDTR | VMX_RESTORE_HOST_SEL_TR))
+ {
+ AssertCompile(sizeof(Gdtr) == sizeof(X86XDTR64));
+ memcpy(&pVCpu->hm.s.vmx.RestoreHost.HostGdtr, &Gdtr, sizeof(X86XDTR64));
+ if (pVM->hm.s.fHostKernelFeatures & SUPKERNELFEATURES_GDT_NEED_WRITABLE)
+ {
+ /* The GDT is read-only but the writable GDT is available. */
+ pVCpu->hm.s.vmx.fRestoreHostFlags |= VMX_RESTORE_HOST_GDT_NEED_WRITABLE;
+ pVCpu->hm.s.vmx.RestoreHost.HostGdtrRw.cb = Gdtr.cbGdt;
+ rc = SUPR0GetCurrentGdtRw(&pVCpu->hm.s.vmx.RestoreHost.HostGdtrRw.uAddr);
+ AssertRCReturn(rc, rc);
+ }
+ }
+#else
+ uintptr_t uTRBase = X86DESC_BASE(pDesc);
+#endif
+ rc = VMXWriteVmcsHstN(VMX_VMCS_HOST_TR_BASE, uTRBase);
+ AssertRCReturn(rc, rc);
+
+ /*
+ * Host FS base and GS base.
+ */
+#if HC_ARCH_BITS == 64
+ uint64_t u64FSBase = ASMRdMsr(MSR_K8_FS_BASE);
+ uint64_t u64GSBase = ASMRdMsr(MSR_K8_GS_BASE);
+ rc = VMXWriteVmcs64(VMX_VMCS_HOST_FS_BASE, u64FSBase);
+ rc |= VMXWriteVmcs64(VMX_VMCS_HOST_GS_BASE, u64GSBase);
+ AssertRCReturn(rc, rc);
+
+ /* Store the base if we have to restore FS or GS manually as we need to restore the base as well. */
+ if (pVCpu->hm.s.vmx.fRestoreHostFlags & VMX_RESTORE_HOST_SEL_FS)
+ pVCpu->hm.s.vmx.RestoreHost.uHostFSBase = u64FSBase;
+ if (pVCpu->hm.s.vmx.fRestoreHostFlags & VMX_RESTORE_HOST_SEL_GS)
+ pVCpu->hm.s.vmx.RestoreHost.uHostGSBase = u64GSBase;
+#endif
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Exports certain host MSRs in the VM-exit MSR-load area and some in the
+ * host-state area of the VMCS.
+ *
+ * Theses MSRs will be automatically restored on the host after every successful
+ * VM-exit.
+ *
+ * @returns VBox status code.
+ * @param pVCpu The cross context virtual CPU structure.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+static int hmR0VmxExportHostMsrs(PVMCPU pVCpu)
+{
+ AssertPtr(pVCpu);
+ AssertPtr(pVCpu->hm.s.vmx.pvHostMsr);
+
+ /*
+ * Save MSRs that we restore lazily (due to preemption or transition to ring-3)
+ * rather than swapping them on every VM-entry.
+ */
+ hmR0VmxLazySaveHostMsrs(pVCpu);
+
+ /*
+ * Host Sysenter MSRs.
+ */
+ int rc = VMXWriteVmcs32(VMX_VMCS32_HOST_SYSENTER_CS, ASMRdMsr_Low(MSR_IA32_SYSENTER_CS));
+#if HC_ARCH_BITS == 32
+ rc |= VMXWriteVmcs32(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr_Low(MSR_IA32_SYSENTER_ESP));
+ rc |= VMXWriteVmcs32(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr_Low(MSR_IA32_SYSENTER_EIP));
+#else
+ rc |= VMXWriteVmcs64(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr(MSR_IA32_SYSENTER_ESP));
+ rc |= VMXWriteVmcs64(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr(MSR_IA32_SYSENTER_EIP));
+#endif
+ AssertRCReturn(rc, rc);
+
+ /*
+ * Host EFER MSR.
+ *
+ * If the CPU supports the newer VMCS controls for managing EFER, use it. Otherwise it's
+ * done as part of auto-load/store MSR area in the VMCS, see hmR0VmxExportGuestMsrs().
+ */
+ PVM pVM = pVCpu->CTX_SUFF(pVM);
+ if (pVM->hm.s.vmx.fSupportsVmcsEfer)
+ {
+ rc = VMXWriteVmcs64(VMX_VMCS64_HOST_EFER_FULL, pVM->hm.s.vmx.u64HostEfer);
+ AssertRCReturn(rc, rc);
+ }
+
+ /** @todo IA32_PERF_GLOBALCTRL, IA32_PAT also see hmR0VmxExportGuestExitCtls(). */
+
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Figures out if we need to swap the EFER MSR which is particularly expensive.
+ *
+ * We check all relevant bits. For now, that's everything besides LMA/LME, as
+ * these two bits are handled by VM-entry, see hmR0VmxExportGuestExitCtls() and
+ * hmR0VMxExportGuestEntryCtls().
+ *
+ * @returns true if we need to load guest EFER, false otherwise.
+ * @param pVCpu The cross context virtual CPU structure.
+ *
+ * @remarks Requires EFER, CR4.
+ * @remarks No-long-jump zone!!!
+ */
+static bool hmR0VmxShouldSwapEferMsr(PVMCPU pVCpu)
+{
+#ifdef HMVMX_ALWAYS_SWAP_EFER
+ RT_NOREF(pVCpu);
+ return true;
+#else
+
+ PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+#if HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS)
+ /* For 32-bit hosts running 64-bit guests, we always swap EFER in the world-switcher. Nothing to do here. */
+ if (CPUMIsGuestInLongModeEx(pCtx))
+ return false;
+#endif
+
+ PVM pVM = pVCpu->CTX_SUFF(pVM);
+ uint64_t const u64HostEfer = pVM->hm.s.vmx.u64HostEfer;
+ uint64_t const u64GuestEfer = pCtx->msrEFER;
+
+ /*
+ * For 64-bit guests, if EFER.SCE bit differs, we need to swap EFER to ensure that the
+ * guest's SYSCALL behaviour isn't broken, see @bugref{7386}.
+ */
+ if ( CPUMIsGuestInLongModeEx(pCtx)
+ && (u64GuestEfer & MSR_K6_EFER_SCE) != (u64HostEfer & MSR_K6_EFER_SCE))
+ {
+ return true;
+ }
+
+ /*
+ * If the guest uses PAE and EFER.NXE bit differs, we need to swap EFER as it
+ * affects guest paging. 64-bit paging implies CR4.PAE as well.
+ * See Intel spec. 4.5 "IA-32e Paging" and Intel spec. 4.1.1 "Three Paging Modes".
+ */
+ if ( (pCtx->cr4 & X86_CR4_PAE)
+ && (pCtx->cr0 & X86_CR0_PG)
+ && (u64GuestEfer & MSR_K6_EFER_NXE) != (u64HostEfer & MSR_K6_EFER_NXE))
+ {
+ /* Assert that host is NX capable. */
+ Assert(pVCpu->CTX_SUFF(pVM)->cpum.ro.HostFeatures.fNoExecute);
+ return true;
+ }
+
+ return false;
+#endif
+}
+
+
+/**
+ * Exports the guest state with appropriate VM-entry controls in the VMCS.
+ *
+ * These controls can affect things done on VM-exit; e.g. "load debug controls",
+ * see Intel spec. 24.8.1 "VM-entry controls".
+ *
+ * @returns VBox status code.
+ * @param pVCpu The cross context virtual CPU structure.
+ *
+ * @remarks Requires EFER.
+ * @remarks No-long-jump zone!!!
+ */
+static int hmR0VmxExportGuestEntryCtls(PVMCPU pVCpu)
+{
+ if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_VMX_ENTRY_CTLS)
+ {
+ PVM pVM = pVCpu->CTX_SUFF(pVM);
+ uint32_t fVal = pVM->hm.s.vmx.Msrs.EntryCtls.n.allowed0; /* Bits set here must be set in the VMCS. */
+ uint32_t const fZap = pVM->hm.s.vmx.Msrs.EntryCtls.n.allowed1; /* Bits cleared here must be cleared in the VMCS. */
+
+ /* Load debug controls (DR7 & IA32_DEBUGCTL_MSR). The first VT-x capable CPUs only supports the 1-setting of this bit. */
+ fVal |= VMX_ENTRY_CTLS_LOAD_DEBUG;
+
+ /* Set if the guest is in long mode. This will set/clear the EFER.LMA bit on VM-entry. */
+ if (CPUMIsGuestInLongModeEx(&pVCpu->cpum.GstCtx))
+ {
+ fVal |= VMX_ENTRY_CTLS_IA32E_MODE_GUEST;
+ Log4Func(("VMX_ENTRY_CTLS_IA32E_MODE_GUEST\n"));
+ }
+ else
+ Assert(!(fVal & VMX_ENTRY_CTLS_IA32E_MODE_GUEST));
+
+ /* If the CPU supports the newer VMCS controls for managing guest/host EFER, use it. */
+ if ( pVM->hm.s.vmx.fSupportsVmcsEfer
+ && hmR0VmxShouldSwapEferMsr(pVCpu))
+ {
+ fVal |= VMX_ENTRY_CTLS_LOAD_EFER_MSR;
+ Log4Func(("VMX_ENTRY_CTLS_LOAD_EFER_MSR\n"));
+ }
+
+ /*
+ * The following should -not- be set (since we're not in SMM mode):
+ * - VMX_ENTRY_CTLS_ENTRY_TO_SMM
+ * - VMX_ENTRY_CTLS_DEACTIVATE_DUAL_MON
+ */
+
+ /** @todo VMX_ENTRY_CTLS_LOAD_PERF_MSR,
+ * VMX_ENTRY_CTLS_LOAD_PAT_MSR. */
+
+ if ((fVal & fZap) != fVal)
+ {
+ Log4Func(("Invalid VM-entry controls combo! Cpu=%#RX32 fVal=%#RX32 fZap=%#RX32\n",
+ pVM->hm.s.vmx.Msrs.EntryCtls.n.allowed0, fVal, fZap));
+ pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_ENTRY;
+ return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
+ }
+
+ /* Commit it to the VMCS and update our cache. */
+ if (pVCpu->hm.s.vmx.u32EntryCtls != fVal)
+ {
+ int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_ENTRY, fVal);
+ AssertRCReturn(rc, rc);
+ pVCpu->hm.s.vmx.u32EntryCtls = fVal;
+ }
+
+ ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_VMX_ENTRY_CTLS);
+ }
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Exports the guest state with appropriate VM-exit controls in the VMCS.
+ *
+ * @returns VBox status code.
+ * @param pVCpu The cross context virtual CPU structure.
+ *
+ * @remarks Requires EFER.
+ */
+static int hmR0VmxExportGuestExitCtls(PVMCPU pVCpu)
+{
+ if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_VMX_EXIT_CTLS)
+ {
+ PVM pVM = pVCpu->CTX_SUFF(pVM);
+ uint32_t fVal = pVM->hm.s.vmx.Msrs.ExitCtls.n.allowed0; /* Bits set here must be set in the VMCS. */
+ uint32_t const fZap = pVM->hm.s.vmx.Msrs.ExitCtls.n.allowed1; /* Bits cleared here must be cleared in the VMCS. */
+
+ /* Save debug controls (DR7 & IA32_DEBUGCTL_MSR). The first VT-x CPUs only supported the 1-setting of this bit. */
+ fVal |= VMX_EXIT_CTLS_SAVE_DEBUG;
+
+ /*
+ * Set the host long mode active (EFER.LMA) bit (which Intel calls "Host address-space size") if necessary.
+ * On VM-exit, VT-x sets both the host EFER.LMA and EFER.LME bit to this value. See assertion in
+ * hmR0VmxExportHostMsrs().
+ */
+#if HC_ARCH_BITS == 64
+ fVal |= VMX_EXIT_CTLS_HOST_ADDR_SPACE_SIZE;
+ Log4Func(("VMX_EXIT_CTLS_HOST_ADDR_SPACE_SIZE\n"));
+#else
+ Assert( pVCpu->hm.s.vmx.pfnStartVM == VMXR0SwitcherStartVM64
+ || pVCpu->hm.s.vmx.pfnStartVM == VMXR0StartVM32);
+ /* Set the host address-space size based on the switcher, not guest state. See @bugref{8432}. */
+ if (pVCpu->hm.s.vmx.pfnStartVM == VMXR0SwitcherStartVM64)
+ {
+ /* The switcher returns to long mode, EFER is managed by the switcher. */
+ fVal |= VMX_EXIT_CTLS_HOST_ADDR_SPACE_SIZE;
+ Log4Func(("VMX_EXIT_CTLS_HOST_ADDR_SPACE_SIZE\n"));
+ }
+ else
+ Assert(!(fVal & VMX_EXIT_CTLS_HOST_ADDR_SPACE_SIZE));
+#endif
+
+ /* If the newer VMCS fields for managing EFER exists, use it. */
+ if ( pVM->hm.s.vmx.fSupportsVmcsEfer
+ && hmR0VmxShouldSwapEferMsr(pVCpu))
+ {
+ fVal |= VMX_EXIT_CTLS_SAVE_EFER_MSR
+ | VMX_EXIT_CTLS_LOAD_EFER_MSR;
+ Log4Func(("VMX_EXIT_CTLS_SAVE_EFER_MSR and VMX_EXIT_CTLS_LOAD_EFER_MSR\n"));
+ }
+
+ /* Don't acknowledge external interrupts on VM-exit. We want to let the host do that. */
+ Assert(!(fVal & VMX_EXIT_CTLS_ACK_EXT_INT));
+
+ /** @todo VMX_EXIT_CTLS_LOAD_PERF_MSR,
+ * VMX_EXIT_CTLS_SAVE_PAT_MSR,
+ * VMX_EXIT_CTLS_LOAD_PAT_MSR. */
+
+ /* Enable saving of the VMX preemption timer value on VM-exit. */
+ if ( pVM->hm.s.vmx.fUsePreemptTimer
+ && (pVM->hm.s.vmx.Msrs.ExitCtls.n.allowed1 & VMX_EXIT_CTLS_SAVE_PREEMPT_TIMER))
+ fVal |= VMX_EXIT_CTLS_SAVE_PREEMPT_TIMER;
+
+ if ((fVal & fZap) != fVal)
+ {
+ LogRelFunc(("Invalid VM-exit controls combo! cpu=%#RX32 fVal=%#RX32 fZap=%R#X32\n",
+ pVM->hm.s.vmx.Msrs.ExitCtls.n.allowed0, fVal, fZap));
+ pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_EXIT;
+ return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
+ }
+
+ /* Commit it to the VMCS and update our cache. */
+ if (pVCpu->hm.s.vmx.u32ExitCtls != fVal)
+ {
+ int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXIT, fVal);
+ AssertRCReturn(rc, rc);
+ pVCpu->hm.s.vmx.u32ExitCtls = fVal;
+ }
+
+ ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_VMX_EXIT_CTLS);
+ }
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Sets the TPR threshold in the VMCS.
+ *
+ * @returns VBox status code.
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param u32TprThreshold The TPR threshold (task-priority class only).
+ */
+DECLINLINE(int) hmR0VmxApicSetTprThreshold(PVMCPU pVCpu, uint32_t u32TprThreshold)
+{
+ Assert(!(u32TprThreshold & ~VMX_TPR_THRESHOLD_MASK)); /* Bits 31:4 MBZ. */
+ Assert(pVCpu->hm.s.vmx.u32ProcCtls & VMX_PROC_CTLS_USE_TPR_SHADOW); RT_NOREF_PV(pVCpu);
+ return VMXWriteVmcs32(VMX_VMCS32_CTRL_TPR_THRESHOLD, u32TprThreshold);
+}
+
+
+/**
+ * Exports the guest APIC TPR state into the VMCS.
+ *
+ * @returns VBox status code.
+ * @param pVCpu The cross context virtual CPU structure.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+static int hmR0VmxExportGuestApicTpr(PVMCPU pVCpu)
+{
+ if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_APIC_TPR)
+ {
+ HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_APIC_TPR);
+
+ if ( PDMHasApic(pVCpu->CTX_SUFF(pVM))
+ && APICIsEnabled(pVCpu))
+ {
+ /*
+ * Setup TPR shadowing.
+ */
+ if (pVCpu->hm.s.vmx.u32ProcCtls & VMX_PROC_CTLS_USE_TPR_SHADOW)
+ {
+ Assert(pVCpu->hm.s.vmx.HCPhysVirtApic);
+
+ bool fPendingIntr = false;
+ uint8_t u8Tpr = 0;
+ uint8_t u8PendingIntr = 0;
+ int rc = APICGetTpr(pVCpu, &u8Tpr, &fPendingIntr, &u8PendingIntr);
+ AssertRCReturn(rc, rc);
+
+ /*
+ * If there are interrupts pending but masked by the TPR, instruct VT-x to
+ * cause a TPR-below-threshold VM-exit when the guest lowers its TPR below the
+ * priority of the pending interrupt so we can deliver the interrupt. If there
+ * are no interrupts pending, set threshold to 0 to not cause any
+ * TPR-below-threshold VM-exits.
+ */
+ pVCpu->hm.s.vmx.pbVirtApic[XAPIC_OFF_TPR] = u8Tpr;
+ uint32_t u32TprThreshold = 0;
+ if (fPendingIntr)
+ {
+ /* Bits 3:0 of the TPR threshold field correspond to bits 7:4 of the TPR (which is the Task-Priority Class). */
+ const uint8_t u8PendingPriority = u8PendingIntr >> 4;
+ const uint8_t u8TprPriority = u8Tpr >> 4;
+ if (u8PendingPriority <= u8TprPriority)
+ u32TprThreshold = u8PendingPriority;
+ }
+
+ rc = hmR0VmxApicSetTprThreshold(pVCpu, u32TprThreshold);
+ AssertRCReturn(rc, rc);
+ }
+ }
+ ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_APIC_TPR);
+ }
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Gets the guest's interruptibility-state ("interrupt shadow" as AMD calls it).
+ *
+ * @returns Guest's interruptibility-state.
+ * @param pVCpu The cross context virtual CPU structure.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+static uint32_t hmR0VmxGetGuestIntrState(PVMCPU pVCpu)
+{
+ /*
+ * Check if we should inhibit interrupt delivery due to instructions like STI and MOV SS.
+ */
+ uint32_t fIntrState = 0;
+ if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS))
+ {
+ /* If inhibition is active, RIP & RFLAGS should've been accessed
+ (i.e. read previously from the VMCS or from ring-3). */
+ PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+#ifdef VBOX_STRICT
+ uint64_t const fExtrn = ASMAtomicUoReadU64(&pCtx->fExtrn);
+ AssertMsg(!(fExtrn & (CPUMCTX_EXTRN_RIP | CPUMCTX_EXTRN_RFLAGS)), ("%#x\n", fExtrn));
+#endif
+ if (pCtx->rip == EMGetInhibitInterruptsPC(pVCpu))
+ {
+ if (pCtx->eflags.Bits.u1IF)
+ fIntrState = VMX_VMCS_GUEST_INT_STATE_BLOCK_STI;
+ else
+ fIntrState = VMX_VMCS_GUEST_INT_STATE_BLOCK_MOVSS;
+ }
+ else if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS))
+ {
+ /*
+ * We can clear the inhibit force flag as even if we go back to the recompiler
+ * without executing guest code in VT-x, the flag's condition to be cleared is
+ * met and thus the cleared state is correct.
+ */
+ VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS);
+ }
+ }
+
+ /*
+ * NMIs to the guest are blocked after an NMI is injected until the guest executes an IRET. We only
+ * bother with virtual-NMI blocking when we have support for virtual NMIs in the CPU, otherwise
+ * setting this would block host-NMIs and IRET will not clear the blocking.
+ *
+ * See Intel spec. 26.6.1 "Interruptibility state". See @bugref{7445}.
+ */
+ if ( VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_BLOCK_NMIS)
+ && (pVCpu->hm.s.vmx.u32PinCtls & VMX_PIN_CTLS_VIRT_NMI))
+ {
+ fIntrState |= VMX_VMCS_GUEST_INT_STATE_BLOCK_NMI;
+ }
+
+ return fIntrState;
+}
+
+
+/**
+ * Exports the exception intercepts required for guest execution in the VMCS.
+ *
+ * @returns VBox status code.
+ * @param pVCpu The cross context virtual CPU structure.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+static int hmR0VmxExportGuestXcptIntercepts(PVMCPU pVCpu)
+{
+ if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_VMX_GUEST_XCPT_INTERCEPTS)
+ {
+ uint32_t uXcptBitmap = pVCpu->hm.s.vmx.u32XcptBitmap;
+
+ /* The remaining exception intercepts are handled elsewhere, e.g. in hmR0VmxExportGuestCR0(). */
+ if (pVCpu->hm.s.fGIMTrapXcptUD)
+ uXcptBitmap |= RT_BIT(X86_XCPT_UD);
+#ifndef HMVMX_ALWAYS_TRAP_ALL_XCPTS
+ else
+ uXcptBitmap &= ~RT_BIT(X86_XCPT_UD);
+#endif
+
+ Assert(uXcptBitmap & RT_BIT_32(X86_XCPT_AC));
+ Assert(uXcptBitmap & RT_BIT_32(X86_XCPT_DB));
+
+ if (uXcptBitmap != pVCpu->hm.s.vmx.u32XcptBitmap)
+ {
+ int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXCEPTION_BITMAP, uXcptBitmap);
+ AssertRCReturn(rc, rc);
+ pVCpu->hm.s.vmx.u32XcptBitmap = uXcptBitmap;
+ }
+
+ ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_VMX_GUEST_XCPT_INTERCEPTS);
+ Log4Func(("VMX_VMCS32_CTRL_EXCEPTION_BITMAP=%#RX64\n", uXcptBitmap));
+ }
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Exports the guest's RIP into the guest-state area in the VMCS.
+ *
+ * @returns VBox status code.
+ * @param pVCpu The cross context virtual CPU structure.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+static int hmR0VmxExportGuestRip(PVMCPU pVCpu)
+{
+ int rc = VINF_SUCCESS;
+ if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_RIP)
+ {
+ HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_RIP);
+
+ rc = VMXWriteVmcsGstN(VMX_VMCS_GUEST_RIP, pVCpu->cpum.GstCtx.rip);
+ AssertRCReturn(rc, rc);
+
+ ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_RIP);
+ Log4Func(("RIP=%#RX64\n", pVCpu->cpum.GstCtx.rip));
+ }
+ return rc;
+}
+
+
+/**
+ * Exports the guest's RSP into the guest-state area in the VMCS.
+ *
+ * @returns VBox status code.
+ * @param pVCpu The cross context virtual CPU structure.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+static int hmR0VmxExportGuestRsp(PVMCPU pVCpu)
+{
+ if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_RSP)
+ {
+ HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_RSP);
+
+ int rc = VMXWriteVmcsGstN(VMX_VMCS_GUEST_RSP, pVCpu->cpum.GstCtx.rsp);
+ AssertRCReturn(rc, rc);
+
+ ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_RSP);
+ }
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Exports the guest's RFLAGS into the guest-state area in the VMCS.
+ *
+ * @returns VBox status code.
+ * @param pVCpu The cross context virtual CPU structure.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+static int hmR0VmxExportGuestRflags(PVMCPU pVCpu)
+{
+ if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_RFLAGS)
+ {
+ HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_RFLAGS);
+
+ /* Intel spec. 2.3.1 "System Flags and Fields in IA-32e Mode" claims the upper 32-bits of RFLAGS are reserved (MBZ).
+ Let us assert it as such and use 32-bit VMWRITE. */
+ Assert(!RT_HI_U32(pVCpu->cpum.GstCtx.rflags.u64));
+ X86EFLAGS fEFlags = pVCpu->cpum.GstCtx.eflags;
+ Assert(fEFlags.u32 & X86_EFL_RA1_MASK);
+ Assert(!(fEFlags.u32 & ~(X86_EFL_1 | X86_EFL_LIVE_MASK)));
+
+ /*
+ * If we're emulating real-mode using Virtual 8086 mode, save the real-mode eflags so
+ * we can restore them on VM-exit. Modify the real-mode guest's eflags so that VT-x
+ * can run the real-mode guest code under Virtual 8086 mode.
+ */
+ if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active)
+ {
+ Assert(pVCpu->CTX_SUFF(pVM)->hm.s.vmx.pRealModeTSS);
+ Assert(PDMVmmDevHeapIsEnabled(pVCpu->CTX_SUFF(pVM)));
+ pVCpu->hm.s.vmx.RealMode.Eflags.u32 = fEFlags.u32; /* Save the original eflags of the real-mode guest. */
+ fEFlags.Bits.u1VM = 1; /* Set the Virtual 8086 mode bit. */
+ fEFlags.Bits.u2IOPL = 0; /* Change IOPL to 0, otherwise certain instructions won't fault. */
+ }
+
+ int rc = VMXWriteVmcs32(VMX_VMCS_GUEST_RFLAGS, fEFlags.u32);
+ AssertRCReturn(rc, rc);
+
+ /*
+ * Setup pending debug exceptions if the guest is single-stepping using EFLAGS.TF.
+ *
+ * We must avoid setting any automatic debug exceptions delivery when single-stepping
+ * through the hypervisor debugger using EFLAGS.TF.
+ */
+ if ( !pVCpu->hm.s.fSingleInstruction
+ && fEFlags.Bits.u1TF)
+ {
+ /** @todo r=ramshankar: Warning! We ASSUME EFLAGS.TF will not cleared on
+ * premature trips to ring-3 esp since IEM does not yet handle it. */
+ rc = VMXWriteVmcs32(VMX_VMCS_GUEST_PENDING_DEBUG_XCPTS, VMX_VMCS_GUEST_PENDING_DEBUG_XCPT_BS);
+ AssertRCReturn(rc, rc);
+ }
+
+ ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_RFLAGS);
+ Log4Func(("EFlags=%#RX32\n", fEFlags.u32));
+ }
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Exports the guest CR0 control register into the guest-state area in the VMCS.
+ *
+ * The guest FPU state is always pre-loaded hence we don't need to bother about
+ * sharing FPU related CR0 bits between the guest and host.
+ *
+ * @returns VBox status code.
+ * @param pVCpu The cross context virtual CPU structure.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+static int hmR0VmxExportGuestCR0(PVMCPU pVCpu)
+{
+ if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_CR0)
+ {
+ PVM pVM = pVCpu->CTX_SUFF(pVM);
+ HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_CR0);
+ Assert(!RT_HI_U32(pVCpu->cpum.GstCtx.cr0));
+
+ uint32_t const u32ShadowCr0 = pVCpu->cpum.GstCtx.cr0;
+ uint32_t u32GuestCr0 = pVCpu->cpum.GstCtx.cr0;
+
+ /*
+ * Setup VT-x's view of the guest CR0.
+ * Minimize VM-exits due to CR3 changes when we have NestedPaging.
+ */
+ uint32_t uProcCtls = pVCpu->hm.s.vmx.u32ProcCtls;
+ if (pVM->hm.s.fNestedPaging)
+ {
+ if (CPUMIsGuestPagingEnabled(pVCpu))
+ {
+ /* The guest has paging enabled, let it access CR3 without causing a VM-exit if supported. */
+ uProcCtls &= ~( VMX_PROC_CTLS_CR3_LOAD_EXIT
+ | VMX_PROC_CTLS_CR3_STORE_EXIT);
+ }
+ else
+ {
+ /* The guest doesn't have paging enabled, make CR3 access cause a VM-exit to update our shadow. */
+ uProcCtls |= VMX_PROC_CTLS_CR3_LOAD_EXIT
+ | VMX_PROC_CTLS_CR3_STORE_EXIT;
+ }
+
+ /* If we have unrestricted guest execution, we never have to intercept CR3 reads. */
+ if (pVM->hm.s.vmx.fUnrestrictedGuest)
+ uProcCtls &= ~VMX_PROC_CTLS_CR3_STORE_EXIT;
+ }
+ else
+ {
+ /* Guest CPL 0 writes to its read-only pages should cause a #PF VM-exit. */
+ u32GuestCr0 |= X86_CR0_WP;
+ }
+
+ /*
+ * Guest FPU bits.
+ *
+ * Since we pre-load the guest FPU always before VM-entry there is no need to track lazy state
+ * using CR0.TS.
+ *
+ * Intel spec. 23.8 "Restrictions on VMX operation" mentions that CR0.NE bit must always be
+ * set on the first CPUs to support VT-x and no mention of with regards to UX in VM-entry checks.
+ */
+ u32GuestCr0 |= X86_CR0_NE;
+
+ /* If CR0.NE isn't set, we need to intercept #MF exceptions and report them to the guest differently. */
+ bool const fInterceptMF = !(u32ShadowCr0 & X86_CR0_NE);
+
+ /*
+ * Update exception intercepts.
+ */
+ uint32_t uXcptBitmap = pVCpu->hm.s.vmx.u32XcptBitmap;
+ if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active)
+ {
+ Assert(PDMVmmDevHeapIsEnabled(pVM));
+ Assert(pVM->hm.s.vmx.pRealModeTSS);
+ uXcptBitmap |= HMVMX_REAL_MODE_XCPT_MASK;
+ }
+ else
+ {
+ /* For now, cleared here as mode-switches can happen outside HM/VT-x. See @bugref{7626#c11}. */
+ uXcptBitmap &= ~HMVMX_REAL_MODE_XCPT_MASK;
+ if (fInterceptMF)
+ uXcptBitmap |= RT_BIT(X86_XCPT_MF);
+ }
+
+ /* Additional intercepts for debugging, define these yourself explicitly. */
+#ifdef HMVMX_ALWAYS_TRAP_ALL_XCPTS
+ uXcptBitmap |= 0
+ | RT_BIT(X86_XCPT_BP)
+ | RT_BIT(X86_XCPT_DE)
+ | RT_BIT(X86_XCPT_NM)
+ | RT_BIT(X86_XCPT_TS)
+ | RT_BIT(X86_XCPT_UD)
+ | RT_BIT(X86_XCPT_NP)
+ | RT_BIT(X86_XCPT_SS)
+ | RT_BIT(X86_XCPT_GP)
+ | RT_BIT(X86_XCPT_PF)
+ | RT_BIT(X86_XCPT_MF)
+ ;
+#elif defined(HMVMX_ALWAYS_TRAP_PF)
+ uXcptBitmap |= RT_BIT(X86_XCPT_PF);
+#endif
+ if (pVCpu->hm.s.fTrapXcptGpForLovelyMesaDrv)
+ uXcptBitmap |= RT_BIT(X86_XCPT_GP);
+ Assert(pVM->hm.s.fNestedPaging || (uXcptBitmap & RT_BIT(X86_XCPT_PF)));
+
+ /*
+ * Set/clear the CR0 specific bits along with their exceptions (PE, PG, CD, NW).
+ */
+ uint32_t fSetCr0 = (uint32_t)(pVM->hm.s.vmx.Msrs.u64Cr0Fixed0 & pVM->hm.s.vmx.Msrs.u64Cr0Fixed1);
+ uint32_t fZapCr0 = (uint32_t)(pVM->hm.s.vmx.Msrs.u64Cr0Fixed0 | pVM->hm.s.vmx.Msrs.u64Cr0Fixed1);
+ if (pVM->hm.s.vmx.fUnrestrictedGuest) /* Exceptions for unrestricted-guests for fixed CR0 bits (PE, PG). */
+ fSetCr0 &= ~(X86_CR0_PE | X86_CR0_PG);
+ else
+ Assert((fSetCr0 & (X86_CR0_PE | X86_CR0_PG)) == (X86_CR0_PE | X86_CR0_PG));
+
+ u32GuestCr0 |= fSetCr0;
+ u32GuestCr0 &= fZapCr0;
+ u32GuestCr0 &= ~(X86_CR0_CD | X86_CR0_NW); /* Always enable caching. */
+
+ /*
+ * CR0 is shared between host and guest along with a CR0 read shadow. Therefore, certain bits must not be changed
+ * by the guest because VT-x ignores saving/restoring them (namely CD, ET, NW) and for certain other bits
+ * we want to be notified immediately of guest CR0 changes (e.g. PG to update our shadow page tables).
+ */
+ uint32_t u32Cr0Mask = X86_CR0_PE
+ | X86_CR0_NE
+ | (pVM->hm.s.fNestedPaging ? 0 : X86_CR0_WP)
+ | X86_CR0_PG
+ | X86_CR0_ET /* Bit ignored on VM-entry and VM-exit. Don't let the guest modify the host CR0.ET */
+ | X86_CR0_CD /* Bit ignored on VM-entry and VM-exit. Don't let the guest modify the host CR0.CD */
+ | X86_CR0_NW; /* Bit ignored on VM-entry and VM-exit. Don't let the guest modify the host CR0.NW */
+
+ /** @todo Avoid intercepting CR0.PE with unrestricted guests. Fix PGM
+ * enmGuestMode to be in-sync with the current mode. See @bugref{6398}
+ * and @bugref{6944}. */
+#if 0
+ if (pVM->hm.s.vmx.fUnrestrictedGuest)
+ u32Cr0Mask &= ~X86_CR0_PE;
+#endif
+ /*
+ * Finally, update VMCS fields with the CR0 values and the exception bitmap.
+ */
+ int rc = VMXWriteVmcs32(VMX_VMCS_GUEST_CR0, u32GuestCr0);
+ rc |= VMXWriteVmcs32(VMX_VMCS_CTRL_CR0_READ_SHADOW, u32ShadowCr0);
+ if (u32Cr0Mask != pVCpu->hm.s.vmx.u32Cr0Mask)
+ rc |= VMXWriteVmcs32(VMX_VMCS_CTRL_CR0_MASK, u32Cr0Mask);
+ if (uProcCtls != pVCpu->hm.s.vmx.u32ProcCtls)
+ rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, uProcCtls);
+ if (uXcptBitmap != pVCpu->hm.s.vmx.u32XcptBitmap)
+ rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_EXCEPTION_BITMAP, uXcptBitmap);
+ AssertRCReturn(rc, rc);
+
+ /* Update our caches. */
+ pVCpu->hm.s.vmx.u32Cr0Mask = u32Cr0Mask;
+ pVCpu->hm.s.vmx.u32ProcCtls = uProcCtls;
+ pVCpu->hm.s.vmx.u32XcptBitmap = uXcptBitmap;
+
+ ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_CR0);
+
+ Log4Func(("u32Cr0Mask=%#RX32 u32ShadowCr0=%#RX32 u32GuestCr0=%#RX32 (fSetCr0=%#RX32 fZapCr0=%#RX32\n", u32Cr0Mask,
+ u32ShadowCr0, u32GuestCr0, fSetCr0, fZapCr0));
+ }
+
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Exports the guest control registers (CR3, CR4) into the guest-state area
+ * in the VMCS.
+ *
+ * @returns VBox strict status code.
+ * @retval VINF_EM_RESCHEDULE_REM if we try to emulate non-paged guest code
+ * without unrestricted guest access and the VMMDev is not presently
+ * mapped (e.g. EFI32).
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+static VBOXSTRICTRC hmR0VmxExportGuestCR3AndCR4(PVMCPU pVCpu)
+{
+ int rc = VINF_SUCCESS;
+ PVM pVM = pVCpu->CTX_SUFF(pVM);
+
+ /*
+ * Guest CR2.
+ * It's always loaded in the assembler code. Nothing to do here.
+ */
+
+ /*
+ * Guest CR3.
+ */
+ if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_CR3)
+ {
+ HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_CR3);
+
+ RTGCPHYS GCPhysGuestCR3 = NIL_RTGCPHYS;
+ if (pVM->hm.s.fNestedPaging)
+ {
+ pVCpu->hm.s.vmx.HCPhysEPTP = PGMGetHyperCR3(pVCpu);
+
+ /* Validate. See Intel spec. 28.2.2 "EPT Translation Mechanism" and 24.6.11 "Extended-Page-Table Pointer (EPTP)" */
+ Assert(pVCpu->hm.s.vmx.HCPhysEPTP);
+ Assert(!(pVCpu->hm.s.vmx.HCPhysEPTP & UINT64_C(0xfff0000000000000)));
+ Assert(!(pVCpu->hm.s.vmx.HCPhysEPTP & 0xfff));
+
+ /* VMX_EPT_MEMTYPE_WB support is already checked in hmR0VmxSetupTaggedTlb(). */
+ pVCpu->hm.s.vmx.HCPhysEPTP |= VMX_EPT_MEMTYPE_WB
+ | (VMX_EPT_PAGE_WALK_LENGTH_DEFAULT << VMX_EPT_PAGE_WALK_LENGTH_SHIFT);
+
+ /* Validate. See Intel spec. 26.2.1 "Checks on VMX Controls" */
+ AssertMsg( ((pVCpu->hm.s.vmx.HCPhysEPTP >> 3) & 0x07) == 3 /* Bits 3:5 (EPT page walk length - 1) must be 3. */
+ && ((pVCpu->hm.s.vmx.HCPhysEPTP >> 7) & 0x1f) == 0, /* Bits 7:11 MBZ. */
+ ("EPTP %#RX64\n", pVCpu->hm.s.vmx.HCPhysEPTP));
+ AssertMsg( !((pVCpu->hm.s.vmx.HCPhysEPTP >> 6) & 0x01) /* Bit 6 (EPT accessed & dirty bit). */
+ || (pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_EPT_ACCESS_DIRTY),
+ ("EPTP accessed/dirty bit not supported by CPU but set %#RX64\n", pVCpu->hm.s.vmx.HCPhysEPTP));
+
+ rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_EPTP_FULL, pVCpu->hm.s.vmx.HCPhysEPTP);
+ AssertRCReturn(rc, rc);
+
+ PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+ if ( pVM->hm.s.vmx.fUnrestrictedGuest
+ || CPUMIsGuestPagingEnabledEx(pCtx))
+ {
+ /* If the guest is in PAE mode, pass the PDPEs to VT-x using the VMCS fields. */
+ if (CPUMIsGuestInPAEModeEx(pCtx))
+ {
+ rc = PGMGstGetPaePdpes(pVCpu, &pVCpu->hm.s.aPdpes[0]);
+ AssertRCReturn(rc, rc);
+ rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_PDPTE0_FULL, pVCpu->hm.s.aPdpes[0].u);
+ rc |= VMXWriteVmcs64(VMX_VMCS64_GUEST_PDPTE1_FULL, pVCpu->hm.s.aPdpes[1].u);
+ rc |= VMXWriteVmcs64(VMX_VMCS64_GUEST_PDPTE2_FULL, pVCpu->hm.s.aPdpes[2].u);
+ rc |= VMXWriteVmcs64(VMX_VMCS64_GUEST_PDPTE3_FULL, pVCpu->hm.s.aPdpes[3].u);
+ AssertRCReturn(rc, rc);
+ }
+
+ /*
+ * The guest's view of its CR3 is unblemished with Nested Paging when the
+ * guest is using paging or we have unrestricted guest execution to handle
+ * the guest when it's not using paging.
+ */
+ GCPhysGuestCR3 = pCtx->cr3;
+ }
+ else
+ {
+ /*
+ * The guest is not using paging, but the CPU (VT-x) has to. While the guest
+ * thinks it accesses physical memory directly, we use our identity-mapped
+ * page table to map guest-linear to guest-physical addresses. EPT takes care
+ * of translating it to host-physical addresses.
+ */
+ RTGCPHYS GCPhys;
+ Assert(pVM->hm.s.vmx.pNonPagingModeEPTPageTable);
+
+ /* We obtain it here every time as the guest could have relocated this PCI region. */
+ rc = PDMVmmDevHeapR3ToGCPhys(pVM, pVM->hm.s.vmx.pNonPagingModeEPTPageTable, &GCPhys);
+ if (RT_SUCCESS(rc))
+ { /* likely */ }
+ else if (rc == VERR_PDM_DEV_HEAP_R3_TO_GCPHYS)
+ {
+ Log4Func(("VERR_PDM_DEV_HEAP_R3_TO_GCPHYS -> VINF_EM_RESCHEDULE_REM\n"));
+ return VINF_EM_RESCHEDULE_REM; /* We cannot execute now, switch to REM/IEM till the guest maps in VMMDev. */
+ }
+ else
+ AssertMsgFailedReturn(("%Rrc\n", rc), rc);
+
+ GCPhysGuestCR3 = GCPhys;
+ }
+
+ Log4Func(("u32GuestCr3=%#RGp (GstN)\n", GCPhysGuestCR3));
+ rc = VMXWriteVmcsGstN(VMX_VMCS_GUEST_CR3, GCPhysGuestCR3);
+ AssertRCReturn(rc, rc);
+ }
+ else
+ {
+ /* Non-nested paging case, just use the hypervisor's CR3. */
+ RTHCPHYS HCPhysGuestCR3 = PGMGetHyperCR3(pVCpu);
+
+ Log4Func(("u32GuestCr3=%#RHv (HstN)\n", HCPhysGuestCR3));
+ rc = VMXWriteVmcsHstN(VMX_VMCS_GUEST_CR3, HCPhysGuestCR3);
+ AssertRCReturn(rc, rc);
+ }
+
+ ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_CR3);
+ }
+
+ /*
+ * Guest CR4.
+ * ASSUMES this is done everytime we get in from ring-3! (XCR0)
+ */
+ if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_CR4)
+ {
+ PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+ HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_CR4);
+ Assert(!RT_HI_U32(pCtx->cr4));
+
+ uint32_t u32GuestCr4 = pCtx->cr4;
+ uint32_t const u32ShadowCr4 = pCtx->cr4;
+
+ /*
+ * Setup VT-x's view of the guest CR4.
+ *
+ * If we're emulating real-mode using virtual-8086 mode, we want to redirect software
+ * interrupts to the 8086 program interrupt handler. Clear the VME bit (the interrupt
+ * redirection bitmap is already all 0, see hmR3InitFinalizeR0())
+ *
+ * See Intel spec. 20.2 "Software Interrupt Handling Methods While in Virtual-8086 Mode".
+ */
+ if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active)
+ {
+ Assert(pVM->hm.s.vmx.pRealModeTSS);
+ Assert(PDMVmmDevHeapIsEnabled(pVM));
+ u32GuestCr4 &= ~X86_CR4_VME;
+ }
+
+ if (pVM->hm.s.fNestedPaging)
+ {
+ if ( !CPUMIsGuestPagingEnabledEx(pCtx)
+ && !pVM->hm.s.vmx.fUnrestrictedGuest)
+ {
+ /* We use 4 MB pages in our identity mapping page table when the guest doesn't have paging. */
+ u32GuestCr4 |= X86_CR4_PSE;
+ /* Our identity mapping is a 32-bit page directory. */
+ u32GuestCr4 &= ~X86_CR4_PAE;
+ }
+ /* else use guest CR4.*/
+ }
+ else
+ {
+ /*
+ * The shadow paging modes and guest paging modes are different, the shadow is in accordance with the host
+ * paging mode and thus we need to adjust VT-x's view of CR4 depending on our shadow page tables.
+ */
+ switch (pVCpu->hm.s.enmShadowMode)
+ {
+ case PGMMODE_REAL: /* Real-mode. */
+ case PGMMODE_PROTECTED: /* Protected mode without paging. */
+ case PGMMODE_32_BIT: /* 32-bit paging. */
+ {
+ u32GuestCr4 &= ~X86_CR4_PAE;
+ break;
+ }
+
+ case PGMMODE_PAE: /* PAE paging. */
+ case PGMMODE_PAE_NX: /* PAE paging with NX. */
+ {
+ u32GuestCr4 |= X86_CR4_PAE;
+ break;
+ }
+
+ case PGMMODE_AMD64: /* 64-bit AMD paging (long mode). */
+ case PGMMODE_AMD64_NX: /* 64-bit AMD paging (long mode) with NX enabled. */
+#ifdef VBOX_ENABLE_64_BITS_GUESTS
+ break;
+#endif
+ default:
+ AssertFailed();
+ return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
+ }
+ }
+
+ /* We need to set and clear the CR4 specific bits here (mainly the X86_CR4_VMXE bit). */
+ uint64_t const fSetCr4 = (pVM->hm.s.vmx.Msrs.u64Cr4Fixed0 & pVM->hm.s.vmx.Msrs.u64Cr4Fixed1);
+ uint64_t const fZapCr4 = (pVM->hm.s.vmx.Msrs.u64Cr4Fixed0 | pVM->hm.s.vmx.Msrs.u64Cr4Fixed1);
+ u32GuestCr4 |= fSetCr4;
+ u32GuestCr4 &= fZapCr4;
+
+ /* Setup CR4 mask. CR4 flags owned by the host, if the guest attempts to change them,
+ that would cause a VM-exit. */
+ uint32_t u32Cr4Mask = X86_CR4_VME
+ | X86_CR4_PAE
+ | X86_CR4_PGE
+ | X86_CR4_PSE
+ | X86_CR4_VMXE;
+ if (pVM->cpum.ro.HostFeatures.fXSaveRstor)
+ u32Cr4Mask |= X86_CR4_OSXSAVE;
+ if (pVM->cpum.ro.GuestFeatures.fPcid)
+ u32Cr4Mask |= X86_CR4_PCIDE;
+
+ /* Write VT-x's view of the guest CR4, the CR4 modify mask and the read-only CR4 shadow
+ into the VMCS and update our cache. */
+ rc = VMXWriteVmcs32(VMX_VMCS_GUEST_CR4, u32GuestCr4);
+ rc |= VMXWriteVmcs32(VMX_VMCS_CTRL_CR4_READ_SHADOW, u32ShadowCr4);
+ if (pVCpu->hm.s.vmx.u32Cr4Mask != u32Cr4Mask)
+ rc |= VMXWriteVmcs32(VMX_VMCS_CTRL_CR4_MASK, u32Cr4Mask);
+ AssertRCReturn(rc, rc);
+ pVCpu->hm.s.vmx.u32Cr4Mask = u32Cr4Mask;
+
+ /* Whether to save/load/restore XCR0 during world switch depends on CR4.OSXSAVE and host+guest XCR0. */
+ pVCpu->hm.s.fLoadSaveGuestXcr0 = (pCtx->cr4 & X86_CR4_OSXSAVE) && pCtx->aXcr[0] != ASMGetXcr0();
+
+ ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_CR4);
+
+ Log4Func(("u32GuestCr4=%#RX32 u32ShadowCr4=%#RX32 (fSetCr4=%#RX32 fZapCr4=%#RX32)\n", u32GuestCr4, u32ShadowCr4, fSetCr4,
+ fZapCr4));
+ }
+ return rc;
+}
+
+
+/**
+ * Exports the guest debug registers into the guest-state area in the VMCS.
+ * The guest debug bits are partially shared with the host (e.g. DR6, DR0-3).
+ *
+ * This also sets up whether \#DB and MOV DRx accesses cause VM-exits.
+ *
+ * @returns VBox status code.
+ * @param pVCpu The cross context virtual CPU structure.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+static int hmR0VmxExportSharedDebugState(PVMCPU pVCpu)
+{
+ Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+
+#ifdef VBOX_STRICT
+ /* Validate. Intel spec. 26.3.1.1 "Checks on Guest Controls Registers, Debug Registers, MSRs" */
+ if (pVCpu->hm.s.vmx.u32EntryCtls & VMX_ENTRY_CTLS_LOAD_DEBUG)
+ {
+ /* Validate. Intel spec. 17.2 "Debug Registers", recompiler paranoia checks. */
+ Assert((pVCpu->cpum.GstCtx.dr[7] & (X86_DR7_MBZ_MASK | X86_DR7_RAZ_MASK)) == 0);
+ Assert((pVCpu->cpum.GstCtx.dr[7] & X86_DR7_RA1_MASK) == X86_DR7_RA1_MASK);
+ }
+#endif
+
+ bool fSteppingDB = false;
+ bool fInterceptMovDRx = false;
+ uint32_t uProcCtls = pVCpu->hm.s.vmx.u32ProcCtls;
+ if (pVCpu->hm.s.fSingleInstruction)
+ {
+ /* If the CPU supports the monitor trap flag, use it for single stepping in DBGF and avoid intercepting #DB. */
+ PVM pVM = pVCpu->CTX_SUFF(pVM);
+ if (pVM->hm.s.vmx.Msrs.ProcCtls.n.allowed1 & VMX_PROC_CTLS_MONITOR_TRAP_FLAG)
+ {
+ uProcCtls |= VMX_PROC_CTLS_MONITOR_TRAP_FLAG;
+ Assert(fSteppingDB == false);
+ }
+ else
+ {
+ pVCpu->cpum.GstCtx.eflags.u32 |= X86_EFL_TF;
+ pVCpu->hm.s.fCtxChanged |= HM_CHANGED_GUEST_RFLAGS;
+ pVCpu->hm.s.fClearTrapFlag = true;
+ fSteppingDB = true;
+ }
+ }
+
+ uint32_t u32GuestDr7;
+ if ( fSteppingDB
+ || (CPUMGetHyperDR7(pVCpu) & X86_DR7_ENABLED_MASK))
+ {
+ /*
+ * Use the combined guest and host DRx values found in the hypervisor register set
+ * because the debugger has breakpoints active or someone is single stepping on the
+ * host side without a monitor trap flag.
+ *
+ * Note! DBGF expects a clean DR6 state before executing guest code.
+ */
+#if HC_ARCH_BITS == 32 && defined(VBOX_WITH_64_BITS_GUESTS)
+ if ( CPUMIsGuestInLongModeEx(&pVCpu->cpum.GstCtx)
+ && !CPUMIsHyperDebugStateActivePending(pVCpu))
+ {
+ CPUMR0LoadHyperDebugState(pVCpu, true /* include DR6 */);
+ Assert(CPUMIsHyperDebugStateActivePending(pVCpu));
+ Assert(!CPUMIsGuestDebugStateActivePending(pVCpu));
+ }
+ else
+#endif
+ if (!CPUMIsHyperDebugStateActive(pVCpu))
+ {
+ CPUMR0LoadHyperDebugState(pVCpu, true /* include DR6 */);
+ Assert(CPUMIsHyperDebugStateActive(pVCpu));
+ Assert(!CPUMIsGuestDebugStateActive(pVCpu));
+ }
+
+ /* Update DR7 with the hypervisor value (other DRx registers are handled by CPUM one way or another). */
+ u32GuestDr7 = (uint32_t)CPUMGetHyperDR7(pVCpu);
+ pVCpu->hm.s.fUsingHyperDR7 = true;
+ fInterceptMovDRx = true;
+ }
+ else
+ {
+ /*
+ * If the guest has enabled debug registers, we need to load them prior to
+ * executing guest code so they'll trigger at the right time.
+ */
+ if (pVCpu->cpum.GstCtx.dr[7] & (X86_DR7_ENABLED_MASK | X86_DR7_GD))
+ {
+#if HC_ARCH_BITS == 32 && defined(VBOX_WITH_64_BITS_GUESTS)
+ if ( CPUMIsGuestInLongModeEx(&pVCpu->cpum.GstCtx)
+ && !CPUMIsGuestDebugStateActivePending(pVCpu))
+ {
+ CPUMR0LoadGuestDebugState(pVCpu, true /* include DR6 */);
+ Assert(CPUMIsGuestDebugStateActivePending(pVCpu));
+ Assert(!CPUMIsHyperDebugStateActivePending(pVCpu));
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatDRxArmed);
+ }
+ else
+#endif
+ if (!CPUMIsGuestDebugStateActive(pVCpu))
+ {
+ CPUMR0LoadGuestDebugState(pVCpu, true /* include DR6 */);
+ Assert(CPUMIsGuestDebugStateActive(pVCpu));
+ Assert(!CPUMIsHyperDebugStateActive(pVCpu));
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatDRxArmed);
+ }
+ Assert(!fInterceptMovDRx);
+ }
+ /*
+ * If no debugging enabled, we'll lazy load DR0-3. Unlike on AMD-V, we
+ * must intercept #DB in order to maintain a correct DR6 guest value, and
+ * because we need to intercept it to prevent nested #DBs from hanging the
+ * CPU, we end up always having to intercept it. See hmR0VmxInitXcptBitmap.
+ */
+#if HC_ARCH_BITS == 32 && defined(VBOX_WITH_64_BITS_GUESTS)
+ else if ( !CPUMIsGuestDebugStateActivePending(pVCpu)
+ && !CPUMIsGuestDebugStateActive(pVCpu))
+#else
+ else if (!CPUMIsGuestDebugStateActive(pVCpu))
+#endif
+ {
+ fInterceptMovDRx = true;
+ }
+
+ /* Update DR7 with the actual guest value. */
+ u32GuestDr7 = pVCpu->cpum.GstCtx.dr[7];
+ pVCpu->hm.s.fUsingHyperDR7 = false;
+ }
+
+ if (fInterceptMovDRx)
+ uProcCtls |= VMX_PROC_CTLS_MOV_DR_EXIT;
+ else
+ uProcCtls &= ~VMX_PROC_CTLS_MOV_DR_EXIT;
+
+ /*
+ * Update the processor-based VM-execution controls with the MOV-DRx intercepts and the
+ * monitor-trap flag and update our cache.
+ */
+ if (uProcCtls != pVCpu->hm.s.vmx.u32ProcCtls)
+ {
+ int rc2 = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, uProcCtls);
+ AssertRCReturn(rc2, rc2);
+ pVCpu->hm.s.vmx.u32ProcCtls = uProcCtls;
+ }
+
+ /*
+ * Update guest DR7.
+ */
+ int rc = VMXWriteVmcs32(VMX_VMCS_GUEST_DR7, u32GuestDr7);
+ AssertRCReturn(rc, rc);
+
+ /*
+ * If we have forced EFLAGS.TF to be set because we're single-stepping in the hypervisor debugger,
+ * we need to clear interrupt inhibition if any as otherwise it causes a VM-entry failure.
+ *
+ * See Intel spec. 26.3.1.5 "Checks on Guest Non-Register State".
+ */
+ if (fSteppingDB)
+ {
+ Assert(pVCpu->hm.s.fSingleInstruction);
+ Assert(pVCpu->cpum.GstCtx.eflags.Bits.u1TF);
+
+ uint32_t fIntrState = 0;
+ rc = VMXReadVmcs32(VMX_VMCS32_GUEST_INT_STATE, &fIntrState);
+ AssertRCReturn(rc, rc);
+
+ if (fIntrState & (VMX_VMCS_GUEST_INT_STATE_BLOCK_STI | VMX_VMCS_GUEST_INT_STATE_BLOCK_MOVSS))
+ {
+ fIntrState &= ~(VMX_VMCS_GUEST_INT_STATE_BLOCK_STI | VMX_VMCS_GUEST_INT_STATE_BLOCK_MOVSS);
+ rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_INT_STATE, fIntrState);
+ AssertRCReturn(rc, rc);
+ }
+ }
+
+ return VINF_SUCCESS;
+}
+
+
+#ifdef VBOX_STRICT
+/**
+ * Strict function to validate segment registers.
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ *
+ * @remarks Will import guest CR0 on strict builds during validation of
+ * segments.
+ */
+static void hmR0VmxValidateSegmentRegs(PVMCPU pVCpu)
+{
+ /*
+ * Validate segment registers. See Intel spec. 26.3.1.2 "Checks on Guest Segment Registers".
+ *
+ * The reason we check for attribute value 0 in this function and not just the unusable bit is
+ * because hmR0VmxExportGuestSegmentReg() only updates the VMCS' copy of the value with the unusable bit
+ * and doesn't change the guest-context value.
+ */
+ PVM pVM = pVCpu->CTX_SUFF(pVM);
+ PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+ hmR0VmxImportGuestState(pVCpu, CPUMCTX_EXTRN_CR0);
+ if ( !pVM->hm.s.vmx.fUnrestrictedGuest
+ && ( !CPUMIsGuestInRealModeEx(pCtx)
+ && !CPUMIsGuestInV86ModeEx(pCtx)))
+ {
+ /* Protected mode checks */
+ /* CS */
+ Assert(pCtx->cs.Attr.n.u1Present);
+ Assert(!(pCtx->cs.Attr.u & 0xf00));
+ Assert(!(pCtx->cs.Attr.u & 0xfffe0000));
+ Assert( (pCtx->cs.u32Limit & 0xfff) == 0xfff
+ || !(pCtx->cs.Attr.n.u1Granularity));
+ Assert( !(pCtx->cs.u32Limit & 0xfff00000)
+ || (pCtx->cs.Attr.n.u1Granularity));
+ /* CS cannot be loaded with NULL in protected mode. */
+ Assert(pCtx->cs.Attr.u && !(pCtx->cs.Attr.u & X86DESCATTR_UNUSABLE)); /** @todo is this really true even for 64-bit CS? */
+ if (pCtx->cs.Attr.n.u4Type == 9 || pCtx->cs.Attr.n.u4Type == 11)
+ Assert(pCtx->cs.Attr.n.u2Dpl == pCtx->ss.Attr.n.u2Dpl);
+ else if (pCtx->cs.Attr.n.u4Type == 13 || pCtx->cs.Attr.n.u4Type == 15)
+ Assert(pCtx->cs.Attr.n.u2Dpl <= pCtx->ss.Attr.n.u2Dpl);
+ else
+ AssertMsgFailed(("Invalid CS Type %#x\n", pCtx->cs.Attr.n.u2Dpl));
+ /* SS */
+ Assert((pCtx->ss.Sel & X86_SEL_RPL) == (pCtx->cs.Sel & X86_SEL_RPL));
+ Assert(pCtx->ss.Attr.n.u2Dpl == (pCtx->ss.Sel & X86_SEL_RPL));
+ if ( !(pCtx->cr0 & X86_CR0_PE)
+ || pCtx->cs.Attr.n.u4Type == 3)
+ {
+ Assert(!pCtx->ss.Attr.n.u2Dpl);
+ }
+ if (pCtx->ss.Attr.u && !(pCtx->ss.Attr.u & X86DESCATTR_UNUSABLE))
+ {
+ Assert((pCtx->ss.Sel & X86_SEL_RPL) == (pCtx->cs.Sel & X86_SEL_RPL));
+ Assert(pCtx->ss.Attr.n.u4Type == 3 || pCtx->ss.Attr.n.u4Type == 7);
+ Assert(pCtx->ss.Attr.n.u1Present);
+ Assert(!(pCtx->ss.Attr.u & 0xf00));
+ Assert(!(pCtx->ss.Attr.u & 0xfffe0000));
+ Assert( (pCtx->ss.u32Limit & 0xfff) == 0xfff
+ || !(pCtx->ss.Attr.n.u1Granularity));
+ Assert( !(pCtx->ss.u32Limit & 0xfff00000)
+ || (pCtx->ss.Attr.n.u1Granularity));
+ }
+ /* DS, ES, FS, GS - only check for usable selectors, see hmR0VmxExportGuestSegmentReg(). */
+ if (pCtx->ds.Attr.u && !(pCtx->ds.Attr.u & X86DESCATTR_UNUSABLE))
+ {
+ Assert(pCtx->ds.Attr.n.u4Type & X86_SEL_TYPE_ACCESSED);
+ Assert(pCtx->ds.Attr.n.u1Present);
+ Assert(pCtx->ds.Attr.n.u4Type > 11 || pCtx->ds.Attr.n.u2Dpl >= (pCtx->ds.Sel & X86_SEL_RPL));
+ Assert(!(pCtx->ds.Attr.u & 0xf00));
+ Assert(!(pCtx->ds.Attr.u & 0xfffe0000));
+ Assert( (pCtx->ds.u32Limit & 0xfff) == 0xfff
+ || !(pCtx->ds.Attr.n.u1Granularity));
+ Assert( !(pCtx->ds.u32Limit & 0xfff00000)
+ || (pCtx->ds.Attr.n.u1Granularity));
+ Assert( !(pCtx->ds.Attr.n.u4Type & X86_SEL_TYPE_CODE)
+ || (pCtx->ds.Attr.n.u4Type & X86_SEL_TYPE_READ));
+ }
+ if (pCtx->es.Attr.u && !(pCtx->es.Attr.u & X86DESCATTR_UNUSABLE))
+ {
+ Assert(pCtx->es.Attr.n.u4Type & X86_SEL_TYPE_ACCESSED);
+ Assert(pCtx->es.Attr.n.u1Present);
+ Assert(pCtx->es.Attr.n.u4Type > 11 || pCtx->es.Attr.n.u2Dpl >= (pCtx->es.Sel & X86_SEL_RPL));
+ Assert(!(pCtx->es.Attr.u & 0xf00));
+ Assert(!(pCtx->es.Attr.u & 0xfffe0000));
+ Assert( (pCtx->es.u32Limit & 0xfff) == 0xfff
+ || !(pCtx->es.Attr.n.u1Granularity));
+ Assert( !(pCtx->es.u32Limit & 0xfff00000)
+ || (pCtx->es.Attr.n.u1Granularity));
+ Assert( !(pCtx->es.Attr.n.u4Type & X86_SEL_TYPE_CODE)
+ || (pCtx->es.Attr.n.u4Type & X86_SEL_TYPE_READ));
+ }
+ if (pCtx->fs.Attr.u && !(pCtx->fs.Attr.u & X86DESCATTR_UNUSABLE))
+ {
+ Assert(pCtx->fs.Attr.n.u4Type & X86_SEL_TYPE_ACCESSED);
+ Assert(pCtx->fs.Attr.n.u1Present);
+ Assert(pCtx->fs.Attr.n.u4Type > 11 || pCtx->fs.Attr.n.u2Dpl >= (pCtx->fs.Sel & X86_SEL_RPL));
+ Assert(!(pCtx->fs.Attr.u & 0xf00));
+ Assert(!(pCtx->fs.Attr.u & 0xfffe0000));
+ Assert( (pCtx->fs.u32Limit & 0xfff) == 0xfff
+ || !(pCtx->fs.Attr.n.u1Granularity));
+ Assert( !(pCtx->fs.u32Limit & 0xfff00000)
+ || (pCtx->fs.Attr.n.u1Granularity));
+ Assert( !(pCtx->fs.Attr.n.u4Type & X86_SEL_TYPE_CODE)
+ || (pCtx->fs.Attr.n.u4Type & X86_SEL_TYPE_READ));
+ }
+ if (pCtx->gs.Attr.u && !(pCtx->gs.Attr.u & X86DESCATTR_UNUSABLE))
+ {
+ Assert(pCtx->gs.Attr.n.u4Type & X86_SEL_TYPE_ACCESSED);
+ Assert(pCtx->gs.Attr.n.u1Present);
+ Assert(pCtx->gs.Attr.n.u4Type > 11 || pCtx->gs.Attr.n.u2Dpl >= (pCtx->gs.Sel & X86_SEL_RPL));
+ Assert(!(pCtx->gs.Attr.u & 0xf00));
+ Assert(!(pCtx->gs.Attr.u & 0xfffe0000));
+ Assert( (pCtx->gs.u32Limit & 0xfff) == 0xfff
+ || !(pCtx->gs.Attr.n.u1Granularity));
+ Assert( !(pCtx->gs.u32Limit & 0xfff00000)
+ || (pCtx->gs.Attr.n.u1Granularity));
+ Assert( !(pCtx->gs.Attr.n.u4Type & X86_SEL_TYPE_CODE)
+ || (pCtx->gs.Attr.n.u4Type & X86_SEL_TYPE_READ));
+ }
+ /* 64-bit capable CPUs. */
+# if HC_ARCH_BITS == 64
+ Assert(!RT_HI_U32(pCtx->cs.u64Base));
+ Assert(!pCtx->ss.Attr.u || !RT_HI_U32(pCtx->ss.u64Base));
+ Assert(!pCtx->ds.Attr.u || !RT_HI_U32(pCtx->ds.u64Base));
+ Assert(!pCtx->es.Attr.u || !RT_HI_U32(pCtx->es.u64Base));
+# endif
+ }
+ else if ( CPUMIsGuestInV86ModeEx(pCtx)
+ || ( CPUMIsGuestInRealModeEx(pCtx)
+ && !pVM->hm.s.vmx.fUnrestrictedGuest))
+ {
+ /* Real and v86 mode checks. */
+ /* hmR0VmxExportGuestSegmentReg() writes the modified in VMCS. We want what we're feeding to VT-x. */
+ uint32_t u32CSAttr, u32SSAttr, u32DSAttr, u32ESAttr, u32FSAttr, u32GSAttr;
+ if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active)
+ {
+ u32CSAttr = 0xf3; u32SSAttr = 0xf3; u32DSAttr = 0xf3; u32ESAttr = 0xf3; u32FSAttr = 0xf3; u32GSAttr = 0xf3;
+ }
+ else
+ {
+ u32CSAttr = pCtx->cs.Attr.u; u32SSAttr = pCtx->ss.Attr.u; u32DSAttr = pCtx->ds.Attr.u;
+ u32ESAttr = pCtx->es.Attr.u; u32FSAttr = pCtx->fs.Attr.u; u32GSAttr = pCtx->gs.Attr.u;
+ }
+
+ /* CS */
+ AssertMsg((pCtx->cs.u64Base == (uint64_t)pCtx->cs.Sel << 4), ("CS base %#x %#x\n", pCtx->cs.u64Base, pCtx->cs.Sel));
+ Assert(pCtx->cs.u32Limit == 0xffff);
+ Assert(u32CSAttr == 0xf3);
+ /* SS */
+ Assert(pCtx->ss.u64Base == (uint64_t)pCtx->ss.Sel << 4);
+ Assert(pCtx->ss.u32Limit == 0xffff);
+ Assert(u32SSAttr == 0xf3);
+ /* DS */
+ Assert(pCtx->ds.u64Base == (uint64_t)pCtx->ds.Sel << 4);
+ Assert(pCtx->ds.u32Limit == 0xffff);
+ Assert(u32DSAttr == 0xf3);
+ /* ES */
+ Assert(pCtx->es.u64Base == (uint64_t)pCtx->es.Sel << 4);
+ Assert(pCtx->es.u32Limit == 0xffff);
+ Assert(u32ESAttr == 0xf3);
+ /* FS */
+ Assert(pCtx->fs.u64Base == (uint64_t)pCtx->fs.Sel << 4);
+ Assert(pCtx->fs.u32Limit == 0xffff);
+ Assert(u32FSAttr == 0xf3);
+ /* GS */
+ Assert(pCtx->gs.u64Base == (uint64_t)pCtx->gs.Sel << 4);
+ Assert(pCtx->gs.u32Limit == 0xffff);
+ Assert(u32GSAttr == 0xf3);
+ /* 64-bit capable CPUs. */
+# if HC_ARCH_BITS == 64
+ Assert(!RT_HI_U32(pCtx->cs.u64Base));
+ Assert(!u32SSAttr || !RT_HI_U32(pCtx->ss.u64Base));
+ Assert(!u32DSAttr || !RT_HI_U32(pCtx->ds.u64Base));
+ Assert(!u32ESAttr || !RT_HI_U32(pCtx->es.u64Base));
+# endif
+ }
+}
+#endif /* VBOX_STRICT */
+
+
+/**
+ * Exports a guest segment register into the guest-state area in the VMCS.
+ *
+ * @returns VBox status code.
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param idxSel Index of the selector in the VMCS.
+ * @param idxLimit Index of the segment limit in the VMCS.
+ * @param idxBase Index of the segment base in the VMCS.
+ * @param idxAccess Index of the access rights of the segment in the VMCS.
+ * @param pSelReg Pointer to the segment selector.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+static int hmR0VmxExportGuestSegmentReg(PVMCPU pVCpu, uint32_t idxSel, uint32_t idxLimit, uint32_t idxBase, uint32_t idxAccess,
+ PCCPUMSELREG pSelReg)
+{
+ int rc = VMXWriteVmcs32(idxSel, pSelReg->Sel); /* 16-bit guest selector field. */
+ rc |= VMXWriteVmcs32(idxLimit, pSelReg->u32Limit); /* 32-bit guest segment limit field. */
+ rc |= VMXWriteVmcsGstN(idxBase, pSelReg->u64Base); /* Natural width guest segment base field.*/
+ AssertRCReturn(rc, rc);
+
+ uint32_t u32Access = pSelReg->Attr.u;
+ if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active)
+ {
+ /* VT-x requires our real-using-v86 mode hack to override the segment access-right bits. */
+ u32Access = 0xf3;
+ Assert(pVCpu->CTX_SUFF(pVM)->hm.s.vmx.pRealModeTSS);
+ Assert(PDMVmmDevHeapIsEnabled(pVCpu->CTX_SUFF(pVM)));
+ }
+ else
+ {
+ /*
+ * The way to differentiate between whether this is really a null selector or was just
+ * a selector loaded with 0 in real-mode is using the segment attributes. A selector
+ * loaded in real-mode with the value 0 is valid and usable in protected-mode and we
+ * should -not- mark it as an unusable segment. Both the recompiler & VT-x ensures
+ * NULL selectors loaded in protected-mode have their attribute as 0.
+ */
+ if (!u32Access)
+ u32Access = X86DESCATTR_UNUSABLE;
+ }
+
+ /* Validate segment access rights. Refer to Intel spec. "26.3.1.2 Checks on Guest Segment Registers". */
+ AssertMsg((u32Access & X86DESCATTR_UNUSABLE) || (u32Access & X86_SEL_TYPE_ACCESSED),
+ ("Access bit not set for usable segment. idx=%#x sel=%#x attr %#x\n", idxBase, pSelReg, pSelReg->Attr.u));
+
+ rc = VMXWriteVmcs32(idxAccess, u32Access); /* 32-bit guest segment access-rights field. */
+ AssertRCReturn(rc, rc);
+ return rc;
+}
+
+
+/**
+ * Exports the guest segment registers, GDTR, IDTR, LDTR, (TR, FS and GS bases)
+ * into the guest-state area in the VMCS.
+ *
+ * @returns VBox status code.
+ * @param pVCpu The cross context virtual CPU structure.
+ *
+ * @remarks Will import guest CR0 on strict builds during validation of
+ * segments.
+ * @remarks No-long-jump zone!!!
+ */
+static int hmR0VmxExportGuestSegmentRegs(PVMCPU pVCpu)
+{
+ int rc = VERR_INTERNAL_ERROR_5;
+ PVM pVM = pVCpu->CTX_SUFF(pVM);
+ PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+
+ /*
+ * Guest Segment registers: CS, SS, DS, ES, FS, GS.
+ */
+ if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_SREG_MASK)
+ {
+#ifdef VBOX_WITH_REM
+ if (!pVM->hm.s.vmx.fUnrestrictedGuest)
+ {
+ Assert(pVM->hm.s.vmx.pRealModeTSS);
+ AssertCompile(PGMMODE_REAL < PGMMODE_PROTECTED);
+ if ( pVCpu->hm.s.vmx.fWasInRealMode
+ && PGMGetGuestMode(pVCpu) >= PGMMODE_PROTECTED)
+ {
+ /* Signal that the recompiler must flush its code-cache as the guest -may- rewrite code it will later execute
+ in real-mode (e.g. OpenBSD 4.0) */
+ REMFlushTBs(pVM);
+ Log4Func(("Switch to protected mode detected!\n"));
+ pVCpu->hm.s.vmx.fWasInRealMode = false;
+ }
+ }
+#endif
+ if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_CS)
+ {
+ HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_CS);
+ if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active)
+ pVCpu->hm.s.vmx.RealMode.AttrCS.u = pCtx->cs.Attr.u;
+ rc = HMVMX_EXPORT_SREG(CS, &pCtx->cs);
+ AssertRCReturn(rc, rc);
+ ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_CS);
+ }
+
+ if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_SS)
+ {
+ HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_SS);
+ if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active)
+ pVCpu->hm.s.vmx.RealMode.AttrSS.u = pCtx->ss.Attr.u;
+ rc = HMVMX_EXPORT_SREG(SS, &pCtx->ss);
+ AssertRCReturn(rc, rc);
+ ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_SS);
+ }
+
+ if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_DS)
+ {
+ HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_DS);
+ if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active)
+ pVCpu->hm.s.vmx.RealMode.AttrDS.u = pCtx->ds.Attr.u;
+ rc = HMVMX_EXPORT_SREG(DS, &pCtx->ds);
+ AssertRCReturn(rc, rc);
+ ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_DS);
+ }
+
+ if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_ES)
+ {
+ HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_ES);
+ if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active)
+ pVCpu->hm.s.vmx.RealMode.AttrES.u = pCtx->es.Attr.u;
+ rc = HMVMX_EXPORT_SREG(ES, &pCtx->es);
+ AssertRCReturn(rc, rc);
+ ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_ES);
+ }
+
+ if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_FS)
+ {
+ HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_FS);
+ if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active)
+ pVCpu->hm.s.vmx.RealMode.AttrFS.u = pCtx->fs.Attr.u;
+ rc = HMVMX_EXPORT_SREG(FS, &pCtx->fs);
+ AssertRCReturn(rc, rc);
+ ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_FS);
+ }
+
+ if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_GS)
+ {
+ HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_GS);
+ if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active)
+ pVCpu->hm.s.vmx.RealMode.AttrGS.u = pCtx->gs.Attr.u;
+ rc = HMVMX_EXPORT_SREG(GS, &pCtx->gs);
+ AssertRCReturn(rc, rc);
+ ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_GS);
+ }
+
+#ifdef VBOX_STRICT
+ hmR0VmxValidateSegmentRegs(pVCpu);
+#endif
+
+ Log4Func(("CS=%#RX16 Base=%#RX64 Limit=%#RX32 Attr=%#RX32\n", pCtx->cs.Sel, pCtx->cs.u64Base,
+ pCtx->cs.u32Limit, pCtx->cs.Attr.u));
+ }
+
+ /*
+ * Guest TR.
+ */
+ if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_TR)
+ {
+ HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_TR);
+
+ /*
+ * Real-mode emulation using virtual-8086 mode with CR4.VME. Interrupt redirection is
+ * achieved using the interrupt redirection bitmap (all bits cleared to let the guest
+ * handle INT-n's) in the TSS. See hmR3InitFinalizeR0() to see how pRealModeTSS is setup.
+ */
+ uint16_t u16Sel = 0;
+ uint32_t u32Limit = 0;
+ uint64_t u64Base = 0;
+ uint32_t u32AccessRights = 0;
+
+ if (!pVCpu->hm.s.vmx.RealMode.fRealOnV86Active)
+ {
+ u16Sel = pCtx->tr.Sel;
+ u32Limit = pCtx->tr.u32Limit;
+ u64Base = pCtx->tr.u64Base;
+ u32AccessRights = pCtx->tr.Attr.u;
+ }
+ else
+ {
+ Assert(pVM->hm.s.vmx.pRealModeTSS);
+ Assert(PDMVmmDevHeapIsEnabled(pVM)); /* Guaranteed by HMCanExecuteGuest() -XXX- what about inner loop changes? */
+
+ /* We obtain it here every time as PCI regions could be reconfigured in the guest, changing the VMMDev base. */
+ RTGCPHYS GCPhys;
+ rc = PDMVmmDevHeapR3ToGCPhys(pVM, pVM->hm.s.vmx.pRealModeTSS, &GCPhys);
+ AssertRCReturn(rc, rc);
+
+ X86DESCATTR DescAttr;
+ DescAttr.u = 0;
+ DescAttr.n.u1Present = 1;
+ DescAttr.n.u4Type = X86_SEL_TYPE_SYS_386_TSS_BUSY;
+
+ u16Sel = 0;
+ u32Limit = HM_VTX_TSS_SIZE;
+ u64Base = GCPhys; /* in real-mode phys = virt. */
+ u32AccessRights = DescAttr.u;
+ }
+
+ /* Validate. */
+ Assert(!(u16Sel & RT_BIT(2)));
+ AssertMsg( (u32AccessRights & 0xf) == X86_SEL_TYPE_SYS_386_TSS_BUSY
+ || (u32AccessRights & 0xf) == X86_SEL_TYPE_SYS_286_TSS_BUSY, ("TSS is not busy!? %#x\n", u32AccessRights));
+ AssertMsg(!(u32AccessRights & X86DESCATTR_UNUSABLE), ("TR unusable bit is not clear!? %#x\n", u32AccessRights));
+ Assert(!(u32AccessRights & RT_BIT(4))); /* System MBZ.*/
+ Assert(u32AccessRights & RT_BIT(7)); /* Present MB1.*/
+ Assert(!(u32AccessRights & 0xf00)); /* 11:8 MBZ. */
+ Assert(!(u32AccessRights & 0xfffe0000)); /* 31:17 MBZ. */
+ Assert( (u32Limit & 0xfff) == 0xfff
+ || !(u32AccessRights & RT_BIT(15))); /* Granularity MBZ. */
+ Assert( !(pCtx->tr.u32Limit & 0xfff00000)
+ || (u32AccessRights & RT_BIT(15))); /* Granularity MB1. */
+
+ rc = VMXWriteVmcs32(VMX_VMCS16_GUEST_TR_SEL, u16Sel);
+ rc |= VMXWriteVmcs32(VMX_VMCS32_GUEST_TR_LIMIT, u32Limit);
+ rc |= VMXWriteVmcs32(VMX_VMCS32_GUEST_TR_ACCESS_RIGHTS, u32AccessRights);
+ rc |= VMXWriteVmcsGstN(VMX_VMCS_GUEST_TR_BASE, u64Base);
+ AssertRCReturn(rc, rc);
+
+ ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_TR);
+ Log4Func(("TR base=%#RX64\n", pCtx->tr.u64Base));
+ }
+
+ /*
+ * Guest GDTR.
+ */
+ if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_GDTR)
+ {
+ HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_GDTR);
+
+ rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_GDTR_LIMIT, pCtx->gdtr.cbGdt);
+ rc |= VMXWriteVmcsGstN(VMX_VMCS_GUEST_GDTR_BASE, pCtx->gdtr.pGdt);
+ AssertRCReturn(rc, rc);
+
+ /* Validate. */
+ Assert(!(pCtx->gdtr.cbGdt & 0xffff0000)); /* Bits 31:16 MBZ. */
+
+ ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_GDTR);
+ Log4Func(("GDTR base=%#RX64\n", pCtx->gdtr.pGdt));
+ }
+
+ /*
+ * Guest LDTR.
+ */
+ if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_LDTR)
+ {
+ HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_LDTR);
+
+ /* The unusable bit is specific to VT-x, if it's a null selector mark it as an unusable segment. */
+ uint32_t u32Access = 0;
+ if (!pCtx->ldtr.Attr.u)
+ u32Access = X86DESCATTR_UNUSABLE;
+ else
+ u32Access = pCtx->ldtr.Attr.u;
+
+ rc = VMXWriteVmcs32(VMX_VMCS16_GUEST_LDTR_SEL, pCtx->ldtr.Sel);
+ rc |= VMXWriteVmcs32(VMX_VMCS32_GUEST_LDTR_LIMIT, pCtx->ldtr.u32Limit);
+ rc |= VMXWriteVmcs32(VMX_VMCS32_GUEST_LDTR_ACCESS_RIGHTS, u32Access);
+ rc |= VMXWriteVmcsGstN(VMX_VMCS_GUEST_LDTR_BASE, pCtx->ldtr.u64Base);
+ AssertRCReturn(rc, rc);
+
+ /* Validate. */
+ if (!(u32Access & X86DESCATTR_UNUSABLE))
+ {
+ Assert(!(pCtx->ldtr.Sel & RT_BIT(2))); /* TI MBZ. */
+ Assert(pCtx->ldtr.Attr.n.u4Type == 2); /* Type MB2 (LDT). */
+ Assert(!pCtx->ldtr.Attr.n.u1DescType); /* System MBZ. */
+ Assert(pCtx->ldtr.Attr.n.u1Present == 1); /* Present MB1. */
+ Assert(!pCtx->ldtr.Attr.n.u4LimitHigh); /* 11:8 MBZ. */
+ Assert(!(pCtx->ldtr.Attr.u & 0xfffe0000)); /* 31:17 MBZ. */
+ Assert( (pCtx->ldtr.u32Limit & 0xfff) == 0xfff
+ || !pCtx->ldtr.Attr.n.u1Granularity); /* Granularity MBZ. */
+ Assert( !(pCtx->ldtr.u32Limit & 0xfff00000)
+ || pCtx->ldtr.Attr.n.u1Granularity); /* Granularity MB1. */
+ }
+
+ ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_LDTR);
+ Log4Func(("LDTR base=%#RX64\n", pCtx->ldtr.u64Base));
+ }
+
+ /*
+ * Guest IDTR.
+ */
+ if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_IDTR)
+ {
+ HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_IDTR);
+
+ rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_IDTR_LIMIT, pCtx->idtr.cbIdt);
+ rc |= VMXWriteVmcsGstN(VMX_VMCS_GUEST_IDTR_BASE, pCtx->idtr.pIdt);
+ AssertRCReturn(rc, rc);
+
+ /* Validate. */
+ Assert(!(pCtx->idtr.cbIdt & 0xffff0000)); /* Bits 31:16 MBZ. */
+
+ ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_IDTR);
+ Log4Func(("IDTR base=%#RX64\n", pCtx->idtr.pIdt));
+ }
+
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Exports certain guest MSRs into the VM-entry MSR-load and VM-exit MSR-store
+ * areas.
+ *
+ * These MSRs will automatically be loaded to the host CPU on every successful
+ * VM-entry and stored from the host CPU on every successful VM-exit. This also
+ * creates/updates MSR slots for the host MSRs. The actual host MSR values are
+ * -not- updated here for performance reasons. See hmR0VmxExportHostMsrs().
+ *
+ * Also exports the guest sysenter MSRs into the guest-state area in the VMCS.
+ *
+ * @returns VBox status code.
+ * @param pVCpu The cross context virtual CPU structure.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+static int hmR0VmxExportGuestMsrs(PVMCPU pVCpu)
+{
+ AssertPtr(pVCpu);
+ AssertPtr(pVCpu->hm.s.vmx.pvGuestMsr);
+
+ /*
+ * MSRs that we use the auto-load/store MSR area in the VMCS.
+ * For 64-bit hosts, we load/restore them lazily, see hmR0VmxLazyLoadGuestMsrs().
+ */
+ PVM pVM = pVCpu->CTX_SUFF(pVM);
+ PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+ if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_VMX_GUEST_AUTO_MSRS)
+ {
+ if (pVM->hm.s.fAllow64BitGuests)
+ {
+#if HC_ARCH_BITS == 32
+ HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_SYSCALL_MSRS | CPUMCTX_EXTRN_KERNEL_GS_BASE);
+
+ int rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, MSR_K8_LSTAR, pCtx->msrLSTAR, false, NULL);
+ rc |= hmR0VmxAddAutoLoadStoreMsr(pVCpu, MSR_K6_STAR, pCtx->msrSTAR, false, NULL);
+ rc |= hmR0VmxAddAutoLoadStoreMsr(pVCpu, MSR_K8_SF_MASK, pCtx->msrSFMASK, false, NULL);
+ rc |= hmR0VmxAddAutoLoadStoreMsr(pVCpu, MSR_K8_KERNEL_GS_BASE, pCtx->msrKERNELGSBASE, false, NULL);
+ AssertRCReturn(rc, rc);
+# ifdef LOG_ENABLED
+ PCVMXAUTOMSR pMsr = (PCVMXAUTOMSR)pVCpu->hm.s.vmx.pvGuestMsr;
+ for (uint32_t i = 0; i < pVCpu->hm.s.vmx.cMsrs; i++, pMsr++)
+ Log4Func(("MSR[%RU32]: u32Msr=%#RX32 u64Value=%#RX64\n", i, pMsr->u32Msr, pMsr->u64Value));
+# endif
+#endif
+ }
+ ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_VMX_GUEST_AUTO_MSRS);
+ }
+
+ /*
+ * Guest Sysenter MSRs.
+ * These flags are only set when MSR-bitmaps are not supported by the CPU and we cause
+ * VM-exits on WRMSRs for these MSRs.
+ */
+ if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_SYSENTER_MSR_MASK)
+ {
+ HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_SYSENTER_MSRS);
+
+ if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_SYSENTER_CS_MSR)
+ {
+ int rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_SYSENTER_CS, pCtx->SysEnter.cs);
+ AssertRCReturn(rc, rc);
+ ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_SYSENTER_CS_MSR);
+ }
+
+ if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_SYSENTER_EIP_MSR)
+ {
+ int rc = VMXWriteVmcsGstN(VMX_VMCS_GUEST_SYSENTER_EIP, pCtx->SysEnter.eip);
+ AssertRCReturn(rc, rc);
+ ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_SYSENTER_EIP_MSR);
+ }
+
+ if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_SYSENTER_ESP_MSR)
+ {
+ int rc = VMXWriteVmcsGstN(VMX_VMCS_GUEST_SYSENTER_ESP, pCtx->SysEnter.esp);
+ AssertRCReturn(rc, rc);
+ ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_SYSENTER_ESP_MSR);
+ }
+ }
+
+ if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_EFER_MSR)
+ {
+ HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_EFER);
+
+ if (hmR0VmxShouldSwapEferMsr(pVCpu))
+ {
+ /*
+ * If the CPU supports VMCS controls for swapping EFER, use it. Otherwise, we have no option
+ * but to use the auto-load store MSR area in the VMCS for swapping EFER. See @bugref{7368}.
+ */
+ if (pVM->hm.s.vmx.fSupportsVmcsEfer)
+ {
+ int rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_EFER_FULL, pCtx->msrEFER);
+ AssertRCReturn(rc,rc);
+ Log4Func(("EFER=%#RX64\n", pCtx->msrEFER));
+ }
+ else
+ {
+ int rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, MSR_K6_EFER, pCtx->msrEFER, false /* fUpdateHostMsr */,
+ NULL /* pfAddedAndUpdated */);
+ AssertRCReturn(rc, rc);
+
+ /* We need to intercept reads too, see @bugref{7386#c16}. */
+ if (pVM->hm.s.vmx.Msrs.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_MSR_BITMAPS)
+ hmR0VmxSetMsrPermission(pVCpu, MSR_K6_EFER, VMXMSREXIT_INTERCEPT_READ, VMXMSREXIT_INTERCEPT_WRITE);
+ Log4Func(("MSR[--]: u32Msr=%#RX32 u64Value=%#RX64 cMsrs=%u\n", MSR_K6_EFER, pCtx->msrEFER,
+ pVCpu->hm.s.vmx.cMsrs));
+ }
+ }
+ else if (!pVM->hm.s.vmx.fSupportsVmcsEfer)
+ hmR0VmxRemoveAutoLoadStoreMsr(pVCpu, MSR_K6_EFER);
+ ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_EFER_MSR);
+ }
+
+ return VINF_SUCCESS;
+}
+
+
+#if HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS)
+/**
+ * Check if guest state allows safe use of 32-bit switcher again.
+ *
+ * Segment bases and protected mode structures must be 32-bit addressable
+ * because the 32-bit switcher will ignore high dword when writing these VMCS
+ * fields. See @bugref{8432} for details.
+ *
+ * @returns true if safe, false if must continue to use the 64-bit switcher.
+ * @param pCtx Pointer to the guest-CPU context.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+static bool hmR0VmxIs32BitSwitcherSafe(PCCPUMCTX pCtx)
+{
+ if (pCtx->gdtr.pGdt & UINT64_C(0xffffffff00000000)) return false;
+ if (pCtx->idtr.pIdt & UINT64_C(0xffffffff00000000)) return false;
+ if (pCtx->ldtr.u64Base & UINT64_C(0xffffffff00000000)) return false;
+ if (pCtx->tr.u64Base & UINT64_C(0xffffffff00000000)) return false;
+ if (pCtx->es.u64Base & UINT64_C(0xffffffff00000000)) return false;
+ if (pCtx->cs.u64Base & UINT64_C(0xffffffff00000000)) return false;
+ if (pCtx->ss.u64Base & UINT64_C(0xffffffff00000000)) return false;
+ if (pCtx->ds.u64Base & UINT64_C(0xffffffff00000000)) return false;
+ if (pCtx->fs.u64Base & UINT64_C(0xffffffff00000000)) return false;
+ if (pCtx->gs.u64Base & UINT64_C(0xffffffff00000000)) return false;
+
+ /* All good, bases are 32-bit. */
+ return true;
+}
+#endif
+
+
+/**
+ * Selects up the appropriate function to run guest code.
+ *
+ * @returns VBox status code.
+ * @param pVCpu The cross context virtual CPU structure.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+static int hmR0VmxSelectVMRunHandler(PVMCPU pVCpu)
+{
+ PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+ if (CPUMIsGuestInLongModeEx(pCtx))
+ {
+#ifndef VBOX_ENABLE_64_BITS_GUESTS
+ return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
+#endif
+ Assert(pVCpu->CTX_SUFF(pVM)->hm.s.fAllow64BitGuests); /* Guaranteed by hmR3InitFinalizeR0(). */
+#if HC_ARCH_BITS == 32
+ /* 32-bit host. We need to switch to 64-bit before running the 64-bit guest. */
+ if (pVCpu->hm.s.vmx.pfnStartVM != VMXR0SwitcherStartVM64)
+ {
+#ifdef VBOX_STRICT
+ if (pVCpu->hm.s.vmx.pfnStartVM != NULL) /* Very first entry would have saved host-state already, ignore it. */
+ {
+ /* Currently, all mode changes sends us back to ring-3, so these should be set. See @bugref{6944}. */
+ uint64_t const fCtxChanged = ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged);
+ RT_UNTRUSTED_NONVOLATILE_COPY_FENCE();
+ AssertMsg(fCtxChanged & ( HM_CHANGED_VMX_EXIT_CTLS
+ | HM_CHANGED_VMX_ENTRY_CTLS
+ | HM_CHANGED_GUEST_EFER_MSR), ("fCtxChanged=%#RX64\n", fCtxChanged));
+ }
+#endif
+ pVCpu->hm.s.vmx.pfnStartVM = VMXR0SwitcherStartVM64;
+
+ /* Mark that we've switched to 64-bit handler, we can't safely switch back to 32-bit for
+ the rest of the VM run (until VM reset). See @bugref{8432#c7}. */
+ pVCpu->hm.s.vmx.fSwitchedTo64on32 = true;
+ Log4Func(("Selected 64-bit switcher\n"));
+ }
+#else
+ /* 64-bit host. */
+ pVCpu->hm.s.vmx.pfnStartVM = VMXR0StartVM64;
+#endif
+ }
+ else
+ {
+ /* Guest is not in long mode, use the 32-bit handler. */
+#if HC_ARCH_BITS == 32
+ if ( pVCpu->hm.s.vmx.pfnStartVM != VMXR0StartVM32
+ && !pVCpu->hm.s.vmx.fSwitchedTo64on32 /* If set, guest mode change does not imply switcher change. */
+ && pVCpu->hm.s.vmx.pfnStartVM != NULL) /* Very first entry would have saved host-state already, ignore it. */
+ {
+# ifdef VBOX_STRICT
+ /* Currently, all mode changes sends us back to ring-3, so these should be set. See @bugref{6944}. */
+ uint64_t const fCtxChanged = ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged);
+ RT_UNTRUSTED_NONVOLATILE_COPY_FENCE();
+ AssertMsg(fCtxChanged & ( HM_CHANGED_VMX_EXIT_CTLS
+ | HM_CHANGED_VMX_ENTRY_CTLS
+ | HM_CHANGED_GUEST_EFER_MSR), ("fCtxChanged=%#RX64\n", fCtxChanged));
+# endif
+ }
+# ifdef VBOX_ENABLE_64_BITS_GUESTS
+ /*
+ * Keep using the 64-bit switcher even though we're in 32-bit because of bad Intel
+ * design, see @bugref{8432#c7}. If real-on-v86 mode is active, clear the 64-bit
+ * switcher flag because now we know the guest is in a sane state where it's safe
+ * to use the 32-bit switcher. Otherwise check the guest state if it's safe to use
+ * the much faster 32-bit switcher again.
+ */
+ if (!pVCpu->hm.s.vmx.fSwitchedTo64on32)
+ {
+ if (pVCpu->hm.s.vmx.pfnStartVM != VMXR0StartVM32)
+ Log4Func(("Selected 32-bit switcher\n"));
+ pVCpu->hm.s.vmx.pfnStartVM = VMXR0StartVM32;
+ }
+ else
+ {
+ Assert(pVCpu->hm.s.vmx.pfnStartVM == VMXR0SwitcherStartVM64);
+ if ( pVCpu->hm.s.vmx.RealMode.fRealOnV86Active
+ || hmR0VmxIs32BitSwitcherSafe(pCtx))
+ {
+ pVCpu->hm.s.vmx.fSwitchedTo64on32 = false;
+ pVCpu->hm.s.vmx.pfnStartVM = VMXR0StartVM32;
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_EFER_MSR
+ | HM_CHANGED_VMX_ENTRY_CTLS
+ | HM_CHANGED_VMX_EXIT_CTLS
+ | HM_CHANGED_HOST_CONTEXT);
+ Log4Func(("Selected 32-bit switcher (safe)\n"));
+ }
+ }
+# else
+ pVCpu->hm.s.vmx.pfnStartVM = VMXR0StartVM32;
+# endif
+#else
+ pVCpu->hm.s.vmx.pfnStartVM = VMXR0StartVM32;
+#endif
+ }
+ Assert(pVCpu->hm.s.vmx.pfnStartVM);
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Wrapper for running the guest code in VT-x.
+ *
+ * @returns VBox status code, no informational status codes.
+ * @param pVCpu The cross context virtual CPU structure.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+DECLINLINE(int) hmR0VmxRunGuest(PVMCPU pVCpu)
+{
+ /* Mark that HM is the keeper of all guest-CPU registers now that we're going to execute guest code. */
+ PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+ pCtx->fExtrn |= HMVMX_CPUMCTX_EXTRN_ALL | CPUMCTX_EXTRN_KEEPER_HM;
+
+ /*
+ * 64-bit Windows uses XMM registers in the kernel as the Microsoft compiler expresses
+ * floating-point operations using SSE instructions. Some XMM registers (XMM6-XMM15) are
+ * callee-saved and thus the need for this XMM wrapper.
+ *
+ * See MSDN "Configuring Programs for 64-bit/x64 Software Conventions / Register Usage".
+ */
+ bool const fResumeVM = RT_BOOL(pVCpu->hm.s.vmx.fVmcsState & HMVMX_VMCS_STATE_LAUNCHED);
+ /** @todo Add stats for resume vs launch. */
+ PVM pVM = pVCpu->CTX_SUFF(pVM);
+#ifdef VBOX_WITH_KERNEL_USING_XMM
+ int rc = hmR0VMXStartVMWrapXMM(fResumeVM, pCtx, &pVCpu->hm.s.vmx.VMCSCache, pVM, pVCpu, pVCpu->hm.s.vmx.pfnStartVM);
+#else
+ int rc = pVCpu->hm.s.vmx.pfnStartVM(fResumeVM, pCtx, &pVCpu->hm.s.vmx.VMCSCache, pVM, pVCpu);
+#endif
+ AssertMsg(rc <= VINF_SUCCESS, ("%Rrc\n", rc));
+ return rc;
+}
+
+
+/**
+ * Reports world-switch error and dumps some useful debug info.
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param rcVMRun The return code from VMLAUNCH/VMRESUME.
+ * @param pVmxTransient Pointer to the VMX transient structure (only
+ * exitReason updated).
+ */
+static void hmR0VmxReportWorldSwitchError(PVMCPU pVCpu, int rcVMRun, PVMXTRANSIENT pVmxTransient)
+{
+ Assert(pVCpu);
+ Assert(pVmxTransient);
+ HMVMX_ASSERT_PREEMPT_SAFE(pVCpu);
+
+ Log4Func(("VM-entry failure: %Rrc\n", rcVMRun));
+ switch (rcVMRun)
+ {
+ case VERR_VMX_INVALID_VMXON_PTR:
+ AssertFailed();
+ break;
+ case VINF_SUCCESS: /* VMLAUNCH/VMRESUME succeeded but VM-entry failed... yeah, true story. */
+ case VERR_VMX_UNABLE_TO_START_VM: /* VMLAUNCH/VMRESUME itself failed. */
+ {
+ int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_REASON, &pVCpu->hm.s.vmx.LastError.u32ExitReason);
+ rc |= VMXReadVmcs32(VMX_VMCS32_RO_VM_INSTR_ERROR, &pVCpu->hm.s.vmx.LastError.u32InstrError);
+ rc |= hmR0VmxReadExitQualVmcs(pVCpu, pVmxTransient);
+ AssertRC(rc);
+
+ pVCpu->hm.s.vmx.LastError.idEnteredCpu = pVCpu->hm.s.idEnteredCpu;
+ /* LastError.idCurrentCpu was already updated in hmR0VmxPreRunGuestCommitted().
+ Cannot do it here as we may have been long preempted. */
+
+#ifdef VBOX_STRICT
+ Log4(("uExitReason %#RX32 (VmxTransient %#RX16)\n", pVCpu->hm.s.vmx.LastError.u32ExitReason,
+ pVmxTransient->uExitReason));
+ Log4(("Exit Qualification %#RX64\n", pVmxTransient->uExitQual));
+ Log4(("InstrError %#RX32\n", pVCpu->hm.s.vmx.LastError.u32InstrError));
+ if (pVCpu->hm.s.vmx.LastError.u32InstrError <= HMVMX_INSTR_ERROR_MAX)
+ Log4(("InstrError Desc. \"%s\"\n", g_apszVmxInstrErrors[pVCpu->hm.s.vmx.LastError.u32InstrError]));
+ else
+ Log4(("InstrError Desc. Range exceeded %u\n", HMVMX_INSTR_ERROR_MAX));
+ Log4(("Entered host CPU %u\n", pVCpu->hm.s.vmx.LastError.idEnteredCpu));
+ Log4(("Current host CPU %u\n", pVCpu->hm.s.vmx.LastError.idCurrentCpu));
+
+ /* VMX control bits. */
+ uint32_t u32Val;
+ uint64_t u64Val;
+ RTHCUINTREG uHCReg;
+ rc = VMXReadVmcs32(VMX_VMCS32_CTRL_PIN_EXEC, &u32Val); AssertRC(rc);
+ Log4(("VMX_VMCS32_CTRL_PIN_EXEC %#RX32\n", u32Val));
+ rc = VMXReadVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, &u32Val); AssertRC(rc);
+ Log4(("VMX_VMCS32_CTRL_PROC_EXEC %#RX32\n", u32Val));
+ if (pVCpu->hm.s.vmx.u32ProcCtls & VMX_PROC_CTLS_USE_SECONDARY_CTLS)
+ {
+ rc = VMXReadVmcs32(VMX_VMCS32_CTRL_PROC_EXEC2, &u32Val); AssertRC(rc);
+ Log4(("VMX_VMCS32_CTRL_PROC_EXEC2 %#RX32\n", u32Val));
+ }
+ rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY, &u32Val); AssertRC(rc);
+ Log4(("VMX_VMCS32_CTRL_ENTRY %#RX32\n", u32Val));
+ rc = VMXReadVmcs32(VMX_VMCS32_CTRL_EXIT, &u32Val); AssertRC(rc);
+ Log4(("VMX_VMCS32_CTRL_EXIT %#RX32\n", u32Val));
+ rc = VMXReadVmcs32(VMX_VMCS32_CTRL_CR3_TARGET_COUNT, &u32Val); AssertRC(rc);
+ Log4(("VMX_VMCS32_CTRL_CR3_TARGET_COUNT %#RX32\n", u32Val));
+ rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, &u32Val); AssertRC(rc);
+ Log4(("VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO %#RX32\n", u32Val));
+ rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_EXCEPTION_ERRCODE, &u32Val); AssertRC(rc);
+ Log4(("VMX_VMCS32_CTRL_ENTRY_EXCEPTION_ERRCODE %#RX32\n", u32Val));
+ rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_INSTR_LENGTH, &u32Val); AssertRC(rc);
+ Log4(("VMX_VMCS32_CTRL_ENTRY_INSTR_LENGTH %u\n", u32Val));
+ rc = VMXReadVmcs32(VMX_VMCS32_CTRL_TPR_THRESHOLD, &u32Val); AssertRC(rc);
+ Log4(("VMX_VMCS32_CTRL_TPR_THRESHOLD %u\n", u32Val));
+ rc = VMXReadVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT, &u32Val); AssertRC(rc);
+ Log4(("VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT %u (guest MSRs)\n", u32Val));
+ rc = VMXReadVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT, &u32Val); AssertRC(rc);
+ Log4(("VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT %u (host MSRs)\n", u32Val));
+ rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT, &u32Val); AssertRC(rc);
+ Log4(("VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT %u (guest MSRs)\n", u32Val));
+ rc = VMXReadVmcs32(VMX_VMCS32_CTRL_EXCEPTION_BITMAP, &u32Val); AssertRC(rc);
+ Log4(("VMX_VMCS32_CTRL_EXCEPTION_BITMAP %#RX32\n", u32Val));
+ rc = VMXReadVmcs32(VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MASK, &u32Val); AssertRC(rc);
+ Log4(("VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MASK %#RX32\n", u32Val));
+ rc = VMXReadVmcs32(VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MATCH, &u32Val); AssertRC(rc);
+ Log4(("VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MATCH %#RX32\n", u32Val));
+ rc = VMXReadVmcsHstN(VMX_VMCS_CTRL_CR0_MASK, &uHCReg); AssertRC(rc);
+ Log4(("VMX_VMCS_CTRL_CR0_MASK %#RHr\n", uHCReg));
+ rc = VMXReadVmcsHstN(VMX_VMCS_CTRL_CR0_READ_SHADOW, &uHCReg); AssertRC(rc);
+ Log4(("VMX_VMCS_CTRL_CR4_READ_SHADOW %#RHr\n", uHCReg));
+ rc = VMXReadVmcsHstN(VMX_VMCS_CTRL_CR4_MASK, &uHCReg); AssertRC(rc);
+ Log4(("VMX_VMCS_CTRL_CR4_MASK %#RHr\n", uHCReg));
+ rc = VMXReadVmcsHstN(VMX_VMCS_CTRL_CR4_READ_SHADOW, &uHCReg); AssertRC(rc);
+ Log4(("VMX_VMCS_CTRL_CR4_READ_SHADOW %#RHr\n", uHCReg));
+ if (pVCpu->CTX_SUFF(pVM)->hm.s.fNestedPaging)
+ {
+ rc = VMXReadVmcs64(VMX_VMCS64_CTRL_EPTP_FULL, &u64Val); AssertRC(rc);
+ Log4(("VMX_VMCS64_CTRL_EPTP_FULL %#RX64\n", u64Val));
+ }
+
+ /* Guest bits. */
+ rc = VMXReadVmcsGstN(VMX_VMCS_GUEST_RIP, &u64Val); AssertRC(rc);
+ Log4(("Old Guest Rip %#RX64 New %#RX64\n", pVCpu->cpum.GstCtx.rip, u64Val));
+ rc = VMXReadVmcsGstN(VMX_VMCS_GUEST_RSP, &u64Val); AssertRC(rc);
+ Log4(("Old Guest Rsp %#RX64 New %#RX64\n", pVCpu->cpum.GstCtx.rsp, u64Val));
+ rc = VMXReadVmcs32(VMX_VMCS_GUEST_RFLAGS, &u32Val); AssertRC(rc);
+ Log4(("Old Guest Rflags %#RX32 New %#RX32\n", pVCpu->cpum.GstCtx.eflags.u32, u32Val));
+ if (pVCpu->CTX_SUFF(pVM)->hm.s.vmx.fVpid)
+ {
+ rc = VMXReadVmcs32(VMX_VMCS16_VPID, &u32Val); AssertRC(rc);
+ Log4(("VMX_VMCS16_VPID %u\n", u32Val));
+ }
+
+ /* Host bits. */
+ rc = VMXReadVmcsHstN(VMX_VMCS_HOST_CR0, &uHCReg); AssertRC(rc);
+ Log4(("Host CR0 %#RHr\n", uHCReg));
+ rc = VMXReadVmcsHstN(VMX_VMCS_HOST_CR3, &uHCReg); AssertRC(rc);
+ Log4(("Host CR3 %#RHr\n", uHCReg));
+ rc = VMXReadVmcsHstN(VMX_VMCS_HOST_CR4, &uHCReg); AssertRC(rc);
+ Log4(("Host CR4 %#RHr\n", uHCReg));
+
+ RTGDTR HostGdtr;
+ PCX86DESCHC pDesc;
+ ASMGetGDTR(&HostGdtr);
+ rc = VMXReadVmcs32(VMX_VMCS16_HOST_CS_SEL, &u32Val); AssertRC(rc);
+ Log4(("Host CS %#08x\n", u32Val));
+ if (u32Val < HostGdtr.cbGdt)
+ {
+ pDesc = (PCX86DESCHC)(HostGdtr.pGdt + (u32Val & X86_SEL_MASK));
+ hmR0DumpDescriptor(pDesc, u32Val, "CS: ");
+ }
+
+ rc = VMXReadVmcs32(VMX_VMCS16_HOST_DS_SEL, &u32Val); AssertRC(rc);
+ Log4(("Host DS %#08x\n", u32Val));
+ if (u32Val < HostGdtr.cbGdt)
+ {
+ pDesc = (PCX86DESCHC)(HostGdtr.pGdt + (u32Val & X86_SEL_MASK));
+ hmR0DumpDescriptor(pDesc, u32Val, "DS: ");
+ }
+
+ rc = VMXReadVmcs32(VMX_VMCS16_HOST_ES_SEL, &u32Val); AssertRC(rc);
+ Log4(("Host ES %#08x\n", u32Val));
+ if (u32Val < HostGdtr.cbGdt)
+ {
+ pDesc = (PCX86DESCHC)(HostGdtr.pGdt + (u32Val & X86_SEL_MASK));
+ hmR0DumpDescriptor(pDesc, u32Val, "ES: ");
+ }
+
+ rc = VMXReadVmcs32(VMX_VMCS16_HOST_FS_SEL, &u32Val); AssertRC(rc);
+ Log4(("Host FS %#08x\n", u32Val));
+ if (u32Val < HostGdtr.cbGdt)
+ {
+ pDesc = (PCX86DESCHC)(HostGdtr.pGdt + (u32Val & X86_SEL_MASK));
+ hmR0DumpDescriptor(pDesc, u32Val, "FS: ");
+ }
+
+ rc = VMXReadVmcs32(VMX_VMCS16_HOST_GS_SEL, &u32Val); AssertRC(rc);
+ Log4(("Host GS %#08x\n", u32Val));
+ if (u32Val < HostGdtr.cbGdt)
+ {
+ pDesc = (PCX86DESCHC)(HostGdtr.pGdt + (u32Val & X86_SEL_MASK));
+ hmR0DumpDescriptor(pDesc, u32Val, "GS: ");
+ }
+
+ rc = VMXReadVmcs32(VMX_VMCS16_HOST_SS_SEL, &u32Val); AssertRC(rc);
+ Log4(("Host SS %#08x\n", u32Val));
+ if (u32Val < HostGdtr.cbGdt)
+ {
+ pDesc = (PCX86DESCHC)(HostGdtr.pGdt + (u32Val & X86_SEL_MASK));
+ hmR0DumpDescriptor(pDesc, u32Val, "SS: ");
+ }
+
+ rc = VMXReadVmcs32(VMX_VMCS16_HOST_TR_SEL, &u32Val); AssertRC(rc);
+ Log4(("Host TR %#08x\n", u32Val));
+ if (u32Val < HostGdtr.cbGdt)
+ {
+ pDesc = (PCX86DESCHC)(HostGdtr.pGdt + (u32Val & X86_SEL_MASK));
+ hmR0DumpDescriptor(pDesc, u32Val, "TR: ");
+ }
+
+ rc = VMXReadVmcsHstN(VMX_VMCS_HOST_TR_BASE, &uHCReg); AssertRC(rc);
+ Log4(("Host TR Base %#RHv\n", uHCReg));
+ rc = VMXReadVmcsHstN(VMX_VMCS_HOST_GDTR_BASE, &uHCReg); AssertRC(rc);
+ Log4(("Host GDTR Base %#RHv\n", uHCReg));
+ rc = VMXReadVmcsHstN(VMX_VMCS_HOST_IDTR_BASE, &uHCReg); AssertRC(rc);
+ Log4(("Host IDTR Base %#RHv\n", uHCReg));
+ rc = VMXReadVmcs32(VMX_VMCS32_HOST_SYSENTER_CS, &u32Val); AssertRC(rc);
+ Log4(("Host SYSENTER CS %#08x\n", u32Val));
+ rc = VMXReadVmcsHstN(VMX_VMCS_HOST_SYSENTER_EIP, &uHCReg); AssertRC(rc);
+ Log4(("Host SYSENTER EIP %#RHv\n", uHCReg));
+ rc = VMXReadVmcsHstN(VMX_VMCS_HOST_SYSENTER_ESP, &uHCReg); AssertRC(rc);
+ Log4(("Host SYSENTER ESP %#RHv\n", uHCReg));
+ rc = VMXReadVmcsHstN(VMX_VMCS_HOST_RSP, &uHCReg); AssertRC(rc);
+ Log4(("Host RSP %#RHv\n", uHCReg));
+ rc = VMXReadVmcsHstN(VMX_VMCS_HOST_RIP, &uHCReg); AssertRC(rc);
+ Log4(("Host RIP %#RHv\n", uHCReg));
+# if HC_ARCH_BITS == 64
+ Log4(("MSR_K6_EFER = %#RX64\n", ASMRdMsr(MSR_K6_EFER)));
+ Log4(("MSR_K8_CSTAR = %#RX64\n", ASMRdMsr(MSR_K8_CSTAR)));
+ Log4(("MSR_K8_LSTAR = %#RX64\n", ASMRdMsr(MSR_K8_LSTAR)));
+ Log4(("MSR_K6_STAR = %#RX64\n", ASMRdMsr(MSR_K6_STAR)));
+ Log4(("MSR_K8_SF_MASK = %#RX64\n", ASMRdMsr(MSR_K8_SF_MASK)));
+ Log4(("MSR_K8_KERNEL_GS_BASE = %#RX64\n", ASMRdMsr(MSR_K8_KERNEL_GS_BASE)));
+# endif
+#endif /* VBOX_STRICT */
+ break;
+ }
+
+ default:
+ /* Impossible */
+ AssertMsgFailed(("hmR0VmxReportWorldSwitchError %Rrc (%#x)\n", rcVMRun, rcVMRun));
+ break;
+ }
+}
+
+
+#if HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS)
+#ifndef VMX_USE_CACHED_VMCS_ACCESSES
+# error "VMX_USE_CACHED_VMCS_ACCESSES not defined when it should be!"
+#endif
+#ifdef VBOX_STRICT
+static bool hmR0VmxIsValidWriteField(uint32_t idxField)
+{
+ switch (idxField)
+ {
+ case VMX_VMCS_GUEST_RIP:
+ case VMX_VMCS_GUEST_RSP:
+ case VMX_VMCS_GUEST_SYSENTER_EIP:
+ case VMX_VMCS_GUEST_SYSENTER_ESP:
+ case VMX_VMCS_GUEST_GDTR_BASE:
+ case VMX_VMCS_GUEST_IDTR_BASE:
+ case VMX_VMCS_GUEST_CS_BASE:
+ case VMX_VMCS_GUEST_DS_BASE:
+ case VMX_VMCS_GUEST_ES_BASE:
+ case VMX_VMCS_GUEST_FS_BASE:
+ case VMX_VMCS_GUEST_GS_BASE:
+ case VMX_VMCS_GUEST_SS_BASE:
+ case VMX_VMCS_GUEST_LDTR_BASE:
+ case VMX_VMCS_GUEST_TR_BASE:
+ case VMX_VMCS_GUEST_CR3:
+ return true;
+ }
+ return false;
+}
+
+static bool hmR0VmxIsValidReadField(uint32_t idxField)
+{
+ switch (idxField)
+ {
+ /* Read-only fields. */
+ case VMX_VMCS_RO_EXIT_QUALIFICATION:
+ return true;
+ }
+ /* Remaining readable fields should also be writable. */
+ return hmR0VmxIsValidWriteField(idxField);
+}
+#endif /* VBOX_STRICT */
+
+
+/**
+ * Executes the specified handler in 64-bit mode.
+ *
+ * @returns VBox status code (no informational status codes).
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param enmOp The operation to perform.
+ * @param cParams Number of parameters.
+ * @param paParam Array of 32-bit parameters.
+ */
+VMMR0DECL(int) VMXR0Execute64BitsHandler(PVMCPU pVCpu, HM64ON32OP enmOp, uint32_t cParams, uint32_t *paParam)
+{
+ PVM pVM = pVCpu->CTX_SUFF(pVM);
+ AssertReturn(pVM->hm.s.pfnHost32ToGuest64R0, VERR_HM_NO_32_TO_64_SWITCHER);
+ Assert(enmOp > HM64ON32OP_INVALID && enmOp < HM64ON32OP_END);
+ Assert(pVCpu->hm.s.vmx.VMCSCache.Write.cValidEntries <= RT_ELEMENTS(pVCpu->hm.s.vmx.VMCSCache.Write.aField));
+ Assert(pVCpu->hm.s.vmx.VMCSCache.Read.cValidEntries <= RT_ELEMENTS(pVCpu->hm.s.vmx.VMCSCache.Read.aField));
+
+#ifdef VBOX_STRICT
+ for (uint32_t i = 0; i < pVCpu->hm.s.vmx.VMCSCache.Write.cValidEntries; i++)
+ Assert(hmR0VmxIsValidWriteField(pVCpu->hm.s.vmx.VMCSCache.Write.aField[i]));
+
+ for (uint32_t i = 0; i <pVCpu->hm.s.vmx.VMCSCache.Read.cValidEntries; i++)
+ Assert(hmR0VmxIsValidReadField(pVCpu->hm.s.vmx.VMCSCache.Read.aField[i]));
+#endif
+
+ /* Disable interrupts. */
+ RTCCUINTREG fOldEFlags = ASMIntDisableFlags();
+
+#ifdef VBOX_WITH_VMMR0_DISABLE_LAPIC_NMI
+ RTCPUID idHostCpu = RTMpCpuId();
+ CPUMR0SetLApic(pVCpu, idHostCpu);
+#endif
+
+ PCHMPHYSCPU pHostCpu = hmR0GetCurrentCpu();
+ RTHCPHYS HCPhysCpuPage = pHostCpu->HCPhysMemObj;
+
+ /* Clear VMCS. Marking it inactive, clearing implementation-specific data and writing VMCS data back to memory. */
+ VMXClearVmcs(pVCpu->hm.s.vmx.HCPhysVmcs);
+ pVCpu->hm.s.vmx.fVmcsState = HMVMX_VMCS_STATE_CLEAR;
+
+ /* Leave VMX Root Mode. */
+ VMXDisable();
+
+ SUPR0ChangeCR4(0, ~X86_CR4_VMXE);
+
+ CPUMSetHyperESP(pVCpu, VMMGetStackRC(pVCpu));
+ CPUMSetHyperEIP(pVCpu, enmOp);
+ for (int i = (int)cParams - 1; i >= 0; i--)
+ CPUMPushHyper(pVCpu, paParam[i]);
+
+ STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatWorldSwitch3264, z);
+
+ /* Call the switcher. */
+ int rc = pVM->hm.s.pfnHost32ToGuest64R0(pVM, RT_UOFFSETOF_DYN(VM, aCpus[pVCpu->idCpu].cpum) - RT_UOFFSETOF(VM, cpum));
+ STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatWorldSwitch3264, z);
+
+ /** @todo replace with hmR0VmxEnterRootMode() and hmR0VmxLeaveRootMode(). */
+ /* Make sure the VMX instructions don't cause #UD faults. */
+ SUPR0ChangeCR4(X86_CR4_VMXE, RTCCUINTREG_MAX);
+
+ /* Re-enter VMX Root Mode */
+ int rc2 = VMXEnable(HCPhysCpuPage);
+ if (RT_FAILURE(rc2))
+ {
+ SUPR0ChangeCR4(0, ~X86_CR4_VMXE);
+ ASMSetFlags(fOldEFlags);
+ pVM->hm.s.vmx.HCPhysVmxEnableError = HCPhysCpuPage;
+ return rc2;
+ }
+
+ rc2 = VMXActivateVmcs(pVCpu->hm.s.vmx.HCPhysVmcs);
+ AssertRC(rc2);
+ pVCpu->hm.s.vmx.fVmcsState = HMVMX_VMCS_STATE_ACTIVE;
+ Assert(!(ASMGetFlags() & X86_EFL_IF));
+ ASMSetFlags(fOldEFlags);
+ return rc;
+}
+
+
+/**
+ * Prepares for and executes VMLAUNCH (64-bit guests) for 32-bit hosts
+ * supporting 64-bit guests.
+ *
+ * @returns VBox status code.
+ * @param fResume Whether to VMLAUNCH or VMRESUME.
+ * @param pCtx Pointer to the guest-CPU context.
+ * @param pCache Pointer to the VMCS cache.
+ * @param pVM The cross context VM structure.
+ * @param pVCpu The cross context virtual CPU structure.
+ */
+DECLASM(int) VMXR0SwitcherStartVM64(RTHCUINT fResume, PCPUMCTX pCtx, PVMCSCACHE pCache, PVM pVM, PVMCPU pVCpu)
+{
+ NOREF(fResume);
+
+ PCHMPHYSCPU pHostCpu = hmR0GetCurrentCpu();
+ RTHCPHYS const HCPhysCpuPage = pHostCpu->HCPhysMemObj;
+
+#ifdef VBOX_WITH_CRASHDUMP_MAGIC
+ pCache->uPos = 1;
+ pCache->interPD = PGMGetInterPaeCR3(pVM);
+ pCache->pSwitcher = (uint64_t)pVM->hm.s.pfnHost32ToGuest64R0;
+#endif
+
+#if defined(DEBUG) && defined(VMX_USE_CACHED_VMCS_ACCESSES)
+ pCache->TestIn.HCPhysCpuPage = 0;
+ pCache->TestIn.HCPhysVmcs = 0;
+ pCache->TestIn.pCache = 0;
+ pCache->TestOut.HCPhysVmcs = 0;
+ pCache->TestOut.pCache = 0;
+ pCache->TestOut.pCtx = 0;
+ pCache->TestOut.eflags = 0;
+#else
+ NOREF(pCache);
+#endif
+
+ uint32_t aParam[10];
+ aParam[0] = RT_LO_U32(HCPhysCpuPage); /* Param 1: VMXON physical address - Lo. */
+ aParam[1] = RT_HI_U32(HCPhysCpuPage); /* Param 1: VMXON physical address - Hi. */
+ aParam[2] = RT_LO_U32(pVCpu->hm.s.vmx.HCPhysVmcs); /* Param 2: VMCS physical address - Lo. */
+ aParam[3] = RT_HI_U32(pVCpu->hm.s.vmx.HCPhysVmcs); /* Param 2: VMCS physical address - Hi. */
+ aParam[4] = VM_RC_ADDR(pVM, &pVM->aCpus[pVCpu->idCpu].hm.s.vmx.VMCSCache);
+ aParam[5] = 0;
+ aParam[6] = VM_RC_ADDR(pVM, pVM);
+ aParam[7] = 0;
+ aParam[8] = VM_RC_ADDR(pVM, pVCpu);
+ aParam[9] = 0;
+
+#ifdef VBOX_WITH_CRASHDUMP_MAGIC
+ pCtx->dr[4] = pVM->hm.s.vmx.pScratchPhys + 16 + 8;
+ *(uint32_t *)(pVM->hm.s.vmx.pScratch + 16 + 8) = 1;
+#endif
+ int rc = VMXR0Execute64BitsHandler(pVCpu, HM64ON32OP_VMXRCStartVM64, RT_ELEMENTS(aParam), &aParam[0]);
+
+#ifdef VBOX_WITH_CRASHDUMP_MAGIC
+ Assert(*(uint32_t *)(pVM->hm.s.vmx.pScratch + 16 + 8) == 5);
+ Assert(pCtx->dr[4] == 10);
+ *(uint32_t *)(pVM->hm.s.vmx.pScratch + 16 + 8) = 0xff;
+#endif
+
+#if defined(DEBUG) && defined(VMX_USE_CACHED_VMCS_ACCESSES)
+ AssertMsg(pCache->TestIn.HCPhysCpuPage == HCPhysCpuPage, ("%RHp vs %RHp\n", pCache->TestIn.HCPhysCpuPage, HCPhysCpuPage));
+ AssertMsg(pCache->TestIn.HCPhysVmcs == pVCpu->hm.s.vmx.HCPhysVmcs, ("%RHp vs %RHp\n", pCache->TestIn.HCPhysVmcs,
+ pVCpu->hm.s.vmx.HCPhysVmcs));
+ AssertMsg(pCache->TestIn.HCPhysVmcs == pCache->TestOut.HCPhysVmcs, ("%RHp vs %RHp\n", pCache->TestIn.HCPhysVmcs,
+ pCache->TestOut.HCPhysVmcs));
+ AssertMsg(pCache->TestIn.pCache == pCache->TestOut.pCache, ("%RGv vs %RGv\n", pCache->TestIn.pCache,
+ pCache->TestOut.pCache));
+ AssertMsg(pCache->TestIn.pCache == VM_RC_ADDR(pVM, &pVM->aCpus[pVCpu->idCpu].hm.s.vmx.VMCSCache),
+ ("%RGv vs %RGv\n", pCache->TestIn.pCache, VM_RC_ADDR(pVM, &pVM->aCpus[pVCpu->idCpu].hm.s.vmx.VMCSCache)));
+ AssertMsg(pCache->TestIn.pCtx == pCache->TestOut.pCtx, ("%RGv vs %RGv\n", pCache->TestIn.pCtx,
+ pCache->TestOut.pCtx));
+ Assert(!(pCache->TestOut.eflags & X86_EFL_IF));
+#endif
+ NOREF(pCtx);
+ return rc;
+}
+
+
+/**
+ * Initialize the VMCS-Read cache.
+ *
+ * The VMCS cache is used for 32-bit hosts running 64-bit guests (except 32-bit
+ * Darwin which runs with 64-bit paging in 32-bit mode) for 64-bit fields that
+ * cannot be accessed in 32-bit mode. Some 64-bit fields -can- be accessed
+ * (those that have a 32-bit FULL & HIGH part).
+ *
+ * @returns VBox status code.
+ * @param pVCpu The cross context virtual CPU structure.
+ */
+static int hmR0VmxInitVmcsReadCache(PVMCPU pVCpu)
+{
+#define VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, idxField) \
+ do { \
+ Assert(pCache->Read.aField[idxField##_CACHE_IDX] == 0); \
+ pCache->Read.aField[idxField##_CACHE_IDX] = idxField; \
+ pCache->Read.aFieldVal[idxField##_CACHE_IDX] = 0; \
+ ++cReadFields; \
+ } while (0)
+
+ PVMCSCACHE pCache = &pVCpu->hm.s.vmx.VMCSCache;
+ uint32_t cReadFields = 0;
+
+ /*
+ * Don't remove the #if 0'd fields in this code. They're listed here for consistency
+ * and serve to indicate exceptions to the rules.
+ */
+
+ /* Guest-natural selector base fields. */
+#if 0
+ /* These are 32-bit in practice. See Intel spec. 2.5 "Control Registers". */
+ VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_CR0);
+ VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_CR4);
+#endif
+ VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_ES_BASE);
+ VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_CS_BASE);
+ VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_SS_BASE);
+ VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_DS_BASE);
+ VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_FS_BASE);
+ VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_GS_BASE);
+ VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_LDTR_BASE);
+ VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_TR_BASE);
+ VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_GDTR_BASE);
+ VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_IDTR_BASE);
+ VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_RSP);
+ VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_RIP);
+#if 0
+ /* Unused natural width guest-state fields. */
+ VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_PENDING_DEBUG_XCPTS);
+ VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_CR3); /* Handled in Nested Paging case */
+#endif
+ VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_SYSENTER_ESP);
+ VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_SYSENTER_EIP);
+
+ /* 64-bit guest-state fields; unused as we use two 32-bit VMREADs for
+ these 64-bit fields (using "FULL" and "HIGH" fields). */
+#if 0
+ VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS64_GUEST_VMCS_LINK_PTR_FULL);
+ VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS64_GUEST_DEBUGCTL_FULL);
+ VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS64_GUEST_PAT_FULL);
+ VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS64_GUEST_EFER_FULL);
+ VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS64_GUEST_PERF_GLOBAL_CTRL_FULL);
+ VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS64_GUEST_PDPTE0_FULL);
+ VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS64_GUEST_PDPTE1_FULL);
+ VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS64_GUEST_PDPTE2_FULL);
+ VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS64_GUEST_PDPTE3_FULL);
+#endif
+
+ /* Natural width guest-state fields. */
+ VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_RO_EXIT_QUALIFICATION);
+ VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_RO_GUEST_LINEAR_ADDR);
+
+ if (pVCpu->CTX_SUFF(pVM)->hm.s.fNestedPaging)
+ {
+ VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_CR3);
+ AssertMsg(cReadFields == VMX_VMCS_MAX_NESTED_PAGING_CACHE_IDX, ("cReadFields=%u expected %u\n", cReadFields,
+ VMX_VMCS_MAX_NESTED_PAGING_CACHE_IDX));
+ pCache->Read.cValidEntries = VMX_VMCS_MAX_NESTED_PAGING_CACHE_IDX;
+ }
+ else
+ {
+ AssertMsg(cReadFields == VMX_VMCS_MAX_CACHE_IDX, ("cReadFields=%u expected %u\n", cReadFields, VMX_VMCS_MAX_CACHE_IDX));
+ pCache->Read.cValidEntries = VMX_VMCS_MAX_CACHE_IDX;
+ }
+
+#undef VMXLOCAL_INIT_READ_CACHE_FIELD
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Writes a field into the VMCS. This can either directly invoke a VMWRITE or
+ * queue up the VMWRITE by using the VMCS write cache (on 32-bit hosts, except
+ * darwin, running 64-bit guests).
+ *
+ * @returns VBox status code.
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param idxField The VMCS field encoding.
+ * @param u64Val 16, 32 or 64-bit value.
+ */
+VMMR0DECL(int) VMXWriteVmcs64Ex(PVMCPU pVCpu, uint32_t idxField, uint64_t u64Val)
+{
+ int rc;
+ switch (idxField)
+ {
+ /*
+ * These fields consists of a "FULL" and a "HIGH" part which can be written to individually.
+ */
+ /* 64-bit Control fields. */
+ case VMX_VMCS64_CTRL_IO_BITMAP_A_FULL:
+ case VMX_VMCS64_CTRL_IO_BITMAP_B_FULL:
+ case VMX_VMCS64_CTRL_MSR_BITMAP_FULL:
+ case VMX_VMCS64_CTRL_EXIT_MSR_STORE_FULL:
+ case VMX_VMCS64_CTRL_EXIT_MSR_LOAD_FULL:
+ case VMX_VMCS64_CTRL_ENTRY_MSR_LOAD_FULL:
+ case VMX_VMCS64_CTRL_EXEC_VMCS_PTR_FULL:
+ case VMX_VMCS64_CTRL_TSC_OFFSET_FULL:
+ case VMX_VMCS64_CTRL_VIRT_APIC_PAGEADDR_FULL:
+ case VMX_VMCS64_CTRL_APIC_ACCESSADDR_FULL:
+ case VMX_VMCS64_CTRL_VMFUNC_CTRLS_FULL:
+ case VMX_VMCS64_CTRL_EPTP_FULL:
+ case VMX_VMCS64_CTRL_EPTP_LIST_FULL:
+ /* 64-bit Guest-state fields. */
+ case VMX_VMCS64_GUEST_VMCS_LINK_PTR_FULL:
+ case VMX_VMCS64_GUEST_DEBUGCTL_FULL:
+ case VMX_VMCS64_GUEST_PAT_FULL:
+ case VMX_VMCS64_GUEST_EFER_FULL:
+ case VMX_VMCS64_GUEST_PERF_GLOBAL_CTRL_FULL:
+ case VMX_VMCS64_GUEST_PDPTE0_FULL:
+ case VMX_VMCS64_GUEST_PDPTE1_FULL:
+ case VMX_VMCS64_GUEST_PDPTE2_FULL:
+ case VMX_VMCS64_GUEST_PDPTE3_FULL:
+ /* 64-bit Host-state fields. */
+ case VMX_VMCS64_HOST_PAT_FULL:
+ case VMX_VMCS64_HOST_EFER_FULL:
+ case VMX_VMCS64_HOST_PERF_GLOBAL_CTRL_FULL:
+ {
+ rc = VMXWriteVmcs32(idxField, RT_LO_U32(u64Val));
+ rc |= VMXWriteVmcs32(idxField + 1, RT_HI_U32(u64Val));
+ break;
+ }
+
+ /*
+ * These fields do not have high and low parts. Queue up the VMWRITE by using the VMCS write-cache (for 64-bit
+ * values). When we switch the host to 64-bit mode for running 64-bit guests, these VMWRITEs get executed then.
+ */
+ /* Natural-width Guest-state fields. */
+ case VMX_VMCS_GUEST_CR3:
+ case VMX_VMCS_GUEST_ES_BASE:
+ case VMX_VMCS_GUEST_CS_BASE:
+ case VMX_VMCS_GUEST_SS_BASE:
+ case VMX_VMCS_GUEST_DS_BASE:
+ case VMX_VMCS_GUEST_FS_BASE:
+ case VMX_VMCS_GUEST_GS_BASE:
+ case VMX_VMCS_GUEST_LDTR_BASE:
+ case VMX_VMCS_GUEST_TR_BASE:
+ case VMX_VMCS_GUEST_GDTR_BASE:
+ case VMX_VMCS_GUEST_IDTR_BASE:
+ case VMX_VMCS_GUEST_RSP:
+ case VMX_VMCS_GUEST_RIP:
+ case VMX_VMCS_GUEST_SYSENTER_ESP:
+ case VMX_VMCS_GUEST_SYSENTER_EIP:
+ {
+ if (!(RT_HI_U32(u64Val)))
+ {
+ /* If this field is 64-bit, VT-x will zero out the top bits. */
+ rc = VMXWriteVmcs32(idxField, RT_LO_U32(u64Val));
+ }
+ else
+ {
+ /* Assert that only the 32->64 switcher case should ever come here. */
+ Assert(pVCpu->CTX_SUFF(pVM)->hm.s.fAllow64BitGuests);
+ rc = VMXWriteCachedVmcsEx(pVCpu, idxField, u64Val);
+ }
+ break;
+ }
+
+ default:
+ {
+ AssertMsgFailed(("VMXWriteVmcs64Ex: Invalid field %#RX32 (pVCpu=%p u64Val=%#RX64)\n", idxField, pVCpu, u64Val));
+ rc = VERR_INVALID_PARAMETER;
+ break;
+ }
+ }
+ AssertRCReturn(rc, rc);
+ return rc;
+}
+
+
+/**
+ * Queue up a VMWRITE by using the VMCS write cache.
+ * This is only used on 32-bit hosts (except darwin) for 64-bit guests.
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param idxField The VMCS field encoding.
+ * @param u64Val 16, 32 or 64-bit value.
+ */
+VMMR0DECL(int) VMXWriteCachedVmcsEx(PVMCPU pVCpu, uint32_t idxField, uint64_t u64Val)
+{
+ AssertPtr(pVCpu);
+ PVMCSCACHE pCache = &pVCpu->hm.s.vmx.VMCSCache;
+
+ AssertMsgReturn(pCache->Write.cValidEntries < VMCSCACHE_MAX_ENTRY - 1,
+ ("entries=%u\n", pCache->Write.cValidEntries), VERR_ACCESS_DENIED);
+
+ /* Make sure there are no duplicates. */
+ for (uint32_t i = 0; i < pCache->Write.cValidEntries; i++)
+ {
+ if (pCache->Write.aField[i] == idxField)
+ {
+ pCache->Write.aFieldVal[i] = u64Val;
+ return VINF_SUCCESS;
+ }
+ }
+
+ pCache->Write.aField[pCache->Write.cValidEntries] = idxField;
+ pCache->Write.aFieldVal[pCache->Write.cValidEntries] = u64Val;
+ pCache->Write.cValidEntries++;
+ return VINF_SUCCESS;
+}
+#endif /* HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS) */
+
+
+/**
+ * Sets up the usage of TSC-offsetting and updates the VMCS.
+ *
+ * If offsetting is not possible, cause VM-exits on RDTSC(P)s. Also sets up the
+ * VMX preemption timer.
+ *
+ * @returns VBox status code.
+ * @param pVCpu The cross context virtual CPU structure.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+static void hmR0VmxUpdateTscOffsettingAndPreemptTimer(PVMCPU pVCpu)
+{
+ bool fOffsettedTsc;
+ bool fParavirtTsc;
+ PVM pVM = pVCpu->CTX_SUFF(pVM);
+ uint64_t uTscOffset;
+ if (pVM->hm.s.vmx.fUsePreemptTimer)
+ {
+ uint64_t cTicksToDeadline = TMCpuTickGetDeadlineAndTscOffset(pVM, pVCpu, &uTscOffset, &fOffsettedTsc, &fParavirtTsc);
+
+ /* Make sure the returned values have sane upper and lower boundaries. */
+ uint64_t u64CpuHz = SUPGetCpuHzFromGipBySetIndex(g_pSUPGlobalInfoPage, pVCpu->iHostCpuSet);
+ cTicksToDeadline = RT_MIN(cTicksToDeadline, u64CpuHz / 64); /* 1/64th of a second */
+ cTicksToDeadline = RT_MAX(cTicksToDeadline, u64CpuHz / 2048); /* 1/2048th of a second */
+ cTicksToDeadline >>= pVM->hm.s.vmx.cPreemptTimerShift;
+
+ uint32_t cPreemptionTickCount = (uint32_t)RT_MIN(cTicksToDeadline, UINT32_MAX - 16);
+ int rc = VMXWriteVmcs32(VMX_VMCS32_PREEMPT_TIMER_VALUE, cPreemptionTickCount);
+ AssertRC(rc);
+ }
+ else
+ fOffsettedTsc = TMCpuTickCanUseRealTSC(pVM, pVCpu, &uTscOffset, &fParavirtTsc);
+
+ if (fParavirtTsc)
+ {
+ /* Currently neither Hyper-V nor KVM need to update their paravirt. TSC
+ information before every VM-entry, hence disable it for performance sake. */
+#if 0
+ int rc = GIMR0UpdateParavirtTsc(pVM, 0 /* u64Offset */);
+ AssertRC(rc);
+#endif
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatTscParavirt);
+ }
+
+ uint32_t uProcCtls = pVCpu->hm.s.vmx.u32ProcCtls;
+ if ( fOffsettedTsc
+ && RT_LIKELY(!pVCpu->hm.s.fDebugWantRdTscExit))
+ {
+ if (pVCpu->hm.s.vmx.u64TscOffset != uTscOffset)
+ {
+ int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_TSC_OFFSET_FULL, uTscOffset);
+ AssertRC(rc);
+ pVCpu->hm.s.vmx.u64TscOffset = uTscOffset;
+ }
+
+ if (uProcCtls & VMX_PROC_CTLS_RDTSC_EXIT)
+ {
+ uProcCtls &= ~VMX_PROC_CTLS_RDTSC_EXIT;
+ int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, uProcCtls);
+ AssertRC(rc);
+ pVCpu->hm.s.vmx.u32ProcCtls = uProcCtls;
+ }
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatTscOffset);
+ }
+ else
+ {
+ /* We can't use TSC-offsetting (non-fixed TSC, warp drive active etc.), VM-exit on RDTSC(P). */
+ if (!(uProcCtls & VMX_PROC_CTLS_RDTSC_EXIT))
+ {
+ uProcCtls |= VMX_PROC_CTLS_RDTSC_EXIT;
+ int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, uProcCtls);
+ AssertRC(rc);
+ pVCpu->hm.s.vmx.u32ProcCtls = uProcCtls;
+ }
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatTscIntercept);
+ }
+}
+
+
+/**
+ * Gets the IEM exception flags for the specified vector and IDT vectoring /
+ * VM-exit interruption info type.
+ *
+ * @returns The IEM exception flags.
+ * @param uVector The event vector.
+ * @param uVmxVectorType The VMX event type.
+ *
+ * @remarks This function currently only constructs flags required for
+ * IEMEvaluateRecursiveXcpt and not the complete flags (e.g, error-code
+ * and CR2 aspects of an exception are not included).
+ */
+static uint32_t hmR0VmxGetIemXcptFlags(uint8_t uVector, uint32_t uVmxVectorType)
+{
+ uint32_t fIemXcptFlags;
+ switch (uVmxVectorType)
+ {
+ case VMX_IDT_VECTORING_INFO_TYPE_HW_XCPT:
+ case VMX_IDT_VECTORING_INFO_TYPE_NMI:
+ fIemXcptFlags = IEM_XCPT_FLAGS_T_CPU_XCPT;
+ break;
+
+ case VMX_IDT_VECTORING_INFO_TYPE_EXT_INT:
+ fIemXcptFlags = IEM_XCPT_FLAGS_T_EXT_INT;
+ break;
+
+ case VMX_IDT_VECTORING_INFO_TYPE_PRIV_SW_XCPT:
+ fIemXcptFlags = IEM_XCPT_FLAGS_T_SOFT_INT | IEM_XCPT_FLAGS_ICEBP_INSTR;
+ break;
+
+ case VMX_IDT_VECTORING_INFO_TYPE_SW_XCPT:
+ {
+ fIemXcptFlags = IEM_XCPT_FLAGS_T_SOFT_INT;
+ if (uVector == X86_XCPT_BP)
+ fIemXcptFlags |= IEM_XCPT_FLAGS_BP_INSTR;
+ else if (uVector == X86_XCPT_OF)
+ fIemXcptFlags |= IEM_XCPT_FLAGS_OF_INSTR;
+ else
+ {
+ fIemXcptFlags = 0;
+ AssertMsgFailed(("Unexpected vector for software int. uVector=%#x", uVector));
+ }
+ break;
+ }
+
+ case VMX_IDT_VECTORING_INFO_TYPE_SW_INT:
+ fIemXcptFlags = IEM_XCPT_FLAGS_T_SOFT_INT;
+ break;
+
+ default:
+ fIemXcptFlags = 0;
+ AssertMsgFailed(("Unexpected vector type! uVmxVectorType=%#x uVector=%#x", uVmxVectorType, uVector));
+ break;
+ }
+ return fIemXcptFlags;
+}
+
+
+/**
+ * Sets an event as a pending event to be injected into the guest.
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param u32IntInfo The VM-entry interruption-information field.
+ * @param cbInstr The VM-entry instruction length in bytes (for software
+ * interrupts, exceptions and privileged software
+ * exceptions).
+ * @param u32ErrCode The VM-entry exception error code.
+ * @param GCPtrFaultAddress The fault-address (CR2) in case it's a
+ * page-fault.
+ *
+ * @remarks Statistics counter assumes this is a guest event being injected or
+ * re-injected into the guest, i.e. 'StatInjectPendingReflect' is
+ * always incremented.
+ */
+DECLINLINE(void) hmR0VmxSetPendingEvent(PVMCPU pVCpu, uint32_t u32IntInfo, uint32_t cbInstr, uint32_t u32ErrCode,
+ RTGCUINTPTR GCPtrFaultAddress)
+{
+ Assert(!pVCpu->hm.s.Event.fPending);
+ pVCpu->hm.s.Event.fPending = true;
+ pVCpu->hm.s.Event.u64IntInfo = u32IntInfo;
+ pVCpu->hm.s.Event.u32ErrCode = u32ErrCode;
+ pVCpu->hm.s.Event.cbInstr = cbInstr;
+ pVCpu->hm.s.Event.GCPtrFaultAddress = GCPtrFaultAddress;
+}
+
+
+/**
+ * Sets a double-fault (\#DF) exception as pending-for-injection into the VM.
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ */
+DECLINLINE(void) hmR0VmxSetPendingXcptDF(PVMCPU pVCpu)
+{
+ uint32_t const u32IntInfo = RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_VECTOR, X86_XCPT_DF)
+ | RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_TYPE, VMX_EXIT_INT_INFO_TYPE_HW_XCPT)
+ | RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_ERR_CODE_VALID, 1)
+ | RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_VALID, 1);
+ hmR0VmxSetPendingEvent(pVCpu, u32IntInfo, 0 /* cbInstr */, 0 /* u32ErrCode */, 0 /* GCPtrFaultAddress */);
+}
+
+
+/**
+ * Sets an invalid-opcode (\#UD) exception as pending-for-injection into the VM.
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ */
+DECLINLINE(void) hmR0VmxSetPendingXcptUD(PVMCPU pVCpu)
+{
+ uint32_t const u32IntInfo = RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_VECTOR, X86_XCPT_UD)
+ | RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_TYPE, VMX_EXIT_INT_INFO_TYPE_HW_XCPT)
+ | RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_ERR_CODE_VALID, 0)
+ | RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_VALID, 1);
+ hmR0VmxSetPendingEvent(pVCpu, u32IntInfo, 0 /* cbInstr */, 0 /* u32ErrCode */, 0 /* GCPtrFaultAddress */);
+}
+
+
+/**
+ * Sets a debug (\#DB) exception as pending-for-injection into the VM.
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ */
+DECLINLINE(void) hmR0VmxSetPendingXcptDB(PVMCPU pVCpu)
+{
+ uint32_t const u32IntInfo = RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_VECTOR, X86_XCPT_DB)
+ | RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_TYPE, VMX_EXIT_INT_INFO_TYPE_HW_XCPT)
+ | RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_ERR_CODE_VALID, 0)
+ | RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_VALID, 1);
+ hmR0VmxSetPendingEvent(pVCpu, u32IntInfo, 0 /* cbInstr */, 0 /* u32ErrCode */, 0 /* GCPtrFaultAddress */);
+}
+
+
+#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+/**
+ * Sets a general-protection (\#GP) exception as pending-for-injection into the VM.
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param u32ErrCode The error code for the general-protection exception.
+ */
+DECLINLINE(void) hmR0VmxSetPendingXcptGP(PVMCPU pVCpu, uint32_t u32ErrCode)
+{
+ uint32_t const u32IntInfo = RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_VECTOR, X86_XCPT_GP)
+ | RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_TYPE, VMX_EXIT_INT_INFO_TYPE_HW_XCPT)
+ | RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_ERR_CODE_VALID, 1)
+ | RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_VALID, 1);
+ hmR0VmxSetPendingEvent(pVCpu, u32IntInfo, 0 /* cbInstr */, u32ErrCode, 0 /* GCPtrFaultAddress */);
+}
+
+
+/**
+ * Sets a stack (\#SS) exception as pending-for-injection into the VM.
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param u32ErrCode The error code for the stack exception.
+ */
+DECLINLINE(void) hmR0VmxSetPendingXcptSS(PVMCPU pVCpu, uint32_t u32ErrCode)
+{
+ uint32_t const u32IntInfo = RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_VECTOR, X86_XCPT_SS)
+ | RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_TYPE, VMX_EXIT_INT_INFO_TYPE_HW_XCPT)
+ | RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_ERR_CODE_VALID, 1)
+ | RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_VALID, 1);
+ hmR0VmxSetPendingEvent(pVCpu, u32IntInfo, 0 /* cbInstr */, u32ErrCode, 0 /* GCPtrFaultAddress */);
+}
+
+
+# ifndef VBOX_WITH_NESTED_HWVIRT_ONLY_IN_IEM
+/**
+ * Decodes the memory operand of an instruction that caused a VM-exit.
+ *
+ * The VM-exit qualification field provides the displacement field for memory
+ * operand instructions, if any.
+ *
+ * @returns Strict VBox status code (i.e. informational status codes too).
+ * @retval VINF_SUCCESS if the operand was successfully decoded.
+ * @retval VINF_HM_PENDING_XCPT if an exception was raised while decoding the
+ * operand.
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param uExitInstrInfo The VM-exit instruction information field.
+ * @param enmMemAccess The memory operand's access type (read or write).
+ * @param GCPtrDisp The instruction displacement field, if any. For
+ * RIP-relative addressing pass RIP + displacement here.
+ * @param pGCPtrMem Where to store the effective destination memory address.
+ */
+static VBOXSTRICTRC hmR0VmxDecodeMemOperand(PVMCPU pVCpu, uint32_t uExitInstrInfo, RTGCPTR GCPtrDisp, VMXMEMACCESS enmMemAccess,
+ PRTGCPTR pGCPtrMem)
+{
+ Assert(pGCPtrMem);
+ Assert(!CPUMIsGuestInRealOrV86Mode(pVCpu));
+ HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_RIP | CPUMCTX_EXTRN_RSP | CPUMCTX_EXTRN_SREG_MASK | CPUMCTX_EXTRN_EFER
+ | CPUMCTX_EXTRN_CR0);
+
+ static uint64_t const s_auAddrSizeMasks[] = { UINT64_C(0xffff), UINT64_C(0xffffffff), UINT64_C(0xffffffffffffffff) };
+ static uint64_t const s_auAccessSizeMasks[] = { sizeof(uint16_t), sizeof(uint32_t), sizeof(uint64_t) };
+ AssertCompile(RT_ELEMENTS(s_auAccessSizeMasks) == RT_ELEMENTS(s_auAddrSizeMasks));
+
+ VMXEXITINSTRINFO ExitInstrInfo;
+ ExitInstrInfo.u = uExitInstrInfo;
+ uint8_t const uAddrSize = ExitInstrInfo.All.u3AddrSize;
+ uint8_t const iSegReg = ExitInstrInfo.All.iSegReg;
+ bool const fIdxRegValid = !ExitInstrInfo.All.fIdxRegInvalid;
+ uint8_t const iIdxReg = ExitInstrInfo.All.iIdxReg;
+ uint8_t const uScale = ExitInstrInfo.All.u2Scaling;
+ bool const fBaseRegValid = !ExitInstrInfo.All.fBaseRegInvalid;
+ uint8_t const iBaseReg = ExitInstrInfo.All.iBaseReg;
+ bool const fIsMemOperand = !ExitInstrInfo.All.fIsRegOperand;
+ bool const fIsLongMode = CPUMIsGuestInLongModeEx(&pVCpu->cpum.GstCtx);
+
+ /*
+ * Validate instruction information.
+ * This shouldn't happen on real hardware but useful while testing our nested hardware-virtualization code.
+ */
+ AssertLogRelMsgReturn(uAddrSize < RT_ELEMENTS(s_auAddrSizeMasks),
+ ("Invalid address size. ExitInstrInfo=%#RX32\n", ExitInstrInfo.u), VERR_VMX_IPE_1);
+ AssertLogRelMsgReturn(iSegReg < X86_SREG_COUNT,
+ ("Invalid segment register. ExitInstrInfo=%#RX32\n", ExitInstrInfo.u), VERR_VMX_IPE_2);
+ AssertLogRelMsgReturn(fIsMemOperand,
+ ("Expected memory operand. ExitInstrInfo=%#RX32\n", ExitInstrInfo.u), VERR_VMX_IPE_3);
+
+ /*
+ * Compute the complete effective address.
+ *
+ * See AMD instruction spec. 1.4.2 "SIB Byte Format"
+ * See AMD spec. 4.5.2 "Segment Registers".
+ */
+ RTGCPTR GCPtrMem = GCPtrDisp;
+ if (fBaseRegValid)
+ GCPtrMem += pVCpu->cpum.GstCtx.aGRegs[iBaseReg].u64;
+ if (fIdxRegValid)
+ GCPtrMem += pVCpu->cpum.GstCtx.aGRegs[iIdxReg].u64 << uScale;
+
+ RTGCPTR const GCPtrOff = GCPtrMem;
+ if ( !fIsLongMode
+ || iSegReg >= X86_SREG_FS)
+ GCPtrMem += pVCpu->cpum.GstCtx.aSRegs[iSegReg].u64Base;
+ GCPtrMem &= s_auAddrSizeMasks[uAddrSize];
+
+ /*
+ * Validate effective address.
+ * See AMD spec. 4.5.3 "Segment Registers in 64-Bit Mode".
+ */
+ uint8_t const cbAccess = s_auAccessSizeMasks[uAddrSize];
+ Assert(cbAccess > 0);
+ if (fIsLongMode)
+ {
+ if (X86_IS_CANONICAL(GCPtrMem))
+ {
+ *pGCPtrMem = GCPtrMem;
+ return VINF_SUCCESS;
+ }
+
+ /** @todo r=ramshankar: We should probably raise \#SS or \#GP. See AMD spec. 4.12.2
+ * "Data Limit Checks in 64-bit Mode". */
+ Log4Func(("Long mode effective address is not canonical GCPtrMem=%#RX64\n", GCPtrMem));
+ hmR0VmxSetPendingXcptGP(pVCpu, 0);
+ return VINF_HM_PENDING_XCPT;
+ }
+
+ /*
+ * This is a watered down version of iemMemApplySegment().
+ * Parts that are not applicable for VMX instructions like real-or-v8086 mode
+ * and segment CPL/DPL checks are skipped.
+ */
+ RTGCPTR32 const GCPtrFirst32 = (RTGCPTR32)GCPtrOff;
+ RTGCPTR32 const GCPtrLast32 = GCPtrFirst32 + cbAccess - 1;
+ PCCPUMSELREG pSel = &pVCpu->cpum.GstCtx.aSRegs[iSegReg];
+
+ /* Check if the segment is present and usable. */
+ if ( pSel->Attr.n.u1Present
+ && !pSel->Attr.n.u1Unusable)
+ {
+ Assert(pSel->Attr.n.u1DescType);
+ if (!(pSel->Attr.n.u4Type & X86_SEL_TYPE_CODE))
+ {
+ /* Check permissions for the data segment. */
+ if ( enmMemAccess == VMXMEMACCESS_WRITE
+ && !(pSel->Attr.n.u4Type & X86_SEL_TYPE_WRITE))
+ {
+ Log4Func(("Data segment access invalid. iSegReg=%#x Attr=%#RX32\n", iSegReg, pSel->Attr.u));
+ hmR0VmxSetPendingXcptGP(pVCpu, iSegReg);
+ return VINF_HM_PENDING_XCPT;
+ }
+
+ /* Check limits if it's a normal data segment. */
+ if (!(pSel->Attr.n.u4Type & X86_SEL_TYPE_DOWN))
+ {
+ if ( GCPtrFirst32 > pSel->u32Limit
+ || GCPtrLast32 > pSel->u32Limit)
+ {
+ Log4Func(("Data segment limit exceeded."
+ "iSegReg=%#x GCPtrFirst32=%#RX32 GCPtrLast32=%#RX32 u32Limit=%#RX32\n", iSegReg, GCPtrFirst32,
+ GCPtrLast32, pSel->u32Limit));
+ if (iSegReg == X86_SREG_SS)
+ hmR0VmxSetPendingXcptSS(pVCpu, 0);
+ else
+ hmR0VmxSetPendingXcptGP(pVCpu, 0);
+ return VINF_HM_PENDING_XCPT;
+ }
+ }
+ else
+ {
+ /* Check limits if it's an expand-down data segment.
+ Note! The upper boundary is defined by the B bit, not the G bit! */
+ if ( GCPtrFirst32 < pSel->u32Limit + UINT32_C(1)
+ || GCPtrLast32 > (pSel->Attr.n.u1DefBig ? UINT32_MAX : UINT32_C(0xffff)))
+ {
+ Log4Func(("Expand-down data segment limit exceeded."
+ "iSegReg=%#x GCPtrFirst32=%#RX32 GCPtrLast32=%#RX32 u32Limit=%#RX32\n", iSegReg, GCPtrFirst32,
+ GCPtrLast32, pSel->u32Limit));
+ if (iSegReg == X86_SREG_SS)
+ hmR0VmxSetPendingXcptSS(pVCpu, 0);
+ else
+ hmR0VmxSetPendingXcptGP(pVCpu, 0);
+ return VINF_HM_PENDING_XCPT;
+ }
+ }
+ }
+ else
+ {
+ /* Check permissions for the code segment. */
+ if ( enmMemAccess == VMXMEMACCESS_WRITE
+ || ( enmMemAccess == VMXMEMACCESS_READ
+ && !(pSel->Attr.n.u4Type & X86_SEL_TYPE_READ)))
+ {
+ Log4Func(("Code segment access invalid. Attr=%#RX32\n", pSel->Attr.u));
+ Assert(!CPUMIsGuestInRealOrV86ModeEx(&pVCpu->cpum.GstCtx));
+ hmR0VmxSetPendingXcptGP(pVCpu, 0);
+ return VINF_HM_PENDING_XCPT;
+ }
+
+ /* Check limits for the code segment (normal/expand-down not applicable for code segments). */
+ if ( GCPtrFirst32 > pSel->u32Limit
+ || GCPtrLast32 > pSel->u32Limit)
+ {
+ Log4Func(("Code segment limit exceeded. GCPtrFirst32=%#RX32 GCPtrLast32=%#RX32 u32Limit=%#RX32\n",
+ GCPtrFirst32, GCPtrLast32, pSel->u32Limit));
+ if (iSegReg == X86_SREG_SS)
+ hmR0VmxSetPendingXcptSS(pVCpu, 0);
+ else
+ hmR0VmxSetPendingXcptGP(pVCpu, 0);
+ return VINF_HM_PENDING_XCPT;
+ }
+ }
+ }
+ else
+ {
+ Log4Func(("Not present or unusable segment. iSegReg=%#x Attr=%#RX32\n", iSegReg, pSel->Attr.u));
+ hmR0VmxSetPendingXcptGP(pVCpu, 0);
+ return VINF_HM_PENDING_XCPT;
+ }
+
+ *pGCPtrMem = GCPtrMem;
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Perform the relevant VMX instruction checks for VM-exits that occurred due to the
+ * guest attempting to execute a VMX instruction.
+ *
+ * @returns Strict VBox status code (i.e. informational status codes too).
+ * @retval VINF_SUCCESS if we should continue handling the VM-exit.
+ * @retval VINF_HM_PENDING_XCPT if an exception was raised.
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param uExitReason The VM-exit reason.
+ *
+ * @todo NstVmx: Document other error codes when VM-exit is implemented.
+ * @remarks No-long-jump zone!!!
+ */
+static VBOXSTRICTRC hmR0VmxCheckExitDueToVmxInstr(PVMCPU pVCpu, uint32_t uExitReason)
+{
+ HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_CR4 | CPUMCTX_EXTRN_CR0 | CPUMCTX_EXTRN_RFLAGS | CPUMCTX_EXTRN_SS
+ | CPUMCTX_EXTRN_CS | CPUMCTX_EXTRN_EFER);
+
+ if ( CPUMIsGuestInRealOrV86ModeEx(&pVCpu->cpum.GstCtx)
+ || ( CPUMIsGuestInLongModeEx(&pVCpu->cpum.GstCtx)
+ && !CPUMIsGuestIn64BitCodeEx(&pVCpu->cpum.GstCtx)))
+ {
+ Log4Func(("In real/v86-mode or long-mode outside 64-bit code segment -> #UD\n"));
+ hmR0VmxSetPendingXcptUD(pVCpu);
+ return VINF_HM_PENDING_XCPT;
+ }
+
+ if (uExitReason == VMX_EXIT_VMXON)
+ {
+ /*
+ * We check CR4.VMXE because it is required to be always set while in VMX operation
+ * by physical CPUs and our CR4 read shadow is only consulted when executing specific
+ * instructions (CLTS, LMSW, MOV CR, and SMSW) and thus doesn't affect CPU operation
+ * otherwise (i.e. physical CPU won't automatically #UD if Cr4Shadow.VMXE is 0).
+ */
+ if (!CPUMIsGuestVmxEnabled(&pVCpu->cpum.GstCtx))
+ {
+ Log4Func(("CR4.VMXE is not set -> #UD\n"));
+ hmR0VmxSetPendingXcptUD(pVCpu);
+ return VINF_HM_PENDING_XCPT;
+ }
+ }
+ else if (!CPUMIsGuestInVmxRootMode(&pVCpu->cpum.GstCtx))
+ {
+ /*
+ * The guest has not entered VMX operation but attempted to execute a VMX instruction
+ * (other than VMXON), we need to raise a #UD.
+ */
+ Log4Func(("Not in VMX root mode -> #UD\n"));
+ hmR0VmxSetPendingXcptUD(pVCpu);
+ return VINF_HM_PENDING_XCPT;
+ }
+
+ if (CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx))
+ {
+ /*
+ * The nested-guest attempted to execute a VMX instruction, cause a VM-exit and let
+ * the guest hypervisor deal with it.
+ */
+ /** @todo NSTVMX: Trigger a VM-exit */
+ }
+
+ /*
+ * VMX instructions require CPL 0 except in VMX non-root mode where the VM-exit intercept
+ * (above) takes preceedence over the CPL check.
+ */
+ if (CPUMGetGuestCPL(pVCpu) > 0)
+ {
+ Log4Func(("CPL > 0 -> #GP(0)\n"));
+ hmR0VmxSetPendingXcptGP(pVCpu, 0);
+ return VINF_HM_PENDING_XCPT;
+ }
+
+ return VINF_SUCCESS;
+}
+# endif /* !VBOX_WITH_NESTED_HWVIRT_ONLY_IN_IEM */
+#endif /* VBOX_WITH_NESTED_HWVIRT_VMX */
+
+
+/**
+ * Handle a condition that occurred while delivering an event through the guest
+ * IDT.
+ *
+ * @returns Strict VBox status code (i.e. informational status codes too).
+ * @retval VINF_SUCCESS if we should continue handling the VM-exit.
+ * @retval VINF_HM_DOUBLE_FAULT if a \#DF condition was detected and we ought
+ * to continue execution of the guest which will delivery the \#DF.
+ * @retval VINF_EM_RESET if we detected a triple-fault condition.
+ * @retval VERR_EM_GUEST_CPU_HANG if we detected a guest CPU hang.
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param pVmxTransient Pointer to the VMX transient structure.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+static VBOXSTRICTRC hmR0VmxCheckExitDueToEventDelivery(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+ uint32_t const uExitVector = VMX_EXIT_INT_INFO_VECTOR(pVmxTransient->uExitIntInfo);
+
+ int rc2 = hmR0VmxReadIdtVectoringInfoVmcs(pVmxTransient);
+ rc2 |= hmR0VmxReadExitIntInfoVmcs(pVmxTransient);
+ AssertRCReturn(rc2, rc2);
+
+ VBOXSTRICTRC rcStrict = VINF_SUCCESS;
+ if (VMX_IDT_VECTORING_INFO_IS_VALID(pVmxTransient->uIdtVectoringInfo))
+ {
+ uint32_t const uIdtVectorType = VMX_IDT_VECTORING_INFO_TYPE(pVmxTransient->uIdtVectoringInfo);
+ uint32_t const uIdtVector = VMX_IDT_VECTORING_INFO_VECTOR(pVmxTransient->uIdtVectoringInfo);
+
+ /*
+ * If the event was a software interrupt (generated with INT n) or a software exception
+ * (generated by INT3/INTO) or a privileged software exception (generated by INT1), we
+ * can handle the VM-exit and continue guest execution which will re-execute the
+ * instruction rather than re-injecting the exception, as that can cause premature
+ * trips to ring-3 before injection and involve TRPM which currently has no way of
+ * storing that these exceptions were caused by these instructions (ICEBP's #DB poses
+ * the problem).
+ */
+ IEMXCPTRAISE enmRaise;
+ IEMXCPTRAISEINFO fRaiseInfo;
+ if ( uIdtVectorType == VMX_IDT_VECTORING_INFO_TYPE_SW_INT
+ || uIdtVectorType == VMX_IDT_VECTORING_INFO_TYPE_SW_XCPT
+ || uIdtVectorType == VMX_IDT_VECTORING_INFO_TYPE_PRIV_SW_XCPT)
+ {
+ enmRaise = IEMXCPTRAISE_REEXEC_INSTR;
+ fRaiseInfo = IEMXCPTRAISEINFO_NONE;
+ }
+ else if (VMX_EXIT_INT_INFO_IS_VALID(pVmxTransient->uExitIntInfo))
+ {
+ uint32_t const uExitVectorType = VMX_IDT_VECTORING_INFO_TYPE(pVmxTransient->uExitIntInfo);
+ uint32_t const fIdtVectorFlags = hmR0VmxGetIemXcptFlags(uIdtVector, uIdtVectorType);
+ uint32_t const fExitVectorFlags = hmR0VmxGetIemXcptFlags(uExitVector, uExitVectorType);
+ /** @todo Make AssertMsgReturn as just AssertMsg later. */
+ AssertMsgReturn(uExitVectorType == VMX_EXIT_INT_INFO_TYPE_HW_XCPT,
+ ("hmR0VmxCheckExitDueToEventDelivery: Unexpected VM-exit interruption info. %#x!\n",
+ uExitVectorType), VERR_VMX_IPE_5);
+
+ enmRaise = IEMEvaluateRecursiveXcpt(pVCpu, fIdtVectorFlags, uIdtVector, fExitVectorFlags, uExitVector, &fRaiseInfo);
+
+ /* Determine a vectoring #PF condition, see comment in hmR0VmxExitXcptPF(). */
+ if (fRaiseInfo & (IEMXCPTRAISEINFO_EXT_INT_PF | IEMXCPTRAISEINFO_NMI_PF))
+ {
+ pVmxTransient->fVectoringPF = true;
+ enmRaise = IEMXCPTRAISE_PREV_EVENT;
+ }
+ }
+ else
+ {
+ /*
+ * If an exception or hardware interrupt delivery caused an EPT violation/misconfig or APIC access
+ * VM-exit, then the VM-exit interruption-information will not be valid and we end up here.
+ * It is sufficient to reflect the original event to the guest after handling the VM-exit.
+ */
+ Assert( uIdtVectorType == VMX_IDT_VECTORING_INFO_TYPE_HW_XCPT
+ || uIdtVectorType == VMX_IDT_VECTORING_INFO_TYPE_NMI
+ || uIdtVectorType == VMX_IDT_VECTORING_INFO_TYPE_EXT_INT);
+ enmRaise = IEMXCPTRAISE_PREV_EVENT;
+ fRaiseInfo = IEMXCPTRAISEINFO_NONE;
+ }
+
+ /*
+ * On CPUs that support Virtual NMIs, if this VM-exit (be it an exception or EPT violation/misconfig
+ * etc.) occurred while delivering the NMI, we need to clear the block-by-NMI field in the guest
+ * interruptibility-state before re-delivering the NMI after handling the VM-exit. Otherwise the
+ * subsequent VM-entry would fail.
+ *
+ * See Intel spec. 30.7.1.2 "Resuming Guest Software after Handling an Exception". See @bugref{7445}.
+ */
+ if ( VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_BLOCK_NMIS)
+ && uIdtVectorType == VMX_IDT_VECTORING_INFO_TYPE_NMI
+ && ( enmRaise == IEMXCPTRAISE_PREV_EVENT
+ || (fRaiseInfo & IEMXCPTRAISEINFO_NMI_PF))
+ && (pVCpu->hm.s.vmx.u32PinCtls & VMX_PIN_CTLS_VIRT_NMI))
+ {
+ VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_BLOCK_NMIS);
+ }
+
+ switch (enmRaise)
+ {
+ case IEMXCPTRAISE_CURRENT_XCPT:
+ {
+ Log4Func(("IDT: Pending secondary Xcpt: uIdtVectoringInfo=%#RX64 uExitIntInfo=%#RX64\n",
+ pVmxTransient->uIdtVectoringInfo, pVmxTransient->uExitIntInfo));
+ Assert(rcStrict == VINF_SUCCESS);
+ break;
+ }
+
+ case IEMXCPTRAISE_PREV_EVENT:
+ {
+ uint32_t u32ErrCode;
+ if (VMX_IDT_VECTORING_INFO_IS_ERROR_CODE_VALID(pVmxTransient->uIdtVectoringInfo))
+ {
+ rc2 = hmR0VmxReadIdtVectoringErrorCodeVmcs(pVmxTransient);
+ AssertRCReturn(rc2, rc2);
+ u32ErrCode = pVmxTransient->uIdtVectoringErrorCode;
+ }
+ else
+ u32ErrCode = 0;
+
+ /* If uExitVector is #PF, CR2 value will be updated from the VMCS if it's a guest #PF, see hmR0VmxExitXcptPF(). */
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatInjectPendingReflect);
+ hmR0VmxSetPendingEvent(pVCpu, VMX_ENTRY_INT_INFO_FROM_EXIT_IDT_INFO(pVmxTransient->uIdtVectoringInfo),
+ 0 /* cbInstr */, u32ErrCode, pVCpu->cpum.GstCtx.cr2);
+
+ Log4Func(("IDT: Pending vectoring event %#RX64 Err=%#RX32\n", pVCpu->hm.s.Event.u64IntInfo,
+ pVCpu->hm.s.Event.u32ErrCode));
+ Assert(rcStrict == VINF_SUCCESS);
+ break;
+ }
+
+ case IEMXCPTRAISE_REEXEC_INSTR:
+ Assert(rcStrict == VINF_SUCCESS);
+ break;
+
+ case IEMXCPTRAISE_DOUBLE_FAULT:
+ {
+ /*
+ * Determing a vectoring double #PF condition. Used later, when PGM evaluates the
+ * second #PF as a guest #PF (and not a shadow #PF) and needs to be converted into a #DF.
+ */
+ if (fRaiseInfo & IEMXCPTRAISEINFO_PF_PF)
+ {
+ pVmxTransient->fVectoringDoublePF = true;
+ Log4Func(("IDT: Vectoring double #PF %#RX64 cr2=%#RX64\n", pVCpu->hm.s.Event.u64IntInfo,
+ pVCpu->cpum.GstCtx.cr2));
+ rcStrict = VINF_SUCCESS;
+ }
+ else
+ {
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatInjectPendingReflect);
+ hmR0VmxSetPendingXcptDF(pVCpu);
+ Log4Func(("IDT: Pending vectoring #DF %#RX64 uIdtVector=%#x uExitVector=%#x\n", pVCpu->hm.s.Event.u64IntInfo,
+ uIdtVector, uExitVector));
+ rcStrict = VINF_HM_DOUBLE_FAULT;
+ }
+ break;
+ }
+
+ case IEMXCPTRAISE_TRIPLE_FAULT:
+ {
+ Log4Func(("IDT: Pending vectoring triple-fault uIdt=%#x uExit=%#x\n", uIdtVector, uExitVector));
+ rcStrict = VINF_EM_RESET;
+ break;
+ }
+
+ case IEMXCPTRAISE_CPU_HANG:
+ {
+ Log4Func(("IDT: Bad guest! Entering CPU hang. fRaiseInfo=%#x\n", fRaiseInfo));
+ rcStrict = VERR_EM_GUEST_CPU_HANG;
+ break;
+ }
+
+ default:
+ {
+ AssertMsgFailed(("IDT: vcpu[%RU32] Unexpected/invalid value! enmRaise=%#x\n", pVCpu->idCpu, enmRaise));
+ rcStrict = VERR_VMX_IPE_2;
+ break;
+ }
+ }
+ }
+ else if ( VMX_EXIT_INT_INFO_IS_VALID(pVmxTransient->uExitIntInfo)
+ && VMX_EXIT_INT_INFO_IS_NMI_UNBLOCK_IRET(pVmxTransient->uExitIntInfo)
+ && uExitVector != X86_XCPT_DF
+ && (pVCpu->hm.s.vmx.u32PinCtls & VMX_PIN_CTLS_VIRT_NMI))
+ {
+ /*
+ * Execution of IRET caused this fault when NMI blocking was in effect (i.e we're in the guest NMI handler).
+ * We need to set the block-by-NMI field so that NMIs remain blocked until the IRET execution is restarted.
+ * See Intel spec. 30.7.1.2 "Resuming guest software after handling an exception".
+ */
+ if (!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_BLOCK_NMIS))
+ {
+ Log4Func(("Setting VMCPU_FF_BLOCK_NMIS. fValid=%RTbool uExitReason=%u\n",
+ VMX_EXIT_INT_INFO_IS_VALID(pVmxTransient->uExitIntInfo), pVmxTransient->uExitReason));
+ VMCPU_FF_SET(pVCpu, VMCPU_FF_BLOCK_NMIS);
+ }
+ }
+
+ Assert( rcStrict == VINF_SUCCESS || rcStrict == VINF_HM_DOUBLE_FAULT
+ || rcStrict == VINF_EM_RESET || rcStrict == VERR_EM_GUEST_CPU_HANG);
+ return rcStrict;
+}
+
+
+/**
+ * Imports a guest segment register from the current VMCS into
+ * the guest-CPU context.
+ *
+ * @returns VBox status code.
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param idxSel Index of the selector in the VMCS.
+ * @param idxLimit Index of the segment limit in the VMCS.
+ * @param idxBase Index of the segment base in the VMCS.
+ * @param idxAccess Index of the access rights of the segment in the VMCS.
+ * @param pSelReg Pointer to the segment selector.
+ *
+ * @remarks Called with interrupts and/or preemption disabled, try not to assert and
+ * do not log!
+ *
+ * @remarks Never call this function directly!!! Use the
+ * HMVMX_IMPORT_SREG() macro as that takes care
+ * of whether to read from the VMCS cache or not.
+ */
+static int hmR0VmxImportGuestSegmentReg(PVMCPU pVCpu, uint32_t idxSel, uint32_t idxLimit, uint32_t idxBase, uint32_t idxAccess,
+ PCPUMSELREG pSelReg)
+{
+ NOREF(pVCpu);
+
+ uint32_t u32Sel;
+ uint32_t u32Limit;
+ uint32_t u32Attr;
+ uint64_t u64Base;
+ int rc = VMXReadVmcs32(idxSel, &u32Sel);
+ rc |= VMXReadVmcs32(idxLimit, &u32Limit);
+ rc |= VMXReadVmcs32(idxAccess, &u32Attr);
+ rc |= VMXReadVmcsGstNByIdxVal(idxBase, &u64Base);
+ AssertRCReturn(rc, rc);
+
+ pSelReg->Sel = (uint16_t)u32Sel;
+ pSelReg->ValidSel = (uint16_t)u32Sel;
+ pSelReg->fFlags = CPUMSELREG_FLAGS_VALID;
+ pSelReg->u32Limit = u32Limit;
+ pSelReg->u64Base = u64Base;
+ pSelReg->Attr.u = u32Attr;
+
+ /*
+ * If VT-x marks the segment as unusable, most other bits remain undefined:
+ * - For CS the L, D and G bits have meaning.
+ * - For SS the DPL has meaning (it -is- the CPL for Intel and VBox).
+ * - For the remaining data segments no bits are defined.
+ *
+ * The present bit and the unusable bit has been observed to be set at the
+ * same time (the selector was supposed to be invalid as we started executing
+ * a V8086 interrupt in ring-0).
+ *
+ * What should be important for the rest of the VBox code, is that the P bit is
+ * cleared. Some of the other VBox code recognizes the unusable bit, but
+ * AMD-V certainly don't, and REM doesn't really either. So, to be on the
+ * safe side here, we'll strip off P and other bits we don't care about. If
+ * any code breaks because Attr.u != 0 when Sel < 4, it should be fixed.
+ *
+ * See Intel spec. 27.3.2 "Saving Segment Registers and Descriptor-Table Registers".
+ */
+ if (pSelReg->Attr.u & X86DESCATTR_UNUSABLE)
+ {
+ Assert(idxSel != VMX_VMCS16_GUEST_TR_SEL); /* TR is the only selector that can never be unusable. */
+
+ /* Masking off: X86DESCATTR_P, X86DESCATTR_LIMIT_HIGH, and X86DESCATTR_AVL. The latter two are really irrelevant. */
+ pSelReg->Attr.u &= X86DESCATTR_UNUSABLE | X86DESCATTR_L | X86DESCATTR_D | X86DESCATTR_G
+ | X86DESCATTR_DPL | X86DESCATTR_TYPE | X86DESCATTR_DT;
+#ifdef VBOX_STRICT
+ VMMRZCallRing3Disable(pVCpu);
+ Log4Func(("Unusable idxSel=%#x attr=%#x -> %#x\n", idxSel, u32Sel, pSelReg->Attr.u));
+# ifdef DEBUG_bird
+ AssertMsg((u32Attr & ~X86DESCATTR_P) == pSelReg->Attr.u,
+ ("%#x: %#x != %#x (sel=%#x base=%#llx limit=%#x)\n",
+ idxSel, u32Sel, pSelReg->Attr.u, pSelReg->Sel, pSelReg->u64Base, pSelReg->u32Limit));
+# endif
+ VMMRZCallRing3Enable(pVCpu);
+#endif
+ }
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Imports the guest RIP from the VMCS back into the guest-CPU context.
+ *
+ * @returns VBox status code.
+ * @param pVCpu The cross context virtual CPU structure.
+ *
+ * @remarks Called with interrupts and/or preemption disabled, should not assert!
+ * @remarks Do -not- call this function directly, use hmR0VmxImportGuestState()
+ * instead!!!
+ */
+DECLINLINE(int) hmR0VmxImportGuestRip(PVMCPU pVCpu)
+{
+ uint64_t u64Val;
+ PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+ if (pCtx->fExtrn & CPUMCTX_EXTRN_RIP)
+ {
+ int rc = VMXReadVmcsGstN(VMX_VMCS_GUEST_RIP, &u64Val);
+ if (RT_SUCCESS(rc))
+ {
+ pCtx->rip = u64Val;
+ EMR0HistoryUpdatePC(pVCpu, pCtx->rip, false);
+ pCtx->fExtrn &= ~CPUMCTX_EXTRN_RIP;
+ }
+ return rc;
+ }
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Imports the guest RFLAGS from the VMCS back into the guest-CPU context.
+ *
+ * @returns VBox status code.
+ * @param pVCpu The cross context virtual CPU structure.
+ *
+ * @remarks Called with interrupts and/or preemption disabled, should not assert!
+ * @remarks Do -not- call this function directly, use hmR0VmxImportGuestState()
+ * instead!!!
+ */
+DECLINLINE(int) hmR0VmxImportGuestRFlags(PVMCPU pVCpu)
+{
+ uint32_t u32Val;
+ PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+ if (pCtx->fExtrn & CPUMCTX_EXTRN_RFLAGS)
+ {
+ int rc = VMXReadVmcs32(VMX_VMCS_GUEST_RFLAGS, &u32Val);
+ if (RT_SUCCESS(rc))
+ {
+ pCtx->eflags.u32 = u32Val;
+
+ /* Restore eflags for real-on-v86-mode hack. */
+ if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active)
+ {
+ pCtx->eflags.Bits.u1VM = 0;
+ pCtx->eflags.Bits.u2IOPL = pVCpu->hm.s.vmx.RealMode.Eflags.Bits.u2IOPL;
+ }
+ }
+ pCtx->fExtrn &= ~CPUMCTX_EXTRN_RFLAGS;
+ return rc;
+ }
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Imports the guest interruptibility-state from the VMCS back into the guest-CPU
+ * context.
+ *
+ * @returns VBox status code.
+ * @param pVCpu The cross context virtual CPU structure.
+ *
+ * @remarks Called with interrupts and/or preemption disabled, try not to assert and
+ * do not log!
+ * @remarks Do -not- call this function directly, use hmR0VmxImportGuestState()
+ * instead!!!
+ */
+DECLINLINE(int) hmR0VmxImportGuestIntrState(PVMCPU pVCpu)
+{
+ uint32_t u32Val;
+ PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+ int rc = VMXReadVmcs32(VMX_VMCS32_GUEST_INT_STATE, &u32Val);
+ AssertRCReturn(rc, rc);
+
+ /*
+ * We additionally have a requirement to import RIP, RFLAGS depending on whether we
+ * might need them in hmR0VmxEvaluatePendingEvent().
+ */
+ if (!u32Val)
+ {
+ if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS))
+ {
+ rc = hmR0VmxImportGuestRip(pVCpu);
+ rc |= hmR0VmxImportGuestRFlags(pVCpu);
+ AssertRCReturn(rc, rc);
+ VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS);
+ }
+
+ if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_BLOCK_NMIS))
+ VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_BLOCK_NMIS);
+ }
+ else
+ {
+ rc = hmR0VmxImportGuestRip(pVCpu);
+ rc |= hmR0VmxImportGuestRFlags(pVCpu);
+ AssertRCReturn(rc, rc);
+
+ if (u32Val & ( VMX_VMCS_GUEST_INT_STATE_BLOCK_MOVSS
+ | VMX_VMCS_GUEST_INT_STATE_BLOCK_STI))
+ {
+ EMSetInhibitInterruptsPC(pVCpu, pCtx->rip);
+ }
+ else if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS))
+ VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS);
+
+ if (u32Val & VMX_VMCS_GUEST_INT_STATE_BLOCK_NMI)
+ {
+ if (!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_BLOCK_NMIS))
+ VMCPU_FF_SET(pVCpu, VMCPU_FF_BLOCK_NMIS);
+ }
+ else if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_BLOCK_NMIS))
+ VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_BLOCK_NMIS);
+ }
+
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Worker for VMXR0ImportStateOnDemand.
+ *
+ * @returns VBox status code.
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param fWhat What to import, CPUMCTX_EXTRN_XXX.
+ */
+static int hmR0VmxImportGuestState(PVMCPU pVCpu, uint64_t fWhat)
+{
+#define VMXLOCAL_BREAK_RC(a_rc) \
+ if (RT_FAILURE(a_rc)) \
+ break
+
+ int rc = VINF_SUCCESS;
+ PVM pVM = pVCpu->CTX_SUFF(pVM);
+ PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+ uint64_t u64Val;
+ uint32_t u32Val;
+
+ Log4Func(("fExtrn=%#RX64 fWhat=%#RX64\n", pCtx->fExtrn, fWhat));
+ STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatImportGuestState, x);
+
+ /*
+ * We disable interrupts to make the updating of the state and in particular
+ * the fExtrn modification atomic wrt to preemption hooks.
+ */
+ RTCCUINTREG const fEFlags = ASMIntDisableFlags();
+
+ fWhat &= pCtx->fExtrn;
+ if (fWhat)
+ {
+ do
+ {
+ if (fWhat & CPUMCTX_EXTRN_RIP)
+ {
+ rc = hmR0VmxImportGuestRip(pVCpu);
+ VMXLOCAL_BREAK_RC(rc);
+ }
+
+ if (fWhat & CPUMCTX_EXTRN_RFLAGS)
+ {
+ rc = hmR0VmxImportGuestRFlags(pVCpu);
+ VMXLOCAL_BREAK_RC(rc);
+ }
+
+ if (fWhat & CPUMCTX_EXTRN_HM_VMX_INT_STATE)
+ {
+ rc = hmR0VmxImportGuestIntrState(pVCpu);
+ VMXLOCAL_BREAK_RC(rc);
+ }
+
+ if (fWhat & CPUMCTX_EXTRN_RSP)
+ {
+ rc = VMXReadVmcsGstN(VMX_VMCS_GUEST_RSP, &u64Val);
+ VMXLOCAL_BREAK_RC(rc);
+ pCtx->rsp = u64Val;
+ }
+
+ if (fWhat & CPUMCTX_EXTRN_SREG_MASK)
+ {
+ if (fWhat & CPUMCTX_EXTRN_CS)
+ {
+ rc = HMVMX_IMPORT_SREG(CS, &pCtx->cs);
+ rc |= hmR0VmxImportGuestRip(pVCpu);
+ VMXLOCAL_BREAK_RC(rc);
+ if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active)
+ pCtx->cs.Attr.u = pVCpu->hm.s.vmx.RealMode.AttrCS.u;
+ EMR0HistoryUpdatePC(pVCpu, pCtx->cs.u64Base + pCtx->rip, true);
+ }
+ if (fWhat & CPUMCTX_EXTRN_SS)
+ {
+ rc = HMVMX_IMPORT_SREG(SS, &pCtx->ss);
+ VMXLOCAL_BREAK_RC(rc);
+ if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active)
+ pCtx->ss.Attr.u = pVCpu->hm.s.vmx.RealMode.AttrSS.u;
+ }
+ if (fWhat & CPUMCTX_EXTRN_DS)
+ {
+ rc = HMVMX_IMPORT_SREG(DS, &pCtx->ds);
+ VMXLOCAL_BREAK_RC(rc);
+ if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active)
+ pCtx->ds.Attr.u = pVCpu->hm.s.vmx.RealMode.AttrDS.u;
+ }
+ if (fWhat & CPUMCTX_EXTRN_ES)
+ {
+ rc = HMVMX_IMPORT_SREG(ES, &pCtx->es);
+ VMXLOCAL_BREAK_RC(rc);
+ if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active)
+ pCtx->es.Attr.u = pVCpu->hm.s.vmx.RealMode.AttrES.u;
+ }
+ if (fWhat & CPUMCTX_EXTRN_FS)
+ {
+ rc = HMVMX_IMPORT_SREG(FS, &pCtx->fs);
+ VMXLOCAL_BREAK_RC(rc);
+ if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active)
+ pCtx->fs.Attr.u = pVCpu->hm.s.vmx.RealMode.AttrFS.u;
+ }
+ if (fWhat & CPUMCTX_EXTRN_GS)
+ {
+ rc = HMVMX_IMPORT_SREG(GS, &pCtx->gs);
+ VMXLOCAL_BREAK_RC(rc);
+ if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active)
+ pCtx->gs.Attr.u = pVCpu->hm.s.vmx.RealMode.AttrGS.u;
+ }
+ }
+
+ if (fWhat & CPUMCTX_EXTRN_TABLE_MASK)
+ {
+ if (fWhat & CPUMCTX_EXTRN_LDTR)
+ {
+ rc = HMVMX_IMPORT_SREG(LDTR, &pCtx->ldtr);
+ VMXLOCAL_BREAK_RC(rc);
+ }
+
+ if (fWhat & CPUMCTX_EXTRN_GDTR)
+ {
+ rc = VMXReadVmcsGstN(VMX_VMCS_GUEST_GDTR_BASE, &u64Val);
+ rc |= VMXReadVmcs32(VMX_VMCS32_GUEST_GDTR_LIMIT, &u32Val);
+ VMXLOCAL_BREAK_RC(rc);
+ pCtx->gdtr.pGdt = u64Val;
+ pCtx->gdtr.cbGdt = u32Val;
+ }
+
+ /* Guest IDTR. */
+ if (fWhat & CPUMCTX_EXTRN_IDTR)
+ {
+ rc = VMXReadVmcsGstN(VMX_VMCS_GUEST_IDTR_BASE, &u64Val);
+ rc |= VMXReadVmcs32(VMX_VMCS32_GUEST_IDTR_LIMIT, &u32Val);
+ VMXLOCAL_BREAK_RC(rc);
+ pCtx->idtr.pIdt = u64Val;
+ pCtx->idtr.cbIdt = u32Val;
+ }
+
+ /* Guest TR. */
+ if (fWhat & CPUMCTX_EXTRN_TR)
+ {
+ /* Real-mode emulation using virtual-8086 mode has the fake TSS (pRealModeTSS) in TR, don't save that one. */
+ if (!pVCpu->hm.s.vmx.RealMode.fRealOnV86Active)
+ {
+ rc = HMVMX_IMPORT_SREG(TR, &pCtx->tr);
+ VMXLOCAL_BREAK_RC(rc);
+ }
+ }
+ }
+
+ if (fWhat & CPUMCTX_EXTRN_SYSENTER_MSRS)
+ {
+ rc = VMXReadVmcsGstN(VMX_VMCS_GUEST_SYSENTER_EIP, &pCtx->SysEnter.eip);
+ rc |= VMXReadVmcsGstN(VMX_VMCS_GUEST_SYSENTER_ESP, &pCtx->SysEnter.esp);
+ rc |= VMXReadVmcs32(VMX_VMCS32_GUEST_SYSENTER_CS, &u32Val);
+ pCtx->SysEnter.cs = u32Val;
+ VMXLOCAL_BREAK_RC(rc);
+ }
+
+#if HC_ARCH_BITS == 64
+ if (fWhat & CPUMCTX_EXTRN_KERNEL_GS_BASE)
+ {
+ if ( pVM->hm.s.fAllow64BitGuests
+ && (pVCpu->hm.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST))
+ pCtx->msrKERNELGSBASE = ASMRdMsr(MSR_K8_KERNEL_GS_BASE);
+ }
+
+ if (fWhat & CPUMCTX_EXTRN_SYSCALL_MSRS)
+ {
+ if ( pVM->hm.s.fAllow64BitGuests
+ && (pVCpu->hm.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST))
+ {
+ pCtx->msrLSTAR = ASMRdMsr(MSR_K8_LSTAR);
+ pCtx->msrSTAR = ASMRdMsr(MSR_K6_STAR);
+ pCtx->msrSFMASK = ASMRdMsr(MSR_K8_SF_MASK);
+ }
+ }
+#endif
+
+ if ( (fWhat & (CPUMCTX_EXTRN_TSC_AUX | CPUMCTX_EXTRN_OTHER_MSRS))
+#if HC_ARCH_BITS == 32
+ || (fWhat & (CPUMCTX_EXTRN_KERNEL_GS_BASE | CPUMCTX_EXTRN_SYSCALL_MSRS))
+#endif
+ )
+ {
+ PCVMXAUTOMSR pMsr = (PVMXAUTOMSR)pVCpu->hm.s.vmx.pvGuestMsr;
+ uint32_t const cMsrs = pVCpu->hm.s.vmx.cMsrs;
+ for (uint32_t i = 0; i < cMsrs; i++, pMsr++)
+ {
+ switch (pMsr->u32Msr)
+ {
+#if HC_ARCH_BITS == 32
+ case MSR_K8_LSTAR: pCtx->msrLSTAR = pMsr->u64Value; break;
+ case MSR_K6_STAR: pCtx->msrSTAR = pMsr->u64Value; break;
+ case MSR_K8_SF_MASK: pCtx->msrSFMASK = pMsr->u64Value; break;
+ case MSR_K8_KERNEL_GS_BASE: pCtx->msrKERNELGSBASE = pMsr->u64Value; break;
+#endif
+ case MSR_IA32_SPEC_CTRL: CPUMSetGuestSpecCtrl(pVCpu, pMsr->u64Value); break;
+ case MSR_K8_TSC_AUX: CPUMSetGuestTscAux(pVCpu, pMsr->u64Value); break;
+ case MSR_K6_EFER: /* EFER can't be changed without causing a VM-exit */ break;
+ default:
+ {
+ pVCpu->hm.s.u32HMError = pMsr->u32Msr;
+ ASMSetFlags(fEFlags);
+ AssertMsgFailed(("Unexpected MSR in auto-load/store area. uMsr=%#RX32 cMsrs=%u\n", pMsr->u32Msr,
+ cMsrs));
+ return VERR_HM_UNEXPECTED_LD_ST_MSR;
+ }
+ }
+ }
+ }
+
+ if (fWhat & CPUMCTX_EXTRN_DR7)
+ {
+ if (!pVCpu->hm.s.fUsingHyperDR7)
+ {
+ /* Upper 32-bits are always zero. See Intel spec. 2.7.3 "Loading and Storing Debug Registers". */
+ rc = VMXReadVmcs32(VMX_VMCS_GUEST_DR7, &u32Val);
+ VMXLOCAL_BREAK_RC(rc);
+ pCtx->dr[7] = u32Val;
+ }
+ }
+
+ if (fWhat & CPUMCTX_EXTRN_CR_MASK)
+ {
+ uint32_t u32Shadow;
+ if (fWhat & CPUMCTX_EXTRN_CR0)
+ {
+ rc = VMXReadVmcs32(VMX_VMCS_GUEST_CR0, &u32Val);
+ rc |= VMXReadVmcs32(VMX_VMCS_CTRL_CR0_READ_SHADOW, &u32Shadow);
+ VMXLOCAL_BREAK_RC(rc);
+ u32Val = (u32Val & ~pVCpu->hm.s.vmx.u32Cr0Mask)
+ | (u32Shadow & pVCpu->hm.s.vmx.u32Cr0Mask);
+ VMMRZCallRing3Disable(pVCpu); /* Calls into PGM which has Log statements. */
+ CPUMSetGuestCR0(pVCpu, u32Val);
+ VMMRZCallRing3Enable(pVCpu);
+ }
+
+ if (fWhat & CPUMCTX_EXTRN_CR4)
+ {
+ rc = VMXReadVmcs32(VMX_VMCS_GUEST_CR4, &u32Val);
+ rc |= VMXReadVmcs32(VMX_VMCS_CTRL_CR4_READ_SHADOW, &u32Shadow);
+ VMXLOCAL_BREAK_RC(rc);
+ u32Val = (u32Val & ~pVCpu->hm.s.vmx.u32Cr4Mask)
+ | (u32Shadow & pVCpu->hm.s.vmx.u32Cr4Mask);
+ CPUMSetGuestCR4(pVCpu, u32Val);
+ }
+
+ if (fWhat & CPUMCTX_EXTRN_CR3)
+ {
+ /* CR0.PG bit changes are always intercepted, so it's up to date. */
+ if ( pVM->hm.s.vmx.fUnrestrictedGuest
+ || ( pVM->hm.s.fNestedPaging
+ && CPUMIsGuestPagingEnabledEx(pCtx)))
+ {
+ rc = VMXReadVmcsGstN(VMX_VMCS_GUEST_CR3, &u64Val);
+ if (pCtx->cr3 != u64Val)
+ {
+ CPUMSetGuestCR3(pVCpu, u64Val);
+ VMCPU_FF_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3);
+ }
+
+ /* If the guest is in PAE mode, sync back the PDPE's into the guest state.
+ Note: CR4.PAE, CR0.PG, EFER bit changes are always intercepted, so they're up to date. */
+ if (CPUMIsGuestInPAEModeEx(pCtx))
+ {
+ rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE0_FULL, &pVCpu->hm.s.aPdpes[0].u);
+ rc |= VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE1_FULL, &pVCpu->hm.s.aPdpes[1].u);
+ rc |= VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE2_FULL, &pVCpu->hm.s.aPdpes[2].u);
+ rc |= VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE3_FULL, &pVCpu->hm.s.aPdpes[3].u);
+ VMXLOCAL_BREAK_RC(rc);
+ VMCPU_FF_SET(pVCpu, VMCPU_FF_HM_UPDATE_PAE_PDPES);
+ }
+ }
+ }
+ }
+ } while (0);
+
+ if (RT_SUCCESS(rc))
+ {
+ /* Update fExtrn. */
+ pCtx->fExtrn &= ~fWhat;
+
+ /* If everything has been imported, clear the HM keeper bit. */
+ if (!(pCtx->fExtrn & HMVMX_CPUMCTX_EXTRN_ALL))
+ {
+ pCtx->fExtrn &= ~CPUMCTX_EXTRN_KEEPER_HM;
+ Assert(!pCtx->fExtrn);
+ }
+ }
+ }
+ else
+ AssertMsg(!pCtx->fExtrn || (pCtx->fExtrn & HMVMX_CPUMCTX_EXTRN_ALL), ("%#RX64\n", pCtx->fExtrn));
+
+ ASMSetFlags(fEFlags);
+
+ STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatImportGuestState, x);
+
+ /*
+ * Honor any pending CR3 updates.
+ *
+ * Consider this scenario: VM-exit -> VMMRZCallRing3Enable() -> do stuff that causes a longjmp -> hmR0VmxCallRing3Callback()
+ * -> VMMRZCallRing3Disable() -> hmR0VmxImportGuestState() -> Sets VMCPU_FF_HM_UPDATE_CR3 pending -> return from the longjmp
+ * -> continue with VM-exit handling -> hmR0VmxImportGuestState() and here we are.
+ *
+ * The reason for such complicated handling is because VM-exits that call into PGM expect CR3 to be up-to-date and thus
+ * if any CR3-saves -before- the VM-exit (longjmp) postponed the CR3 update via the force-flag, any VM-exit handler that
+ * calls into PGM when it re-saves CR3 will end up here and we call PGMUpdateCR3(). This is why the code below should
+ * -NOT- check if CPUMCTX_EXTRN_CR3 is set!
+ *
+ * The longjmp exit path can't check these CR3 force-flags and call code that takes a lock again. We cover for it here.
+ */
+ if (VMMRZCallRing3IsEnabled(pVCpu))
+ {
+ if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3))
+ {
+ Assert(!(ASMAtomicUoReadU64(&pCtx->fExtrn) & CPUMCTX_EXTRN_CR3));
+ PGMUpdateCR3(pVCpu, CPUMGetGuestCR3(pVCpu));
+ }
+
+ if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_PAE_PDPES))
+ PGMGstUpdatePaePdpes(pVCpu, &pVCpu->hm.s.aPdpes[0]);
+
+ Assert(!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3));
+ Assert(!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_PAE_PDPES));
+ }
+
+ return VINF_SUCCESS;
+#undef VMXLOCAL_BREAK_RC
+}
+
+
+/**
+ * Saves the guest state from the VMCS into the guest-CPU context.
+ *
+ * @returns VBox status code.
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param fWhat What to import, CPUMCTX_EXTRN_XXX.
+ */
+VMMR0DECL(int) VMXR0ImportStateOnDemand(PVMCPU pVCpu, uint64_t fWhat)
+{
+ return hmR0VmxImportGuestState(pVCpu, fWhat);
+}
+
+
+/**
+ * Check per-VM and per-VCPU force flag actions that require us to go back to
+ * ring-3 for one reason or another.
+ *
+ * @returns Strict VBox status code (i.e. informational status codes too)
+ * @retval VINF_SUCCESS if we don't have any actions that require going back to
+ * ring-3.
+ * @retval VINF_PGM_SYNC_CR3 if we have pending PGM CR3 sync.
+ * @retval VINF_EM_PENDING_REQUEST if we have pending requests (like hardware
+ * interrupts)
+ * @retval VINF_PGM_POOL_FLUSH_PENDING if PGM is doing a pool flush and requires
+ * all EMTs to be in ring-3.
+ * @retval VINF_EM_RAW_TO_R3 if there is pending DMA requests.
+ * @retval VINF_EM_NO_MEMORY PGM is out of memory, we need to return
+ * to the EM loop.
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param fStepping Running in hmR0VmxRunGuestCodeStep().
+ */
+static VBOXSTRICTRC hmR0VmxCheckForceFlags(PVMCPU pVCpu, bool fStepping)
+{
+ Assert(VMMRZCallRing3IsEnabled(pVCpu));
+
+ /*
+ * Anything pending? Should be more likely than not if we're doing a good job.
+ */
+ PVM pVM = pVCpu->CTX_SUFF(pVM);
+ if ( !fStepping
+ ? !VM_FF_IS_ANY_SET(pVM, VM_FF_HP_R0_PRE_HM_MASK)
+ && !VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_HP_R0_PRE_HM_MASK)
+ : !VM_FF_IS_ANY_SET(pVM, VM_FF_HP_R0_PRE_HM_STEP_MASK)
+ && !VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_HP_R0_PRE_HM_STEP_MASK) )
+ return VINF_SUCCESS;
+
+ /* Pending PGM C3 sync. */
+ if (VMCPU_FF_IS_ANY_SET(pVCpu,VMCPU_FF_PGM_SYNC_CR3 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL))
+ {
+ PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+ Assert(!(ASMAtomicUoReadU64(&pCtx->fExtrn) & (CPUMCTX_EXTRN_CR0 | CPUMCTX_EXTRN_CR3 | CPUMCTX_EXTRN_CR4)));
+ VBOXSTRICTRC rcStrict2 = PGMSyncCR3(pVCpu, pCtx->cr0, pCtx->cr3, pCtx->cr4,
+ VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3));
+ if (rcStrict2 != VINF_SUCCESS)
+ {
+ AssertRC(VBOXSTRICTRC_VAL(rcStrict2));
+ Log4Func(("PGMSyncCR3 forcing us back to ring-3. rc2=%d\n", VBOXSTRICTRC_VAL(rcStrict2)));
+ return rcStrict2;
+ }
+ }
+
+ /* Pending HM-to-R3 operations (critsects, timers, EMT rendezvous etc.) */
+ if ( VM_FF_IS_ANY_SET(pVM, VM_FF_HM_TO_R3_MASK)
+ || VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_HM_TO_R3_MASK))
+ {
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchHmToR3FF);
+ int rc2 = RT_LIKELY(!VM_FF_IS_SET(pVM, VM_FF_PGM_NO_MEMORY)) ? VINF_EM_RAW_TO_R3 : VINF_EM_NO_MEMORY;
+ Log4Func(("HM_TO_R3 forcing us back to ring-3. rc=%d\n", rc2));
+ return rc2;
+ }
+
+ /* Pending VM request packets, such as hardware interrupts. */
+ if ( VM_FF_IS_SET(pVM, VM_FF_REQUEST)
+ || VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_REQUEST))
+ {
+ Log4Func(("Pending VM request forcing us back to ring-3\n"));
+ return VINF_EM_PENDING_REQUEST;
+ }
+
+ /* Pending PGM pool flushes. */
+ if (VM_FF_IS_SET(pVM, VM_FF_PGM_POOL_FLUSH_PENDING))
+ {
+ Log4Func(("PGM pool flush pending forcing us back to ring-3\n"));
+ return VINF_PGM_POOL_FLUSH_PENDING;
+ }
+
+ /* Pending DMA requests. */
+ if (VM_FF_IS_SET(pVM, VM_FF_PDM_DMA))
+ {
+ Log4Func(("Pending DMA request forcing us back to ring-3\n"));
+ return VINF_EM_RAW_TO_R3;
+ }
+
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Converts any TRPM trap into a pending HM event. This is typically used when
+ * entering from ring-3 (not longjmp returns).
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ */
+static void hmR0VmxTrpmTrapToPendingEvent(PVMCPU pVCpu)
+{
+ Assert(TRPMHasTrap(pVCpu));
+ Assert(!pVCpu->hm.s.Event.fPending);
+
+ uint8_t uVector;
+ TRPMEVENT enmTrpmEvent;
+ RTGCUINT uErrCode;
+ RTGCUINTPTR GCPtrFaultAddress;
+ uint8_t cbInstr;
+
+ int rc = TRPMQueryTrapAll(pVCpu, &uVector, &enmTrpmEvent, &uErrCode, &GCPtrFaultAddress, &cbInstr);
+ AssertRC(rc);
+
+ /* Refer Intel spec. 24.8.3 "VM-entry Controls for Event Injection" for the format of u32IntInfo. */
+ uint32_t u32IntInfo = uVector | VMX_EXIT_INT_INFO_VALID;
+ if (enmTrpmEvent == TRPM_TRAP)
+ {
+ switch (uVector)
+ {
+ case X86_XCPT_NMI:
+ u32IntInfo |= (VMX_EXIT_INT_INFO_TYPE_NMI << VMX_EXIT_INT_INFO_TYPE_SHIFT);
+ break;
+
+ case X86_XCPT_BP:
+ case X86_XCPT_OF:
+ u32IntInfo |= (VMX_EXIT_INT_INFO_TYPE_SW_XCPT << VMX_EXIT_INT_INFO_TYPE_SHIFT);
+ break;
+
+ case X86_XCPT_PF:
+ case X86_XCPT_DF:
+ case X86_XCPT_TS:
+ case X86_XCPT_NP:
+ case X86_XCPT_SS:
+ case X86_XCPT_GP:
+ case X86_XCPT_AC:
+ u32IntInfo |= VMX_EXIT_INT_INFO_ERROR_CODE_VALID;
+ RT_FALL_THRU();
+ default:
+ u32IntInfo |= (VMX_EXIT_INT_INFO_TYPE_HW_XCPT << VMX_EXIT_INT_INFO_TYPE_SHIFT);
+ break;
+ }
+ }
+ else if (enmTrpmEvent == TRPM_HARDWARE_INT)
+ u32IntInfo |= (VMX_EXIT_INT_INFO_TYPE_EXT_INT << VMX_EXIT_INT_INFO_TYPE_SHIFT);
+ else if (enmTrpmEvent == TRPM_SOFTWARE_INT)
+ u32IntInfo |= (VMX_EXIT_INT_INFO_TYPE_SW_INT << VMX_EXIT_INT_INFO_TYPE_SHIFT);
+ else
+ AssertMsgFailed(("Invalid TRPM event type %d\n", enmTrpmEvent));
+
+ rc = TRPMResetTrap(pVCpu);
+ AssertRC(rc);
+ Log4(("TRPM->HM event: u32IntInfo=%#RX32 enmTrpmEvent=%d cbInstr=%u uErrCode=%#RX32 GCPtrFaultAddress=%#RGv\n",
+ u32IntInfo, enmTrpmEvent, cbInstr, uErrCode, GCPtrFaultAddress));
+
+ hmR0VmxSetPendingEvent(pVCpu, u32IntInfo, cbInstr, uErrCode, GCPtrFaultAddress);
+}
+
+
+/**
+ * Converts the pending HM event into a TRPM trap.
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ */
+static void hmR0VmxPendingEventToTrpmTrap(PVMCPU pVCpu)
+{
+ Assert(pVCpu->hm.s.Event.fPending);
+
+ uint32_t uVectorType = VMX_IDT_VECTORING_INFO_TYPE(pVCpu->hm.s.Event.u64IntInfo);
+ uint32_t uVector = VMX_IDT_VECTORING_INFO_VECTOR(pVCpu->hm.s.Event.u64IntInfo);
+ bool fErrorCodeValid = VMX_IDT_VECTORING_INFO_IS_ERROR_CODE_VALID(pVCpu->hm.s.Event.u64IntInfo);
+ uint32_t uErrorCode = pVCpu->hm.s.Event.u32ErrCode;
+
+ /* If a trap was already pending, we did something wrong! */
+ Assert(TRPMQueryTrap(pVCpu, NULL /* pu8TrapNo */, NULL /* pEnmType */) == VERR_TRPM_NO_ACTIVE_TRAP);
+
+ TRPMEVENT enmTrapType;
+ switch (uVectorType)
+ {
+ case VMX_IDT_VECTORING_INFO_TYPE_EXT_INT:
+ enmTrapType = TRPM_HARDWARE_INT;
+ break;
+
+ case VMX_IDT_VECTORING_INFO_TYPE_SW_INT:
+ enmTrapType = TRPM_SOFTWARE_INT;
+ break;
+
+ case VMX_IDT_VECTORING_INFO_TYPE_NMI:
+ case VMX_IDT_VECTORING_INFO_TYPE_PRIV_SW_XCPT:
+ case VMX_IDT_VECTORING_INFO_TYPE_SW_XCPT: /* #BP and #OF */
+ case VMX_IDT_VECTORING_INFO_TYPE_HW_XCPT:
+ enmTrapType = TRPM_TRAP;
+ break;
+
+ default:
+ AssertMsgFailed(("Invalid trap type %#x\n", uVectorType));
+ enmTrapType = TRPM_32BIT_HACK;
+ break;
+ }
+
+ Log4(("HM event->TRPM: uVector=%#x enmTrapType=%d\n", uVector, enmTrapType));
+
+ int rc = TRPMAssertTrap(pVCpu, uVector, enmTrapType);
+ AssertRC(rc);
+
+ if (fErrorCodeValid)
+ TRPMSetErrorCode(pVCpu, uErrorCode);
+
+ if ( uVectorType == VMX_IDT_VECTORING_INFO_TYPE_HW_XCPT
+ && uVector == X86_XCPT_PF)
+ {
+ TRPMSetFaultAddress(pVCpu, pVCpu->hm.s.Event.GCPtrFaultAddress);
+ }
+ else if ( uVectorType == VMX_IDT_VECTORING_INFO_TYPE_SW_INT
+ || uVectorType == VMX_IDT_VECTORING_INFO_TYPE_SW_XCPT
+ || uVectorType == VMX_IDT_VECTORING_INFO_TYPE_PRIV_SW_XCPT)
+ {
+ AssertMsg( uVectorType == VMX_IDT_VECTORING_INFO_TYPE_SW_INT
+ || (uVector == X86_XCPT_BP || uVector == X86_XCPT_OF),
+ ("Invalid vector: uVector=%#x uVectorType=%#x\n", uVector, uVectorType));
+ TRPMSetInstrLength(pVCpu, pVCpu->hm.s.Event.cbInstr);
+ }
+
+ /* Clear the events from the VMCS. */
+ VMXWriteVmcs32(VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, 0);
+
+ /* We're now done converting the pending event. */
+ pVCpu->hm.s.Event.fPending = false;
+}
+
+
+/**
+ * Does the necessary state syncing before returning to ring-3 for any reason
+ * (longjmp, preemption, voluntary exits to ring-3) from VT-x.
+ *
+ * @returns VBox status code.
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param fImportState Whether to import the guest state from the VMCS back
+ * to the guest-CPU context.
+ *
+ * @remarks No-long-jmp zone!!!
+ */
+static int hmR0VmxLeave(PVMCPU pVCpu, bool fImportState)
+{
+ Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+ Assert(!VMMRZCallRing3IsEnabled(pVCpu));
+
+ RTCPUID idCpu = RTMpCpuId();
+ Log4Func(("HostCpuId=%u\n", idCpu));
+
+ /*
+ * !!! IMPORTANT !!!
+ * If you modify code here, check whether hmR0VmxCallRing3Callback() needs to be updated too.
+ */
+
+ /* Save the guest state if necessary. */
+ if (fImportState)
+ {
+ int rc = hmR0VmxImportGuestState(pVCpu, HMVMX_CPUMCTX_EXTRN_ALL);
+ AssertRCReturn(rc, rc);
+ }
+
+ /* Restore host FPU state if necessary. We will resync on next R0 reentry. */
+ CPUMR0FpuStateMaybeSaveGuestAndRestoreHost(pVCpu);
+ Assert(!CPUMIsGuestFPUStateActive(pVCpu));
+
+ /* Restore host debug registers if necessary. We will resync on next R0 reentry. */
+#ifdef VBOX_STRICT
+ if (CPUMIsHyperDebugStateActive(pVCpu))
+ Assert(pVCpu->hm.s.vmx.u32ProcCtls & VMX_PROC_CTLS_MOV_DR_EXIT);
+#endif
+ CPUMR0DebugStateMaybeSaveGuestAndRestoreHost(pVCpu, true /* save DR6 */);
+ Assert(!CPUMIsGuestDebugStateActive(pVCpu) && !CPUMIsGuestDebugStateActivePending(pVCpu));
+ Assert(!CPUMIsHyperDebugStateActive(pVCpu) && !CPUMIsHyperDebugStateActivePending(pVCpu));
+
+#if HC_ARCH_BITS == 64
+ /* Restore host-state bits that VT-x only restores partially. */
+ if ( (pVCpu->hm.s.vmx.fRestoreHostFlags & VMX_RESTORE_HOST_REQUIRED)
+ && (pVCpu->hm.s.vmx.fRestoreHostFlags & ~VMX_RESTORE_HOST_REQUIRED))
+ {
+ Log4Func(("Restoring Host State: fRestoreHostFlags=%#RX32 HostCpuId=%u\n", pVCpu->hm.s.vmx.fRestoreHostFlags, idCpu));
+ VMXRestoreHostState(pVCpu->hm.s.vmx.fRestoreHostFlags, &pVCpu->hm.s.vmx.RestoreHost);
+ }
+ pVCpu->hm.s.vmx.fRestoreHostFlags = 0;
+#endif
+
+ /* Restore the lazy host MSRs as we're leaving VT-x context. */
+ if (pVCpu->hm.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST)
+ {
+ /* We shouldn't restore the host MSRs without saving the guest MSRs first. */
+ if (!fImportState)
+ {
+ int rc = hmR0VmxImportGuestState(pVCpu, CPUMCTX_EXTRN_KERNEL_GS_BASE | CPUMCTX_EXTRN_SYSCALL_MSRS);
+ AssertRCReturn(rc, rc);
+ }
+ hmR0VmxLazyRestoreHostMsrs(pVCpu);
+ Assert(!pVCpu->hm.s.vmx.fLazyMsrs);
+ }
+ else
+ pVCpu->hm.s.vmx.fLazyMsrs = 0;
+
+ /* Update auto-load/store host MSRs values when we re-enter VT-x (as we could be on a different CPU). */
+ pVCpu->hm.s.vmx.fUpdatedHostMsrs = false;
+
+ STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatEntry);
+ STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatImportGuestState);
+ STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExportGuestState);
+ STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatPreExit);
+ STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitHandling);
+ STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitIO);
+ STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitMovCRx);
+ STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitXcptNmi);
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchLongJmpToR3);
+
+ VMCPU_CMPXCHG_STATE(pVCpu, VMCPUSTATE_STARTED_HM, VMCPUSTATE_STARTED_EXEC);
+
+ /** @todo This partially defeats the purpose of having preemption hooks.
+ * The problem is, deregistering the hooks should be moved to a place that
+ * lasts until the EMT is about to be destroyed not everytime while leaving HM
+ * context.
+ */
+ if (pVCpu->hm.s.vmx.fVmcsState & HMVMX_VMCS_STATE_ACTIVE)
+ {
+ int rc = VMXClearVmcs(pVCpu->hm.s.vmx.HCPhysVmcs);
+ AssertRCReturn(rc, rc);
+
+ pVCpu->hm.s.vmx.fVmcsState = HMVMX_VMCS_STATE_CLEAR;
+ Log4Func(("Cleared Vmcs. HostCpuId=%u\n", idCpu));
+ }
+ Assert(!(pVCpu->hm.s.vmx.fVmcsState & HMVMX_VMCS_STATE_LAUNCHED));
+ NOREF(idCpu);
+
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Leaves the VT-x session.
+ *
+ * @returns VBox status code.
+ * @param pVCpu The cross context virtual CPU structure.
+ *
+ * @remarks No-long-jmp zone!!!
+ */
+static int hmR0VmxLeaveSession(PVMCPU pVCpu)
+{
+ HM_DISABLE_PREEMPT(pVCpu);
+ HMVMX_ASSERT_CPU_SAFE(pVCpu);
+ Assert(!VMMRZCallRing3IsEnabled(pVCpu));
+ Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+
+ /* When thread-context hooks are used, we can avoid doing the leave again if we had been preempted before
+ and done this from the VMXR0ThreadCtxCallback(). */
+ if (!pVCpu->hm.s.fLeaveDone)
+ {
+ int rc2 = hmR0VmxLeave(pVCpu, true /* fImportState */);
+ AssertRCReturnStmt(rc2, HM_RESTORE_PREEMPT(), rc2);
+ pVCpu->hm.s.fLeaveDone = true;
+ }
+ Assert(!pVCpu->cpum.GstCtx.fExtrn);
+
+ /*
+ * !!! IMPORTANT !!!
+ * If you modify code here, make sure to check whether hmR0VmxCallRing3Callback() needs to be updated too.
+ */
+
+ /* Deregister hook now that we've left HM context before re-enabling preemption. */
+ /** @todo Deregistering here means we need to VMCLEAR always
+ * (longjmp/exit-to-r3) in VT-x which is not efficient, eliminate need
+ * for calling VMMR0ThreadCtxHookDisable here! */
+ VMMR0ThreadCtxHookDisable(pVCpu);
+
+ /* Leave HM context. This takes care of local init (term). */
+ int rc = HMR0LeaveCpu(pVCpu);
+
+ HM_RESTORE_PREEMPT();
+ return rc;
+}
+
+
+/**
+ * Does the necessary state syncing before doing a longjmp to ring-3.
+ *
+ * @returns VBox status code.
+ * @param pVCpu The cross context virtual CPU structure.
+ *
+ * @remarks No-long-jmp zone!!!
+ */
+DECLINLINE(int) hmR0VmxLongJmpToRing3(PVMCPU pVCpu)
+{
+ return hmR0VmxLeaveSession(pVCpu);
+}
+
+
+/**
+ * Take necessary actions before going back to ring-3.
+ *
+ * An action requires us to go back to ring-3. This function does the necessary
+ * steps before we can safely return to ring-3. This is not the same as longjmps
+ * to ring-3, this is voluntary and prepares the guest so it may continue
+ * executing outside HM (recompiler/IEM).
+ *
+ * @returns VBox status code.
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param rcExit The reason for exiting to ring-3. Can be
+ * VINF_VMM_UNKNOWN_RING3_CALL.
+ */
+static int hmR0VmxExitToRing3(PVMCPU pVCpu, VBOXSTRICTRC rcExit)
+{
+ Assert(pVCpu);
+ HMVMX_ASSERT_PREEMPT_SAFE(pVCpu);
+
+ if (RT_UNLIKELY(rcExit == VERR_VMX_INVALID_VMCS_PTR))
+ {
+ VMXGetActivatedVmcs(&pVCpu->hm.s.vmx.LastError.u64VmcsPhys);
+ pVCpu->hm.s.vmx.LastError.u32VmcsRev = *(uint32_t *)pVCpu->hm.s.vmx.pvVmcs;
+ pVCpu->hm.s.vmx.LastError.idEnteredCpu = pVCpu->hm.s.idEnteredCpu;
+ /* LastError.idCurrentCpu was updated in hmR0VmxPreRunGuestCommitted(). */
+ }
+
+ /* Please, no longjumps here (any logging shouldn't flush jump back to ring-3). NO LOGGING BEFORE THIS POINT! */
+ VMMRZCallRing3Disable(pVCpu);
+ Log4Func(("rcExit=%d\n", VBOXSTRICTRC_VAL(rcExit)));
+
+ /* We need to do this only while truly exiting the "inner loop" back to ring-3 and -not- for any longjmp to ring3. */
+ if (pVCpu->hm.s.Event.fPending)
+ {
+ hmR0VmxPendingEventToTrpmTrap(pVCpu);
+ Assert(!pVCpu->hm.s.Event.fPending);
+ }
+
+ /* Clear interrupt-window and NMI-window controls as we re-evaluate it when we return from ring-3. */
+ hmR0VmxClearIntNmiWindowsVmcs(pVCpu);
+
+ /* If we're emulating an instruction, we shouldn't have any TRPM traps pending
+ and if we're injecting an event we should have a TRPM trap pending. */
+ AssertMsg(rcExit != VINF_EM_RAW_INJECT_TRPM_EVENT || TRPMHasTrap(pVCpu), ("%Rrc\n", VBOXSTRICTRC_VAL(rcExit)));
+#ifndef DEBUG_bird /* Triggered after firing an NMI against NT4SP1, possibly a triple fault in progress. */
+ AssertMsg(rcExit != VINF_EM_RAW_EMULATE_INSTR || !TRPMHasTrap(pVCpu), ("%Rrc\n", VBOXSTRICTRC_VAL(rcExit)));
+#endif
+
+ /* Save guest state and restore host state bits. */
+ int rc = hmR0VmxLeaveSession(pVCpu);
+ AssertRCReturn(rc, rc);
+ STAM_COUNTER_DEC(&pVCpu->hm.s.StatSwitchLongJmpToR3);
+ /* Thread-context hooks are unregistered at this point!!! */
+
+ /* Sync recompiler state. */
+ VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TO_R3);
+ CPUMSetChangedFlags(pVCpu, CPUM_CHANGED_SYSENTER_MSR
+ | CPUM_CHANGED_LDTR
+ | CPUM_CHANGED_GDTR
+ | CPUM_CHANGED_IDTR
+ | CPUM_CHANGED_TR
+ | CPUM_CHANGED_HIDDEN_SEL_REGS);
+ if ( pVCpu->CTX_SUFF(pVM)->hm.s.fNestedPaging
+ && CPUMIsGuestPagingEnabledEx(&pVCpu->cpum.GstCtx))
+ {
+ CPUMSetChangedFlags(pVCpu, CPUM_CHANGED_GLOBAL_TLB_FLUSH);
+ }
+
+ Assert(!pVCpu->hm.s.fClearTrapFlag);
+
+ /* Update the exit-to-ring 3 reason. */
+ pVCpu->hm.s.rcLastExitToR3 = VBOXSTRICTRC_VAL(rcExit);
+
+ /* On our way back from ring-3 reload the guest state if there is a possibility of it being changed. */
+ if (rcExit != VINF_EM_RAW_INTERRUPT)
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_ALL_GUEST);
+
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchExitToR3);
+
+ /* We do -not- want any longjmp notifications after this! We must return to ring-3 ASAP. */
+ VMMRZCallRing3RemoveNotification(pVCpu);
+ VMMRZCallRing3Enable(pVCpu);
+
+ return rc;
+}
+
+
+/**
+ * VMMRZCallRing3() callback wrapper which saves the guest state before we
+ * longjump to ring-3 and possibly get preempted.
+ *
+ * @returns VBox status code.
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param enmOperation The operation causing the ring-3 longjump.
+ * @param pvUser User argument, currently unused, NULL.
+ */
+static DECLCALLBACK(int) hmR0VmxCallRing3Callback(PVMCPU pVCpu, VMMCALLRING3 enmOperation, void *pvUser)
+{
+ RT_NOREF(pvUser);
+ if (enmOperation == VMMCALLRING3_VM_R0_ASSERTION)
+ {
+ /*
+ * !!! IMPORTANT !!!
+ * If you modify code here, check whether hmR0VmxLeave() and hmR0VmxLeaveSession() needs to be updated too.
+ * This is a stripped down version which gets out ASAP, trying to not trigger any further assertions.
+ */
+ VMMRZCallRing3RemoveNotification(pVCpu);
+ VMMRZCallRing3Disable(pVCpu);
+ RTTHREADPREEMPTSTATE PreemptState = RTTHREADPREEMPTSTATE_INITIALIZER;
+ RTThreadPreemptDisable(&PreemptState);
+
+ hmR0VmxImportGuestState(pVCpu, HMVMX_CPUMCTX_EXTRN_ALL);
+ CPUMR0FpuStateMaybeSaveGuestAndRestoreHost(pVCpu);
+ CPUMR0DebugStateMaybeSaveGuestAndRestoreHost(pVCpu, true /* save DR6 */);
+
+#if HC_ARCH_BITS == 64
+ /* Restore host-state bits that VT-x only restores partially. */
+ if ( (pVCpu->hm.s.vmx.fRestoreHostFlags & VMX_RESTORE_HOST_REQUIRED)
+ && (pVCpu->hm.s.vmx.fRestoreHostFlags & ~VMX_RESTORE_HOST_REQUIRED))
+ VMXRestoreHostState(pVCpu->hm.s.vmx.fRestoreHostFlags, &pVCpu->hm.s.vmx.RestoreHost);
+ pVCpu->hm.s.vmx.fRestoreHostFlags = 0;
+#endif
+
+ /* Restore the lazy host MSRs as we're leaving VT-x context. */
+ if (pVCpu->hm.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST)
+ hmR0VmxLazyRestoreHostMsrs(pVCpu);
+
+ /* Update auto-load/store host MSRs values when we re-enter VT-x (as we could be on a different CPU). */
+ pVCpu->hm.s.vmx.fUpdatedHostMsrs = false;
+ VMCPU_CMPXCHG_STATE(pVCpu, VMCPUSTATE_STARTED_HM, VMCPUSTATE_STARTED_EXEC);
+ if (pVCpu->hm.s.vmx.fVmcsState & HMVMX_VMCS_STATE_ACTIVE)
+ {
+ VMXClearVmcs(pVCpu->hm.s.vmx.HCPhysVmcs);
+ pVCpu->hm.s.vmx.fVmcsState = HMVMX_VMCS_STATE_CLEAR;
+ }
+
+ /** @todo eliminate the need for calling VMMR0ThreadCtxHookDisable here! */
+ VMMR0ThreadCtxHookDisable(pVCpu);
+ HMR0LeaveCpu(pVCpu);
+ RTThreadPreemptRestore(&PreemptState);
+ return VINF_SUCCESS;
+ }
+
+ Assert(pVCpu);
+ Assert(pvUser);
+ Assert(VMMRZCallRing3IsEnabled(pVCpu));
+ HMVMX_ASSERT_PREEMPT_SAFE(pVCpu);
+
+ VMMRZCallRing3Disable(pVCpu);
+ Assert(VMMR0IsLogFlushDisabled(pVCpu));
+
+ Log4Func((" -> hmR0VmxLongJmpToRing3 enmOperation=%d\n", enmOperation));
+
+ int rc = hmR0VmxLongJmpToRing3(pVCpu);
+ AssertRCReturn(rc, rc);
+
+ VMMRZCallRing3Enable(pVCpu);
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Sets the interrupt-window exiting control in the VMCS which instructs VT-x to
+ * cause a VM-exit as soon as the guest is in a state to receive interrupts.
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ */
+DECLINLINE(void) hmR0VmxSetIntWindowExitVmcs(PVMCPU pVCpu)
+{
+ if (RT_LIKELY(pVCpu->CTX_SUFF(pVM)->hm.s.vmx.Msrs.ProcCtls.n.allowed1 & VMX_PROC_CTLS_INT_WINDOW_EXIT))
+ {
+ if (!(pVCpu->hm.s.vmx.u32ProcCtls & VMX_PROC_CTLS_INT_WINDOW_EXIT))
+ {
+ pVCpu->hm.s.vmx.u32ProcCtls |= VMX_PROC_CTLS_INT_WINDOW_EXIT;
+ int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVCpu->hm.s.vmx.u32ProcCtls);
+ AssertRC(rc);
+ Log4Func(("Setup interrupt-window exiting\n"));
+ }
+ } /* else we will deliver interrupts whenever the guest exits next and is in a state to receive events. */
+}
+
+
+/**
+ * Clears the interrupt-window exiting control in the VMCS.
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ */
+DECLINLINE(void) hmR0VmxClearIntWindowExitVmcs(PVMCPU pVCpu)
+{
+ Assert(pVCpu->hm.s.vmx.u32ProcCtls & VMX_PROC_CTLS_INT_WINDOW_EXIT);
+ pVCpu->hm.s.vmx.u32ProcCtls &= ~VMX_PROC_CTLS_INT_WINDOW_EXIT;
+ int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVCpu->hm.s.vmx.u32ProcCtls);
+ AssertRC(rc);
+ Log4Func(("Cleared interrupt-window exiting\n"));
+}
+
+
+/**
+ * Sets the NMI-window exiting control in the VMCS which instructs VT-x to
+ * cause a VM-exit as soon as the guest is in a state to receive NMIs.
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ */
+DECLINLINE(void) hmR0VmxSetNmiWindowExitVmcs(PVMCPU pVCpu)
+{
+ if (RT_LIKELY(pVCpu->CTX_SUFF(pVM)->hm.s.vmx.Msrs.ProcCtls.n.allowed1 & VMX_PROC_CTLS_NMI_WINDOW_EXIT))
+ {
+ if (!(pVCpu->hm.s.vmx.u32ProcCtls & VMX_PROC_CTLS_NMI_WINDOW_EXIT))
+ {
+ pVCpu->hm.s.vmx.u32ProcCtls |= VMX_PROC_CTLS_NMI_WINDOW_EXIT;
+ int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVCpu->hm.s.vmx.u32ProcCtls);
+ AssertRC(rc);
+ Log4Func(("Setup NMI-window exiting\n"));
+ }
+ } /* else we will deliver NMIs whenever we VM-exit next, even possibly nesting NMIs. Can't be helped on ancient CPUs. */
+}
+
+
+/**
+ * Clears the NMI-window exiting control in the VMCS.
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ */
+DECLINLINE(void) hmR0VmxClearNmiWindowExitVmcs(PVMCPU pVCpu)
+{
+ Assert(pVCpu->hm.s.vmx.u32ProcCtls & VMX_PROC_CTLS_NMI_WINDOW_EXIT);
+ pVCpu->hm.s.vmx.u32ProcCtls &= ~VMX_PROC_CTLS_NMI_WINDOW_EXIT;
+ int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVCpu->hm.s.vmx.u32ProcCtls);
+ AssertRC(rc);
+ Log4Func(("Cleared NMI-window exiting\n"));
+}
+
+
+/**
+ * Evaluates the event to be delivered to the guest and sets it as the pending
+ * event.
+ *
+ * @returns The VT-x guest-interruptibility state.
+ * @param pVCpu The cross context virtual CPU structure.
+ */
+static uint32_t hmR0VmxEvaluatePendingEvent(PVMCPU pVCpu)
+{
+ /* Get the current interruptibility-state of the guest and then figure out what can be injected. */
+ PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+ uint32_t const fIntrState = hmR0VmxGetGuestIntrState(pVCpu);
+ bool const fBlockMovSS = RT_BOOL(fIntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_MOVSS);
+ bool const fBlockSti = RT_BOOL(fIntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_STI);
+ bool const fBlockNmi = RT_BOOL(fIntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_NMI);
+
+ Assert(!fBlockSti || !(ASMAtomicUoReadU64(&pCtx->fExtrn) & CPUMCTX_EXTRN_RFLAGS));
+ Assert(!(fIntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_SMI)); /* We don't support block-by-SMI yet.*/
+ Assert(!fBlockSti || pCtx->eflags.Bits.u1IF); /* Cannot set block-by-STI when interrupts are disabled. */
+ Assert(!TRPMHasTrap(pVCpu));
+
+ if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_UPDATE_APIC))
+ APICUpdatePendingInterrupts(pVCpu);
+
+ /*
+ * Toggling of interrupt force-flags here is safe since we update TRPM on premature exits
+ * to ring-3 before executing guest code, see hmR0VmxExitToRing3(). We must NOT restore these force-flags.
+ */
+ /** @todo SMI. SMIs take priority over NMIs. */
+ if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INTERRUPT_NMI)) /* NMI. NMIs take priority over regular interrupts. */
+ {
+ /* On some CPUs block-by-STI also blocks NMIs. See Intel spec. 26.3.1.5 "Checks On Guest Non-Register State". */
+ if ( !pVCpu->hm.s.Event.fPending
+ && !fBlockNmi
+ && !fBlockSti
+ && !fBlockMovSS)
+ {
+ Log4Func(("Pending NMI\n"));
+ uint32_t u32IntInfo = X86_XCPT_NMI | VMX_EXIT_INT_INFO_VALID;
+ u32IntInfo |= (VMX_EXIT_INT_INFO_TYPE_NMI << VMX_EXIT_INT_INFO_TYPE_SHIFT);
+
+ hmR0VmxSetPendingEvent(pVCpu, u32IntInfo, 0 /* cbInstr */, 0 /* u32ErrCode */, 0 /* GCPtrFaultAddress */);
+ VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INTERRUPT_NMI);
+ }
+ else
+ hmR0VmxSetNmiWindowExitVmcs(pVCpu);
+ }
+ /*
+ * Check if the guest can receive external interrupts (PIC/APIC). Once PDMGetInterrupt() returns
+ * a valid interrupt we must- deliver the interrupt. We can no longer re-request it from the APIC.
+ */
+ else if ( VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC)
+ && !pVCpu->hm.s.fSingleInstruction)
+ {
+ Assert(!DBGFIsStepping(pVCpu));
+ int rc = hmR0VmxImportGuestState(pVCpu, CPUMCTX_EXTRN_RFLAGS);
+ AssertRCReturn(rc, 0);
+ bool const fBlockInt = !(pCtx->eflags.u32 & X86_EFL_IF);
+ if ( !pVCpu->hm.s.Event.fPending
+ && !fBlockInt
+ && !fBlockSti
+ && !fBlockMovSS)
+ {
+ uint8_t u8Interrupt;
+ rc = PDMGetInterrupt(pVCpu, &u8Interrupt);
+ if (RT_SUCCESS(rc))
+ {
+ Log4Func(("Pending external interrupt u8Interrupt=%#x\n", u8Interrupt));
+ uint32_t u32IntInfo = u8Interrupt
+ | VMX_EXIT_INT_INFO_VALID
+ | (VMX_EXIT_INT_INFO_TYPE_EXT_INT << VMX_EXIT_INT_INFO_TYPE_SHIFT);
+
+ hmR0VmxSetPendingEvent(pVCpu, u32IntInfo, 0 /* cbInstr */, 0 /* u32ErrCode */, 0 /* GCPtrfaultAddress */);
+ }
+ else if (rc == VERR_APIC_INTR_MASKED_BY_TPR)
+ {
+ if (pVCpu->hm.s.vmx.u32ProcCtls & VMX_PROC_CTLS_USE_TPR_SHADOW)
+ hmR0VmxApicSetTprThreshold(pVCpu, u8Interrupt >> 4);
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchTprMaskedIrq);
+
+ /*
+ * If the CPU doesn't have TPR shadowing, we will always get a VM-exit on TPR changes and
+ * APICSetTpr() will end up setting the VMCPU_FF_INTERRUPT_APIC if required, so there is no
+ * need to re-set this force-flag here.
+ */
+ }
+ else
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchGuestIrq);
+ }
+ else
+ hmR0VmxSetIntWindowExitVmcs(pVCpu);
+ }
+
+ return fIntrState;
+}
+
+
+/**
+ * Injects any pending events into the guest if the guest is in a state to
+ * receive them.
+ *
+ * @returns Strict VBox status code (i.e. informational status codes too).
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param fIntrState The VT-x guest-interruptibility state.
+ * @param fStepping Running in hmR0VmxRunGuestCodeStep() and we should
+ * return VINF_EM_DBG_STEPPED if the event was
+ * dispatched directly.
+ */
+static VBOXSTRICTRC hmR0VmxInjectPendingEvent(PVMCPU pVCpu, uint32_t fIntrState, bool fStepping)
+{
+ HMVMX_ASSERT_PREEMPT_SAFE(pVCpu);
+ Assert(VMMRZCallRing3IsEnabled(pVCpu));
+
+ bool const fBlockMovSS = RT_BOOL(fIntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_MOVSS);
+ bool const fBlockSti = RT_BOOL(fIntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_STI);
+
+ Assert(!fBlockSti || !(ASMAtomicUoReadU64(&pVCpu->cpum.GstCtx.fExtrn) & CPUMCTX_EXTRN_RFLAGS));
+ Assert(!(fIntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_SMI)); /* We don't support block-by-SMI yet.*/
+ Assert(!fBlockSti || pVCpu->cpum.GstCtx.eflags.Bits.u1IF); /* Cannot set block-by-STI when interrupts are disabled. */
+ Assert(!TRPMHasTrap(pVCpu));
+
+ VBOXSTRICTRC rcStrict = VINF_SUCCESS;
+ if (pVCpu->hm.s.Event.fPending)
+ {
+ /*
+ * Do -not- clear any interrupt-window exiting control here. We might have an interrupt
+ * pending even while injecting an event and in this case, we want a VM-exit as soon as
+ * the guest is ready for the next interrupt, see @bugref{6208#c45}.
+ *
+ * See Intel spec. 26.6.5 "Interrupt-Window Exiting and Virtual-Interrupt Delivery".
+ */
+ uint32_t const uIntType = VMX_ENTRY_INT_INFO_TYPE(pVCpu->hm.s.Event.u64IntInfo);
+#ifdef VBOX_STRICT
+ if (uIntType == VMX_ENTRY_INT_INFO_TYPE_EXT_INT)
+ {
+ bool const fBlockInt = !(pVCpu->cpum.GstCtx.eflags.u32 & X86_EFL_IF);
+ Assert(!fBlockInt);
+ Assert(!fBlockSti);
+ Assert(!fBlockMovSS);
+ }
+ else if (uIntType == VMX_ENTRY_INT_INFO_TYPE_NMI)
+ {
+ bool const fBlockNmi = RT_BOOL(fIntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_NMI);
+ Assert(!fBlockSti);
+ Assert(!fBlockMovSS);
+ Assert(!fBlockNmi);
+ }
+#endif
+ Log4(("Injecting pending event vcpu[%RU32] u64IntInfo=%#RX64 Type=%#RX32\n", pVCpu->idCpu, pVCpu->hm.s.Event.u64IntInfo,
+ uIntType));
+
+ /*
+ * Inject the event and get any changes to the guest-interruptibility state.
+ *
+ * The guest-interruptibility state may need to be updated if we inject the event
+ * into the guest IDT ourselves (for real-on-v86 guest injecting software interrupts).
+ */
+ rcStrict = hmR0VmxInjectEventVmcs(pVCpu, pVCpu->hm.s.Event.u64IntInfo, pVCpu->hm.s.Event.cbInstr,
+ pVCpu->hm.s.Event.u32ErrCode, pVCpu->hm.s.Event.GCPtrFaultAddress, fStepping,
+ &fIntrState);
+ AssertRCReturn(VBOXSTRICTRC_VAL(rcStrict), rcStrict);
+
+ if (uIntType == VMX_ENTRY_INT_INFO_TYPE_EXT_INT)
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatInjectInterrupt);
+ else
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatInjectXcpt);
+ }
+
+ /*
+ * Update the guest-interruptibility state.
+ *
+ * This is required for the real-on-v86 software interrupt injection case above, as well as
+ * updates to the guest state from ring-3 or IEM/REM.
+ */
+ int rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_INT_STATE, fIntrState);
+ AssertRCReturn(rc, rc);
+
+ /*
+ * There's no need to clear the VM-entry interruption-information field here if we're not
+ * injecting anything. VT-x clears the valid bit on every VM-exit.
+ *
+ * See Intel spec. 24.8.3 "VM-Entry Controls for Event Injection".
+ */
+
+ Assert(rcStrict == VINF_SUCCESS || rcStrict == VINF_EM_RESET || (rcStrict == VINF_EM_DBG_STEPPED && fStepping));
+ NOREF(fBlockMovSS); NOREF(fBlockSti);
+ return rcStrict;
+}
+
+
+/**
+ * Injects a double-fault (\#DF) exception into the VM.
+ *
+ * @returns Strict VBox status code (i.e. informational status codes too).
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param fStepping Whether we're running in hmR0VmxRunGuestCodeStep()
+ * and should return VINF_EM_DBG_STEPPED if the event
+ * is injected directly (register modified by us, not
+ * by hardware on VM-entry).
+ * @param pfIntrState Pointer to the current guest interruptibility-state.
+ * This interruptibility-state will be updated if
+ * necessary. This cannot not be NULL.
+ */
+DECLINLINE(VBOXSTRICTRC) hmR0VmxInjectXcptDF(PVMCPU pVCpu, bool fStepping, uint32_t *pfIntrState)
+{
+ uint32_t const u32IntInfo = X86_XCPT_DF | VMX_EXIT_INT_INFO_VALID
+ | (VMX_EXIT_INT_INFO_TYPE_HW_XCPT << VMX_EXIT_INT_INFO_TYPE_SHIFT)
+ | VMX_EXIT_INT_INFO_ERROR_CODE_VALID;
+ return hmR0VmxInjectEventVmcs(pVCpu, u32IntInfo, 0 /* cbInstr */, 0 /* u32ErrCode */, 0 /* GCPtrFaultAddress */, fStepping,
+ pfIntrState);
+}
+
+
+/**
+ * Injects a general-protection (\#GP) fault into the VM.
+ *
+ * @returns Strict VBox status code (i.e. informational status codes too).
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param fErrorCodeValid Whether the error code is valid (depends on the CPU
+ * mode, i.e. in real-mode it's not valid).
+ * @param u32ErrorCode The error code associated with the \#GP.
+ * @param fStepping Whether we're running in
+ * hmR0VmxRunGuestCodeStep() and should return
+ * VINF_EM_DBG_STEPPED if the event is injected
+ * directly (register modified by us, not by
+ * hardware on VM-entry).
+ * @param pfIntrState Pointer to the current guest interruptibility-state.
+ * This interruptibility-state will be updated if
+ * necessary. This cannot not be NULL.
+ */
+DECLINLINE(VBOXSTRICTRC) hmR0VmxInjectXcptGP(PVMCPU pVCpu, bool fErrorCodeValid, uint32_t u32ErrorCode, bool fStepping,
+ uint32_t *pfIntrState)
+{
+ uint32_t const u32IntInfo = X86_XCPT_GP | VMX_EXIT_INT_INFO_VALID
+ | (VMX_EXIT_INT_INFO_TYPE_HW_XCPT << VMX_EXIT_INT_INFO_TYPE_SHIFT)
+ | (fErrorCodeValid ? VMX_EXIT_INT_INFO_ERROR_CODE_VALID : 0);
+ return hmR0VmxInjectEventVmcs(pVCpu, u32IntInfo, 0 /* cbInstr */, u32ErrorCode, 0 /* GCPtrFaultAddress */, fStepping,
+ pfIntrState);
+}
+
+
+/**
+ * Pushes a 2-byte value onto the real-mode (in virtual-8086 mode) guest's
+ * stack.
+ *
+ * @returns Strict VBox status code (i.e. informational status codes too).
+ * @retval VINF_EM_RESET if pushing a value to the stack caused a triple-fault.
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param uValue The value to push to the guest stack.
+ */
+static VBOXSTRICTRC hmR0VmxRealModeGuestStackPush(PVMCPU pVCpu, uint16_t uValue)
+{
+ /*
+ * The stack limit is 0xffff in real-on-virtual 8086 mode. Real-mode with weird stack limits cannot be run in
+ * virtual 8086 mode in VT-x. See Intel spec. 26.3.1.2 "Checks on Guest Segment Registers".
+ * See Intel Instruction reference for PUSH and Intel spec. 22.33.1 "Segment Wraparound".
+ */
+ PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+ if (pCtx->sp == 1)
+ return VINF_EM_RESET;
+ pCtx->sp -= sizeof(uint16_t); /* May wrap around which is expected behaviour. */
+ int rc = PGMPhysSimpleWriteGCPhys(pVCpu->CTX_SUFF(pVM), pCtx->ss.u64Base + pCtx->sp, &uValue, sizeof(uint16_t));
+ AssertRC(rc);
+ return rc;
+}
+
+
+/**
+ * Injects an event into the guest upon VM-entry by updating the relevant fields
+ * in the VM-entry area in the VMCS.
+ *
+ * @returns Strict VBox status code (i.e. informational status codes too).
+ * @retval VINF_SUCCESS if the event is successfully injected into the VMCS.
+ * @retval VINF_EM_RESET if event injection resulted in a triple-fault.
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param u64IntInfo The VM-entry interruption-information field.
+ * @param cbInstr The VM-entry instruction length in bytes (for
+ * software interrupts, exceptions and privileged
+ * software exceptions).
+ * @param u32ErrCode The VM-entry exception error code.
+ * @param GCPtrFaultAddress The page-fault address for \#PF exceptions.
+ * @param pfIntrState Pointer to the current guest interruptibility-state.
+ * This interruptibility-state will be updated if
+ * necessary. This cannot not be NULL.
+ * @param fStepping Whether we're running in
+ * hmR0VmxRunGuestCodeStep() and should return
+ * VINF_EM_DBG_STEPPED if the event is injected
+ * directly (register modified by us, not by
+ * hardware on VM-entry).
+ */
+static VBOXSTRICTRC hmR0VmxInjectEventVmcs(PVMCPU pVCpu, uint64_t u64IntInfo, uint32_t cbInstr, uint32_t u32ErrCode,
+ RTGCUINTREG GCPtrFaultAddress, bool fStepping, uint32_t *pfIntrState)
+{
+ /* Intel spec. 24.8.3 "VM-Entry Controls for Event Injection" specifies the interruption-information field to be 32-bits. */
+ AssertMsg(!RT_HI_U32(u64IntInfo), ("%#RX64\n", u64IntInfo));
+ Assert(pfIntrState);
+
+ PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+ uint32_t u32IntInfo = (uint32_t)u64IntInfo;
+ uint32_t const uVector = VMX_ENTRY_INT_INFO_VECTOR(u32IntInfo);
+ uint32_t const uIntType = VMX_ENTRY_INT_INFO_TYPE(u32IntInfo);
+
+#ifdef VBOX_STRICT
+ /*
+ * Validate the error-code-valid bit for hardware exceptions.
+ * No error codes for exceptions in real-mode.
+ *
+ * See Intel spec. 20.1.4 "Interrupt and Exception Handling"
+ */
+ if ( uIntType == VMX_EXIT_INT_INFO_TYPE_HW_XCPT
+ && !CPUMIsGuestInRealModeEx(pCtx))
+ {
+ switch (uVector)
+ {
+ case X86_XCPT_PF:
+ case X86_XCPT_DF:
+ case X86_XCPT_TS:
+ case X86_XCPT_NP:
+ case X86_XCPT_SS:
+ case X86_XCPT_GP:
+ case X86_XCPT_AC:
+ AssertMsg(VMX_ENTRY_INT_INFO_IS_ERROR_CODE_VALID(u32IntInfo),
+ ("Error-code-valid bit not set for exception that has an error code uVector=%#x\n", uVector));
+ RT_FALL_THRU();
+ default:
+ break;
+ }
+ }
+#endif
+
+ /* Cannot inject an NMI when block-by-MOV SS is in effect. */
+ Assert( uIntType != VMX_EXIT_INT_INFO_TYPE_NMI
+ || !(*pfIntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_MOVSS));
+
+ STAM_COUNTER_INC(&pVCpu->hm.s.paStatInjectedIrqsR0[uVector & MASK_INJECT_IRQ_STAT]);
+
+ /*
+ * Hardware interrupts & exceptions cannot be delivered through the software interrupt
+ * redirection bitmap to the real mode task in virtual-8086 mode. We must jump to the
+ * interrupt handler in the (real-mode) guest.
+ *
+ * See Intel spec. 20.3 "Interrupt and Exception handling in Virtual-8086 Mode".
+ * See Intel spec. 20.1.4 "Interrupt and Exception Handling" for real-mode interrupt handling.
+ */
+ if (CPUMIsGuestInRealModeEx(pCtx)) /* CR0.PE bit changes are always intercepted, so it's up to date. */
+ {
+ if (pVCpu->CTX_SUFF(pVM)->hm.s.vmx.fUnrestrictedGuest)
+ {
+ /*
+ * For unrestricted execution enabled CPUs running real-mode guests, we must not
+ * set the deliver-error-code bit.
+ *
+ * See Intel spec. 26.2.1.3 "VM-Entry Control Fields".
+ */
+ u32IntInfo &= ~VMX_ENTRY_INT_INFO_ERROR_CODE_VALID;
+ }
+ else
+ {
+ PVM pVM = pVCpu->CTX_SUFF(pVM);
+ Assert(PDMVmmDevHeapIsEnabled(pVM));
+ Assert(pVM->hm.s.vmx.pRealModeTSS);
+
+ /* We require RIP, RSP, RFLAGS, CS, IDTR, import them. */
+ int rc2 = hmR0VmxImportGuestState(pVCpu, CPUMCTX_EXTRN_SREG_MASK | CPUMCTX_EXTRN_TABLE_MASK | CPUMCTX_EXTRN_RIP
+ | CPUMCTX_EXTRN_RSP | CPUMCTX_EXTRN_RFLAGS);
+ AssertRCReturn(rc2, rc2);
+
+ /* Check if the interrupt handler is present in the IVT (real-mode IDT). IDT limit is (4N - 1). */
+ size_t const cbIdtEntry = sizeof(X86IDTR16);
+ if (uVector * cbIdtEntry + (cbIdtEntry - 1) > pCtx->idtr.cbIdt)
+ {
+ /* If we are trying to inject a #DF with no valid IDT entry, return a triple-fault. */
+ if (uVector == X86_XCPT_DF)
+ return VINF_EM_RESET;
+
+ /* If we're injecting a #GP with no valid IDT entry, inject a double-fault. */
+ if (uVector == X86_XCPT_GP)
+ return hmR0VmxInjectXcptDF(pVCpu, fStepping, pfIntrState);
+
+ /*
+ * If we're injecting an event with no valid IDT entry, inject a #GP.
+ * No error codes for exceptions in real-mode.
+ *
+ * See Intel spec. 20.1.4 "Interrupt and Exception Handling"
+ */
+ return hmR0VmxInjectXcptGP(pVCpu, false /* fErrCodeValid */, 0 /* u32ErrCode */, fStepping, pfIntrState);
+ }
+
+ /* Software exceptions (#BP and #OF exceptions thrown as a result of INT3 or INTO) */
+ uint16_t uGuestIp = pCtx->ip;
+ if (uIntType == VMX_ENTRY_INT_INFO_TYPE_SW_XCPT)
+ {
+ Assert(uVector == X86_XCPT_BP || uVector == X86_XCPT_OF);
+ /* #BP and #OF are both benign traps, we need to resume the next instruction. */
+ uGuestIp = pCtx->ip + (uint16_t)cbInstr;
+ }
+ else if (uIntType == VMX_ENTRY_INT_INFO_TYPE_SW_INT)
+ uGuestIp = pCtx->ip + (uint16_t)cbInstr;
+
+ /* Get the code segment selector and offset from the IDT entry for the interrupt handler. */
+ X86IDTR16 IdtEntry;
+ RTGCPHYS GCPhysIdtEntry = (RTGCPHYS)pCtx->idtr.pIdt + uVector * cbIdtEntry;
+ rc2 = PGMPhysSimpleReadGCPhys(pVM, &IdtEntry, GCPhysIdtEntry, cbIdtEntry);
+ AssertRCReturn(rc2, rc2);
+
+ /* Construct the stack frame for the interrupt/exception handler. */
+ VBOXSTRICTRC rcStrict;
+ rcStrict = hmR0VmxRealModeGuestStackPush(pVCpu, pCtx->eflags.u32);
+ if (rcStrict == VINF_SUCCESS)
+ rcStrict = hmR0VmxRealModeGuestStackPush(pVCpu, pCtx->cs.Sel);
+ if (rcStrict == VINF_SUCCESS)
+ rcStrict = hmR0VmxRealModeGuestStackPush(pVCpu, uGuestIp);
+
+ /* Clear the required eflag bits and jump to the interrupt/exception handler. */
+ if (rcStrict == VINF_SUCCESS)
+ {
+ pCtx->eflags.u32 &= ~(X86_EFL_IF | X86_EFL_TF | X86_EFL_RF | X86_EFL_AC);
+ pCtx->rip = IdtEntry.offSel;
+ pCtx->cs.Sel = IdtEntry.uSel;
+ pCtx->cs.ValidSel = IdtEntry.uSel;
+ pCtx->cs.u64Base = IdtEntry.uSel << cbIdtEntry;
+ if ( uIntType == VMX_ENTRY_INT_INFO_TYPE_HW_XCPT
+ && uVector == X86_XCPT_PF)
+ pCtx->cr2 = GCPtrFaultAddress;
+
+ /* If any other guest-state bits are changed here, make sure to update
+ hmR0VmxPreRunGuestCommitted() when thread-context hooks are used. */
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_CS | HM_CHANGED_GUEST_CR2
+ | HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS
+ | HM_CHANGED_GUEST_RSP);
+
+ /* We're clearing interrupts, which means no block-by-STI interrupt-inhibition. */
+ if (*pfIntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_STI)
+ {
+ Assert( uIntType != VMX_ENTRY_INT_INFO_TYPE_NMI
+ && uIntType != VMX_ENTRY_INT_INFO_TYPE_EXT_INT);
+ Log4Func(("Clearing inhibition due to STI\n"));
+ *pfIntrState &= ~VMX_VMCS_GUEST_INT_STATE_BLOCK_STI;
+ }
+ Log4(("Injecting real-mode: u32IntInfo=%#x u32ErrCode=%#x cbInstr=%#x Eflags=%#x CS:EIP=%04x:%04x\n",
+ u32IntInfo, u32ErrCode, cbInstr, pCtx->eflags.u, pCtx->cs.Sel, pCtx->eip));
+
+ /* The event has been truly dispatched. Mark it as no longer pending so we don't attempt to 'undo'
+ it, if we are returning to ring-3 before executing guest code. */
+ pVCpu->hm.s.Event.fPending = false;
+
+ /* Make hmR0VmxPreRunGuest() return if we're stepping since we've changed cs:rip. */
+ if (fStepping)
+ rcStrict = VINF_EM_DBG_STEPPED;
+ }
+ AssertMsg(rcStrict == VINF_SUCCESS || rcStrict == VINF_EM_RESET || (rcStrict == VINF_EM_DBG_STEPPED && fStepping),
+ ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict)));
+ return rcStrict;
+ }
+ }
+
+ /* Validate. */
+ Assert(VMX_ENTRY_INT_INFO_IS_VALID(u32IntInfo)); /* Bit 31 (Valid bit) must be set by caller. */
+ Assert(!(u32IntInfo & VMX_BF_ENTRY_INT_INFO_RSVD_12_30_MASK)); /* Bits 30:12 MBZ. */
+
+ /* Inject. */
+ int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, u32IntInfo);
+ if (VMX_ENTRY_INT_INFO_IS_ERROR_CODE_VALID(u32IntInfo))
+ rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_ENTRY_EXCEPTION_ERRCODE, u32ErrCode);
+ rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_ENTRY_INSTR_LENGTH, cbInstr);
+ AssertRCReturn(rc, rc);
+
+ /* Update CR2. */
+ if ( VMX_ENTRY_INT_INFO_TYPE(u32IntInfo) == VMX_EXIT_INT_INFO_TYPE_HW_XCPT
+ && uVector == X86_XCPT_PF)
+ pCtx->cr2 = GCPtrFaultAddress;
+
+ Log4(("Injecting u32IntInfo=%#x u32ErrCode=%#x cbInstr=%#x CR2=%#RX64\n", u32IntInfo, u32ErrCode, cbInstr, pCtx->cr2));
+
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Clears the interrupt-window exiting control in the VMCS and if necessary
+ * clears the current event in the VMCS as well.
+ *
+ * @returns VBox status code.
+ * @param pVCpu The cross context virtual CPU structure.
+ *
+ * @remarks Use this function only to clear events that have not yet been
+ * delivered to the guest but are injected in the VMCS!
+ * @remarks No-long-jump zone!!!
+ */
+static void hmR0VmxClearIntNmiWindowsVmcs(PVMCPU pVCpu)
+{
+ if (pVCpu->hm.s.vmx.u32ProcCtls & VMX_PROC_CTLS_INT_WINDOW_EXIT)
+ {
+ hmR0VmxClearIntWindowExitVmcs(pVCpu);
+ Log4Func(("Cleared interrupt window\n"));
+ }
+
+ if (pVCpu->hm.s.vmx.u32ProcCtls & VMX_PROC_CTLS_NMI_WINDOW_EXIT)
+ {
+ hmR0VmxClearNmiWindowExitVmcs(pVCpu);
+ Log4Func(("Cleared NMI window\n"));
+ }
+}
+
+
+/**
+ * Enters the VT-x session.
+ *
+ * @returns VBox status code.
+ * @param pVCpu The cross context virtual CPU structure.
+ */
+VMMR0DECL(int) VMXR0Enter(PVMCPU pVCpu)
+{
+ AssertPtr(pVCpu);
+ Assert(pVCpu->CTX_SUFF(pVM)->hm.s.vmx.fSupported);
+ Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+
+ LogFlowFunc(("pVCpu=%p\n", pVCpu));
+ Assert((pVCpu->hm.s.fCtxChanged & (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE))
+ == (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE));
+
+#ifdef VBOX_STRICT
+ /* At least verify VMX is enabled, since we can't check if we're in VMX root mode without #GP'ing. */
+ RTCCUINTREG uHostCR4 = ASMGetCR4();
+ if (!(uHostCR4 & X86_CR4_VMXE))
+ {
+ LogRelFunc(("X86_CR4_VMXE bit in CR4 is not set!\n"));
+ return VERR_VMX_X86_CR4_VMXE_CLEARED;
+ }
+#endif
+
+ /*
+ * Load the VCPU's VMCS as the current (and active) one.
+ */
+ Assert(pVCpu->hm.s.vmx.fVmcsState & HMVMX_VMCS_STATE_CLEAR);
+ int rc = VMXActivateVmcs(pVCpu->hm.s.vmx.HCPhysVmcs);
+ if (RT_SUCCESS(rc))
+ {
+ pVCpu->hm.s.vmx.fVmcsState = HMVMX_VMCS_STATE_ACTIVE;
+ pVCpu->hm.s.fLeaveDone = false;
+ Log4Func(("Activated Vmcs. HostCpuId=%u\n", RTMpCpuId()));
+
+ /*
+ * Do the EMT scheduled L1D flush here if needed.
+ */
+ if (pVCpu->CTX_SUFF(pVM)->hm.s.fL1dFlushOnSched)
+ ASMWrMsr(MSR_IA32_FLUSH_CMD, MSR_IA32_FLUSH_CMD_F_L1D);
+ }
+ return rc;
+}
+
+
+/**
+ * The thread-context callback (only on platforms which support it).
+ *
+ * @param enmEvent The thread-context event.
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param fGlobalInit Whether global VT-x/AMD-V init. was used.
+ * @thread EMT(pVCpu)
+ */
+VMMR0DECL(void) VMXR0ThreadCtxCallback(RTTHREADCTXEVENT enmEvent, PVMCPU pVCpu, bool fGlobalInit)
+{
+ NOREF(fGlobalInit);
+
+ switch (enmEvent)
+ {
+ case RTTHREADCTXEVENT_OUT:
+ {
+ Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+ Assert(VMMR0ThreadCtxHookIsEnabled(pVCpu));
+ VMCPU_ASSERT_EMT(pVCpu);
+
+ /* No longjmps (logger flushes, locks) in this fragile context. */
+ VMMRZCallRing3Disable(pVCpu);
+ Log4Func(("Preempting: HostCpuId=%u\n", RTMpCpuId()));
+
+ /*
+ * Restore host-state (FPU, debug etc.)
+ */
+ if (!pVCpu->hm.s.fLeaveDone)
+ {
+ /*
+ * Do -not- import the guest-state here as we might already be in the middle of importing
+ * it, esp. bad if we're holding the PGM lock, see comment in hmR0VmxImportGuestState().
+ */
+ hmR0VmxLeave(pVCpu, false /* fImportState */);
+ pVCpu->hm.s.fLeaveDone = true;
+ }
+
+ /* Leave HM context, takes care of local init (term). */
+ int rc = HMR0LeaveCpu(pVCpu);
+ AssertRC(rc); NOREF(rc);
+
+ /* Restore longjmp state. */
+ VMMRZCallRing3Enable(pVCpu);
+ STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatSwitchPreempt);
+ break;
+ }
+
+ case RTTHREADCTXEVENT_IN:
+ {
+ Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+ Assert(VMMR0ThreadCtxHookIsEnabled(pVCpu));
+ VMCPU_ASSERT_EMT(pVCpu);
+
+ /* No longjmps here, as we don't want to trigger preemption (& its hook) while resuming. */
+ VMMRZCallRing3Disable(pVCpu);
+ Log4Func(("Resumed: HostCpuId=%u\n", RTMpCpuId()));
+
+ /* Initialize the bare minimum state required for HM. This takes care of
+ initializing VT-x if necessary (onlined CPUs, local init etc.) */
+ int rc = hmR0EnterCpu(pVCpu);
+ AssertRC(rc);
+ Assert((pVCpu->hm.s.fCtxChanged & (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE))
+ == (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE));
+
+ /* Load the active VMCS as the current one. */
+ if (pVCpu->hm.s.vmx.fVmcsState & HMVMX_VMCS_STATE_CLEAR)
+ {
+ rc = VMXActivateVmcs(pVCpu->hm.s.vmx.HCPhysVmcs);
+ AssertRC(rc); NOREF(rc);
+ pVCpu->hm.s.vmx.fVmcsState = HMVMX_VMCS_STATE_ACTIVE;
+ Log4Func(("Resumed: Activated Vmcs. HostCpuId=%u\n", RTMpCpuId()));
+ }
+ pVCpu->hm.s.fLeaveDone = false;
+
+ /* Do the EMT scheduled L1D flush if needed. */
+ if (pVCpu->CTX_SUFF(pVM)->hm.s.fL1dFlushOnSched)
+ ASMWrMsr(MSR_IA32_FLUSH_CMD, MSR_IA32_FLUSH_CMD_F_L1D);
+
+ /* Restore longjmp state. */
+ VMMRZCallRing3Enable(pVCpu);
+ break;
+ }
+
+ default:
+ break;
+ }
+}
+
+
+/**
+ * Exports the host state into the VMCS host-state area.
+ * Sets up the VM-exit MSR-load area.
+ *
+ * The CPU state will be loaded from these fields on every successful VM-exit.
+ *
+ * @returns VBox status code.
+ * @param pVCpu The cross context virtual CPU structure.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+static int hmR0VmxExportHostState(PVMCPU pVCpu)
+{
+ Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+
+ int rc = VINF_SUCCESS;
+ if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_HOST_CONTEXT)
+ {
+ rc = hmR0VmxExportHostControlRegs();
+ AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
+
+ rc = hmR0VmxExportHostSegmentRegs(pVCpu);
+ AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
+
+ rc = hmR0VmxExportHostMsrs(pVCpu);
+ AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
+
+ pVCpu->hm.s.fCtxChanged &= ~HM_CHANGED_HOST_CONTEXT;
+ }
+ return rc;
+}
+
+
+/**
+ * Saves the host state in the VMCS host-state.
+ *
+ * @returns VBox status code.
+ * @param pVCpu The cross context virtual CPU structure.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+VMMR0DECL(int) VMXR0ExportHostState(PVMCPU pVCpu)
+{
+ AssertPtr(pVCpu);
+ Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+
+ /*
+ * Export the host state here while entering HM context.
+ * When thread-context hooks are used, we might get preempted and have to re-save the host
+ * state but most of the time we won't be, so do it here before we disable interrupts.
+ */
+ return hmR0VmxExportHostState(pVCpu);
+}
+
+
+/**
+ * Exports the guest state into the VMCS guest-state area.
+ *
+ * The will typically be done before VM-entry when the guest-CPU state and the
+ * VMCS state may potentially be out of sync.
+ *
+ * Sets up the VM-entry MSR-load and VM-exit MSR-store areas. Sets up the
+ * VM-entry controls.
+ * Sets up the appropriate VMX non-root function to execute guest code based on
+ * the guest CPU mode.
+ *
+ * @returns VBox strict status code.
+ * @retval VINF_EM_RESCHEDULE_REM if we try to emulate non-paged guest code
+ * without unrestricted guest access and the VMMDev is not presently
+ * mapped (e.g. EFI32).
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+static VBOXSTRICTRC hmR0VmxExportGuestState(PVMCPU pVCpu)
+{
+ AssertPtr(pVCpu);
+ HMVMX_ASSERT_PREEMPT_SAFE(pVCpu);
+
+ LogFlowFunc(("pVCpu=%p\n", pVCpu));
+
+ STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatExportGuestState, x);
+
+ /* Determine real-on-v86 mode. */
+ pVCpu->hm.s.vmx.RealMode.fRealOnV86Active = false;
+ if ( !pVCpu->CTX_SUFF(pVM)->hm.s.vmx.fUnrestrictedGuest
+ && CPUMIsGuestInRealModeEx(&pVCpu->cpum.GstCtx))
+ pVCpu->hm.s.vmx.RealMode.fRealOnV86Active = true;
+
+ /*
+ * Any ordering dependency among the sub-functions below must be explicitly stated using comments.
+ * Ideally, assert that the cross-dependent bits are up-to-date at the point of using it.
+ */
+ int rc = hmR0VmxSelectVMRunHandler(pVCpu);
+ AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
+
+ /* This needs to be done after hmR0VmxSelectVMRunHandler() as changing pfnStartVM may require VM-entry control updates. */
+ rc = hmR0VmxExportGuestEntryCtls(pVCpu);
+ AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
+
+ /* This needs to be done after hmR0VmxSelectVMRunHandler() as changing pfnStartVM may require VM-exit control updates. */
+ rc = hmR0VmxExportGuestExitCtls(pVCpu);
+ AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
+
+ rc = hmR0VmxExportGuestCR0(pVCpu);
+ AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
+
+ VBOXSTRICTRC rcStrict = hmR0VmxExportGuestCR3AndCR4(pVCpu);
+ if (rcStrict == VINF_SUCCESS)
+ { /* likely */ }
+ else
+ {
+ Assert(rcStrict == VINF_EM_RESCHEDULE_REM || RT_FAILURE_NP(rcStrict));
+ return rcStrict;
+ }
+
+ rc = hmR0VmxExportGuestSegmentRegs(pVCpu);
+ AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
+
+ /* This needs to be done after hmR0VmxExportGuestEntryCtls() and hmR0VmxExportGuestExitCtls() as it
+ may alter controls if we determine we don't have to swap EFER after all. */
+ rc = hmR0VmxExportGuestMsrs(pVCpu);
+ AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
+
+ rc = hmR0VmxExportGuestApicTpr(pVCpu);
+ AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
+
+ rc = hmR0VmxExportGuestXcptIntercepts(pVCpu);
+ AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
+
+ rc = hmR0VmxExportGuestRip(pVCpu);
+ rc |= hmR0VmxExportGuestRsp(pVCpu);
+ rc |= hmR0VmxExportGuestRflags(pVCpu);
+ AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
+
+ /* Clear any bits that may be set but exported unconditionally or unused/reserved bits. */
+ ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~( (HM_CHANGED_GUEST_GPRS_MASK & ~HM_CHANGED_GUEST_RSP)
+ | HM_CHANGED_GUEST_CR2
+ | (HM_CHANGED_GUEST_DR_MASK & ~HM_CHANGED_GUEST_DR7)
+ | HM_CHANGED_GUEST_X87
+ | HM_CHANGED_GUEST_SSE_AVX
+ | HM_CHANGED_GUEST_OTHER_XSAVE
+ | HM_CHANGED_GUEST_XCRx
+ | HM_CHANGED_GUEST_KERNEL_GS_BASE /* Part of lazy or auto load-store MSRs. */
+ | HM_CHANGED_GUEST_SYSCALL_MSRS /* Part of lazy or auto load-store MSRs. */
+ | HM_CHANGED_GUEST_TSC_AUX
+ | HM_CHANGED_GUEST_OTHER_MSRS
+ | HM_CHANGED_GUEST_HWVIRT
+ | (HM_CHANGED_KEEPER_STATE_MASK & ~HM_CHANGED_VMX_MASK)));
+
+ STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExportGuestState, x);
+ return rc;
+}
+
+
+/**
+ * Exports the state shared between the host and guest into the VMCS.
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+static void hmR0VmxExportSharedState(PVMCPU pVCpu)
+{
+ Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+ Assert(!VMMRZCallRing3IsEnabled(pVCpu));
+
+ if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_GUEST_DR_MASK)
+ {
+ int rc = hmR0VmxExportSharedDebugState(pVCpu);
+ AssertRC(rc);
+ pVCpu->hm.s.fCtxChanged &= ~HM_CHANGED_GUEST_DR_MASK;
+
+ /* Loading shared debug bits might have changed eflags.TF bit for debugging purposes. */
+ if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_GUEST_RFLAGS)
+ {
+ rc = hmR0VmxExportGuestRflags(pVCpu);
+ AssertRC(rc);
+ }
+ }
+
+ if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_VMX_GUEST_LAZY_MSRS)
+ {
+ hmR0VmxLazyLoadGuestMsrs(pVCpu);
+ pVCpu->hm.s.fCtxChanged &= ~HM_CHANGED_VMX_GUEST_LAZY_MSRS;
+ }
+
+ AssertMsg(!(pVCpu->hm.s.fCtxChanged & HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE),
+ ("fCtxChanged=%#RX64\n", pVCpu->hm.s.fCtxChanged));
+}
+
+
+/**
+ * Worker for loading the guest-state bits in the inner VT-x execution loop.
+ *
+ * @returns Strict VBox status code (i.e. informational status codes too).
+ * @retval VINF_EM_RESCHEDULE_REM if we try to emulate non-paged guest code
+ * without unrestricted guest access and the VMMDev is not presently
+ * mapped (e.g. EFI32).
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+static VBOXSTRICTRC hmR0VmxExportGuestStateOptimal(PVMCPU pVCpu)
+{
+ HMVMX_ASSERT_PREEMPT_SAFE(pVCpu);
+ Assert(!VMMRZCallRing3IsEnabled(pVCpu));
+ Assert(VMMR0IsLogFlushDisabled(pVCpu));
+
+#ifdef HMVMX_ALWAYS_SYNC_FULL_GUEST_STATE
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_ALL_GUEST);
+#endif
+
+ /*
+ * For many exits it's only RIP that changes and hence try to export it first
+ * without going through a lot of change flag checks.
+ */
+ VBOXSTRICTRC rcStrict;
+ uint64_t fCtxChanged = ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged);
+ RT_UNTRUSTED_NONVOLATILE_COPY_FENCE();
+ if ((fCtxChanged & (HM_CHANGED_ALL_GUEST & ~HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE)) == HM_CHANGED_GUEST_RIP)
+ {
+ rcStrict = hmR0VmxExportGuestRip(pVCpu);
+ if (RT_LIKELY(rcStrict == VINF_SUCCESS))
+ { /* likely */}
+ else
+ AssertMsgFailedReturn(("hmR0VmxExportGuestRip failed! rc=%Rrc\n", VBOXSTRICTRC_VAL(rcStrict)), rcStrict);
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatExportMinimal);
+ }
+ else if (fCtxChanged & (HM_CHANGED_ALL_GUEST & ~HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE))
+ {
+ rcStrict = hmR0VmxExportGuestState(pVCpu);
+ if (RT_LIKELY(rcStrict == VINF_SUCCESS))
+ { /* likely */}
+ else
+ {
+ AssertMsg(rcStrict == VINF_EM_RESCHEDULE_REM, ("hmR0VmxExportGuestState failed! rc=%Rrc\n",
+ VBOXSTRICTRC_VAL(rcStrict)));
+ Assert(!VMMRZCallRing3IsEnabled(pVCpu));
+ return rcStrict;
+ }
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatExportFull);
+ }
+ else
+ rcStrict = VINF_SUCCESS;
+
+#ifdef VBOX_STRICT
+ /* All the guest state bits should be loaded except maybe the host context and/or the shared host/guest bits. */
+ fCtxChanged = ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged);
+ RT_UNTRUSTED_NONVOLATILE_COPY_FENCE();
+ AssertMsg(!(fCtxChanged & (HM_CHANGED_ALL_GUEST & ~HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE)),
+ ("fCtxChanged=%#RX64\n", fCtxChanged));
+#endif
+ return rcStrict;
+}
+
+
+/**
+ * Does the preparations before executing guest code in VT-x.
+ *
+ * This may cause longjmps to ring-3 and may even result in rescheduling to the
+ * recompiler/IEM. We must be cautious what we do here regarding committing
+ * guest-state information into the VMCS assuming we assuredly execute the
+ * guest in VT-x mode.
+ *
+ * If we fall back to the recompiler/IEM after updating the VMCS and clearing
+ * the common-state (TRPM/forceflags), we must undo those changes so that the
+ * recompiler/IEM can (and should) use them when it resumes guest execution.
+ * Otherwise such operations must be done when we can no longer exit to ring-3.
+ *
+ * @returns Strict VBox status code (i.e. informational status codes too).
+ * @retval VINF_SUCCESS if we can proceed with running the guest, interrupts
+ * have been disabled.
+ * @retval VINF_EM_RESET if a triple-fault occurs while injecting a
+ * double-fault into the guest.
+ * @retval VINF_EM_DBG_STEPPED if @a fStepping is true and an event was
+ * dispatched directly.
+ * @retval VINF_* scheduling changes, we have to go back to ring-3.
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param pVmxTransient Pointer to the VMX transient structure.
+ * @param fStepping Set if called from hmR0VmxRunGuestCodeStep(). Makes
+ * us ignore some of the reasons for returning to
+ * ring-3, and return VINF_EM_DBG_STEPPED if event
+ * dispatching took place.
+ */
+static VBOXSTRICTRC hmR0VmxPreRunGuest(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient, bool fStepping)
+{
+ Assert(VMMRZCallRing3IsEnabled(pVCpu));
+
+#ifdef VBOX_WITH_NESTED_HWVIRT_ONLY_IN_IEM
+ if (CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx))
+ {
+ Log2(("hmR0VmxPreRunGuest: Rescheduling to IEM due to nested-hwvirt or forced IEM exec -> VINF_EM_RESCHEDULE_REM\n"));
+ RT_NOREF3(pVCpu, pVmxTransient, fStepping);
+ return VINF_EM_RESCHEDULE_REM;
+ }
+#endif
+
+#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
+ PGMRZDynMapFlushAutoSet(pVCpu);
+#endif
+
+ /* Check force flag actions that might require us to go back to ring-3. */
+ VBOXSTRICTRC rcStrict = hmR0VmxCheckForceFlags(pVCpu, fStepping);
+ if (rcStrict == VINF_SUCCESS)
+ { /* FFs doesn't get set all the time. */ }
+ else
+ return rcStrict;
+
+ /*
+ * Setup the virtualized-APIC accesses.
+ *
+ * Note! This can cause a longjumps to R3 due to the acquisition of the PGM lock
+ * in both PGMHandlerPhysicalReset() and IOMMMIOMapMMIOHCPage(), see @bugref{8721}.
+ *
+ * This is the reason we do it here and not in hmR0VmxExportGuestState().
+ */
+ PVM pVM = pVCpu->CTX_SUFF(pVM);
+ if ( !pVCpu->hm.s.vmx.u64MsrApicBase
+ && (pVCpu->hm.s.vmx.u32ProcCtls2 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS)
+ && PDMHasApic(pVM))
+ {
+ uint64_t const u64MsrApicBase = APICGetBaseMsrNoCheck(pVCpu);
+ Assert(u64MsrApicBase);
+ Assert(pVM->hm.s.vmx.HCPhysApicAccess);
+
+ RTGCPHYS const GCPhysApicBase = u64MsrApicBase & PAGE_BASE_GC_MASK;
+
+ /* Unalias any existing mapping. */
+ int rc = PGMHandlerPhysicalReset(pVM, GCPhysApicBase);
+ AssertRCReturn(rc, rc);
+
+ /* Map the HC APIC-access page in place of the MMIO page, also updates the shadow page tables if necessary. */
+ Log4Func(("Mapped HC APIC-access page at %#RGp\n", GCPhysApicBase));
+ rc = IOMMMIOMapMMIOHCPage(pVM, pVCpu, GCPhysApicBase, pVM->hm.s.vmx.HCPhysApicAccess, X86_PTE_RW | X86_PTE_P);
+ AssertRCReturn(rc, rc);
+
+ /* Update the per-VCPU cache of the APIC base MSR. */
+ pVCpu->hm.s.vmx.u64MsrApicBase = u64MsrApicBase;
+ }
+
+ if (TRPMHasTrap(pVCpu))
+ hmR0VmxTrpmTrapToPendingEvent(pVCpu);
+ uint32_t fIntrState = hmR0VmxEvaluatePendingEvent(pVCpu);
+
+ /*
+ * Event injection may take locks (currently the PGM lock for real-on-v86 case) and thus
+ * needs to be done with longjmps or interrupts + preemption enabled. Event injection might
+ * also result in triple-faulting the VM.
+ */
+ rcStrict = hmR0VmxInjectPendingEvent(pVCpu, fIntrState, fStepping);
+ if (RT_LIKELY(rcStrict == VINF_SUCCESS))
+ { /* likely */ }
+ else
+ {
+ AssertMsg(rcStrict == VINF_EM_RESET || (rcStrict == VINF_EM_DBG_STEPPED && fStepping),
+ ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict)));
+ return rcStrict;
+ }
+
+ /*
+ * A longjump might result in importing CR3 even for VM-exits that don't necessarily
+ * import CR3 themselves. We will need to update them here, as even as late as the above
+ * hmR0VmxInjectPendingEvent() call may lazily import guest-CPU state on demand causing
+ * the below force flags to be set.
+ */
+ if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3))
+ {
+ Assert(!(ASMAtomicUoReadU64(&pVCpu->cpum.GstCtx.fExtrn) & CPUMCTX_EXTRN_CR3));
+ int rc2 = PGMUpdateCR3(pVCpu, CPUMGetGuestCR3(pVCpu));
+ AssertMsgReturn(rc2 == VINF_SUCCESS || rc2 == VINF_PGM_SYNC_CR3,
+ ("%Rrc\n", rc2), RT_FAILURE_NP(rc2) ? rc2 : VERR_IPE_UNEXPECTED_INFO_STATUS);
+ Assert(!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3));
+ }
+ if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_PAE_PDPES))
+ {
+ PGMGstUpdatePaePdpes(pVCpu, &pVCpu->hm.s.aPdpes[0]);
+ Assert(!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_PAE_PDPES));
+ }
+
+ /*
+ * No longjmps to ring-3 from this point on!!!
+ * Asserts() will still longjmp to ring-3 (but won't return), which is intentional, better than a kernel panic.
+ * This also disables flushing of the R0-logger instance (if any).
+ */
+ VMMRZCallRing3Disable(pVCpu);
+
+ /*
+ * Export the guest state bits.
+ *
+ * We cannot perform longjmps while loading the guest state because we do not preserve the
+ * host/guest state (although the VMCS will be preserved) across longjmps which can cause
+ * CPU migration.
+ *
+ * If we are injecting events to a real-on-v86 mode guest, we will have to update
+ * RIP and some segment registers, i.e. hmR0VmxInjectPendingEvent()->hmR0VmxInjectEventVmcs().
+ * Hence, loading of the guest state needs to be done -after- injection of events.
+ */
+ rcStrict = hmR0VmxExportGuestStateOptimal(pVCpu);
+ if (RT_LIKELY(rcStrict == VINF_SUCCESS))
+ { /* likely */ }
+ else
+ {
+ VMMRZCallRing3Enable(pVCpu);
+ return rcStrict;
+ }
+
+ /*
+ * We disable interrupts so that we don't miss any interrupts that would flag preemption
+ * (IPI/timers etc.) when thread-context hooks aren't used and we've been running with
+ * preemption disabled for a while. Since this is purly to aid the
+ * RTThreadPreemptIsPending() code, it doesn't matter that it may temporarily reenable and
+ * disable interrupt on NT.
+ *
+ * We need to check for force-flags that could've possible been altered since we last
+ * checked them (e.g. by PDMGetInterrupt() leaving the PDM critical section,
+ * see @bugref{6398}).
+ *
+ * We also check a couple of other force-flags as a last opportunity to get the EMT back
+ * to ring-3 before executing guest code.
+ */
+ pVmxTransient->fEFlags = ASMIntDisableFlags();
+
+ if ( ( !VM_FF_IS_ANY_SET(pVM, VM_FF_EMT_RENDEZVOUS | VM_FF_TM_VIRTUAL_SYNC)
+ && !VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_HM_TO_R3_MASK))
+ || ( fStepping /* Optimized for the non-stepping case, so a bit of unnecessary work when stepping. */
+ && !VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_HM_TO_R3_MASK & ~(VMCPU_FF_TIMER | VMCPU_FF_PDM_CRITSECT))) )
+ {
+ if (!RTThreadPreemptIsPending(NIL_RTTHREAD))
+ {
+ pVCpu->hm.s.Event.fPending = false;
+
+ /*
+ * We've injected any pending events. This is really the point of no return (to ring-3).
+ *
+ * Note! The caller expects to continue with interrupts & longjmps disabled on successful
+ * returns from this function, so don't enable them here.
+ */
+ return VINF_SUCCESS;
+ }
+
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchPendingHostIrq);
+ rcStrict = VINF_EM_RAW_INTERRUPT;
+ }
+ else
+ {
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchHmToR3FF);
+ rcStrict = VINF_EM_RAW_TO_R3;
+ }
+
+ ASMSetFlags(pVmxTransient->fEFlags);
+ VMMRZCallRing3Enable(pVCpu);
+
+ return rcStrict;
+}
+
+
+/**
+ * Prepares to run guest code in VT-x and we've committed to doing so. This
+ * means there is no backing out to ring-3 or anywhere else at this
+ * point.
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param pVmxTransient Pointer to the VMX transient structure.
+ *
+ * @remarks Called with preemption disabled.
+ * @remarks No-long-jump zone!!!
+ */
+static void hmR0VmxPreRunGuestCommitted(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+ Assert(!VMMRZCallRing3IsEnabled(pVCpu));
+ Assert(VMMR0IsLogFlushDisabled(pVCpu));
+ Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+
+ /*
+ * Indicate start of guest execution and where poking EMT out of guest-context is recognized.
+ */
+ VMCPU_ASSERT_STATE(pVCpu, VMCPUSTATE_STARTED_HM);
+ VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_EXEC);
+
+ PVM pVM = pVCpu->CTX_SUFF(pVM);
+ if (!CPUMIsGuestFPUStateActive(pVCpu))
+ {
+ STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatLoadGuestFpuState, x);
+ if (CPUMR0LoadGuestFPU(pVM, pVCpu) == VINF_CPUM_HOST_CR0_MODIFIED)
+ pVCpu->hm.s.fCtxChanged |= HM_CHANGED_HOST_CONTEXT;
+ STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatLoadGuestFpuState, x);
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatLoadGuestFpu);
+ }
+
+ /*
+ * Lazy-update of the host MSRs values in the auto-load/store MSR area.
+ */
+ if ( !pVCpu->hm.s.vmx.fUpdatedHostMsrs
+ && pVCpu->hm.s.vmx.cMsrs > 0)
+ hmR0VmxUpdateAutoLoadStoreHostMsrs(pVCpu);
+
+ /*
+ * Re-save the host state bits as we may've been preempted (only happens when
+ * thread-context hooks are used or when hmR0VmxSetupVMRunHandler() changes pfnStartVM).
+ * Note that the 64-on-32 switcher saves the (64-bit) host state into the VMCS and
+ * if we change the switcher back to 32-bit, we *must* save the 32-bit host state here.
+ * See @bugref{8432}.
+ */
+ if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_HOST_CONTEXT)
+ {
+ int rc = hmR0VmxExportHostState(pVCpu);
+ AssertRC(rc);
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchPreemptExportHostState);
+ }
+ Assert(!(pVCpu->hm.s.fCtxChanged & HM_CHANGED_HOST_CONTEXT));
+
+ /*
+ * Export the state shared between host and guest (FPU, debug, lazy MSRs).
+ */
+ if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE)
+ hmR0VmxExportSharedState(pVCpu);
+ AssertMsg(!pVCpu->hm.s.fCtxChanged, ("fCtxChanged=%#RX64\n", pVCpu->hm.s.fCtxChanged));
+
+ /* Store status of the shared guest-host state at the time of VM-entry. */
+#if HC_ARCH_BITS == 32 && defined(VBOX_WITH_64_BITS_GUESTS)
+ if (CPUMIsGuestInLongModeEx(&pVCpu->cpum.GstCtx))
+ {
+ pVmxTransient->fWasGuestDebugStateActive = CPUMIsGuestDebugStateActivePending(pVCpu);
+ pVmxTransient->fWasHyperDebugStateActive = CPUMIsHyperDebugStateActivePending(pVCpu);
+ }
+ else
+#endif
+ {
+ pVmxTransient->fWasGuestDebugStateActive = CPUMIsGuestDebugStateActive(pVCpu);
+ pVmxTransient->fWasHyperDebugStateActive = CPUMIsHyperDebugStateActive(pVCpu);
+ }
+
+ /*
+ * Cache the TPR-shadow for checking on every VM-exit if it might have changed.
+ */
+ if (pVCpu->hm.s.vmx.u32ProcCtls & VMX_PROC_CTLS_USE_TPR_SHADOW)
+ pVmxTransient->u8GuestTpr = pVCpu->hm.s.vmx.pbVirtApic[XAPIC_OFF_TPR];
+
+ PHMPHYSCPU pHostCpu = hmR0GetCurrentCpu();
+ RTCPUID idCurrentCpu = pHostCpu->idCpu;
+ if ( pVmxTransient->fUpdateTscOffsettingAndPreemptTimer
+ || idCurrentCpu != pVCpu->hm.s.idLastCpu)
+ {
+ hmR0VmxUpdateTscOffsettingAndPreemptTimer(pVCpu);
+ pVmxTransient->fUpdateTscOffsettingAndPreemptTimer = false;
+ }
+
+ ASMAtomicWriteBool(&pVCpu->hm.s.fCheckedTLBFlush, true); /* Used for TLB flushing, set this across the world switch. */
+ hmR0VmxFlushTaggedTlb(pHostCpu, pVCpu); /* Invalidate the appropriate guest entries from the TLB. */
+ Assert(idCurrentCpu == pVCpu->hm.s.idLastCpu);
+ pVCpu->hm.s.vmx.LastError.idCurrentCpu = idCurrentCpu; /* Update the error reporting info. with the current host CPU. */
+
+ STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatEntry, &pVCpu->hm.s.StatInGC, x);
+
+ TMNotifyStartOfExecution(pVCpu); /* Finally, notify TM to resume its clocks as we're about
+ to start executing. */
+
+ /*
+ * Load the TSC_AUX MSR when we are not intercepting RDTSCP.
+ */
+ if (pVCpu->hm.s.vmx.u32ProcCtls2 & VMX_PROC_CTLS2_RDTSCP)
+ {
+ if (!(pVCpu->hm.s.vmx.u32ProcCtls & VMX_PROC_CTLS_RDTSC_EXIT))
+ {
+ bool fMsrUpdated;
+ hmR0VmxImportGuestState(pVCpu, CPUMCTX_EXTRN_TSC_AUX);
+ int rc2 = hmR0VmxAddAutoLoadStoreMsr(pVCpu, MSR_K8_TSC_AUX, CPUMGetGuestTscAux(pVCpu), true /* fUpdateHostMsr */,
+ &fMsrUpdated);
+ AssertRC(rc2);
+ Assert(fMsrUpdated || pVCpu->hm.s.vmx.fUpdatedHostMsrs);
+ /* Finally, mark that all host MSR values are updated so we don't redo it without leaving VT-x. See @bugref{6956}. */
+ pVCpu->hm.s.vmx.fUpdatedHostMsrs = true;
+ }
+ else
+ {
+ hmR0VmxRemoveAutoLoadStoreMsr(pVCpu, MSR_K8_TSC_AUX);
+ Assert(!pVCpu->hm.s.vmx.cMsrs || pVCpu->hm.s.vmx.fUpdatedHostMsrs);
+ }
+ }
+
+ if (pVM->cpum.ro.GuestFeatures.fIbrs)
+ {
+ bool fMsrUpdated;
+ hmR0VmxImportGuestState(pVCpu, CPUMCTX_EXTRN_OTHER_MSRS);
+ int rc2 = hmR0VmxAddAutoLoadStoreMsr(pVCpu, MSR_IA32_SPEC_CTRL, CPUMGetGuestSpecCtrl(pVCpu), true /* fUpdateHostMsr */,
+ &fMsrUpdated);
+ AssertRC(rc2);
+ Assert(fMsrUpdated || pVCpu->hm.s.vmx.fUpdatedHostMsrs);
+ /* Finally, mark that all host MSR values are updated so we don't redo it without leaving VT-x. See @bugref{6956}. */
+ pVCpu->hm.s.vmx.fUpdatedHostMsrs = true;
+ }
+
+#ifdef VBOX_STRICT
+ hmR0VmxCheckAutoLoadStoreMsrs(pVCpu);
+ hmR0VmxCheckHostEferMsr(pVCpu);
+ AssertRC(hmR0VmxCheckVmcsCtls(pVCpu));
+#endif
+#ifdef HMVMX_ALWAYS_CHECK_GUEST_STATE
+ if (!(pVCpu->hm.s.vmx.u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS))
+ {
+ uint32_t uInvalidReason = hmR0VmxCheckGuestState(pVCpu);
+ if (uInvalidReason != VMX_IGS_REASON_NOT_FOUND)
+ Log4(("hmR0VmxCheckGuestState returned %#x\n", uInvalidReason));
+ }
+#endif
+}
+
+
+/**
+ * Performs some essential restoration of state after running guest code in
+ * VT-x.
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param pVmxTransient Pointer to the VMX transient structure.
+ * @param rcVMRun Return code of VMLAUNCH/VMRESUME.
+ *
+ * @remarks Called with interrupts disabled, and returns with interrupts enabled!
+ *
+ * @remarks No-long-jump zone!!! This function will however re-enable longjmps
+ * unconditionally when it is safe to do so.
+ */
+static void hmR0VmxPostRunGuest(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient, int rcVMRun)
+{
+ uint64_t const uHostTsc = ASMReadTSC();
+ Assert(!VMMRZCallRing3IsEnabled(pVCpu));
+
+ ASMAtomicWriteBool(&pVCpu->hm.s.fCheckedTLBFlush, false); /* See HMInvalidatePageOnAllVCpus(): used for TLB flushing. */
+ ASMAtomicIncU32(&pVCpu->hm.s.cWorldSwitchExits); /* Initialized in vmR3CreateUVM(): used for EMT poking. */
+ pVCpu->hm.s.fCtxChanged = 0; /* Exits/longjmps to ring-3 requires saving the guest state. */
+ pVmxTransient->fVmcsFieldsRead = 0; /* Transient fields need to be read from the VMCS. */
+ pVmxTransient->fVectoringPF = false; /* Vectoring page-fault needs to be determined later. */
+ pVmxTransient->fVectoringDoublePF = false; /* Vectoring double page-fault needs to be determined later. */
+
+ if (!(pVCpu->hm.s.vmx.u32ProcCtls & VMX_PROC_CTLS_RDTSC_EXIT))
+ TMCpuTickSetLastSeen(pVCpu, uHostTsc + pVCpu->hm.s.vmx.u64TscOffset);
+
+ STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatInGC, &pVCpu->hm.s.StatPreExit, x);
+ TMNotifyEndOfExecution(pVCpu); /* Notify TM that the guest is no longer running. */
+ Assert(!ASMIntAreEnabled());
+ VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_HM);
+
+#if HC_ARCH_BITS == 64
+ pVCpu->hm.s.vmx.fRestoreHostFlags |= VMX_RESTORE_HOST_REQUIRED; /* Host state messed up by VT-x, we must restore. */
+#endif
+#if HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS)
+ /* The 64-on-32 switcher maintains fVmcsState on its own and we need to leave it alone here. */
+ if (pVCpu->hm.s.vmx.pfnStartVM != VMXR0SwitcherStartVM64)
+ pVCpu->hm.s.vmx.fVmcsState |= HMVMX_VMCS_STATE_LAUNCHED; /* Use VMRESUME instead of VMLAUNCH in the next run. */
+#else
+ pVCpu->hm.s.vmx.fVmcsState |= HMVMX_VMCS_STATE_LAUNCHED; /* Use VMRESUME instead of VMLAUNCH in the next run. */
+#endif
+#ifdef VBOX_STRICT
+ hmR0VmxCheckHostEferMsr(pVCpu); /* Verify that VMRUN/VMLAUNCH didn't modify host EFER. */
+#endif
+ ASMSetFlags(pVmxTransient->fEFlags); /* Enable interrupts. */
+
+ /* Save the basic VM-exit reason. Refer Intel spec. 24.9.1 "Basic VM-exit Information". */
+ uint32_t uExitReason;
+ int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_REASON, &uExitReason);
+ rc |= hmR0VmxReadEntryIntInfoVmcs(pVmxTransient);
+ AssertRC(rc);
+ pVmxTransient->uExitReason = VMX_EXIT_REASON_BASIC(uExitReason);
+ pVmxTransient->fVMEntryFailed = VMX_EXIT_REASON_HAS_ENTRY_FAILED(uExitReason);
+
+ if (rcVMRun == VINF_SUCCESS)
+ {
+ /*
+ * Update the VM-exit history array here even if the VM-entry failed due to:
+ * - Invalid guest state.
+ * - MSR loading.
+ * - Machine-check event.
+ *
+ * In any of the above cases we will still have a "valid" VM-exit reason
+ * despite @a fVMEntryFailed being false.
+ *
+ * See Intel spec. 26.7 "VM-Entry failures during or after loading guest state".
+ *
+ * Note! We don't have CS or RIP at this point. Will probably address that later
+ * by amending the history entry added here.
+ */
+ EMHistoryAddExit(pVCpu, EMEXIT_MAKE_FT(EMEXIT_F_KIND_VMX, pVmxTransient->uExitReason & EMEXIT_F_TYPE_MASK),
+ UINT64_MAX, uHostTsc);
+
+ if (!pVmxTransient->fVMEntryFailed)
+ {
+ VMMRZCallRing3Enable(pVCpu);
+
+ Assert(!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3));
+ Assert(!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_PAE_PDPES));
+
+#if defined(HMVMX_ALWAYS_SYNC_FULL_GUEST_STATE) || defined(HMVMX_ALWAYS_SAVE_FULL_GUEST_STATE)
+ rc = hmR0VmxImportGuestState(pVCpu, HMVMX_CPUMCTX_EXTRN_ALL);
+ AssertRC(rc);
+#elif defined(HMVMX_ALWAYS_SAVE_GUEST_RFLAGS)
+ rc = hmR0VmxImportGuestState(pVCpu, HMVMX_CPUMCTX_EXTRN_RFLAGS);
+ AssertRC(rc);
+#else
+ /*
+ * Import the guest-interruptibility state always as we need it while evaluating
+ * injecting events on re-entry.
+ *
+ * We don't import CR0 (when Unrestricted guest execution is unavailable) despite
+ * checking for real-mode while exporting the state because all bits that cause
+ * mode changes wrt CR0 are intercepted.
+ */
+ rc = hmR0VmxImportGuestState(pVCpu, CPUMCTX_EXTRN_HM_VMX_INT_STATE);
+ AssertRC(rc);
+#endif
+
+ /*
+ * Sync the TPR shadow with our APIC state.
+ */
+ if ( (pVCpu->hm.s.vmx.u32ProcCtls & VMX_PROC_CTLS_USE_TPR_SHADOW)
+ && pVmxTransient->u8GuestTpr != pVCpu->hm.s.vmx.pbVirtApic[XAPIC_OFF_TPR])
+ {
+ rc = APICSetTpr(pVCpu, pVCpu->hm.s.vmx.pbVirtApic[XAPIC_OFF_TPR]);
+ AssertRC(rc);
+ ASMAtomicOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_APIC_TPR);
+ }
+
+ Assert(VMMRZCallRing3IsEnabled(pVCpu));
+ return;
+ }
+ }
+ else
+ Log4Func(("VM-entry failure: rcVMRun=%Rrc fVMEntryFailed=%RTbool\n", rcVMRun, pVmxTransient->fVMEntryFailed));
+
+ VMMRZCallRing3Enable(pVCpu);
+}
+
+
+/**
+ * Runs the guest code using VT-x the normal way.
+ *
+ * @returns VBox status code.
+ * @param pVCpu The cross context virtual CPU structure.
+ *
+ * @note Mostly the same as hmR0VmxRunGuestCodeStep().
+ */
+static VBOXSTRICTRC hmR0VmxRunGuestCodeNormal(PVMCPU pVCpu)
+{
+ VMXTRANSIENT VmxTransient;
+ VmxTransient.fUpdateTscOffsettingAndPreemptTimer = true;
+ VBOXSTRICTRC rcStrict = VERR_INTERNAL_ERROR_5;
+ uint32_t cLoops = 0;
+
+ for (;; cLoops++)
+ {
+ Assert(!HMR0SuspendPending());
+ HMVMX_ASSERT_CPU_SAFE(pVCpu);
+
+ /* Preparatory work for running guest code, this may force us to return
+ to ring-3. This bugger disables interrupts on VINF_SUCCESS! */
+ STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatEntry, x);
+ rcStrict = hmR0VmxPreRunGuest(pVCpu, &VmxTransient, false /* fStepping */);
+ if (rcStrict != VINF_SUCCESS)
+ break;
+
+ hmR0VmxPreRunGuestCommitted(pVCpu, &VmxTransient);
+ int rcRun = hmR0VmxRunGuest(pVCpu);
+
+ /* Restore any residual host-state and save any bits shared between host
+ and guest into the guest-CPU state. Re-enables interrupts! */
+ hmR0VmxPostRunGuest(pVCpu, &VmxTransient, rcRun);
+
+ /* Check for errors with running the VM (VMLAUNCH/VMRESUME). */
+ if (RT_SUCCESS(rcRun))
+ { /* very likely */ }
+ else
+ {
+ STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatPreExit, x);
+ hmR0VmxReportWorldSwitchError(pVCpu, rcRun, &VmxTransient);
+ return rcRun;
+ }
+
+ /* Profile the VM-exit. */
+ AssertMsg(VmxTransient.uExitReason <= VMX_EXIT_MAX, ("%#x\n", VmxTransient.uExitReason));
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatExitAll);
+ STAM_COUNTER_INC(&pVCpu->hm.s.paStatExitReasonR0[VmxTransient.uExitReason & MASK_EXITREASON_STAT]);
+ STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatPreExit, &pVCpu->hm.s.StatExitHandling, x);
+ HMVMX_START_EXIT_DISPATCH_PROF();
+
+ VBOXVMM_R0_HMVMX_VMEXIT_NOCTX(pVCpu, &pVCpu->cpum.GstCtx, VmxTransient.uExitReason);
+
+ /* Handle the VM-exit. */
+#ifdef HMVMX_USE_FUNCTION_TABLE
+ rcStrict = g_apfnVMExitHandlers[VmxTransient.uExitReason](pVCpu, &VmxTransient);
+#else
+ rcStrict = hmR0VmxHandleExit(pVCpu, &VmxTransient, VmxTransient.uExitReason);
+#endif
+ STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitHandling, x);
+ if (rcStrict == VINF_SUCCESS)
+ {
+ if (cLoops <= pVCpu->CTX_SUFF(pVM)->hm.s.cMaxResumeLoops)
+ continue; /* likely */
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchMaxResumeLoops);
+ rcStrict = VINF_EM_RAW_INTERRUPT;
+ }
+ break;
+ }
+
+ STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatEntry, x);
+ return rcStrict;
+}
+
+
+
+/** @name Execution loop for single stepping, DBGF events and expensive Dtrace
+ * probes.
+ *
+ * The following few functions and associated structure contains the bloat
+ * necessary for providing detailed debug events and dtrace probes as well as
+ * reliable host side single stepping. This works on the principle of
+ * "subclassing" the normal execution loop and workers. We replace the loop
+ * method completely and override selected helpers to add necessary adjustments
+ * to their core operation.
+ *
+ * The goal is to keep the "parent" code lean and mean, so as not to sacrifice
+ * any performance for debug and analysis features.
+ *
+ * @{
+ */
+
+/**
+ * Transient per-VCPU debug state of VMCS and related info. we save/restore in
+ * the debug run loop.
+ */
+typedef struct VMXRUNDBGSTATE
+{
+ /** The RIP we started executing at. This is for detecting that we stepped. */
+ uint64_t uRipStart;
+ /** The CS we started executing with. */
+ uint16_t uCsStart;
+
+ /** Whether we've actually modified the 1st execution control field. */
+ bool fModifiedProcCtls : 1;
+ /** Whether we've actually modified the 2nd execution control field. */
+ bool fModifiedProcCtls2 : 1;
+ /** Whether we've actually modified the exception bitmap. */
+ bool fModifiedXcptBitmap : 1;
+
+ /** We desire the modified the CR0 mask to be cleared. */
+ bool fClearCr0Mask : 1;
+ /** We desire the modified the CR4 mask to be cleared. */
+ bool fClearCr4Mask : 1;
+ /** Stuff we need in VMX_VMCS32_CTRL_PROC_EXEC. */
+ uint32_t fCpe1Extra;
+ /** Stuff we do not want in VMX_VMCS32_CTRL_PROC_EXEC. */
+ uint32_t fCpe1Unwanted;
+ /** Stuff we need in VMX_VMCS32_CTRL_PROC_EXEC2. */
+ uint32_t fCpe2Extra;
+ /** Extra stuff we need in VMX_VMCS32_CTRL_EXCEPTION_BITMAP. */
+ uint32_t bmXcptExtra;
+ /** The sequence number of the Dtrace provider settings the state was
+ * configured against. */
+ uint32_t uDtraceSettingsSeqNo;
+ /** VM-exits to check (one bit per VM-exit). */
+ uint32_t bmExitsToCheck[3];
+
+ /** The initial VMX_VMCS32_CTRL_PROC_EXEC value (helps with restore). */
+ uint32_t fProcCtlsInitial;
+ /** The initial VMX_VMCS32_CTRL_PROC_EXEC2 value (helps with restore). */
+ uint32_t fProcCtls2Initial;
+ /** The initial VMX_VMCS32_CTRL_EXCEPTION_BITMAP value (helps with restore). */
+ uint32_t bmXcptInitial;
+} VMXRUNDBGSTATE;
+AssertCompileMemberSize(VMXRUNDBGSTATE, bmExitsToCheck, (VMX_EXIT_MAX + 1 + 31) / 32 * 4);
+typedef VMXRUNDBGSTATE *PVMXRUNDBGSTATE;
+
+
+/**
+ * Initializes the VMXRUNDBGSTATE structure.
+ *
+ * @param pVCpu The cross context virtual CPU structure of the
+ * calling EMT.
+ * @param pDbgState The structure to initialize.
+ */
+static void hmR0VmxRunDebugStateInit(PVMCPU pVCpu, PVMXRUNDBGSTATE pDbgState)
+{
+ pDbgState->uRipStart = pVCpu->cpum.GstCtx.rip;
+ pDbgState->uCsStart = pVCpu->cpum.GstCtx.cs.Sel;
+
+ pDbgState->fModifiedProcCtls = false;
+ pDbgState->fModifiedProcCtls2 = false;
+ pDbgState->fModifiedXcptBitmap = false;
+ pDbgState->fClearCr0Mask = false;
+ pDbgState->fClearCr4Mask = false;
+ pDbgState->fCpe1Extra = 0;
+ pDbgState->fCpe1Unwanted = 0;
+ pDbgState->fCpe2Extra = 0;
+ pDbgState->bmXcptExtra = 0;
+ pDbgState->fProcCtlsInitial = pVCpu->hm.s.vmx.u32ProcCtls;
+ pDbgState->fProcCtls2Initial = pVCpu->hm.s.vmx.u32ProcCtls2;
+ pDbgState->bmXcptInitial = pVCpu->hm.s.vmx.u32XcptBitmap;
+}
+
+
+/**
+ * Updates the VMSC fields with changes requested by @a pDbgState.
+ *
+ * This is performed after hmR0VmxPreRunGuestDebugStateUpdate as well
+ * immediately before executing guest code, i.e. when interrupts are disabled.
+ * We don't check status codes here as we cannot easily assert or return in the
+ * latter case.
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param pDbgState The debug state.
+ */
+static void hmR0VmxPreRunGuestDebugStateApply(PVMCPU pVCpu, PVMXRUNDBGSTATE pDbgState)
+{
+ /*
+ * Ensure desired flags in VMCS control fields are set.
+ * (Ignoring write failure here, as we're committed and it's just debug extras.)
+ *
+ * Note! We load the shadow CR0 & CR4 bits when we flag the clearing, so
+ * there should be no stale data in pCtx at this point.
+ */
+ if ( (pVCpu->hm.s.vmx.u32ProcCtls & pDbgState->fCpe1Extra) != pDbgState->fCpe1Extra
+ || (pVCpu->hm.s.vmx.u32ProcCtls & pDbgState->fCpe1Unwanted))
+ {
+ pVCpu->hm.s.vmx.u32ProcCtls |= pDbgState->fCpe1Extra;
+ pVCpu->hm.s.vmx.u32ProcCtls &= ~pDbgState->fCpe1Unwanted;
+ VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVCpu->hm.s.vmx.u32ProcCtls);
+ Log6Func(("VMX_VMCS32_CTRL_PROC_EXEC: %#RX32\n", pVCpu->hm.s.vmx.u32ProcCtls));
+ pDbgState->fModifiedProcCtls = true;
+ }
+
+ if ((pVCpu->hm.s.vmx.u32ProcCtls2 & pDbgState->fCpe2Extra) != pDbgState->fCpe2Extra)
+ {
+ pVCpu->hm.s.vmx.u32ProcCtls2 |= pDbgState->fCpe2Extra;
+ VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC2, pVCpu->hm.s.vmx.u32ProcCtls2);
+ Log6Func(("VMX_VMCS32_CTRL_PROC_EXEC2: %#RX32\n", pVCpu->hm.s.vmx.u32ProcCtls2));
+ pDbgState->fModifiedProcCtls2 = true;
+ }
+
+ if ((pVCpu->hm.s.vmx.u32XcptBitmap & pDbgState->bmXcptExtra) != pDbgState->bmXcptExtra)
+ {
+ pVCpu->hm.s.vmx.u32XcptBitmap |= pDbgState->bmXcptExtra;
+ VMXWriteVmcs32(VMX_VMCS32_CTRL_EXCEPTION_BITMAP, pVCpu->hm.s.vmx.u32XcptBitmap);
+ Log6Func(("VMX_VMCS32_CTRL_EXCEPTION_BITMAP: %#RX32\n", pVCpu->hm.s.vmx.u32XcptBitmap));
+ pDbgState->fModifiedXcptBitmap = true;
+ }
+
+ if (pDbgState->fClearCr0Mask && pVCpu->hm.s.vmx.u32Cr0Mask != 0)
+ {
+ pVCpu->hm.s.vmx.u32Cr0Mask = 0;
+ VMXWriteVmcs32(VMX_VMCS_CTRL_CR0_MASK, 0);
+ Log6Func(("VMX_VMCS_CTRL_CR0_MASK: 0\n"));
+ }
+
+ if (pDbgState->fClearCr4Mask && pVCpu->hm.s.vmx.u32Cr4Mask != 0)
+ {
+ pVCpu->hm.s.vmx.u32Cr4Mask = 0;
+ VMXWriteVmcs32(VMX_VMCS_CTRL_CR4_MASK, 0);
+ Log6Func(("VMX_VMCS_CTRL_CR4_MASK: 0\n"));
+ }
+}
+
+
+/**
+ * Restores VMCS fields that were changed by hmR0VmxPreRunGuestDebugStateApply for
+ * re-entry next time around.
+ *
+ * @returns Strict VBox status code (i.e. informational status codes too).
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param pDbgState The debug state.
+ * @param rcStrict The return code from executing the guest using single
+ * stepping.
+ */
+static VBOXSTRICTRC hmR0VmxRunDebugStateRevert(PVMCPU pVCpu, PVMXRUNDBGSTATE pDbgState, VBOXSTRICTRC rcStrict)
+{
+ /*
+ * Restore VM-exit control settings as we may not reenter this function the
+ * next time around.
+ */
+ /* We reload the initial value, trigger what we can of recalculations the
+ next time around. From the looks of things, that's all that's required atm. */
+ if (pDbgState->fModifiedProcCtls)
+ {
+ if (!(pDbgState->fProcCtlsInitial & VMX_PROC_CTLS_MOV_DR_EXIT) && CPUMIsHyperDebugStateActive(pVCpu))
+ pDbgState->fProcCtlsInitial |= VMX_PROC_CTLS_MOV_DR_EXIT; /* Avoid assertion in hmR0VmxLeave */
+ int rc2 = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pDbgState->fProcCtlsInitial);
+ AssertRCReturn(rc2, rc2);
+ pVCpu->hm.s.vmx.u32ProcCtls = pDbgState->fProcCtlsInitial;
+ }
+
+ /* We're currently the only ones messing with this one, so just restore the
+ cached value and reload the field. */
+ if ( pDbgState->fModifiedProcCtls2
+ && pVCpu->hm.s.vmx.u32ProcCtls2 != pDbgState->fProcCtls2Initial)
+ {
+ int rc2 = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC2, pDbgState->fProcCtls2Initial);
+ AssertRCReturn(rc2, rc2);
+ pVCpu->hm.s.vmx.u32ProcCtls2 = pDbgState->fProcCtls2Initial;
+ }
+
+ /* If we've modified the exception bitmap, we restore it and trigger
+ reloading and partial recalculation the next time around. */
+ if (pDbgState->fModifiedXcptBitmap)
+ pVCpu->hm.s.vmx.u32XcptBitmap = pDbgState->bmXcptInitial;
+
+ return rcStrict;
+}
+
+
+/**
+ * Configures VM-exit controls for current DBGF and DTrace settings.
+ *
+ * This updates @a pDbgState and the VMCS execution control fields to reflect
+ * the necessary VM-exits demanded by DBGF and DTrace.
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param pDbgState The debug state.
+ * @param pVmxTransient Pointer to the VMX transient structure. May update
+ * fUpdateTscOffsettingAndPreemptTimer.
+ */
+static void hmR0VmxPreRunGuestDebugStateUpdate(PVMCPU pVCpu, PVMXRUNDBGSTATE pDbgState, PVMXTRANSIENT pVmxTransient)
+{
+ /*
+ * Take down the dtrace serial number so we can spot changes.
+ */
+ pDbgState->uDtraceSettingsSeqNo = VBOXVMM_GET_SETTINGS_SEQ_NO();
+ ASMCompilerBarrier();
+
+ /*
+ * We'll rebuild most of the middle block of data members (holding the
+ * current settings) as we go along here, so start by clearing it all.
+ */
+ pDbgState->bmXcptExtra = 0;
+ pDbgState->fCpe1Extra = 0;
+ pDbgState->fCpe1Unwanted = 0;
+ pDbgState->fCpe2Extra = 0;
+ for (unsigned i = 0; i < RT_ELEMENTS(pDbgState->bmExitsToCheck); i++)
+ pDbgState->bmExitsToCheck[i] = 0;
+
+ /*
+ * Software interrupts (INT XXh) - no idea how to trigger these...
+ */
+ PVM pVM = pVCpu->CTX_SUFF(pVM);
+ if ( DBGF_IS_EVENT_ENABLED(pVM, DBGFEVENT_INTERRUPT_SOFTWARE)
+ || VBOXVMM_INT_SOFTWARE_ENABLED())
+ {
+ ASMBitSet(pDbgState->bmExitsToCheck, VMX_EXIT_XCPT_OR_NMI);
+ }
+
+ /*
+ * INT3 breakpoints - triggered by #BP exceptions.
+ */
+ if (pVM->dbgf.ro.cEnabledInt3Breakpoints > 0)
+ pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_BP);
+
+ /*
+ * Exception bitmap and XCPT events+probes.
+ */
+ for (int iXcpt = 0; iXcpt < (DBGFEVENT_XCPT_LAST - DBGFEVENT_XCPT_FIRST + 1); iXcpt++)
+ if (DBGF_IS_EVENT_ENABLED(pVM, (DBGFEVENTTYPE)(DBGFEVENT_XCPT_FIRST + iXcpt)))
+ pDbgState->bmXcptExtra |= RT_BIT_32(iXcpt);
+
+ if (VBOXVMM_XCPT_DE_ENABLED()) pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_DE);
+ if (VBOXVMM_XCPT_DB_ENABLED()) pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_DB);
+ if (VBOXVMM_XCPT_BP_ENABLED()) pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_BP);
+ if (VBOXVMM_XCPT_OF_ENABLED()) pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_OF);
+ if (VBOXVMM_XCPT_BR_ENABLED()) pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_BR);
+ if (VBOXVMM_XCPT_UD_ENABLED()) pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_UD);
+ if (VBOXVMM_XCPT_NM_ENABLED()) pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_NM);
+ if (VBOXVMM_XCPT_DF_ENABLED()) pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_DF);
+ if (VBOXVMM_XCPT_TS_ENABLED()) pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_TS);
+ if (VBOXVMM_XCPT_NP_ENABLED()) pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_NP);
+ if (VBOXVMM_XCPT_SS_ENABLED()) pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_SS);
+ if (VBOXVMM_XCPT_GP_ENABLED()) pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_GP);
+ if (VBOXVMM_XCPT_PF_ENABLED()) pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_PF);
+ if (VBOXVMM_XCPT_MF_ENABLED()) pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_MF);
+ if (VBOXVMM_XCPT_AC_ENABLED()) pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_AC);
+ if (VBOXVMM_XCPT_XF_ENABLED()) pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_XF);
+ if (VBOXVMM_XCPT_VE_ENABLED()) pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_VE);
+ if (VBOXVMM_XCPT_SX_ENABLED()) pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_SX);
+
+ if (pDbgState->bmXcptExtra)
+ ASMBitSet(pDbgState->bmExitsToCheck, VMX_EXIT_XCPT_OR_NMI);
+
+ /*
+ * Process events and probes for VM-exits, making sure we get the wanted VM-exits.
+ *
+ * Note! This is the reverse of what hmR0VmxHandleExitDtraceEvents does.
+ * So, when adding/changing/removing please don't forget to update it.
+ *
+ * Some of the macros are picking up local variables to save horizontal space,
+ * (being able to see it in a table is the lesser evil here).
+ */
+#define IS_EITHER_ENABLED(a_pVM, a_EventSubName) \
+ ( DBGF_IS_EVENT_ENABLED(a_pVM, RT_CONCAT(DBGFEVENT_, a_EventSubName)) \
+ || RT_CONCAT3(VBOXVMM_, a_EventSubName, _ENABLED)() )
+#define SET_ONLY_XBM_IF_EITHER_EN(a_EventSubName, a_uExit) \
+ if (IS_EITHER_ENABLED(pVM, a_EventSubName)) \
+ { AssertCompile((unsigned)(a_uExit) < sizeof(pDbgState->bmExitsToCheck) * 8); \
+ ASMBitSet((pDbgState)->bmExitsToCheck, a_uExit); \
+ } else do { } while (0)
+#define SET_CPE1_XBM_IF_EITHER_EN(a_EventSubName, a_uExit, a_fCtrlProcExec) \
+ if (IS_EITHER_ENABLED(pVM, a_EventSubName)) \
+ { \
+ (pDbgState)->fCpe1Extra |= (a_fCtrlProcExec); \
+ AssertCompile((unsigned)(a_uExit) < sizeof(pDbgState->bmExitsToCheck) * 8); \
+ ASMBitSet((pDbgState)->bmExitsToCheck, a_uExit); \
+ } else do { } while (0)
+#define SET_CPEU_XBM_IF_EITHER_EN(a_EventSubName, a_uExit, a_fUnwantedCtrlProcExec) \
+ if (IS_EITHER_ENABLED(pVM, a_EventSubName)) \
+ { \
+ (pDbgState)->fCpe1Unwanted |= (a_fUnwantedCtrlProcExec); \
+ AssertCompile((unsigned)(a_uExit) < sizeof(pDbgState->bmExitsToCheck) * 8); \
+ ASMBitSet((pDbgState)->bmExitsToCheck, a_uExit); \
+ } else do { } while (0)
+#define SET_CPE2_XBM_IF_EITHER_EN(a_EventSubName, a_uExit, a_fCtrlProcExec2) \
+ if (IS_EITHER_ENABLED(pVM, a_EventSubName)) \
+ { \
+ (pDbgState)->fCpe2Extra |= (a_fCtrlProcExec2); \
+ AssertCompile((unsigned)(a_uExit) < sizeof(pDbgState->bmExitsToCheck) * 8); \
+ ASMBitSet((pDbgState)->bmExitsToCheck, a_uExit); \
+ } else do { } while (0)
+
+ SET_ONLY_XBM_IF_EITHER_EN(EXIT_TASK_SWITCH, VMX_EXIT_TASK_SWITCH); /* unconditional */
+ SET_ONLY_XBM_IF_EITHER_EN(EXIT_VMX_EPT_VIOLATION, VMX_EXIT_EPT_VIOLATION); /* unconditional */
+ SET_ONLY_XBM_IF_EITHER_EN(EXIT_VMX_EPT_MISCONFIG, VMX_EXIT_EPT_MISCONFIG); /* unconditional (unless #VE) */
+ SET_ONLY_XBM_IF_EITHER_EN(EXIT_VMX_VAPIC_ACCESS, VMX_EXIT_APIC_ACCESS); /* feature dependent, nothing to enable here */
+ SET_ONLY_XBM_IF_EITHER_EN(EXIT_VMX_VAPIC_WRITE, VMX_EXIT_APIC_WRITE); /* feature dependent, nothing to enable here */
+
+ SET_ONLY_XBM_IF_EITHER_EN(INSTR_CPUID, VMX_EXIT_CPUID); /* unconditional */
+ SET_ONLY_XBM_IF_EITHER_EN( EXIT_CPUID, VMX_EXIT_CPUID);
+ SET_ONLY_XBM_IF_EITHER_EN(INSTR_GETSEC, VMX_EXIT_GETSEC); /* unconditional */
+ SET_ONLY_XBM_IF_EITHER_EN( EXIT_GETSEC, VMX_EXIT_GETSEC);
+ SET_CPE1_XBM_IF_EITHER_EN(INSTR_HALT, VMX_EXIT_HLT, VMX_PROC_CTLS_HLT_EXIT); /* paranoia */
+ SET_ONLY_XBM_IF_EITHER_EN( EXIT_HALT, VMX_EXIT_HLT);
+ SET_ONLY_XBM_IF_EITHER_EN(INSTR_INVD, VMX_EXIT_INVD); /* unconditional */
+ SET_ONLY_XBM_IF_EITHER_EN( EXIT_INVD, VMX_EXIT_INVD);
+ SET_CPE1_XBM_IF_EITHER_EN(INSTR_INVLPG, VMX_EXIT_INVLPG, VMX_PROC_CTLS_INVLPG_EXIT);
+ SET_ONLY_XBM_IF_EITHER_EN( EXIT_INVLPG, VMX_EXIT_INVLPG);
+ SET_CPE1_XBM_IF_EITHER_EN(INSTR_RDPMC, VMX_EXIT_RDPMC, VMX_PROC_CTLS_RDPMC_EXIT);
+ SET_ONLY_XBM_IF_EITHER_EN( EXIT_RDPMC, VMX_EXIT_RDPMC);
+ SET_CPE1_XBM_IF_EITHER_EN(INSTR_RDTSC, VMX_EXIT_RDTSC, VMX_PROC_CTLS_RDTSC_EXIT);
+ SET_ONLY_XBM_IF_EITHER_EN( EXIT_RDTSC, VMX_EXIT_RDTSC);
+ SET_ONLY_XBM_IF_EITHER_EN(INSTR_RSM, VMX_EXIT_RSM); /* unconditional */
+ SET_ONLY_XBM_IF_EITHER_EN( EXIT_RSM, VMX_EXIT_RSM);
+ SET_ONLY_XBM_IF_EITHER_EN(INSTR_VMM_CALL, VMX_EXIT_VMCALL); /* unconditional */
+ SET_ONLY_XBM_IF_EITHER_EN( EXIT_VMM_CALL, VMX_EXIT_VMCALL);
+ SET_ONLY_XBM_IF_EITHER_EN(INSTR_VMX_VMCLEAR, VMX_EXIT_VMCLEAR); /* unconditional */
+ SET_ONLY_XBM_IF_EITHER_EN( EXIT_VMX_VMCLEAR, VMX_EXIT_VMCLEAR);
+ SET_ONLY_XBM_IF_EITHER_EN(INSTR_VMX_VMLAUNCH, VMX_EXIT_VMLAUNCH); /* unconditional */
+ SET_ONLY_XBM_IF_EITHER_EN( EXIT_VMX_VMLAUNCH, VMX_EXIT_VMLAUNCH);
+ SET_ONLY_XBM_IF_EITHER_EN(INSTR_VMX_VMPTRLD, VMX_EXIT_VMPTRLD); /* unconditional */
+ SET_ONLY_XBM_IF_EITHER_EN( EXIT_VMX_VMPTRLD, VMX_EXIT_VMPTRLD);
+ SET_ONLY_XBM_IF_EITHER_EN(INSTR_VMX_VMPTRST, VMX_EXIT_VMPTRST); /* unconditional */
+ SET_ONLY_XBM_IF_EITHER_EN( EXIT_VMX_VMPTRST, VMX_EXIT_VMPTRST);
+ SET_ONLY_XBM_IF_EITHER_EN(INSTR_VMX_VMREAD, VMX_EXIT_VMREAD); /* unconditional */
+ SET_ONLY_XBM_IF_EITHER_EN( EXIT_VMX_VMREAD, VMX_EXIT_VMREAD);
+ SET_ONLY_XBM_IF_EITHER_EN(INSTR_VMX_VMRESUME, VMX_EXIT_VMRESUME); /* unconditional */
+ SET_ONLY_XBM_IF_EITHER_EN( EXIT_VMX_VMRESUME, VMX_EXIT_VMRESUME);
+ SET_ONLY_XBM_IF_EITHER_EN(INSTR_VMX_VMWRITE, VMX_EXIT_VMWRITE); /* unconditional */
+ SET_ONLY_XBM_IF_EITHER_EN( EXIT_VMX_VMWRITE, VMX_EXIT_VMWRITE);
+ SET_ONLY_XBM_IF_EITHER_EN(INSTR_VMX_VMXOFF, VMX_EXIT_VMXOFF); /* unconditional */
+ SET_ONLY_XBM_IF_EITHER_EN( EXIT_VMX_VMXOFF, VMX_EXIT_VMXOFF);
+ SET_ONLY_XBM_IF_EITHER_EN(INSTR_VMX_VMXON, VMX_EXIT_VMXON); /* unconditional */
+ SET_ONLY_XBM_IF_EITHER_EN( EXIT_VMX_VMXON, VMX_EXIT_VMXON);
+
+ if ( IS_EITHER_ENABLED(pVM, INSTR_CRX_READ)
+ || IS_EITHER_ENABLED(pVM, INSTR_CRX_WRITE))
+ {
+ int rc = hmR0VmxImportGuestState(pVCpu, CPUMCTX_EXTRN_CR0 | CPUMCTX_EXTRN_CR4 | CPUMCTX_EXTRN_APIC_TPR);
+ AssertRC(rc);
+
+#if 0 /** @todo fix me */
+ pDbgState->fClearCr0Mask = true;
+ pDbgState->fClearCr4Mask = true;
+#endif
+ if (IS_EITHER_ENABLED(pVM, INSTR_CRX_READ))
+ pDbgState->fCpe1Extra |= VMX_PROC_CTLS_CR3_STORE_EXIT | VMX_PROC_CTLS_CR8_STORE_EXIT;
+ if (IS_EITHER_ENABLED(pVM, INSTR_CRX_WRITE))
+ pDbgState->fCpe1Extra |= VMX_PROC_CTLS_CR3_LOAD_EXIT | VMX_PROC_CTLS_CR8_LOAD_EXIT;
+ pDbgState->fCpe1Unwanted |= VMX_PROC_CTLS_USE_TPR_SHADOW; /* risky? */
+ /* Note! We currently don't use VMX_VMCS32_CTRL_CR3_TARGET_COUNT. It would
+ require clearing here and in the loop if we start using it. */
+ ASMBitSet(pDbgState->bmExitsToCheck, VMX_EXIT_MOV_CRX);
+ }
+ else
+ {
+ if (pDbgState->fClearCr0Mask)
+ {
+ pDbgState->fClearCr0Mask = false;
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_CR0);
+ }
+ if (pDbgState->fClearCr4Mask)
+ {
+ pDbgState->fClearCr4Mask = false;
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_CR4);
+ }
+ }
+ SET_ONLY_XBM_IF_EITHER_EN( EXIT_CRX_READ, VMX_EXIT_MOV_CRX);
+ SET_ONLY_XBM_IF_EITHER_EN( EXIT_CRX_WRITE, VMX_EXIT_MOV_CRX);
+
+ if ( IS_EITHER_ENABLED(pVM, INSTR_DRX_READ)
+ || IS_EITHER_ENABLED(pVM, INSTR_DRX_WRITE))
+ {
+ /** @todo later, need to fix handler as it assumes this won't usually happen. */
+ ASMBitSet(pDbgState->bmExitsToCheck, VMX_EXIT_MOV_DRX);
+ }
+ SET_ONLY_XBM_IF_EITHER_EN( EXIT_DRX_READ, VMX_EXIT_MOV_DRX);
+ SET_ONLY_XBM_IF_EITHER_EN( EXIT_DRX_WRITE, VMX_EXIT_MOV_DRX);
+
+ SET_CPEU_XBM_IF_EITHER_EN(INSTR_RDMSR, VMX_EXIT_RDMSR, VMX_PROC_CTLS_USE_MSR_BITMAPS); /* risky clearing this? */
+ SET_ONLY_XBM_IF_EITHER_EN( EXIT_RDMSR, VMX_EXIT_RDMSR);
+ SET_CPEU_XBM_IF_EITHER_EN(INSTR_WRMSR, VMX_EXIT_WRMSR, VMX_PROC_CTLS_USE_MSR_BITMAPS);
+ SET_ONLY_XBM_IF_EITHER_EN( EXIT_WRMSR, VMX_EXIT_WRMSR);
+ SET_CPE1_XBM_IF_EITHER_EN(INSTR_MWAIT, VMX_EXIT_MWAIT, VMX_PROC_CTLS_MWAIT_EXIT); /* paranoia */
+ SET_ONLY_XBM_IF_EITHER_EN( EXIT_MWAIT, VMX_EXIT_MWAIT);
+ SET_CPE1_XBM_IF_EITHER_EN(INSTR_MONITOR, VMX_EXIT_MONITOR, VMX_PROC_CTLS_MONITOR_EXIT); /* paranoia */
+ SET_ONLY_XBM_IF_EITHER_EN( EXIT_MONITOR, VMX_EXIT_MONITOR);
+#if 0 /** @todo too slow, fix handler. */
+ SET_CPE1_XBM_IF_EITHER_EN(INSTR_PAUSE, VMX_EXIT_PAUSE, VMX_PROC_CTLS_PAUSE_EXIT);
+#endif
+ SET_ONLY_XBM_IF_EITHER_EN( EXIT_PAUSE, VMX_EXIT_PAUSE);
+
+ if ( IS_EITHER_ENABLED(pVM, INSTR_SGDT)
+ || IS_EITHER_ENABLED(pVM, INSTR_SIDT)
+ || IS_EITHER_ENABLED(pVM, INSTR_LGDT)
+ || IS_EITHER_ENABLED(pVM, INSTR_LIDT))
+ {
+ pDbgState->fCpe2Extra |= VMX_PROC_CTLS2_DESC_TABLE_EXIT;
+ ASMBitSet(pDbgState->bmExitsToCheck, VMX_EXIT_GDTR_IDTR_ACCESS);
+ }
+ SET_ONLY_XBM_IF_EITHER_EN( EXIT_SGDT, VMX_EXIT_GDTR_IDTR_ACCESS);
+ SET_ONLY_XBM_IF_EITHER_EN( EXIT_SIDT, VMX_EXIT_GDTR_IDTR_ACCESS);
+ SET_ONLY_XBM_IF_EITHER_EN( EXIT_LGDT, VMX_EXIT_GDTR_IDTR_ACCESS);
+ SET_ONLY_XBM_IF_EITHER_EN( EXIT_LIDT, VMX_EXIT_GDTR_IDTR_ACCESS);
+
+ if ( IS_EITHER_ENABLED(pVM, INSTR_SLDT)
+ || IS_EITHER_ENABLED(pVM, INSTR_STR)
+ || IS_EITHER_ENABLED(pVM, INSTR_LLDT)
+ || IS_EITHER_ENABLED(pVM, INSTR_LTR))
+ {
+ pDbgState->fCpe2Extra |= VMX_PROC_CTLS2_DESC_TABLE_EXIT;
+ ASMBitSet(pDbgState->bmExitsToCheck, VMX_EXIT_LDTR_TR_ACCESS);
+ }
+ SET_ONLY_XBM_IF_EITHER_EN( EXIT_SLDT, VMX_EXIT_LDTR_TR_ACCESS);
+ SET_ONLY_XBM_IF_EITHER_EN( EXIT_STR, VMX_EXIT_LDTR_TR_ACCESS);
+ SET_ONLY_XBM_IF_EITHER_EN( EXIT_LLDT, VMX_EXIT_LDTR_TR_ACCESS);
+ SET_ONLY_XBM_IF_EITHER_EN( EXIT_LTR, VMX_EXIT_LDTR_TR_ACCESS);
+
+ SET_ONLY_XBM_IF_EITHER_EN(INSTR_VMX_INVEPT, VMX_EXIT_INVEPT); /* unconditional */
+ SET_ONLY_XBM_IF_EITHER_EN( EXIT_VMX_INVEPT, VMX_EXIT_INVEPT);
+ SET_CPE1_XBM_IF_EITHER_EN(INSTR_RDTSCP, VMX_EXIT_RDTSCP, VMX_PROC_CTLS_RDTSC_EXIT);
+ SET_ONLY_XBM_IF_EITHER_EN( EXIT_RDTSCP, VMX_EXIT_RDTSCP);
+ SET_ONLY_XBM_IF_EITHER_EN(INSTR_VMX_INVVPID, VMX_EXIT_INVVPID); /* unconditional */
+ SET_ONLY_XBM_IF_EITHER_EN( EXIT_VMX_INVVPID, VMX_EXIT_INVVPID);
+ SET_CPE2_XBM_IF_EITHER_EN(INSTR_WBINVD, VMX_EXIT_WBINVD, VMX_PROC_CTLS2_WBINVD_EXIT);
+ SET_ONLY_XBM_IF_EITHER_EN( EXIT_WBINVD, VMX_EXIT_WBINVD);
+ SET_ONLY_XBM_IF_EITHER_EN(INSTR_XSETBV, VMX_EXIT_XSETBV); /* unconditional */
+ SET_ONLY_XBM_IF_EITHER_EN( EXIT_XSETBV, VMX_EXIT_XSETBV);
+ SET_CPE2_XBM_IF_EITHER_EN(INSTR_RDRAND, VMX_EXIT_RDRAND, VMX_PROC_CTLS2_RDRAND_EXIT);
+ SET_ONLY_XBM_IF_EITHER_EN( EXIT_RDRAND, VMX_EXIT_RDRAND);
+ SET_CPE1_XBM_IF_EITHER_EN(INSTR_VMX_INVPCID, VMX_EXIT_INVPCID, VMX_PROC_CTLS_INVLPG_EXIT);
+ SET_ONLY_XBM_IF_EITHER_EN( EXIT_VMX_INVPCID, VMX_EXIT_INVPCID);
+ SET_ONLY_XBM_IF_EITHER_EN(INSTR_VMX_VMFUNC, VMX_EXIT_VMFUNC); /* unconditional for the current setup */
+ SET_ONLY_XBM_IF_EITHER_EN( EXIT_VMX_VMFUNC, VMX_EXIT_VMFUNC);
+ SET_CPE2_XBM_IF_EITHER_EN(INSTR_RDSEED, VMX_EXIT_RDSEED, VMX_PROC_CTLS2_RDSEED_EXIT);
+ SET_ONLY_XBM_IF_EITHER_EN( EXIT_RDSEED, VMX_EXIT_RDSEED);
+ SET_ONLY_XBM_IF_EITHER_EN(INSTR_XSAVES, VMX_EXIT_XSAVES); /* unconditional (enabled by host, guest cfg) */
+ SET_ONLY_XBM_IF_EITHER_EN(EXIT_XSAVES, VMX_EXIT_XSAVES);
+ SET_ONLY_XBM_IF_EITHER_EN(INSTR_XRSTORS, VMX_EXIT_XRSTORS); /* unconditional (enabled by host, guest cfg) */
+ SET_ONLY_XBM_IF_EITHER_EN( EXIT_XRSTORS, VMX_EXIT_XRSTORS);
+
+#undef IS_EITHER_ENABLED
+#undef SET_ONLY_XBM_IF_EITHER_EN
+#undef SET_CPE1_XBM_IF_EITHER_EN
+#undef SET_CPEU_XBM_IF_EITHER_EN
+#undef SET_CPE2_XBM_IF_EITHER_EN
+
+ /*
+ * Sanitize the control stuff.
+ */
+ pDbgState->fCpe2Extra &= pVM->hm.s.vmx.Msrs.ProcCtls2.n.allowed1;
+ if (pDbgState->fCpe2Extra)
+ pDbgState->fCpe1Extra |= VMX_PROC_CTLS_USE_SECONDARY_CTLS;
+ pDbgState->fCpe1Extra &= pVM->hm.s.vmx.Msrs.ProcCtls.n.allowed1;
+ pDbgState->fCpe1Unwanted &= ~pVM->hm.s.vmx.Msrs.ProcCtls.n.allowed0;
+ if (pVCpu->hm.s.fDebugWantRdTscExit != RT_BOOL(pDbgState->fCpe1Extra & VMX_PROC_CTLS_RDTSC_EXIT))
+ {
+ pVCpu->hm.s.fDebugWantRdTscExit ^= true;
+ pVmxTransient->fUpdateTscOffsettingAndPreemptTimer = true;
+ }
+
+ Log6(("HM: debug state: cpe1=%#RX32 cpeu=%#RX32 cpe2=%#RX32%s%s\n",
+ pDbgState->fCpe1Extra, pDbgState->fCpe1Unwanted, pDbgState->fCpe2Extra,
+ pDbgState->fClearCr0Mask ? " clr-cr0" : "",
+ pDbgState->fClearCr4Mask ? " clr-cr4" : ""));
+}
+
+
+/**
+ * Fires off DBGF events and dtrace probes for a VM-exit, when it's
+ * appropriate.
+ *
+ * The caller has checked the VM-exit against the
+ * VMXRUNDBGSTATE::bmExitsToCheck bitmap. The caller has checked for NMIs
+ * already, so we don't have to do that either.
+ *
+ * @returns Strict VBox status code (i.e. informational status codes too).
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param pVmxTransient Pointer to the VMX-transient structure.
+ * @param uExitReason The VM-exit reason.
+ *
+ * @remarks The name of this function is displayed by dtrace, so keep it short
+ * and to the point. No longer than 33 chars long, please.
+ */
+static VBOXSTRICTRC hmR0VmxHandleExitDtraceEvents(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient, uint32_t uExitReason)
+{
+ /*
+ * Translate the event into a DBGF event (enmEvent + uEventArg) and at the
+ * same time check whether any corresponding Dtrace event is enabled (fDtrace).
+ *
+ * Note! This is the reverse operation of what hmR0VmxPreRunGuestDebugStateUpdate
+ * does. Must add/change/remove both places. Same ordering, please.
+ *
+ * Added/removed events must also be reflected in the next section
+ * where we dispatch dtrace events.
+ */
+ bool fDtrace1 = false;
+ bool fDtrace2 = false;
+ DBGFEVENTTYPE enmEvent1 = DBGFEVENT_END;
+ DBGFEVENTTYPE enmEvent2 = DBGFEVENT_END;
+ uint32_t uEventArg = 0;
+#define SET_EXIT(a_EventSubName) \
+ do { \
+ enmEvent2 = RT_CONCAT(DBGFEVENT_EXIT_, a_EventSubName); \
+ fDtrace2 = RT_CONCAT3(VBOXVMM_EXIT_, a_EventSubName, _ENABLED)(); \
+ } while (0)
+#define SET_BOTH(a_EventSubName) \
+ do { \
+ enmEvent1 = RT_CONCAT(DBGFEVENT_INSTR_, a_EventSubName); \
+ enmEvent2 = RT_CONCAT(DBGFEVENT_EXIT_, a_EventSubName); \
+ fDtrace1 = RT_CONCAT3(VBOXVMM_INSTR_, a_EventSubName, _ENABLED)(); \
+ fDtrace2 = RT_CONCAT3(VBOXVMM_EXIT_, a_EventSubName, _ENABLED)(); \
+ } while (0)
+ switch (uExitReason)
+ {
+ case VMX_EXIT_MTF:
+ return hmR0VmxExitMtf(pVCpu, pVmxTransient);
+
+ case VMX_EXIT_XCPT_OR_NMI:
+ {
+ uint8_t const idxVector = VMX_EXIT_INT_INFO_VECTOR(pVmxTransient->uExitIntInfo);
+ switch (VMX_EXIT_INT_INFO_TYPE(pVmxTransient->uExitIntInfo))
+ {
+ case VMX_EXIT_INT_INFO_TYPE_HW_XCPT:
+ case VMX_EXIT_INT_INFO_TYPE_SW_XCPT:
+ case VMX_EXIT_INT_INFO_TYPE_PRIV_SW_XCPT:
+ if (idxVector <= (unsigned)(DBGFEVENT_XCPT_LAST - DBGFEVENT_XCPT_FIRST))
+ {
+ if (VMX_EXIT_INT_INFO_IS_ERROR_CODE_VALID(pVmxTransient->uExitIntInfo))
+ {
+ hmR0VmxReadExitIntErrorCodeVmcs(pVmxTransient);
+ uEventArg = pVmxTransient->uExitIntErrorCode;
+ }
+ enmEvent1 = (DBGFEVENTTYPE)(DBGFEVENT_XCPT_FIRST + idxVector);
+ switch (enmEvent1)
+ {
+ case DBGFEVENT_XCPT_DE: fDtrace1 = VBOXVMM_XCPT_DE_ENABLED(); break;
+ case DBGFEVENT_XCPT_DB: fDtrace1 = VBOXVMM_XCPT_DB_ENABLED(); break;
+ case DBGFEVENT_XCPT_BP: fDtrace1 = VBOXVMM_XCPT_BP_ENABLED(); break;
+ case DBGFEVENT_XCPT_OF: fDtrace1 = VBOXVMM_XCPT_OF_ENABLED(); break;
+ case DBGFEVENT_XCPT_BR: fDtrace1 = VBOXVMM_XCPT_BR_ENABLED(); break;
+ case DBGFEVENT_XCPT_UD: fDtrace1 = VBOXVMM_XCPT_UD_ENABLED(); break;
+ case DBGFEVENT_XCPT_NM: fDtrace1 = VBOXVMM_XCPT_NM_ENABLED(); break;
+ case DBGFEVENT_XCPT_DF: fDtrace1 = VBOXVMM_XCPT_DF_ENABLED(); break;
+ case DBGFEVENT_XCPT_TS: fDtrace1 = VBOXVMM_XCPT_TS_ENABLED(); break;
+ case DBGFEVENT_XCPT_NP: fDtrace1 = VBOXVMM_XCPT_NP_ENABLED(); break;
+ case DBGFEVENT_XCPT_SS: fDtrace1 = VBOXVMM_XCPT_SS_ENABLED(); break;
+ case DBGFEVENT_XCPT_GP: fDtrace1 = VBOXVMM_XCPT_GP_ENABLED(); break;
+ case DBGFEVENT_XCPT_PF: fDtrace1 = VBOXVMM_XCPT_PF_ENABLED(); break;
+ case DBGFEVENT_XCPT_MF: fDtrace1 = VBOXVMM_XCPT_MF_ENABLED(); break;
+ case DBGFEVENT_XCPT_AC: fDtrace1 = VBOXVMM_XCPT_AC_ENABLED(); break;
+ case DBGFEVENT_XCPT_XF: fDtrace1 = VBOXVMM_XCPT_XF_ENABLED(); break;
+ case DBGFEVENT_XCPT_VE: fDtrace1 = VBOXVMM_XCPT_VE_ENABLED(); break;
+ case DBGFEVENT_XCPT_SX: fDtrace1 = VBOXVMM_XCPT_SX_ENABLED(); break;
+ default: break;
+ }
+ }
+ else
+ AssertFailed();
+ break;
+
+ case VMX_EXIT_INT_INFO_TYPE_SW_INT:
+ uEventArg = idxVector;
+ enmEvent1 = DBGFEVENT_INTERRUPT_SOFTWARE;
+ fDtrace1 = VBOXVMM_INT_SOFTWARE_ENABLED();
+ break;
+ }
+ break;
+ }
+
+ case VMX_EXIT_TRIPLE_FAULT:
+ enmEvent1 = DBGFEVENT_TRIPLE_FAULT;
+ //fDtrace1 = VBOXVMM_EXIT_TRIPLE_FAULT_ENABLED();
+ break;
+ case VMX_EXIT_TASK_SWITCH: SET_EXIT(TASK_SWITCH); break;
+ case VMX_EXIT_EPT_VIOLATION: SET_EXIT(VMX_EPT_VIOLATION); break;
+ case VMX_EXIT_EPT_MISCONFIG: SET_EXIT(VMX_EPT_MISCONFIG); break;
+ case VMX_EXIT_APIC_ACCESS: SET_EXIT(VMX_VAPIC_ACCESS); break;
+ case VMX_EXIT_APIC_WRITE: SET_EXIT(VMX_VAPIC_WRITE); break;
+
+ /* Instruction specific VM-exits: */
+ case VMX_EXIT_CPUID: SET_BOTH(CPUID); break;
+ case VMX_EXIT_GETSEC: SET_BOTH(GETSEC); break;
+ case VMX_EXIT_HLT: SET_BOTH(HALT); break;
+ case VMX_EXIT_INVD: SET_BOTH(INVD); break;
+ case VMX_EXIT_INVLPG: SET_BOTH(INVLPG); break;
+ case VMX_EXIT_RDPMC: SET_BOTH(RDPMC); break;
+ case VMX_EXIT_RDTSC: SET_BOTH(RDTSC); break;
+ case VMX_EXIT_RSM: SET_BOTH(RSM); break;
+ case VMX_EXIT_VMCALL: SET_BOTH(VMM_CALL); break;
+ case VMX_EXIT_VMCLEAR: SET_BOTH(VMX_VMCLEAR); break;
+ case VMX_EXIT_VMLAUNCH: SET_BOTH(VMX_VMLAUNCH); break;
+ case VMX_EXIT_VMPTRLD: SET_BOTH(VMX_VMPTRLD); break;
+ case VMX_EXIT_VMPTRST: SET_BOTH(VMX_VMPTRST); break;
+ case VMX_EXIT_VMREAD: SET_BOTH(VMX_VMREAD); break;
+ case VMX_EXIT_VMRESUME: SET_BOTH(VMX_VMRESUME); break;
+ case VMX_EXIT_VMWRITE: SET_BOTH(VMX_VMWRITE); break;
+ case VMX_EXIT_VMXOFF: SET_BOTH(VMX_VMXOFF); break;
+ case VMX_EXIT_VMXON: SET_BOTH(VMX_VMXON); break;
+ case VMX_EXIT_MOV_CRX:
+ hmR0VmxReadExitQualVmcs(pVCpu, pVmxTransient);
+ if (VMX_EXIT_QUAL_CRX_ACCESS(pVmxTransient->uExitQual) == VMX_EXIT_QUAL_CRX_ACCESS_READ)
+ SET_BOTH(CRX_READ);
+ else
+ SET_BOTH(CRX_WRITE);
+ uEventArg = VMX_EXIT_QUAL_CRX_REGISTER(pVmxTransient->uExitQual);
+ break;
+ case VMX_EXIT_MOV_DRX:
+ hmR0VmxReadExitQualVmcs(pVCpu, pVmxTransient);
+ if ( VMX_EXIT_QUAL_DRX_DIRECTION(pVmxTransient->uExitQual)
+ == VMX_EXIT_QUAL_DRX_DIRECTION_READ)
+ SET_BOTH(DRX_READ);
+ else
+ SET_BOTH(DRX_WRITE);
+ uEventArg = VMX_EXIT_QUAL_DRX_REGISTER(pVmxTransient->uExitQual);
+ break;
+ case VMX_EXIT_RDMSR: SET_BOTH(RDMSR); break;
+ case VMX_EXIT_WRMSR: SET_BOTH(WRMSR); break;
+ case VMX_EXIT_MWAIT: SET_BOTH(MWAIT); break;
+ case VMX_EXIT_MONITOR: SET_BOTH(MONITOR); break;
+ case VMX_EXIT_PAUSE: SET_BOTH(PAUSE); break;
+ case VMX_EXIT_GDTR_IDTR_ACCESS:
+ hmR0VmxReadExitInstrInfoVmcs(pVmxTransient);
+ switch (RT_BF_GET(pVmxTransient->ExitInstrInfo.u, VMX_BF_XDTR_INSINFO_INSTR_ID))
+ {
+ case VMX_XDTR_INSINFO_II_SGDT: SET_BOTH(SGDT); break;
+ case VMX_XDTR_INSINFO_II_SIDT: SET_BOTH(SIDT); break;
+ case VMX_XDTR_INSINFO_II_LGDT: SET_BOTH(LGDT); break;
+ case VMX_XDTR_INSINFO_II_LIDT: SET_BOTH(LIDT); break;
+ }
+ break;
+
+ case VMX_EXIT_LDTR_TR_ACCESS:
+ hmR0VmxReadExitInstrInfoVmcs(pVmxTransient);
+ switch (RT_BF_GET(pVmxTransient->ExitInstrInfo.u, VMX_BF_YYTR_INSINFO_INSTR_ID))
+ {
+ case VMX_YYTR_INSINFO_II_SLDT: SET_BOTH(SLDT); break;
+ case VMX_YYTR_INSINFO_II_STR: SET_BOTH(STR); break;
+ case VMX_YYTR_INSINFO_II_LLDT: SET_BOTH(LLDT); break;
+ case VMX_YYTR_INSINFO_II_LTR: SET_BOTH(LTR); break;
+ }
+ break;
+
+ case VMX_EXIT_INVEPT: SET_BOTH(VMX_INVEPT); break;
+ case VMX_EXIT_RDTSCP: SET_BOTH(RDTSCP); break;
+ case VMX_EXIT_INVVPID: SET_BOTH(VMX_INVVPID); break;
+ case VMX_EXIT_WBINVD: SET_BOTH(WBINVD); break;
+ case VMX_EXIT_XSETBV: SET_BOTH(XSETBV); break;
+ case VMX_EXIT_RDRAND: SET_BOTH(RDRAND); break;
+ case VMX_EXIT_INVPCID: SET_BOTH(VMX_INVPCID); break;
+ case VMX_EXIT_VMFUNC: SET_BOTH(VMX_VMFUNC); break;
+ case VMX_EXIT_RDSEED: SET_BOTH(RDSEED); break;
+ case VMX_EXIT_XSAVES: SET_BOTH(XSAVES); break;
+ case VMX_EXIT_XRSTORS: SET_BOTH(XRSTORS); break;
+
+ /* Events that aren't relevant at this point. */
+ case VMX_EXIT_EXT_INT:
+ case VMX_EXIT_INT_WINDOW:
+ case VMX_EXIT_NMI_WINDOW:
+ case VMX_EXIT_TPR_BELOW_THRESHOLD:
+ case VMX_EXIT_PREEMPT_TIMER:
+ case VMX_EXIT_IO_INSTR:
+ break;
+
+ /* Errors and unexpected events. */
+ case VMX_EXIT_INIT_SIGNAL:
+ case VMX_EXIT_SIPI:
+ case VMX_EXIT_IO_SMI:
+ case VMX_EXIT_SMI:
+ case VMX_EXIT_ERR_INVALID_GUEST_STATE:
+ case VMX_EXIT_ERR_MSR_LOAD:
+ case VMX_EXIT_ERR_MACHINE_CHECK:
+ break;
+
+ default:
+ AssertMsgFailed(("Unexpected VM-exit=%#x\n", uExitReason));
+ break;
+ }
+#undef SET_BOTH
+#undef SET_EXIT
+
+ /*
+ * Dtrace tracepoints go first. We do them here at once so we don't
+ * have to copy the guest state saving and stuff a few dozen times.
+ * Down side is that we've got to repeat the switch, though this time
+ * we use enmEvent since the probes are a subset of what DBGF does.
+ */
+ if (fDtrace1 || fDtrace2)
+ {
+ hmR0VmxReadExitQualVmcs(pVCpu, pVmxTransient);
+ hmR0VmxImportGuestState(pVCpu, HMVMX_CPUMCTX_EXTRN_ALL);
+ PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+ switch (enmEvent1)
+ {
+ /** @todo consider which extra parameters would be helpful for each probe. */
+ case DBGFEVENT_END: break;
+ case DBGFEVENT_XCPT_DE: VBOXVMM_XCPT_DE(pVCpu, pCtx); break;
+ case DBGFEVENT_XCPT_DB: VBOXVMM_XCPT_DB(pVCpu, pCtx, pCtx->dr[6]); break;
+ case DBGFEVENT_XCPT_BP: VBOXVMM_XCPT_BP(pVCpu, pCtx); break;
+ case DBGFEVENT_XCPT_OF: VBOXVMM_XCPT_OF(pVCpu, pCtx); break;
+ case DBGFEVENT_XCPT_BR: VBOXVMM_XCPT_BR(pVCpu, pCtx); break;
+ case DBGFEVENT_XCPT_UD: VBOXVMM_XCPT_UD(pVCpu, pCtx); break;
+ case DBGFEVENT_XCPT_NM: VBOXVMM_XCPT_NM(pVCpu, pCtx); break;
+ case DBGFEVENT_XCPT_DF: VBOXVMM_XCPT_DF(pVCpu, pCtx); break;
+ case DBGFEVENT_XCPT_TS: VBOXVMM_XCPT_TS(pVCpu, pCtx, uEventArg); break;
+ case DBGFEVENT_XCPT_NP: VBOXVMM_XCPT_NP(pVCpu, pCtx, uEventArg); break;
+ case DBGFEVENT_XCPT_SS: VBOXVMM_XCPT_SS(pVCpu, pCtx, uEventArg); break;
+ case DBGFEVENT_XCPT_GP: VBOXVMM_XCPT_GP(pVCpu, pCtx, uEventArg); break;
+ case DBGFEVENT_XCPT_PF: VBOXVMM_XCPT_PF(pVCpu, pCtx, uEventArg, pCtx->cr2); break;
+ case DBGFEVENT_XCPT_MF: VBOXVMM_XCPT_MF(pVCpu, pCtx); break;
+ case DBGFEVENT_XCPT_AC: VBOXVMM_XCPT_AC(pVCpu, pCtx); break;
+ case DBGFEVENT_XCPT_XF: VBOXVMM_XCPT_XF(pVCpu, pCtx); break;
+ case DBGFEVENT_XCPT_VE: VBOXVMM_XCPT_VE(pVCpu, pCtx); break;
+ case DBGFEVENT_XCPT_SX: VBOXVMM_XCPT_SX(pVCpu, pCtx, uEventArg); break;
+ case DBGFEVENT_INTERRUPT_SOFTWARE: VBOXVMM_INT_SOFTWARE(pVCpu, pCtx, (uint8_t)uEventArg); break;
+ case DBGFEVENT_INSTR_CPUID: VBOXVMM_INSTR_CPUID(pVCpu, pCtx, pCtx->eax, pCtx->ecx); break;
+ case DBGFEVENT_INSTR_GETSEC: VBOXVMM_INSTR_GETSEC(pVCpu, pCtx); break;
+ case DBGFEVENT_INSTR_HALT: VBOXVMM_INSTR_HALT(pVCpu, pCtx); break;
+ case DBGFEVENT_INSTR_INVD: VBOXVMM_INSTR_INVD(pVCpu, pCtx); break;
+ case DBGFEVENT_INSTR_INVLPG: VBOXVMM_INSTR_INVLPG(pVCpu, pCtx); break;
+ case DBGFEVENT_INSTR_RDPMC: VBOXVMM_INSTR_RDPMC(pVCpu, pCtx); break;
+ case DBGFEVENT_INSTR_RDTSC: VBOXVMM_INSTR_RDTSC(pVCpu, pCtx); break;
+ case DBGFEVENT_INSTR_RSM: VBOXVMM_INSTR_RSM(pVCpu, pCtx); break;
+ case DBGFEVENT_INSTR_CRX_READ: VBOXVMM_INSTR_CRX_READ(pVCpu, pCtx, (uint8_t)uEventArg); break;
+ case DBGFEVENT_INSTR_CRX_WRITE: VBOXVMM_INSTR_CRX_WRITE(pVCpu, pCtx, (uint8_t)uEventArg); break;
+ case DBGFEVENT_INSTR_DRX_READ: VBOXVMM_INSTR_DRX_READ(pVCpu, pCtx, (uint8_t)uEventArg); break;
+ case DBGFEVENT_INSTR_DRX_WRITE: VBOXVMM_INSTR_DRX_WRITE(pVCpu, pCtx, (uint8_t)uEventArg); break;
+ case DBGFEVENT_INSTR_RDMSR: VBOXVMM_INSTR_RDMSR(pVCpu, pCtx, pCtx->ecx); break;
+ case DBGFEVENT_INSTR_WRMSR: VBOXVMM_INSTR_WRMSR(pVCpu, pCtx, pCtx->ecx,
+ RT_MAKE_U64(pCtx->eax, pCtx->edx)); break;
+ case DBGFEVENT_INSTR_MWAIT: VBOXVMM_INSTR_MWAIT(pVCpu, pCtx); break;
+ case DBGFEVENT_INSTR_MONITOR: VBOXVMM_INSTR_MONITOR(pVCpu, pCtx); break;
+ case DBGFEVENT_INSTR_PAUSE: VBOXVMM_INSTR_PAUSE(pVCpu, pCtx); break;
+ case DBGFEVENT_INSTR_SGDT: VBOXVMM_INSTR_SGDT(pVCpu, pCtx); break;
+ case DBGFEVENT_INSTR_SIDT: VBOXVMM_INSTR_SIDT(pVCpu, pCtx); break;
+ case DBGFEVENT_INSTR_LGDT: VBOXVMM_INSTR_LGDT(pVCpu, pCtx); break;
+ case DBGFEVENT_INSTR_LIDT: VBOXVMM_INSTR_LIDT(pVCpu, pCtx); break;
+ case DBGFEVENT_INSTR_SLDT: VBOXVMM_INSTR_SLDT(pVCpu, pCtx); break;
+ case DBGFEVENT_INSTR_STR: VBOXVMM_INSTR_STR(pVCpu, pCtx); break;
+ case DBGFEVENT_INSTR_LLDT: VBOXVMM_INSTR_LLDT(pVCpu, pCtx); break;
+ case DBGFEVENT_INSTR_LTR: VBOXVMM_INSTR_LTR(pVCpu, pCtx); break;
+ case DBGFEVENT_INSTR_RDTSCP: VBOXVMM_INSTR_RDTSCP(pVCpu, pCtx); break;
+ case DBGFEVENT_INSTR_WBINVD: VBOXVMM_INSTR_WBINVD(pVCpu, pCtx); break;
+ case DBGFEVENT_INSTR_XSETBV: VBOXVMM_INSTR_XSETBV(pVCpu, pCtx); break;
+ case DBGFEVENT_INSTR_RDRAND: VBOXVMM_INSTR_RDRAND(pVCpu, pCtx); break;
+ case DBGFEVENT_INSTR_RDSEED: VBOXVMM_INSTR_RDSEED(pVCpu, pCtx); break;
+ case DBGFEVENT_INSTR_XSAVES: VBOXVMM_INSTR_XSAVES(pVCpu, pCtx); break;
+ case DBGFEVENT_INSTR_XRSTORS: VBOXVMM_INSTR_XRSTORS(pVCpu, pCtx); break;
+ case DBGFEVENT_INSTR_VMM_CALL: VBOXVMM_INSTR_VMM_CALL(pVCpu, pCtx); break;
+ case DBGFEVENT_INSTR_VMX_VMCLEAR: VBOXVMM_INSTR_VMX_VMCLEAR(pVCpu, pCtx); break;
+ case DBGFEVENT_INSTR_VMX_VMLAUNCH: VBOXVMM_INSTR_VMX_VMLAUNCH(pVCpu, pCtx); break;
+ case DBGFEVENT_INSTR_VMX_VMPTRLD: VBOXVMM_INSTR_VMX_VMPTRLD(pVCpu, pCtx); break;
+ case DBGFEVENT_INSTR_VMX_VMPTRST: VBOXVMM_INSTR_VMX_VMPTRST(pVCpu, pCtx); break;
+ case DBGFEVENT_INSTR_VMX_VMREAD: VBOXVMM_INSTR_VMX_VMREAD(pVCpu, pCtx); break;
+ case DBGFEVENT_INSTR_VMX_VMRESUME: VBOXVMM_INSTR_VMX_VMRESUME(pVCpu, pCtx); break;
+ case DBGFEVENT_INSTR_VMX_VMWRITE: VBOXVMM_INSTR_VMX_VMWRITE(pVCpu, pCtx); break;
+ case DBGFEVENT_INSTR_VMX_VMXOFF: VBOXVMM_INSTR_VMX_VMXOFF(pVCpu, pCtx); break;
+ case DBGFEVENT_INSTR_VMX_VMXON: VBOXVMM_INSTR_VMX_VMXON(pVCpu, pCtx); break;
+ case DBGFEVENT_INSTR_VMX_INVEPT: VBOXVMM_INSTR_VMX_INVEPT(pVCpu, pCtx); break;
+ case DBGFEVENT_INSTR_VMX_INVVPID: VBOXVMM_INSTR_VMX_INVVPID(pVCpu, pCtx); break;
+ case DBGFEVENT_INSTR_VMX_INVPCID: VBOXVMM_INSTR_VMX_INVPCID(pVCpu, pCtx); break;
+ case DBGFEVENT_INSTR_VMX_VMFUNC: VBOXVMM_INSTR_VMX_VMFUNC(pVCpu, pCtx); break;
+ default: AssertMsgFailed(("enmEvent1=%d uExitReason=%d\n", enmEvent1, uExitReason)); break;
+ }
+ switch (enmEvent2)
+ {
+ /** @todo consider which extra parameters would be helpful for each probe. */
+ case DBGFEVENT_END: break;
+ case DBGFEVENT_EXIT_TASK_SWITCH: VBOXVMM_EXIT_TASK_SWITCH(pVCpu, pCtx); break;
+ case DBGFEVENT_EXIT_CPUID: VBOXVMM_EXIT_CPUID(pVCpu, pCtx, pCtx->eax, pCtx->ecx); break;
+ case DBGFEVENT_EXIT_GETSEC: VBOXVMM_EXIT_GETSEC(pVCpu, pCtx); break;
+ case DBGFEVENT_EXIT_HALT: VBOXVMM_EXIT_HALT(pVCpu, pCtx); break;
+ case DBGFEVENT_EXIT_INVD: VBOXVMM_EXIT_INVD(pVCpu, pCtx); break;
+ case DBGFEVENT_EXIT_INVLPG: VBOXVMM_EXIT_INVLPG(pVCpu, pCtx); break;
+ case DBGFEVENT_EXIT_RDPMC: VBOXVMM_EXIT_RDPMC(pVCpu, pCtx); break;
+ case DBGFEVENT_EXIT_RDTSC: VBOXVMM_EXIT_RDTSC(pVCpu, pCtx); break;
+ case DBGFEVENT_EXIT_RSM: VBOXVMM_EXIT_RSM(pVCpu, pCtx); break;
+ case DBGFEVENT_EXIT_CRX_READ: VBOXVMM_EXIT_CRX_READ(pVCpu, pCtx, (uint8_t)uEventArg); break;
+ case DBGFEVENT_EXIT_CRX_WRITE: VBOXVMM_EXIT_CRX_WRITE(pVCpu, pCtx, (uint8_t)uEventArg); break;
+ case DBGFEVENT_EXIT_DRX_READ: VBOXVMM_EXIT_DRX_READ(pVCpu, pCtx, (uint8_t)uEventArg); break;
+ case DBGFEVENT_EXIT_DRX_WRITE: VBOXVMM_EXIT_DRX_WRITE(pVCpu, pCtx, (uint8_t)uEventArg); break;
+ case DBGFEVENT_EXIT_RDMSR: VBOXVMM_EXIT_RDMSR(pVCpu, pCtx, pCtx->ecx); break;
+ case DBGFEVENT_EXIT_WRMSR: VBOXVMM_EXIT_WRMSR(pVCpu, pCtx, pCtx->ecx,
+ RT_MAKE_U64(pCtx->eax, pCtx->edx)); break;
+ case DBGFEVENT_EXIT_MWAIT: VBOXVMM_EXIT_MWAIT(pVCpu, pCtx); break;
+ case DBGFEVENT_EXIT_MONITOR: VBOXVMM_EXIT_MONITOR(pVCpu, pCtx); break;
+ case DBGFEVENT_EXIT_PAUSE: VBOXVMM_EXIT_PAUSE(pVCpu, pCtx); break;
+ case DBGFEVENT_EXIT_SGDT: VBOXVMM_EXIT_SGDT(pVCpu, pCtx); break;
+ case DBGFEVENT_EXIT_SIDT: VBOXVMM_EXIT_SIDT(pVCpu, pCtx); break;
+ case DBGFEVENT_EXIT_LGDT: VBOXVMM_EXIT_LGDT(pVCpu, pCtx); break;
+ case DBGFEVENT_EXIT_LIDT: VBOXVMM_EXIT_LIDT(pVCpu, pCtx); break;
+ case DBGFEVENT_EXIT_SLDT: VBOXVMM_EXIT_SLDT(pVCpu, pCtx); break;
+ case DBGFEVENT_EXIT_STR: VBOXVMM_EXIT_STR(pVCpu, pCtx); break;
+ case DBGFEVENT_EXIT_LLDT: VBOXVMM_EXIT_LLDT(pVCpu, pCtx); break;
+ case DBGFEVENT_EXIT_LTR: VBOXVMM_EXIT_LTR(pVCpu, pCtx); break;
+ case DBGFEVENT_EXIT_RDTSCP: VBOXVMM_EXIT_RDTSCP(pVCpu, pCtx); break;
+ case DBGFEVENT_EXIT_WBINVD: VBOXVMM_EXIT_WBINVD(pVCpu, pCtx); break;
+ case DBGFEVENT_EXIT_XSETBV: VBOXVMM_EXIT_XSETBV(pVCpu, pCtx); break;
+ case DBGFEVENT_EXIT_RDRAND: VBOXVMM_EXIT_RDRAND(pVCpu, pCtx); break;
+ case DBGFEVENT_EXIT_RDSEED: VBOXVMM_EXIT_RDSEED(pVCpu, pCtx); break;
+ case DBGFEVENT_EXIT_XSAVES: VBOXVMM_EXIT_XSAVES(pVCpu, pCtx); break;
+ case DBGFEVENT_EXIT_XRSTORS: VBOXVMM_EXIT_XRSTORS(pVCpu, pCtx); break;
+ case DBGFEVENT_EXIT_VMM_CALL: VBOXVMM_EXIT_VMM_CALL(pVCpu, pCtx); break;
+ case DBGFEVENT_EXIT_VMX_VMCLEAR: VBOXVMM_EXIT_VMX_VMCLEAR(pVCpu, pCtx); break;
+ case DBGFEVENT_EXIT_VMX_VMLAUNCH: VBOXVMM_EXIT_VMX_VMLAUNCH(pVCpu, pCtx); break;
+ case DBGFEVENT_EXIT_VMX_VMPTRLD: VBOXVMM_EXIT_VMX_VMPTRLD(pVCpu, pCtx); break;
+ case DBGFEVENT_EXIT_VMX_VMPTRST: VBOXVMM_EXIT_VMX_VMPTRST(pVCpu, pCtx); break;
+ case DBGFEVENT_EXIT_VMX_VMREAD: VBOXVMM_EXIT_VMX_VMREAD(pVCpu, pCtx); break;
+ case DBGFEVENT_EXIT_VMX_VMRESUME: VBOXVMM_EXIT_VMX_VMRESUME(pVCpu, pCtx); break;
+ case DBGFEVENT_EXIT_VMX_VMWRITE: VBOXVMM_EXIT_VMX_VMWRITE(pVCpu, pCtx); break;
+ case DBGFEVENT_EXIT_VMX_VMXOFF: VBOXVMM_EXIT_VMX_VMXOFF(pVCpu, pCtx); break;
+ case DBGFEVENT_EXIT_VMX_VMXON: VBOXVMM_EXIT_VMX_VMXON(pVCpu, pCtx); break;
+ case DBGFEVENT_EXIT_VMX_INVEPT: VBOXVMM_EXIT_VMX_INVEPT(pVCpu, pCtx); break;
+ case DBGFEVENT_EXIT_VMX_INVVPID: VBOXVMM_EXIT_VMX_INVVPID(pVCpu, pCtx); break;
+ case DBGFEVENT_EXIT_VMX_INVPCID: VBOXVMM_EXIT_VMX_INVPCID(pVCpu, pCtx); break;
+ case DBGFEVENT_EXIT_VMX_VMFUNC: VBOXVMM_EXIT_VMX_VMFUNC(pVCpu, pCtx); break;
+ case DBGFEVENT_EXIT_VMX_EPT_MISCONFIG: VBOXVMM_EXIT_VMX_EPT_MISCONFIG(pVCpu, pCtx); break;
+ case DBGFEVENT_EXIT_VMX_EPT_VIOLATION: VBOXVMM_EXIT_VMX_EPT_VIOLATION(pVCpu, pCtx); break;
+ case DBGFEVENT_EXIT_VMX_VAPIC_ACCESS: VBOXVMM_EXIT_VMX_VAPIC_ACCESS(pVCpu, pCtx); break;
+ case DBGFEVENT_EXIT_VMX_VAPIC_WRITE: VBOXVMM_EXIT_VMX_VAPIC_WRITE(pVCpu, pCtx); break;
+ default: AssertMsgFailed(("enmEvent2=%d uExitReason=%d\n", enmEvent2, uExitReason)); break;
+ }
+ }
+
+ /*
+ * Fire of the DBGF event, if enabled (our check here is just a quick one,
+ * the DBGF call will do a full check).
+ *
+ * Note! DBGF sets DBGFEVENT_INTERRUPT_SOFTWARE in the bitmap.
+ * Note! If we have to events, we prioritize the first, i.e. the instruction
+ * one, in order to avoid event nesting.
+ */
+ PVM pVM = pVCpu->CTX_SUFF(pVM);
+ if ( enmEvent1 != DBGFEVENT_END
+ && DBGF_IS_EVENT_ENABLED(pVM, enmEvent1))
+ {
+ HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, CPUMCTX_EXTRN_CS | CPUMCTX_EXTRN_RIP);
+ VBOXSTRICTRC rcStrict = DBGFEventGenericWithArgs(pVM, pVCpu, enmEvent1, DBGFEVENTCTX_HM, 1, uEventArg);
+ if (rcStrict != VINF_SUCCESS)
+ return rcStrict;
+ }
+ else if ( enmEvent2 != DBGFEVENT_END
+ && DBGF_IS_EVENT_ENABLED(pVM, enmEvent2))
+ {
+ HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, CPUMCTX_EXTRN_CS | CPUMCTX_EXTRN_RIP);
+ VBOXSTRICTRC rcStrict = DBGFEventGenericWithArgs(pVM, pVCpu, enmEvent2, DBGFEVENTCTX_HM, 1, uEventArg);
+ if (rcStrict != VINF_SUCCESS)
+ return rcStrict;
+ }
+
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Single-stepping VM-exit filtering.
+ *
+ * This is preprocessing the VM-exits and deciding whether we've gotten far
+ * enough to return VINF_EM_DBG_STEPPED already. If not, normal VM-exit
+ * handling is performed.
+ *
+ * @returns Strict VBox status code (i.e. informational status codes too).
+ * @param pVCpu The cross context virtual CPU structure of the calling EMT.
+ * @param pVmxTransient Pointer to the VMX-transient structure.
+ * @param pDbgState The debug state.
+ */
+DECLINLINE(VBOXSTRICTRC) hmR0VmxRunDebugHandleExit(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient, PVMXRUNDBGSTATE pDbgState)
+{
+ /*
+ * Expensive (saves context) generic dtrace VM-exit probe.
+ */
+ uint32_t const uExitReason = pVmxTransient->uExitReason;
+ if (!VBOXVMM_R0_HMVMX_VMEXIT_ENABLED())
+ { /* more likely */ }
+ else
+ {
+ hmR0VmxReadExitQualVmcs(pVCpu, pVmxTransient);
+ int rc = hmR0VmxImportGuestState(pVCpu, HMVMX_CPUMCTX_EXTRN_ALL);
+ AssertRC(rc);
+ VBOXVMM_R0_HMVMX_VMEXIT(pVCpu, &pVCpu->cpum.GstCtx, pVmxTransient->uExitReason, pVmxTransient->uExitQual);
+ }
+
+ /*
+ * Check for host NMI, just to get that out of the way.
+ */
+ if (uExitReason != VMX_EXIT_XCPT_OR_NMI)
+ { /* normally likely */ }
+ else
+ {
+ int rc2 = hmR0VmxReadExitIntInfoVmcs(pVmxTransient);
+ AssertRCReturn(rc2, rc2);
+ uint32_t uIntType = VMX_EXIT_INT_INFO_TYPE(pVmxTransient->uExitIntInfo);
+ if (uIntType == VMX_EXIT_INT_INFO_TYPE_NMI)
+ return hmR0VmxExitXcptOrNmi(pVCpu, pVmxTransient);
+ }
+
+ /*
+ * Check for single stepping event if we're stepping.
+ */
+ if (pVCpu->hm.s.fSingleInstruction)
+ {
+ switch (uExitReason)
+ {
+ case VMX_EXIT_MTF:
+ return hmR0VmxExitMtf(pVCpu, pVmxTransient);
+
+ /* Various events: */
+ case VMX_EXIT_XCPT_OR_NMI:
+ case VMX_EXIT_EXT_INT:
+ case VMX_EXIT_TRIPLE_FAULT:
+ case VMX_EXIT_INT_WINDOW:
+ case VMX_EXIT_NMI_WINDOW:
+ case VMX_EXIT_TASK_SWITCH:
+ case VMX_EXIT_TPR_BELOW_THRESHOLD:
+ case VMX_EXIT_APIC_ACCESS:
+ case VMX_EXIT_EPT_VIOLATION:
+ case VMX_EXIT_EPT_MISCONFIG:
+ case VMX_EXIT_PREEMPT_TIMER:
+
+ /* Instruction specific VM-exits: */
+ case VMX_EXIT_CPUID:
+ case VMX_EXIT_GETSEC:
+ case VMX_EXIT_HLT:
+ case VMX_EXIT_INVD:
+ case VMX_EXIT_INVLPG:
+ case VMX_EXIT_RDPMC:
+ case VMX_EXIT_RDTSC:
+ case VMX_EXIT_RSM:
+ case VMX_EXIT_VMCALL:
+ case VMX_EXIT_VMCLEAR:
+ case VMX_EXIT_VMLAUNCH:
+ case VMX_EXIT_VMPTRLD:
+ case VMX_EXIT_VMPTRST:
+ case VMX_EXIT_VMREAD:
+ case VMX_EXIT_VMRESUME:
+ case VMX_EXIT_VMWRITE:
+ case VMX_EXIT_VMXOFF:
+ case VMX_EXIT_VMXON:
+ case VMX_EXIT_MOV_CRX:
+ case VMX_EXIT_MOV_DRX:
+ case VMX_EXIT_IO_INSTR:
+ case VMX_EXIT_RDMSR:
+ case VMX_EXIT_WRMSR:
+ case VMX_EXIT_MWAIT:
+ case VMX_EXIT_MONITOR:
+ case VMX_EXIT_PAUSE:
+ case VMX_EXIT_GDTR_IDTR_ACCESS:
+ case VMX_EXIT_LDTR_TR_ACCESS:
+ case VMX_EXIT_INVEPT:
+ case VMX_EXIT_RDTSCP:
+ case VMX_EXIT_INVVPID:
+ case VMX_EXIT_WBINVD:
+ case VMX_EXIT_XSETBV:
+ case VMX_EXIT_RDRAND:
+ case VMX_EXIT_INVPCID:
+ case VMX_EXIT_VMFUNC:
+ case VMX_EXIT_RDSEED:
+ case VMX_EXIT_XSAVES:
+ case VMX_EXIT_XRSTORS:
+ {
+ int rc = hmR0VmxImportGuestState(pVCpu, CPUMCTX_EXTRN_CS | CPUMCTX_EXTRN_RIP);
+ AssertRCReturn(rc, rc);
+ if ( pVCpu->cpum.GstCtx.rip != pDbgState->uRipStart
+ || pVCpu->cpum.GstCtx.cs.Sel != pDbgState->uCsStart)
+ return VINF_EM_DBG_STEPPED;
+ break;
+ }
+
+ /* Errors and unexpected events: */
+ case VMX_EXIT_INIT_SIGNAL:
+ case VMX_EXIT_SIPI:
+ case VMX_EXIT_IO_SMI:
+ case VMX_EXIT_SMI:
+ case VMX_EXIT_ERR_INVALID_GUEST_STATE:
+ case VMX_EXIT_ERR_MSR_LOAD:
+ case VMX_EXIT_ERR_MACHINE_CHECK:
+ case VMX_EXIT_APIC_WRITE: /* Some talk about this being fault like, so I guess we must process it? */
+ break;
+
+ default:
+ AssertMsgFailed(("Unexpected VM-exit=%#x\n", uExitReason));
+ break;
+ }
+ }
+
+ /*
+ * Check for debugger event breakpoints and dtrace probes.
+ */
+ if ( uExitReason < RT_ELEMENTS(pDbgState->bmExitsToCheck) * 32U
+ && ASMBitTest(pDbgState->bmExitsToCheck, uExitReason) )
+ {
+ VBOXSTRICTRC rcStrict = hmR0VmxHandleExitDtraceEvents(pVCpu, pVmxTransient, uExitReason);
+ if (rcStrict != VINF_SUCCESS)
+ return rcStrict;
+ }
+
+ /*
+ * Normal processing.
+ */
+#ifdef HMVMX_USE_FUNCTION_TABLE
+ return g_apfnVMExitHandlers[uExitReason](pVCpu, pVmxTransient);
+#else
+ return hmR0VmxHandleExit(pVCpu, pVmxTransient, uExitReason);
+#endif
+}
+
+
+/**
+ * Single steps guest code using VT-x.
+ *
+ * @returns Strict VBox status code (i.e. informational status codes too).
+ * @param pVCpu The cross context virtual CPU structure.
+ *
+ * @note Mostly the same as hmR0VmxRunGuestCodeNormal().
+ */
+static VBOXSTRICTRC hmR0VmxRunGuestCodeDebug(PVMCPU pVCpu)
+{
+ VMXTRANSIENT VmxTransient;
+ VmxTransient.fUpdateTscOffsettingAndPreemptTimer = true;
+
+ /* Set HMCPU indicators. */
+ bool const fSavedSingleInstruction = pVCpu->hm.s.fSingleInstruction;
+ pVCpu->hm.s.fSingleInstruction = pVCpu->hm.s.fSingleInstruction || DBGFIsStepping(pVCpu);
+ pVCpu->hm.s.fDebugWantRdTscExit = false;
+ pVCpu->hm.s.fUsingDebugLoop = true;
+
+ /* State we keep to help modify and later restore the VMCS fields we alter, and for detecting steps. */
+ VMXRUNDBGSTATE DbgState;
+ hmR0VmxRunDebugStateInit(pVCpu, &DbgState);
+ hmR0VmxPreRunGuestDebugStateUpdate(pVCpu, &DbgState, &VmxTransient);
+
+ /*
+ * The loop.
+ */
+ VBOXSTRICTRC rcStrict = VERR_INTERNAL_ERROR_5;
+ for (uint32_t cLoops = 0; ; cLoops++)
+ {
+ Assert(!HMR0SuspendPending());
+ HMVMX_ASSERT_CPU_SAFE(pVCpu);
+ bool fStepping = pVCpu->hm.s.fSingleInstruction;
+
+ /*
+ * Preparatory work for running guest code, this may force us to return
+ * to ring-3. This bugger disables interrupts on VINF_SUCCESS!
+ */
+ STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatEntry, x);
+ hmR0VmxPreRunGuestDebugStateApply(pVCpu, &DbgState); /* Set up execute controls the next to can respond to. */
+ rcStrict = hmR0VmxPreRunGuest(pVCpu, &VmxTransient, fStepping);
+ if (rcStrict != VINF_SUCCESS)
+ break;
+
+ hmR0VmxPreRunGuestCommitted(pVCpu, &VmxTransient);
+ hmR0VmxPreRunGuestDebugStateApply(pVCpu, &DbgState); /* Override any obnoxious code in the above two calls. */
+
+ /*
+ * Now we can run the guest code.
+ */
+ int rcRun = hmR0VmxRunGuest(pVCpu);
+
+ /*
+ * Restore any residual host-state and save any bits shared between host
+ * and guest into the guest-CPU state. Re-enables interrupts!
+ */
+ hmR0VmxPostRunGuest(pVCpu, &VmxTransient, rcRun);
+
+ /* Check for errors with running the VM (VMLAUNCH/VMRESUME). */
+ if (RT_SUCCESS(rcRun))
+ { /* very likely */ }
+ else
+ {
+ STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatPreExit, x);
+ hmR0VmxReportWorldSwitchError(pVCpu, rcRun, &VmxTransient);
+ return rcRun;
+ }
+
+ /* Profile the VM-exit. */
+ AssertMsg(VmxTransient.uExitReason <= VMX_EXIT_MAX, ("%#x\n", VmxTransient.uExitReason));
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatExitAll);
+ STAM_COUNTER_INC(&pVCpu->hm.s.paStatExitReasonR0[VmxTransient.uExitReason & MASK_EXITREASON_STAT]);
+ STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatPreExit, &pVCpu->hm.s.StatExitHandling, x);
+ HMVMX_START_EXIT_DISPATCH_PROF();
+
+ VBOXVMM_R0_HMVMX_VMEXIT_NOCTX(pVCpu, &pVCpu->cpum.GstCtx, VmxTransient.uExitReason);
+
+ /*
+ * Handle the VM-exit - we quit earlier on certain VM-exits, see hmR0VmxHandleExitDebug().
+ */
+ rcStrict = hmR0VmxRunDebugHandleExit(pVCpu, &VmxTransient, &DbgState);
+ STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitHandling, x);
+ if (rcStrict != VINF_SUCCESS)
+ break;
+ if (cLoops > pVCpu->CTX_SUFF(pVM)->hm.s.cMaxResumeLoops)
+ {
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchMaxResumeLoops);
+ rcStrict = VINF_EM_RAW_INTERRUPT;
+ break;
+ }
+
+ /*
+ * Stepping: Did the RIP change, if so, consider it a single step.
+ * Otherwise, make sure one of the TFs gets set.
+ */
+ if (fStepping)
+ {
+ int rc = hmR0VmxImportGuestState(pVCpu, CPUMCTX_EXTRN_CS | CPUMCTX_EXTRN_RIP);
+ AssertRC(rc);
+ if ( pVCpu->cpum.GstCtx.rip != DbgState.uRipStart
+ || pVCpu->cpum.GstCtx.cs.Sel != DbgState.uCsStart)
+ {
+ rcStrict = VINF_EM_DBG_STEPPED;
+ break;
+ }
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_DR7);
+ }
+
+ /*
+ * Update when dtrace settings changes (DBGF kicks us, so no need to check).
+ */
+ if (VBOXVMM_GET_SETTINGS_SEQ_NO() != DbgState.uDtraceSettingsSeqNo)
+ hmR0VmxPreRunGuestDebugStateUpdate(pVCpu, &DbgState, &VmxTransient);
+ }
+
+ /*
+ * Clear the X86_EFL_TF if necessary.
+ */
+ if (pVCpu->hm.s.fClearTrapFlag)
+ {
+ int rc = hmR0VmxImportGuestState(pVCpu, CPUMCTX_EXTRN_RFLAGS);
+ AssertRC(rc);
+ pVCpu->hm.s.fClearTrapFlag = false;
+ pVCpu->cpum.GstCtx.eflags.Bits.u1TF = 0;
+ }
+ /** @todo there seems to be issues with the resume flag when the monitor trap
+ * flag is pending without being used. Seen early in bios init when
+ * accessing APIC page in protected mode. */
+
+ /*
+ * Restore VM-exit control settings as we may not reenter this function the
+ * next time around.
+ */
+ rcStrict = hmR0VmxRunDebugStateRevert(pVCpu, &DbgState, rcStrict);
+
+ /* Restore HMCPU indicators. */
+ pVCpu->hm.s.fUsingDebugLoop = false;
+ pVCpu->hm.s.fDebugWantRdTscExit = false;
+ pVCpu->hm.s.fSingleInstruction = fSavedSingleInstruction;
+
+ STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatEntry, x);
+ return rcStrict;
+}
+
+
+/** @} */
+
+
+/**
+ * Checks if any expensive dtrace probes are enabled and we should go to the
+ * debug loop.
+ *
+ * @returns true if we should use debug loop, false if not.
+ */
+static bool hmR0VmxAnyExpensiveProbesEnabled(void)
+{
+ /* It's probably faster to OR the raw 32-bit counter variables together.
+ Since the variables are in an array and the probes are next to one
+ another (more or less), we have good locality. So, better read
+ eight-nine cache lines ever time and only have one conditional, than
+ 128+ conditionals, right? */
+ return ( VBOXVMM_R0_HMVMX_VMEXIT_ENABLED_RAW() /* expensive too due to context */
+ | VBOXVMM_XCPT_DE_ENABLED_RAW()
+ | VBOXVMM_XCPT_DB_ENABLED_RAW()
+ | VBOXVMM_XCPT_BP_ENABLED_RAW()
+ | VBOXVMM_XCPT_OF_ENABLED_RAW()
+ | VBOXVMM_XCPT_BR_ENABLED_RAW()
+ | VBOXVMM_XCPT_UD_ENABLED_RAW()
+ | VBOXVMM_XCPT_NM_ENABLED_RAW()
+ | VBOXVMM_XCPT_DF_ENABLED_RAW()
+ | VBOXVMM_XCPT_TS_ENABLED_RAW()
+ | VBOXVMM_XCPT_NP_ENABLED_RAW()
+ | VBOXVMM_XCPT_SS_ENABLED_RAW()
+ | VBOXVMM_XCPT_GP_ENABLED_RAW()
+ | VBOXVMM_XCPT_PF_ENABLED_RAW()
+ | VBOXVMM_XCPT_MF_ENABLED_RAW()
+ | VBOXVMM_XCPT_AC_ENABLED_RAW()
+ | VBOXVMM_XCPT_XF_ENABLED_RAW()
+ | VBOXVMM_XCPT_VE_ENABLED_RAW()
+ | VBOXVMM_XCPT_SX_ENABLED_RAW()
+ | VBOXVMM_INT_SOFTWARE_ENABLED_RAW()
+ | VBOXVMM_INT_HARDWARE_ENABLED_RAW()
+ ) != 0
+ || ( VBOXVMM_INSTR_HALT_ENABLED_RAW()
+ | VBOXVMM_INSTR_MWAIT_ENABLED_RAW()
+ | VBOXVMM_INSTR_MONITOR_ENABLED_RAW()
+ | VBOXVMM_INSTR_CPUID_ENABLED_RAW()
+ | VBOXVMM_INSTR_INVD_ENABLED_RAW()
+ | VBOXVMM_INSTR_WBINVD_ENABLED_RAW()
+ | VBOXVMM_INSTR_INVLPG_ENABLED_RAW()
+ | VBOXVMM_INSTR_RDTSC_ENABLED_RAW()
+ | VBOXVMM_INSTR_RDTSCP_ENABLED_RAW()
+ | VBOXVMM_INSTR_RDPMC_ENABLED_RAW()
+ | VBOXVMM_INSTR_RDMSR_ENABLED_RAW()
+ | VBOXVMM_INSTR_WRMSR_ENABLED_RAW()
+ | VBOXVMM_INSTR_CRX_READ_ENABLED_RAW()
+ | VBOXVMM_INSTR_CRX_WRITE_ENABLED_RAW()
+ | VBOXVMM_INSTR_DRX_READ_ENABLED_RAW()
+ | VBOXVMM_INSTR_DRX_WRITE_ENABLED_RAW()
+ | VBOXVMM_INSTR_PAUSE_ENABLED_RAW()
+ | VBOXVMM_INSTR_XSETBV_ENABLED_RAW()
+ | VBOXVMM_INSTR_SIDT_ENABLED_RAW()
+ | VBOXVMM_INSTR_LIDT_ENABLED_RAW()
+ | VBOXVMM_INSTR_SGDT_ENABLED_RAW()
+ | VBOXVMM_INSTR_LGDT_ENABLED_RAW()
+ | VBOXVMM_INSTR_SLDT_ENABLED_RAW()
+ | VBOXVMM_INSTR_LLDT_ENABLED_RAW()
+ | VBOXVMM_INSTR_STR_ENABLED_RAW()
+ | VBOXVMM_INSTR_LTR_ENABLED_RAW()
+ | VBOXVMM_INSTR_GETSEC_ENABLED_RAW()
+ | VBOXVMM_INSTR_RSM_ENABLED_RAW()
+ | VBOXVMM_INSTR_RDRAND_ENABLED_RAW()
+ | VBOXVMM_INSTR_RDSEED_ENABLED_RAW()
+ | VBOXVMM_INSTR_XSAVES_ENABLED_RAW()
+ | VBOXVMM_INSTR_XRSTORS_ENABLED_RAW()
+ | VBOXVMM_INSTR_VMM_CALL_ENABLED_RAW()
+ | VBOXVMM_INSTR_VMX_VMCLEAR_ENABLED_RAW()
+ | VBOXVMM_INSTR_VMX_VMLAUNCH_ENABLED_RAW()
+ | VBOXVMM_INSTR_VMX_VMPTRLD_ENABLED_RAW()
+ | VBOXVMM_INSTR_VMX_VMPTRST_ENABLED_RAW()
+ | VBOXVMM_INSTR_VMX_VMREAD_ENABLED_RAW()
+ | VBOXVMM_INSTR_VMX_VMRESUME_ENABLED_RAW()
+ | VBOXVMM_INSTR_VMX_VMWRITE_ENABLED_RAW()
+ | VBOXVMM_INSTR_VMX_VMXOFF_ENABLED_RAW()
+ | VBOXVMM_INSTR_VMX_VMXON_ENABLED_RAW()
+ | VBOXVMM_INSTR_VMX_VMFUNC_ENABLED_RAW()
+ | VBOXVMM_INSTR_VMX_INVEPT_ENABLED_RAW()
+ | VBOXVMM_INSTR_VMX_INVVPID_ENABLED_RAW()
+ | VBOXVMM_INSTR_VMX_INVPCID_ENABLED_RAW()
+ ) != 0
+ || ( VBOXVMM_EXIT_TASK_SWITCH_ENABLED_RAW()
+ | VBOXVMM_EXIT_HALT_ENABLED_RAW()
+ | VBOXVMM_EXIT_MWAIT_ENABLED_RAW()
+ | VBOXVMM_EXIT_MONITOR_ENABLED_RAW()
+ | VBOXVMM_EXIT_CPUID_ENABLED_RAW()
+ | VBOXVMM_EXIT_INVD_ENABLED_RAW()
+ | VBOXVMM_EXIT_WBINVD_ENABLED_RAW()
+ | VBOXVMM_EXIT_INVLPG_ENABLED_RAW()
+ | VBOXVMM_EXIT_RDTSC_ENABLED_RAW()
+ | VBOXVMM_EXIT_RDTSCP_ENABLED_RAW()
+ | VBOXVMM_EXIT_RDPMC_ENABLED_RAW()
+ | VBOXVMM_EXIT_RDMSR_ENABLED_RAW()
+ | VBOXVMM_EXIT_WRMSR_ENABLED_RAW()
+ | VBOXVMM_EXIT_CRX_READ_ENABLED_RAW()
+ | VBOXVMM_EXIT_CRX_WRITE_ENABLED_RAW()
+ | VBOXVMM_EXIT_DRX_READ_ENABLED_RAW()
+ | VBOXVMM_EXIT_DRX_WRITE_ENABLED_RAW()
+ | VBOXVMM_EXIT_PAUSE_ENABLED_RAW()
+ | VBOXVMM_EXIT_XSETBV_ENABLED_RAW()
+ | VBOXVMM_EXIT_SIDT_ENABLED_RAW()
+ | VBOXVMM_EXIT_LIDT_ENABLED_RAW()
+ | VBOXVMM_EXIT_SGDT_ENABLED_RAW()
+ | VBOXVMM_EXIT_LGDT_ENABLED_RAW()
+ | VBOXVMM_EXIT_SLDT_ENABLED_RAW()
+ | VBOXVMM_EXIT_LLDT_ENABLED_RAW()
+ | VBOXVMM_EXIT_STR_ENABLED_RAW()
+ | VBOXVMM_EXIT_LTR_ENABLED_RAW()
+ | VBOXVMM_EXIT_GETSEC_ENABLED_RAW()
+ | VBOXVMM_EXIT_RSM_ENABLED_RAW()
+ | VBOXVMM_EXIT_RDRAND_ENABLED_RAW()
+ | VBOXVMM_EXIT_RDSEED_ENABLED_RAW()
+ | VBOXVMM_EXIT_XSAVES_ENABLED_RAW()
+ | VBOXVMM_EXIT_XRSTORS_ENABLED_RAW()
+ | VBOXVMM_EXIT_VMM_CALL_ENABLED_RAW()
+ | VBOXVMM_EXIT_VMX_VMCLEAR_ENABLED_RAW()
+ | VBOXVMM_EXIT_VMX_VMLAUNCH_ENABLED_RAW()
+ | VBOXVMM_EXIT_VMX_VMPTRLD_ENABLED_RAW()
+ | VBOXVMM_EXIT_VMX_VMPTRST_ENABLED_RAW()
+ | VBOXVMM_EXIT_VMX_VMREAD_ENABLED_RAW()
+ | VBOXVMM_EXIT_VMX_VMRESUME_ENABLED_RAW()
+ | VBOXVMM_EXIT_VMX_VMWRITE_ENABLED_RAW()
+ | VBOXVMM_EXIT_VMX_VMXOFF_ENABLED_RAW()
+ | VBOXVMM_EXIT_VMX_VMXON_ENABLED_RAW()
+ | VBOXVMM_EXIT_VMX_VMFUNC_ENABLED_RAW()
+ | VBOXVMM_EXIT_VMX_INVEPT_ENABLED_RAW()
+ | VBOXVMM_EXIT_VMX_INVVPID_ENABLED_RAW()
+ | VBOXVMM_EXIT_VMX_INVPCID_ENABLED_RAW()
+ | VBOXVMM_EXIT_VMX_EPT_VIOLATION_ENABLED_RAW()
+ | VBOXVMM_EXIT_VMX_EPT_MISCONFIG_ENABLED_RAW()
+ | VBOXVMM_EXIT_VMX_VAPIC_ACCESS_ENABLED_RAW()
+ | VBOXVMM_EXIT_VMX_VAPIC_WRITE_ENABLED_RAW()
+ ) != 0;
+}
+
+
+/**
+ * Runs the guest code using VT-x.
+ *
+ * @returns Strict VBox status code (i.e. informational status codes too).
+ * @param pVCpu The cross context virtual CPU structure.
+ */
+VMMR0DECL(VBOXSTRICTRC) VMXR0RunGuestCode(PVMCPU pVCpu)
+{
+ PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+ Assert(VMMRZCallRing3IsEnabled(pVCpu));
+ Assert(!ASMAtomicUoReadU64(&pCtx->fExtrn));
+ HMVMX_ASSERT_PREEMPT_SAFE(pVCpu);
+
+ VMMRZCallRing3SetNotification(pVCpu, hmR0VmxCallRing3Callback, pCtx);
+
+ VBOXSTRICTRC rcStrict;
+ if ( !pVCpu->hm.s.fUseDebugLoop
+ && (!VBOXVMM_ANY_PROBES_ENABLED() || !hmR0VmxAnyExpensiveProbesEnabled())
+ && !DBGFIsStepping(pVCpu)
+ && !pVCpu->CTX_SUFF(pVM)->dbgf.ro.cEnabledInt3Breakpoints)
+ rcStrict = hmR0VmxRunGuestCodeNormal(pVCpu);
+ else
+ rcStrict = hmR0VmxRunGuestCodeDebug(pVCpu);
+
+ if (rcStrict == VERR_EM_INTERPRETER)
+ rcStrict = VINF_EM_RAW_EMULATE_INSTR;
+ else if (rcStrict == VINF_EM_RESET)
+ rcStrict = VINF_EM_TRIPLE_FAULT;
+
+ int rc2 = hmR0VmxExitToRing3(pVCpu, rcStrict);
+ if (RT_FAILURE(rc2))
+ {
+ pVCpu->hm.s.u32HMError = (uint32_t)VBOXSTRICTRC_VAL(rcStrict);
+ rcStrict = rc2;
+ }
+ Assert(!ASMAtomicUoReadU64(&pCtx->fExtrn));
+ Assert(!VMMRZCallRing3IsNotificationSet(pVCpu));
+ return rcStrict;
+}
+
+
+#ifndef HMVMX_USE_FUNCTION_TABLE
+DECLINLINE(VBOXSTRICTRC) hmR0VmxHandleExit(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient, uint32_t rcReason)
+{
+#ifdef DEBUG_ramshankar
+#define VMEXIT_CALL_RET(a_fSave, a_CallExpr) \
+ do { \
+ if (a_fSave != 0) \
+ hmR0VmxImportGuestState(pVCpu, HMVMX_CPUMCTX_EXTRN_ALL); \
+ VBOXSTRICTRC rcStrict = a_CallExpr; \
+ if (a_fSave != 0) \
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_ALL_GUEST); \
+ return rcStrict; \
+ } while (0)
+#else
+# define VMEXIT_CALL_RET(a_fSave, a_CallExpr) return a_CallExpr
+#endif
+ switch (rcReason)
+ {
+ case VMX_EXIT_EPT_MISCONFIG: VMEXIT_CALL_RET(0, hmR0VmxExitEptMisconfig(pVCpu, pVmxTransient));
+ case VMX_EXIT_EPT_VIOLATION: VMEXIT_CALL_RET(0, hmR0VmxExitEptViolation(pVCpu, pVmxTransient));
+ case VMX_EXIT_IO_INSTR: VMEXIT_CALL_RET(0, hmR0VmxExitIoInstr(pVCpu, pVmxTransient));
+ case VMX_EXIT_CPUID: VMEXIT_CALL_RET(0, hmR0VmxExitCpuid(pVCpu, pVmxTransient));
+ case VMX_EXIT_RDTSC: VMEXIT_CALL_RET(0, hmR0VmxExitRdtsc(pVCpu, pVmxTransient));
+ case VMX_EXIT_RDTSCP: VMEXIT_CALL_RET(0, hmR0VmxExitRdtscp(pVCpu, pVmxTransient));
+ case VMX_EXIT_APIC_ACCESS: VMEXIT_CALL_RET(0, hmR0VmxExitApicAccess(pVCpu, pVmxTransient));
+ case VMX_EXIT_XCPT_OR_NMI: VMEXIT_CALL_RET(0, hmR0VmxExitXcptOrNmi(pVCpu, pVmxTransient));
+ case VMX_EXIT_MOV_CRX: VMEXIT_CALL_RET(0, hmR0VmxExitMovCRx(pVCpu, pVmxTransient));
+ case VMX_EXIT_EXT_INT: VMEXIT_CALL_RET(0, hmR0VmxExitExtInt(pVCpu, pVmxTransient));
+ case VMX_EXIT_INT_WINDOW: VMEXIT_CALL_RET(0, hmR0VmxExitIntWindow(pVCpu, pVmxTransient));
+ case VMX_EXIT_TPR_BELOW_THRESHOLD: VMEXIT_CALL_RET(0, hmR0VmxExitTprBelowThreshold(pVCpu, pVmxTransient));
+ case VMX_EXIT_MWAIT: VMEXIT_CALL_RET(0, hmR0VmxExitMwait(pVCpu, pVmxTransient));
+ case VMX_EXIT_MONITOR: VMEXIT_CALL_RET(0, hmR0VmxExitMonitor(pVCpu, pVmxTransient));
+ case VMX_EXIT_TASK_SWITCH: VMEXIT_CALL_RET(0, hmR0VmxExitTaskSwitch(pVCpu, pVmxTransient));
+ case VMX_EXIT_PREEMPT_TIMER: VMEXIT_CALL_RET(0, hmR0VmxExitPreemptTimer(pVCpu, pVmxTransient));
+ case VMX_EXIT_RDMSR: VMEXIT_CALL_RET(0, hmR0VmxExitRdmsr(pVCpu, pVmxTransient));
+ case VMX_EXIT_WRMSR: VMEXIT_CALL_RET(0, hmR0VmxExitWrmsr(pVCpu, pVmxTransient));
+ case VMX_EXIT_VMCALL: VMEXIT_CALL_RET(0, hmR0VmxExitVmcall(pVCpu, pVmxTransient));
+ case VMX_EXIT_MOV_DRX: VMEXIT_CALL_RET(0, hmR0VmxExitMovDRx(pVCpu, pVmxTransient));
+ case VMX_EXIT_HLT: VMEXIT_CALL_RET(0, hmR0VmxExitHlt(pVCpu, pVmxTransient));
+ case VMX_EXIT_INVD: VMEXIT_CALL_RET(0, hmR0VmxExitInvd(pVCpu, pVmxTransient));
+ case VMX_EXIT_INVLPG: VMEXIT_CALL_RET(0, hmR0VmxExitInvlpg(pVCpu, pVmxTransient));
+ case VMX_EXIT_RSM: VMEXIT_CALL_RET(0, hmR0VmxExitRsm(pVCpu, pVmxTransient));
+ case VMX_EXIT_MTF: VMEXIT_CALL_RET(0, hmR0VmxExitMtf(pVCpu, pVmxTransient));
+ case VMX_EXIT_PAUSE: VMEXIT_CALL_RET(0, hmR0VmxExitPause(pVCpu, pVmxTransient));
+ case VMX_EXIT_GDTR_IDTR_ACCESS: VMEXIT_CALL_RET(0, hmR0VmxExitXdtrAccess(pVCpu, pVmxTransient));
+ case VMX_EXIT_LDTR_TR_ACCESS: VMEXIT_CALL_RET(0, hmR0VmxExitXdtrAccess(pVCpu, pVmxTransient));
+ case VMX_EXIT_WBINVD: VMEXIT_CALL_RET(0, hmR0VmxExitWbinvd(pVCpu, pVmxTransient));
+ case VMX_EXIT_XSETBV: VMEXIT_CALL_RET(0, hmR0VmxExitXsetbv(pVCpu, pVmxTransient));
+ case VMX_EXIT_RDRAND: VMEXIT_CALL_RET(0, hmR0VmxExitRdrand(pVCpu, pVmxTransient));
+ case VMX_EXIT_INVPCID: VMEXIT_CALL_RET(0, hmR0VmxExitInvpcid(pVCpu, pVmxTransient));
+ case VMX_EXIT_GETSEC: VMEXIT_CALL_RET(0, hmR0VmxExitGetsec(pVCpu, pVmxTransient));
+ case VMX_EXIT_RDPMC: VMEXIT_CALL_RET(0, hmR0VmxExitRdpmc(pVCpu, pVmxTransient));
+#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+ case VMX_EXIT_VMCLEAR: VMEXIT_CALL_RET(0, hmR0VmxExitVmclear(pVCpu, pVmxTransient));
+ case VMX_EXIT_VMLAUNCH: VMEXIT_CALL_RET(0, hmR0VmxExitVmlaunch(pVCpu, pVmxTransient));
+ case VMX_EXIT_VMPTRLD: VMEXIT_CALL_RET(0, hmR0VmxExitVmptrld(pVCpu, pVmxTransient));
+ case VMX_EXIT_VMPTRST: VMEXIT_CALL_RET(0, hmR0VmxExitVmptrst(pVCpu, pVmxTransient));
+ case VMX_EXIT_VMREAD: VMEXIT_CALL_RET(0, hmR0VmxExitVmread(pVCpu, pVmxTransient));
+ case VMX_EXIT_VMRESUME: VMEXIT_CALL_RET(0, hmR0VmxExitVmwrite(pVCpu, pVmxTransient));
+ case VMX_EXIT_VMWRITE: VMEXIT_CALL_RET(0, hmR0VmxExitVmresume(pVCpu, pVmxTransient));
+ case VMX_EXIT_VMXOFF: VMEXIT_CALL_RET(0, hmR0VmxExitVmxoff(pVCpu, pVmxTransient));
+ case VMX_EXIT_VMXON: VMEXIT_CALL_RET(0, hmR0VmxExitVmxon(pVCpu, pVmxTransient));
+#else
+ case VMX_EXIT_VMCLEAR:
+ case VMX_EXIT_VMLAUNCH:
+ case VMX_EXIT_VMPTRLD:
+ case VMX_EXIT_VMPTRST:
+ case VMX_EXIT_VMREAD:
+ case VMX_EXIT_VMRESUME:
+ case VMX_EXIT_VMWRITE:
+ case VMX_EXIT_VMXOFF:
+ case VMX_EXIT_VMXON:
+ return hmR0VmxExitSetPendingXcptUD(pVCpu, pVmxTransient);
+#endif
+
+ case VMX_EXIT_TRIPLE_FAULT: return hmR0VmxExitTripleFault(pVCpu, pVmxTransient);
+ case VMX_EXIT_NMI_WINDOW: return hmR0VmxExitNmiWindow(pVCpu, pVmxTransient);
+ case VMX_EXIT_INIT_SIGNAL: return hmR0VmxExitInitSignal(pVCpu, pVmxTransient);
+ case VMX_EXIT_SIPI: return hmR0VmxExitSipi(pVCpu, pVmxTransient);
+ case VMX_EXIT_IO_SMI: return hmR0VmxExitIoSmi(pVCpu, pVmxTransient);
+ case VMX_EXIT_SMI: return hmR0VmxExitSmi(pVCpu, pVmxTransient);
+ case VMX_EXIT_ERR_MSR_LOAD: return hmR0VmxExitErrMsrLoad(pVCpu, pVmxTransient);
+ case VMX_EXIT_ERR_INVALID_GUEST_STATE: return hmR0VmxExitErrInvalidGuestState(pVCpu, pVmxTransient);
+ case VMX_EXIT_ERR_MACHINE_CHECK: return hmR0VmxExitErrMachineCheck(pVCpu, pVmxTransient);
+
+ case VMX_EXIT_INVEPT:
+ case VMX_EXIT_INVVPID:
+ case VMX_EXIT_VMFUNC:
+ case VMX_EXIT_XSAVES:
+ case VMX_EXIT_XRSTORS:
+ return hmR0VmxExitSetPendingXcptUD(pVCpu, pVmxTransient);
+
+ case VMX_EXIT_ENCLS:
+ case VMX_EXIT_RDSEED: /* only spurious VM-exits, so undefined */
+ case VMX_EXIT_PML_FULL:
+ default:
+ return hmR0VmxExitErrUndefined(pVCpu, pVmxTransient);
+ }
+#undef VMEXIT_CALL_RET
+}
+#endif /* !HMVMX_USE_FUNCTION_TABLE */
+
+
+#ifdef VBOX_STRICT
+/* Is there some generic IPRT define for this that are not in Runtime/internal/\* ?? */
+# define HMVMX_ASSERT_PREEMPT_CPUID_VAR() \
+ RTCPUID const idAssertCpu = RTThreadPreemptIsEnabled(NIL_RTTHREAD) ? NIL_RTCPUID : RTMpCpuId()
+
+# define HMVMX_ASSERT_PREEMPT_CPUID() \
+ do { \
+ RTCPUID const idAssertCpuNow = RTThreadPreemptIsEnabled(NIL_RTTHREAD) ? NIL_RTCPUID : RTMpCpuId(); \
+ AssertMsg(idAssertCpu == idAssertCpuNow, ("VMX %#x, %#x\n", idAssertCpu, idAssertCpuNow)); \
+ } while (0)
+
+# define HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(a_pVCpu, a_pVmxTransient) \
+ do { \
+ AssertPtr((a_pVCpu)); \
+ AssertPtr((a_pVmxTransient)); \
+ Assert((a_pVmxTransient)->fVMEntryFailed == false); \
+ Assert(ASMIntAreEnabled()); \
+ HMVMX_ASSERT_PREEMPT_SAFE(a_pVCpu); \
+ HMVMX_ASSERT_PREEMPT_CPUID_VAR(); \
+ Log4Func(("vcpu[%RU32] -v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v\n", (a_pVCpu)->idCpu)); \
+ HMVMX_ASSERT_PREEMPT_SAFE(a_pVCpu); \
+ if (VMMR0IsLogFlushDisabled((a_pVCpu))) \
+ HMVMX_ASSERT_PREEMPT_CPUID(); \
+ HMVMX_STOP_EXIT_DISPATCH_PROF(); \
+ } while (0)
+
+# define HMVMX_VALIDATE_EXIT_XCPT_HANDLER_PARAMS(a_pVCpu, a_pVmxTransient) \
+ do { \
+ Log4Func(("\n")); \
+ } while (0)
+#else
+# define HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(a_pVCpu, a_pVmxTransient) \
+ do { \
+ HMVMX_STOP_EXIT_DISPATCH_PROF(); \
+ NOREF((a_pVCpu)); NOREF((a_pVmxTransient)); \
+ } while (0)
+# define HMVMX_VALIDATE_EXIT_XCPT_HANDLER_PARAMS(a_pVCpu, a_pVmxTransient) do { } while (0)
+#endif
+
+
+/**
+ * Advances the guest RIP by the specified number of bytes.
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param cbInstr Number of bytes to advance the RIP by.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+DECLINLINE(void) hmR0VmxAdvanceGuestRipBy(PVMCPU pVCpu, uint32_t cbInstr)
+{
+ /* Advance the RIP. */
+ pVCpu->cpum.GstCtx.rip += cbInstr;
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP);
+
+ /* Update interrupt inhibition. */
+ if ( VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS)
+ && pVCpu->cpum.GstCtx.rip != EMGetInhibitInterruptsPC(pVCpu))
+ VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS);
+}
+
+
+/**
+ * Advances the guest RIP after reading it from the VMCS.
+ *
+ * @returns VBox status code, no informational status codes.
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param pVmxTransient Pointer to the VMX transient structure.
+ *
+ * @remarks No-long-jump zone!!!
+ */
+static int hmR0VmxAdvanceGuestRip(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+ int rc = hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
+ rc |= hmR0VmxImportGuestState(pVCpu, CPUMCTX_EXTRN_RIP | CPUMCTX_EXTRN_RFLAGS);
+ AssertRCReturn(rc, rc);
+
+ hmR0VmxAdvanceGuestRipBy(pVCpu, pVmxTransient->cbInstr);
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Tries to determine what part of the guest-state VT-x has deemed as invalid
+ * and update error record fields accordingly.
+ *
+ * @return VMX_IGS_* return codes.
+ * @retval VMX_IGS_REASON_NOT_FOUND if this function could not find anything
+ * wrong with the guest state.
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ *
+ * @remarks This function assumes our cache of the VMCS controls
+ * are valid, i.e. hmR0VmxCheckVmcsCtls() succeeded.
+ */
+static uint32_t hmR0VmxCheckGuestState(PVMCPU pVCpu)
+{
+#define HMVMX_ERROR_BREAK(err) { uError = (err); break; }
+#define HMVMX_CHECK_BREAK(expr, err) if (!(expr)) { \
+ uError = (err); \
+ break; \
+ } else do { } while (0)
+
+ int rc;
+ PVM pVM = pVCpu->CTX_SUFF(pVM);
+ PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+ uint32_t uError = VMX_IGS_ERROR;
+ uint32_t u32Val;
+ bool const fUnrestrictedGuest = pVM->hm.s.vmx.fUnrestrictedGuest;
+
+ do
+ {
+ /*
+ * CR0.
+ */
+ uint32_t fSetCr0 = (uint32_t)(pVM->hm.s.vmx.Msrs.u64Cr0Fixed0 & pVM->hm.s.vmx.Msrs.u64Cr0Fixed1);
+ uint32_t const fZapCr0 = (uint32_t)(pVM->hm.s.vmx.Msrs.u64Cr0Fixed0 | pVM->hm.s.vmx.Msrs.u64Cr0Fixed1);
+ /* Exceptions for unrestricted-guests for fixed CR0 bits (PE, PG).
+ See Intel spec. 26.3.1 "Checks on Guest Control Registers, Debug Registers and MSRs." */
+ if (fUnrestrictedGuest)
+ fSetCr0 &= ~(X86_CR0_PE | X86_CR0_PG);
+
+ uint32_t u32GuestCr0;
+ rc = VMXReadVmcs32(VMX_VMCS_GUEST_CR0, &u32GuestCr0);
+ AssertRCBreak(rc);
+ HMVMX_CHECK_BREAK((u32GuestCr0 & fSetCr0) == fSetCr0, VMX_IGS_CR0_FIXED1);
+ HMVMX_CHECK_BREAK(!(u32GuestCr0 & ~fZapCr0), VMX_IGS_CR0_FIXED0);
+ if ( !fUnrestrictedGuest
+ && (u32GuestCr0 & X86_CR0_PG)
+ && !(u32GuestCr0 & X86_CR0_PE))
+ {
+ HMVMX_ERROR_BREAK(VMX_IGS_CR0_PG_PE_COMBO);
+ }
+
+ /*
+ * CR4.
+ */
+ uint64_t const fSetCr4 = (pVM->hm.s.vmx.Msrs.u64Cr4Fixed0 & pVM->hm.s.vmx.Msrs.u64Cr4Fixed1);
+ uint64_t const fZapCr4 = (pVM->hm.s.vmx.Msrs.u64Cr4Fixed0 | pVM->hm.s.vmx.Msrs.u64Cr4Fixed1);
+
+ uint32_t u32GuestCr4;
+ rc = VMXReadVmcs32(VMX_VMCS_GUEST_CR4, &u32GuestCr4);
+ AssertRCBreak(rc);
+ HMVMX_CHECK_BREAK((u32GuestCr4 & fSetCr4) == fSetCr4, VMX_IGS_CR4_FIXED1);
+ HMVMX_CHECK_BREAK(!(u32GuestCr4 & ~fZapCr4), VMX_IGS_CR4_FIXED0);
+
+ /*
+ * IA32_DEBUGCTL MSR.
+ */
+ uint64_t u64Val;
+ rc = VMXReadVmcs64(VMX_VMCS64_GUEST_DEBUGCTL_FULL, &u64Val);
+ AssertRCBreak(rc);
+ if ( (pVCpu->hm.s.vmx.u32EntryCtls & VMX_ENTRY_CTLS_LOAD_DEBUG)
+ && (u64Val & 0xfffffe3c)) /* Bits 31:9, bits 5:2 MBZ. */
+ {
+ HMVMX_ERROR_BREAK(VMX_IGS_DEBUGCTL_MSR_RESERVED);
+ }
+ uint64_t u64DebugCtlMsr = u64Val;
+
+#ifdef VBOX_STRICT
+ rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY, &u32Val);
+ AssertRCBreak(rc);
+ Assert(u32Val == pVCpu->hm.s.vmx.u32EntryCtls);
+#endif
+ bool const fLongModeGuest = RT_BOOL(pVCpu->hm.s.vmx.u32EntryCtls & VMX_ENTRY_CTLS_IA32E_MODE_GUEST);
+
+ /*
+ * RIP and RFLAGS.
+ */
+ uint32_t u32Eflags;
+#if HC_ARCH_BITS == 64
+ rc = VMXReadVmcs64(VMX_VMCS_GUEST_RIP, &u64Val);
+ AssertRCBreak(rc);
+ /* pCtx->rip can be different than the one in the VMCS (e.g. run guest code and VM-exits that don't update it). */
+ if ( !fLongModeGuest
+ || !pCtx->cs.Attr.n.u1Long)
+ {
+ HMVMX_CHECK_BREAK(!(u64Val & UINT64_C(0xffffffff00000000)), VMX_IGS_LONGMODE_RIP_INVALID);
+ }
+ /** @todo If the processor supports N < 64 linear-address bits, bits 63:N
+ * must be identical if the "IA-32e mode guest" VM-entry
+ * control is 1 and CS.L is 1. No check applies if the
+ * CPU supports 64 linear-address bits. */
+
+ /* Flags in pCtx can be different (real-on-v86 for instance). We are only concerned about the VMCS contents here. */
+ rc = VMXReadVmcs64(VMX_VMCS_GUEST_RFLAGS, &u64Val);
+ AssertRCBreak(rc);
+ HMVMX_CHECK_BREAK(!(u64Val & UINT64_C(0xffffffffffc08028)), /* Bit 63:22, Bit 15, 5, 3 MBZ. */
+ VMX_IGS_RFLAGS_RESERVED);
+ HMVMX_CHECK_BREAK((u64Val & X86_EFL_RA1_MASK), VMX_IGS_RFLAGS_RESERVED1); /* Bit 1 MB1. */
+ u32Eflags = u64Val;
+#else
+ rc = VMXReadVmcs32(VMX_VMCS_GUEST_RFLAGS, &u32Eflags);
+ AssertRCBreak(rc);
+ HMVMX_CHECK_BREAK(!(u32Eflags & 0xffc08028), VMX_IGS_RFLAGS_RESERVED); /* Bit 31:22, Bit 15, 5, 3 MBZ. */
+ HMVMX_CHECK_BREAK((u32Eflags & X86_EFL_RA1_MASK), VMX_IGS_RFLAGS_RESERVED1); /* Bit 1 MB1. */
+#endif
+
+ if ( fLongModeGuest
+ || ( fUnrestrictedGuest
+ && !(u32GuestCr0 & X86_CR0_PE)))
+ {
+ HMVMX_CHECK_BREAK(!(u32Eflags & X86_EFL_VM), VMX_IGS_RFLAGS_VM_INVALID);
+ }
+
+ uint32_t u32EntryInfo;
+ rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, &u32EntryInfo);
+ AssertRCBreak(rc);
+ if ( VMX_ENTRY_INT_INFO_IS_VALID(u32EntryInfo)
+ && VMX_ENTRY_INT_INFO_TYPE(u32EntryInfo) == VMX_EXIT_INT_INFO_TYPE_EXT_INT)
+ {
+ HMVMX_CHECK_BREAK(u32Eflags & X86_EFL_IF, VMX_IGS_RFLAGS_IF_INVALID);
+ }
+
+ /*
+ * 64-bit checks.
+ */
+#if HC_ARCH_BITS == 64
+ if (fLongModeGuest)
+ {
+ HMVMX_CHECK_BREAK(u32GuestCr0 & X86_CR0_PG, VMX_IGS_CR0_PG_LONGMODE);
+ HMVMX_CHECK_BREAK(u32GuestCr4 & X86_CR4_PAE, VMX_IGS_CR4_PAE_LONGMODE);
+ }
+
+ if ( !fLongModeGuest
+ && (u32GuestCr4 & X86_CR4_PCIDE))
+ {
+ HMVMX_ERROR_BREAK(VMX_IGS_CR4_PCIDE);
+ }
+
+ /** @todo CR3 field must be such that bits 63:52 and bits in the range
+ * 51:32 beyond the processor's physical-address width are 0. */
+
+ if ( (pVCpu->hm.s.vmx.u32EntryCtls & VMX_ENTRY_CTLS_LOAD_DEBUG)
+ && (pCtx->dr[7] & X86_DR7_MBZ_MASK))
+ {
+ HMVMX_ERROR_BREAK(VMX_IGS_DR7_RESERVED);
+ }
+
+ rc = VMXReadVmcs64(VMX_VMCS_HOST_SYSENTER_ESP, &u64Val);
+ AssertRCBreak(rc);
+ HMVMX_CHECK_BREAK(X86_IS_CANONICAL(u64Val), VMX_IGS_SYSENTER_ESP_NOT_CANONICAL);
+
+ rc = VMXReadVmcs64(VMX_VMCS_HOST_SYSENTER_EIP, &u64Val);
+ AssertRCBreak(rc);
+ HMVMX_CHECK_BREAK(X86_IS_CANONICAL(u64Val), VMX_IGS_SYSENTER_EIP_NOT_CANONICAL);
+#endif
+
+ /*
+ * PERF_GLOBAL MSR.
+ */
+ if (pVCpu->hm.s.vmx.u32EntryCtls & VMX_ENTRY_CTLS_LOAD_PERF_MSR)
+ {
+ rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PERF_GLOBAL_CTRL_FULL, &u64Val);
+ AssertRCBreak(rc);
+ HMVMX_CHECK_BREAK(!(u64Val & UINT64_C(0xfffffff8fffffffc)),
+ VMX_IGS_PERF_GLOBAL_MSR_RESERVED); /* Bits 63:35, bits 31:2 MBZ. */
+ }
+
+ /*
+ * PAT MSR.
+ */
+ if (pVCpu->hm.s.vmx.u32EntryCtls & VMX_ENTRY_CTLS_LOAD_PAT_MSR)
+ {
+ rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PAT_FULL, &u64Val);
+ AssertRCBreak(rc);
+ HMVMX_CHECK_BREAK(!(u64Val & UINT64_C(0x707070707070707)), VMX_IGS_PAT_MSR_RESERVED);
+ for (unsigned i = 0; i < 8; i++)
+ {
+ uint8_t u8Val = (u64Val & 0xff);
+ if ( u8Val != 0 /* UC */
+ && u8Val != 1 /* WC */
+ && u8Val != 4 /* WT */
+ && u8Val != 5 /* WP */
+ && u8Val != 6 /* WB */
+ && u8Val != 7 /* UC- */)
+ {
+ HMVMX_ERROR_BREAK(VMX_IGS_PAT_MSR_INVALID);
+ }
+ u64Val >>= 8;
+ }
+ }
+
+ /*
+ * EFER MSR.
+ */
+ if (pVCpu->hm.s.vmx.u32EntryCtls & VMX_ENTRY_CTLS_LOAD_EFER_MSR)
+ {
+ Assert(pVM->hm.s.vmx.fSupportsVmcsEfer);
+ rc = VMXReadVmcs64(VMX_VMCS64_GUEST_EFER_FULL, &u64Val);
+ AssertRCBreak(rc);
+ HMVMX_CHECK_BREAK(!(u64Val & UINT64_C(0xfffffffffffff2fe)),
+ VMX_IGS_EFER_MSR_RESERVED); /* Bits 63:12, bit 9, bits 7:1 MBZ. */
+ HMVMX_CHECK_BREAK(RT_BOOL(u64Val & MSR_K6_EFER_LMA) == RT_BOOL( pVCpu->hm.s.vmx.u32EntryCtls
+ & VMX_ENTRY_CTLS_IA32E_MODE_GUEST),
+ VMX_IGS_EFER_LMA_GUEST_MODE_MISMATCH);
+ /** @todo r=ramshankar: Unrestricted check here is probably wrong, see
+ * iemVmxVmentryCheckGuestState(). */
+ HMVMX_CHECK_BREAK( fUnrestrictedGuest
+ || !(u32GuestCr0 & X86_CR0_PG)
+ || RT_BOOL(u64Val & MSR_K6_EFER_LMA) == RT_BOOL(u64Val & MSR_K6_EFER_LME),
+ VMX_IGS_EFER_LMA_LME_MISMATCH);
+ }
+
+ /*
+ * Segment registers.
+ */
+ HMVMX_CHECK_BREAK( (pCtx->ldtr.Attr.u & X86DESCATTR_UNUSABLE)
+ || !(pCtx->ldtr.Sel & X86_SEL_LDT), VMX_IGS_LDTR_TI_INVALID);
+ if (!(u32Eflags & X86_EFL_VM))
+ {
+ /* CS */
+ HMVMX_CHECK_BREAK(pCtx->cs.Attr.n.u1Present, VMX_IGS_CS_ATTR_P_INVALID);
+ HMVMX_CHECK_BREAK(!(pCtx->cs.Attr.u & 0xf00), VMX_IGS_CS_ATTR_RESERVED);
+ HMVMX_CHECK_BREAK(!(pCtx->cs.Attr.u & 0xfffe0000), VMX_IGS_CS_ATTR_RESERVED);
+ HMVMX_CHECK_BREAK( (pCtx->cs.u32Limit & 0xfff) == 0xfff
+ || !(pCtx->cs.Attr.n.u1Granularity), VMX_IGS_CS_ATTR_G_INVALID);
+ HMVMX_CHECK_BREAK( !(pCtx->cs.u32Limit & 0xfff00000)
+ || (pCtx->cs.Attr.n.u1Granularity), VMX_IGS_CS_ATTR_G_INVALID);
+ /* CS cannot be loaded with NULL in protected mode. */
+ HMVMX_CHECK_BREAK(pCtx->cs.Attr.u && !(pCtx->cs.Attr.u & X86DESCATTR_UNUSABLE), VMX_IGS_CS_ATTR_UNUSABLE);
+ HMVMX_CHECK_BREAK(pCtx->cs.Attr.n.u1DescType, VMX_IGS_CS_ATTR_S_INVALID);
+ if (pCtx->cs.Attr.n.u4Type == 9 || pCtx->cs.Attr.n.u4Type == 11)
+ HMVMX_CHECK_BREAK(pCtx->cs.Attr.n.u2Dpl == pCtx->ss.Attr.n.u2Dpl, VMX_IGS_CS_SS_ATTR_DPL_UNEQUAL);
+ else if (pCtx->cs.Attr.n.u4Type == 13 || pCtx->cs.Attr.n.u4Type == 15)
+ HMVMX_CHECK_BREAK(pCtx->cs.Attr.n.u2Dpl <= pCtx->ss.Attr.n.u2Dpl, VMX_IGS_CS_SS_ATTR_DPL_MISMATCH);
+ else if (pVM->hm.s.vmx.fUnrestrictedGuest && pCtx->cs.Attr.n.u4Type == 3)
+ HMVMX_CHECK_BREAK(pCtx->cs.Attr.n.u2Dpl == 0, VMX_IGS_CS_ATTR_DPL_INVALID);
+ else
+ HMVMX_ERROR_BREAK(VMX_IGS_CS_ATTR_TYPE_INVALID);
+
+ /* SS */
+ HMVMX_CHECK_BREAK( pVM->hm.s.vmx.fUnrestrictedGuest
+ || (pCtx->ss.Sel & X86_SEL_RPL) == (pCtx->cs.Sel & X86_SEL_RPL), VMX_IGS_SS_CS_RPL_UNEQUAL);
+ HMVMX_CHECK_BREAK(pCtx->ss.Attr.n.u2Dpl == (pCtx->ss.Sel & X86_SEL_RPL), VMX_IGS_SS_ATTR_DPL_RPL_UNEQUAL);
+ if ( !(pCtx->cr0 & X86_CR0_PE)
+ || pCtx->cs.Attr.n.u4Type == 3)
+ {
+ HMVMX_CHECK_BREAK(!pCtx->ss.Attr.n.u2Dpl, VMX_IGS_SS_ATTR_DPL_INVALID);
+ }
+ if (!(pCtx->ss.Attr.u & X86DESCATTR_UNUSABLE))
+ {
+ HMVMX_CHECK_BREAK(pCtx->ss.Attr.n.u4Type == 3 || pCtx->ss.Attr.n.u4Type == 7, VMX_IGS_SS_ATTR_TYPE_INVALID);
+ HMVMX_CHECK_BREAK(pCtx->ss.Attr.n.u1Present, VMX_IGS_SS_ATTR_P_INVALID);
+ HMVMX_CHECK_BREAK(!(pCtx->ss.Attr.u & 0xf00), VMX_IGS_SS_ATTR_RESERVED);
+ HMVMX_CHECK_BREAK(!(pCtx->ss.Attr.u & 0xfffe0000), VMX_IGS_SS_ATTR_RESERVED);
+ HMVMX_CHECK_BREAK( (pCtx->ss.u32Limit & 0xfff) == 0xfff
+ || !(pCtx->ss.Attr.n.u1Granularity), VMX_IGS_SS_ATTR_G_INVALID);
+ HMVMX_CHECK_BREAK( !(pCtx->ss.u32Limit & 0xfff00000)
+ || (pCtx->ss.Attr.n.u1Granularity), VMX_IGS_SS_ATTR_G_INVALID);
+ }
+
+ /* DS, ES, FS, GS - only check for usable selectors, see hmR0VmxExportGuestSegmenReg(). */
+ if (!(pCtx->ds.Attr.u & X86DESCATTR_UNUSABLE))
+ {
+ HMVMX_CHECK_BREAK(pCtx->ds.Attr.n.u4Type & X86_SEL_TYPE_ACCESSED, VMX_IGS_DS_ATTR_A_INVALID);
+ HMVMX_CHECK_BREAK(pCtx->ds.Attr.n.u1Present, VMX_IGS_DS_ATTR_P_INVALID);
+ HMVMX_CHECK_BREAK( pVM->hm.s.vmx.fUnrestrictedGuest
+ || pCtx->ds.Attr.n.u4Type > 11
+ || pCtx->ds.Attr.n.u2Dpl >= (pCtx->ds.Sel & X86_SEL_RPL), VMX_IGS_DS_ATTR_DPL_RPL_UNEQUAL);
+ HMVMX_CHECK_BREAK(!(pCtx->ds.Attr.u & 0xf00), VMX_IGS_DS_ATTR_RESERVED);
+ HMVMX_CHECK_BREAK(!(pCtx->ds.Attr.u & 0xfffe0000), VMX_IGS_DS_ATTR_RESERVED);
+ HMVMX_CHECK_BREAK( (pCtx->ds.u32Limit & 0xfff) == 0xfff
+ || !(pCtx->ds.Attr.n.u1Granularity), VMX_IGS_DS_ATTR_G_INVALID);
+ HMVMX_CHECK_BREAK( !(pCtx->ds.u32Limit & 0xfff00000)
+ || (pCtx->ds.Attr.n.u1Granularity), VMX_IGS_DS_ATTR_G_INVALID);
+ HMVMX_CHECK_BREAK( !(pCtx->ds.Attr.n.u4Type & X86_SEL_TYPE_CODE)
+ || (pCtx->ds.Attr.n.u4Type & X86_SEL_TYPE_READ), VMX_IGS_DS_ATTR_TYPE_INVALID);
+ }
+ if (!(pCtx->es.Attr.u & X86DESCATTR_UNUSABLE))
+ {
+ HMVMX_CHECK_BREAK(pCtx->es.Attr.n.u4Type & X86_SEL_TYPE_ACCESSED, VMX_IGS_ES_ATTR_A_INVALID);
+ HMVMX_CHECK_BREAK(pCtx->es.Attr.n.u1Present, VMX_IGS_ES_ATTR_P_INVALID);
+ HMVMX_CHECK_BREAK( pVM->hm.s.vmx.fUnrestrictedGuest
+ || pCtx->es.Attr.n.u4Type > 11
+ || pCtx->es.Attr.n.u2Dpl >= (pCtx->es.Sel & X86_SEL_RPL), VMX_IGS_DS_ATTR_DPL_RPL_UNEQUAL);
+ HMVMX_CHECK_BREAK(!(pCtx->es.Attr.u & 0xf00), VMX_IGS_ES_ATTR_RESERVED);
+ HMVMX_CHECK_BREAK(!(pCtx->es.Attr.u & 0xfffe0000), VMX_IGS_ES_ATTR_RESERVED);
+ HMVMX_CHECK_BREAK( (pCtx->es.u32Limit & 0xfff) == 0xfff
+ || !(pCtx->es.Attr.n.u1Granularity), VMX_IGS_ES_ATTR_G_INVALID);
+ HMVMX_CHECK_BREAK( !(pCtx->es.u32Limit & 0xfff00000)
+ || (pCtx->es.Attr.n.u1Granularity), VMX_IGS_ES_ATTR_G_INVALID);
+ HMVMX_CHECK_BREAK( !(pCtx->es.Attr.n.u4Type & X86_SEL_TYPE_CODE)
+ || (pCtx->es.Attr.n.u4Type & X86_SEL_TYPE_READ), VMX_IGS_ES_ATTR_TYPE_INVALID);
+ }
+ if (!(pCtx->fs.Attr.u & X86DESCATTR_UNUSABLE))
+ {
+ HMVMX_CHECK_BREAK(pCtx->fs.Attr.n.u4Type & X86_SEL_TYPE_ACCESSED, VMX_IGS_FS_ATTR_A_INVALID);
+ HMVMX_CHECK_BREAK(pCtx->fs.Attr.n.u1Present, VMX_IGS_FS_ATTR_P_INVALID);
+ HMVMX_CHECK_BREAK( pVM->hm.s.vmx.fUnrestrictedGuest
+ || pCtx->fs.Attr.n.u4Type > 11
+ || pCtx->fs.Attr.n.u2Dpl >= (pCtx->fs.Sel & X86_SEL_RPL), VMX_IGS_FS_ATTR_DPL_RPL_UNEQUAL);
+ HMVMX_CHECK_BREAK(!(pCtx->fs.Attr.u & 0xf00), VMX_IGS_FS_ATTR_RESERVED);
+ HMVMX_CHECK_BREAK(!(pCtx->fs.Attr.u & 0xfffe0000), VMX_IGS_FS_ATTR_RESERVED);
+ HMVMX_CHECK_BREAK( (pCtx->fs.u32Limit & 0xfff) == 0xfff
+ || !(pCtx->fs.Attr.n.u1Granularity), VMX_IGS_FS_ATTR_G_INVALID);
+ HMVMX_CHECK_BREAK( !(pCtx->fs.u32Limit & 0xfff00000)
+ || (pCtx->fs.Attr.n.u1Granularity), VMX_IGS_FS_ATTR_G_INVALID);
+ HMVMX_CHECK_BREAK( !(pCtx->fs.Attr.n.u4Type & X86_SEL_TYPE_CODE)
+ || (pCtx->fs.Attr.n.u4Type & X86_SEL_TYPE_READ), VMX_IGS_FS_ATTR_TYPE_INVALID);
+ }
+ if (!(pCtx->gs.Attr.u & X86DESCATTR_UNUSABLE))
+ {
+ HMVMX_CHECK_BREAK(pCtx->gs.Attr.n.u4Type & X86_SEL_TYPE_ACCESSED, VMX_IGS_GS_ATTR_A_INVALID);
+ HMVMX_CHECK_BREAK(pCtx->gs.Attr.n.u1Present, VMX_IGS_GS_ATTR_P_INVALID);
+ HMVMX_CHECK_BREAK( pVM->hm.s.vmx.fUnrestrictedGuest
+ || pCtx->gs.Attr.n.u4Type > 11
+ || pCtx->gs.Attr.n.u2Dpl >= (pCtx->gs.Sel & X86_SEL_RPL), VMX_IGS_GS_ATTR_DPL_RPL_UNEQUAL);
+ HMVMX_CHECK_BREAK(!(pCtx->gs.Attr.u & 0xf00), VMX_IGS_GS_ATTR_RESERVED);
+ HMVMX_CHECK_BREAK(!(pCtx->gs.Attr.u & 0xfffe0000), VMX_IGS_GS_ATTR_RESERVED);
+ HMVMX_CHECK_BREAK( (pCtx->gs.u32Limit & 0xfff) == 0xfff
+ || !(pCtx->gs.Attr.n.u1Granularity), VMX_IGS_GS_ATTR_G_INVALID);
+ HMVMX_CHECK_BREAK( !(pCtx->gs.u32Limit & 0xfff00000)
+ || (pCtx->gs.Attr.n.u1Granularity), VMX_IGS_GS_ATTR_G_INVALID);
+ HMVMX_CHECK_BREAK( !(pCtx->gs.Attr.n.u4Type & X86_SEL_TYPE_CODE)
+ || (pCtx->gs.Attr.n.u4Type & X86_SEL_TYPE_READ), VMX_IGS_GS_ATTR_TYPE_INVALID);
+ }
+ /* 64-bit capable CPUs. */
+#if HC_ARCH_BITS == 64
+ HMVMX_CHECK_BREAK(X86_IS_CANONICAL(pCtx->fs.u64Base), VMX_IGS_FS_BASE_NOT_CANONICAL);
+ HMVMX_CHECK_BREAK(X86_IS_CANONICAL(pCtx->gs.u64Base), VMX_IGS_GS_BASE_NOT_CANONICAL);
+ HMVMX_CHECK_BREAK( (pCtx->ldtr.Attr.u & X86DESCATTR_UNUSABLE)
+ || X86_IS_CANONICAL(pCtx->ldtr.u64Base), VMX_IGS_LDTR_BASE_NOT_CANONICAL);
+ HMVMX_CHECK_BREAK(!RT_HI_U32(pCtx->cs.u64Base), VMX_IGS_LONGMODE_CS_BASE_INVALID);
+ HMVMX_CHECK_BREAK((pCtx->ss.Attr.u & X86DESCATTR_UNUSABLE) || !RT_HI_U32(pCtx->ss.u64Base),
+ VMX_IGS_LONGMODE_SS_BASE_INVALID);
+ HMVMX_CHECK_BREAK((pCtx->ds.Attr.u & X86DESCATTR_UNUSABLE) || !RT_HI_U32(pCtx->ds.u64Base),
+ VMX_IGS_LONGMODE_DS_BASE_INVALID);
+ HMVMX_CHECK_BREAK((pCtx->es.Attr.u & X86DESCATTR_UNUSABLE) || !RT_HI_U32(pCtx->es.u64Base),
+ VMX_IGS_LONGMODE_ES_BASE_INVALID);
+#endif
+ }
+ else
+ {
+ /* V86 mode checks. */
+ uint32_t u32CSAttr, u32SSAttr, u32DSAttr, u32ESAttr, u32FSAttr, u32GSAttr;
+ if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active)
+ {
+ u32CSAttr = 0xf3; u32SSAttr = 0xf3;
+ u32DSAttr = 0xf3; u32ESAttr = 0xf3;
+ u32FSAttr = 0xf3; u32GSAttr = 0xf3;
+ }
+ else
+ {
+ u32CSAttr = pCtx->cs.Attr.u; u32SSAttr = pCtx->ss.Attr.u;
+ u32DSAttr = pCtx->ds.Attr.u; u32ESAttr = pCtx->es.Attr.u;
+ u32FSAttr = pCtx->fs.Attr.u; u32GSAttr = pCtx->gs.Attr.u;
+ }
+
+ /* CS */
+ HMVMX_CHECK_BREAK((pCtx->cs.u64Base == (uint64_t)pCtx->cs.Sel << 4), VMX_IGS_V86_CS_BASE_INVALID);
+ HMVMX_CHECK_BREAK(pCtx->cs.u32Limit == 0xffff, VMX_IGS_V86_CS_LIMIT_INVALID);
+ HMVMX_CHECK_BREAK(u32CSAttr == 0xf3, VMX_IGS_V86_CS_ATTR_INVALID);
+ /* SS */
+ HMVMX_CHECK_BREAK((pCtx->ss.u64Base == (uint64_t)pCtx->ss.Sel << 4), VMX_IGS_V86_SS_BASE_INVALID);
+ HMVMX_CHECK_BREAK(pCtx->ss.u32Limit == 0xffff, VMX_IGS_V86_SS_LIMIT_INVALID);
+ HMVMX_CHECK_BREAK(u32SSAttr == 0xf3, VMX_IGS_V86_SS_ATTR_INVALID);
+ /* DS */
+ HMVMX_CHECK_BREAK((pCtx->ds.u64Base == (uint64_t)pCtx->ds.Sel << 4), VMX_IGS_V86_DS_BASE_INVALID);
+ HMVMX_CHECK_BREAK(pCtx->ds.u32Limit == 0xffff, VMX_IGS_V86_DS_LIMIT_INVALID);
+ HMVMX_CHECK_BREAK(u32DSAttr == 0xf3, VMX_IGS_V86_DS_ATTR_INVALID);
+ /* ES */
+ HMVMX_CHECK_BREAK((pCtx->es.u64Base == (uint64_t)pCtx->es.Sel << 4), VMX_IGS_V86_ES_BASE_INVALID);
+ HMVMX_CHECK_BREAK(pCtx->es.u32Limit == 0xffff, VMX_IGS_V86_ES_LIMIT_INVALID);
+ HMVMX_CHECK_BREAK(u32ESAttr == 0xf3, VMX_IGS_V86_ES_ATTR_INVALID);
+ /* FS */
+ HMVMX_CHECK_BREAK((pCtx->fs.u64Base == (uint64_t)pCtx->fs.Sel << 4), VMX_IGS_V86_FS_BASE_INVALID);
+ HMVMX_CHECK_BREAK(pCtx->fs.u32Limit == 0xffff, VMX_IGS_V86_FS_LIMIT_INVALID);
+ HMVMX_CHECK_BREAK(u32FSAttr == 0xf3, VMX_IGS_V86_FS_ATTR_INVALID);
+ /* GS */
+ HMVMX_CHECK_BREAK((pCtx->gs.u64Base == (uint64_t)pCtx->gs.Sel << 4), VMX_IGS_V86_GS_BASE_INVALID);
+ HMVMX_CHECK_BREAK(pCtx->gs.u32Limit == 0xffff, VMX_IGS_V86_GS_LIMIT_INVALID);
+ HMVMX_CHECK_BREAK(u32GSAttr == 0xf3, VMX_IGS_V86_GS_ATTR_INVALID);
+ /* 64-bit capable CPUs. */
+#if HC_ARCH_BITS == 64
+ HMVMX_CHECK_BREAK(X86_IS_CANONICAL(pCtx->fs.u64Base), VMX_IGS_FS_BASE_NOT_CANONICAL);
+ HMVMX_CHECK_BREAK(X86_IS_CANONICAL(pCtx->gs.u64Base), VMX_IGS_GS_BASE_NOT_CANONICAL);
+ HMVMX_CHECK_BREAK( (pCtx->ldtr.Attr.u & X86DESCATTR_UNUSABLE)
+ || X86_IS_CANONICAL(pCtx->ldtr.u64Base), VMX_IGS_LDTR_BASE_NOT_CANONICAL);
+ HMVMX_CHECK_BREAK(!RT_HI_U32(pCtx->cs.u64Base), VMX_IGS_LONGMODE_CS_BASE_INVALID);
+ HMVMX_CHECK_BREAK((pCtx->ss.Attr.u & X86DESCATTR_UNUSABLE) || !RT_HI_U32(pCtx->ss.u64Base),
+ VMX_IGS_LONGMODE_SS_BASE_INVALID);
+ HMVMX_CHECK_BREAK((pCtx->ds.Attr.u & X86DESCATTR_UNUSABLE) || !RT_HI_U32(pCtx->ds.u64Base),
+ VMX_IGS_LONGMODE_DS_BASE_INVALID);
+ HMVMX_CHECK_BREAK((pCtx->es.Attr.u & X86DESCATTR_UNUSABLE) || !RT_HI_U32(pCtx->es.u64Base),
+ VMX_IGS_LONGMODE_ES_BASE_INVALID);
+#endif
+ }
+
+ /*
+ * TR.
+ */
+ HMVMX_CHECK_BREAK(!(pCtx->tr.Sel & X86_SEL_LDT), VMX_IGS_TR_TI_INVALID);
+ /* 64-bit capable CPUs. */
+#if HC_ARCH_BITS == 64
+ HMVMX_CHECK_BREAK(X86_IS_CANONICAL(pCtx->tr.u64Base), VMX_IGS_TR_BASE_NOT_CANONICAL);
+#endif
+ if (fLongModeGuest)
+ {
+ HMVMX_CHECK_BREAK(pCtx->tr.Attr.n.u4Type == 11, /* 64-bit busy TSS. */
+ VMX_IGS_LONGMODE_TR_ATTR_TYPE_INVALID);
+ }
+ else
+ {
+ HMVMX_CHECK_BREAK( pCtx->tr.Attr.n.u4Type == 3 /* 16-bit busy TSS. */
+ || pCtx->tr.Attr.n.u4Type == 11, /* 32-bit busy TSS.*/
+ VMX_IGS_TR_ATTR_TYPE_INVALID);
+ }
+ HMVMX_CHECK_BREAK(!pCtx->tr.Attr.n.u1DescType, VMX_IGS_TR_ATTR_S_INVALID);
+ HMVMX_CHECK_BREAK(pCtx->tr.Attr.n.u1Present, VMX_IGS_TR_ATTR_P_INVALID);
+ HMVMX_CHECK_BREAK(!(pCtx->tr.Attr.u & 0xf00), VMX_IGS_TR_ATTR_RESERVED); /* Bits 11:8 MBZ. */
+ HMVMX_CHECK_BREAK( (pCtx->tr.u32Limit & 0xfff) == 0xfff
+ || !(pCtx->tr.Attr.n.u1Granularity), VMX_IGS_TR_ATTR_G_INVALID);
+ HMVMX_CHECK_BREAK( !(pCtx->tr.u32Limit & 0xfff00000)
+ || (pCtx->tr.Attr.n.u1Granularity), VMX_IGS_TR_ATTR_G_INVALID);
+ HMVMX_CHECK_BREAK(!(pCtx->tr.Attr.u & X86DESCATTR_UNUSABLE), VMX_IGS_TR_ATTR_UNUSABLE);
+
+ /*
+ * GDTR and IDTR.
+ */
+#if HC_ARCH_BITS == 64
+ rc = VMXReadVmcs64(VMX_VMCS_GUEST_GDTR_BASE, &u64Val);
+ AssertRCBreak(rc);
+ HMVMX_CHECK_BREAK(X86_IS_CANONICAL(u64Val), VMX_IGS_GDTR_BASE_NOT_CANONICAL);
+
+ rc = VMXReadVmcs64(VMX_VMCS_GUEST_IDTR_BASE, &u64Val);
+ AssertRCBreak(rc);
+ HMVMX_CHECK_BREAK(X86_IS_CANONICAL(u64Val), VMX_IGS_IDTR_BASE_NOT_CANONICAL);
+#endif
+
+ rc = VMXReadVmcs32(VMX_VMCS32_GUEST_GDTR_LIMIT, &u32Val);
+ AssertRCBreak(rc);
+ HMVMX_CHECK_BREAK(!(u32Val & 0xffff0000), VMX_IGS_GDTR_LIMIT_INVALID); /* Bits 31:16 MBZ. */
+
+ rc = VMXReadVmcs32(VMX_VMCS32_GUEST_IDTR_LIMIT, &u32Val);
+ AssertRCBreak(rc);
+ HMVMX_CHECK_BREAK(!(u32Val & 0xffff0000), VMX_IGS_IDTR_LIMIT_INVALID); /* Bits 31:16 MBZ. */
+
+ /*
+ * Guest Non-Register State.
+ */
+ /* Activity State. */
+ uint32_t u32ActivityState;
+ rc = VMXReadVmcs32(VMX_VMCS32_GUEST_ACTIVITY_STATE, &u32ActivityState);
+ AssertRCBreak(rc);
+ HMVMX_CHECK_BREAK( !u32ActivityState
+ || (u32ActivityState & RT_BF_GET(pVM->hm.s.vmx.Msrs.u64Misc, VMX_BF_MISC_ACTIVITY_STATES)),
+ VMX_IGS_ACTIVITY_STATE_INVALID);
+ HMVMX_CHECK_BREAK( !(pCtx->ss.Attr.n.u2Dpl)
+ || u32ActivityState != VMX_VMCS_GUEST_ACTIVITY_HLT, VMX_IGS_ACTIVITY_STATE_HLT_INVALID);
+ uint32_t u32IntrState;
+ rc = VMXReadVmcs32(VMX_VMCS32_GUEST_INT_STATE, &u32IntrState);
+ AssertRCBreak(rc);
+ if ( u32IntrState == VMX_VMCS_GUEST_INT_STATE_BLOCK_MOVSS
+ || u32IntrState == VMX_VMCS_GUEST_INT_STATE_BLOCK_STI)
+ {
+ HMVMX_CHECK_BREAK(u32ActivityState == VMX_VMCS_GUEST_ACTIVITY_ACTIVE, VMX_IGS_ACTIVITY_STATE_ACTIVE_INVALID);
+ }
+
+ /** @todo Activity state and injecting interrupts. Left as a todo since we
+ * currently don't use activity states but ACTIVE. */
+
+ HMVMX_CHECK_BREAK( !(pVCpu->hm.s.vmx.u32EntryCtls & VMX_ENTRY_CTLS_ENTRY_TO_SMM)
+ || u32ActivityState != VMX_VMCS_GUEST_ACTIVITY_SIPI_WAIT, VMX_IGS_ACTIVITY_STATE_SIPI_WAIT_INVALID);
+
+ /* Guest interruptibility-state. */
+ HMVMX_CHECK_BREAK(!(u32IntrState & 0xffffffe0), VMX_IGS_INTERRUPTIBILITY_STATE_RESERVED);
+ HMVMX_CHECK_BREAK((u32IntrState & (VMX_VMCS_GUEST_INT_STATE_BLOCK_STI | VMX_VMCS_GUEST_INT_STATE_BLOCK_MOVSS))
+ != (VMX_VMCS_GUEST_INT_STATE_BLOCK_STI | VMX_VMCS_GUEST_INT_STATE_BLOCK_MOVSS),
+ VMX_IGS_INTERRUPTIBILITY_STATE_STI_MOVSS_INVALID);
+ HMVMX_CHECK_BREAK( (u32Eflags & X86_EFL_IF)
+ || !(u32IntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_STI),
+ VMX_IGS_INTERRUPTIBILITY_STATE_STI_EFL_INVALID);
+ if (VMX_ENTRY_INT_INFO_IS_VALID(u32EntryInfo))
+ {
+ if (VMX_ENTRY_INT_INFO_TYPE(u32EntryInfo) == VMX_EXIT_INT_INFO_TYPE_EXT_INT)
+ {
+ HMVMX_CHECK_BREAK( !(u32IntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_STI)
+ && !(u32IntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_MOVSS),
+ VMX_IGS_INTERRUPTIBILITY_STATE_EXT_INT_INVALID);
+ }
+ else if (VMX_ENTRY_INT_INFO_TYPE(u32EntryInfo) == VMX_EXIT_INT_INFO_TYPE_NMI)
+ {
+ HMVMX_CHECK_BREAK(!(u32IntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_MOVSS),
+ VMX_IGS_INTERRUPTIBILITY_STATE_MOVSS_INVALID);
+ HMVMX_CHECK_BREAK(!(u32IntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_STI),
+ VMX_IGS_INTERRUPTIBILITY_STATE_STI_INVALID);
+ }
+ }
+ /** @todo Assumes the processor is not in SMM. */
+ HMVMX_CHECK_BREAK(!(u32IntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_SMI),
+ VMX_IGS_INTERRUPTIBILITY_STATE_SMI_INVALID);
+ HMVMX_CHECK_BREAK( !(pVCpu->hm.s.vmx.u32EntryCtls & VMX_ENTRY_CTLS_ENTRY_TO_SMM)
+ || (u32IntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_SMI),
+ VMX_IGS_INTERRUPTIBILITY_STATE_SMI_SMM_INVALID);
+ if ( (pVCpu->hm.s.vmx.u32PinCtls & VMX_PIN_CTLS_VIRT_NMI)
+ && VMX_ENTRY_INT_INFO_IS_VALID(u32EntryInfo)
+ && VMX_ENTRY_INT_INFO_TYPE(u32EntryInfo) == VMX_EXIT_INT_INFO_TYPE_NMI)
+ {
+ HMVMX_CHECK_BREAK(!(u32IntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_NMI),
+ VMX_IGS_INTERRUPTIBILITY_STATE_NMI_INVALID);
+ }
+
+ /* Pending debug exceptions. */
+#if HC_ARCH_BITS == 64
+ rc = VMXReadVmcs64(VMX_VMCS_GUEST_PENDING_DEBUG_XCPTS, &u64Val);
+ AssertRCBreak(rc);
+ /* Bits 63:15, Bit 13, Bits 11:4 MBZ. */
+ HMVMX_CHECK_BREAK(!(u64Val & UINT64_C(0xffffffffffffaff0)), VMX_IGS_LONGMODE_PENDING_DEBUG_RESERVED);
+ u32Val = u64Val; /* For pending debug exceptions checks below. */
+#else
+ rc = VMXReadVmcs32(VMX_VMCS_GUEST_PENDING_DEBUG_XCPTS, &u32Val);
+ AssertRCBreak(rc);
+ /* Bits 31:15, Bit 13, Bits 11:4 MBZ. */
+ HMVMX_CHECK_BREAK(!(u32Val & 0xffffaff0), VMX_IGS_PENDING_DEBUG_RESERVED);
+#endif
+
+ if ( (u32IntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_STI)
+ || (u32IntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_MOVSS)
+ || u32ActivityState == VMX_VMCS_GUEST_ACTIVITY_HLT)
+ {
+ if ( (u32Eflags & X86_EFL_TF)
+ && !(u64DebugCtlMsr & RT_BIT_64(1))) /* Bit 1 is IA32_DEBUGCTL.BTF. */
+ {
+ /* Bit 14 is PendingDebug.BS. */
+ HMVMX_CHECK_BREAK(u32Val & RT_BIT(14), VMX_IGS_PENDING_DEBUG_XCPT_BS_NOT_SET);
+ }
+ if ( !(u32Eflags & X86_EFL_TF)
+ || (u64DebugCtlMsr & RT_BIT_64(1))) /* Bit 1 is IA32_DEBUGCTL.BTF. */
+ {
+ /* Bit 14 is PendingDebug.BS. */
+ HMVMX_CHECK_BREAK(!(u32Val & RT_BIT(14)), VMX_IGS_PENDING_DEBUG_XCPT_BS_NOT_CLEAR);
+ }
+ }
+
+ /* VMCS link pointer. */
+ rc = VMXReadVmcs64(VMX_VMCS64_GUEST_VMCS_LINK_PTR_FULL, &u64Val);
+ AssertRCBreak(rc);
+ if (u64Val != UINT64_C(0xffffffffffffffff))
+ {
+ HMVMX_CHECK_BREAK(!(u64Val & 0xfff), VMX_IGS_VMCS_LINK_PTR_RESERVED);
+ /** @todo Bits beyond the processor's physical-address width MBZ. */
+ /** @todo 32-bit located in memory referenced by value of this field (as a
+ * physical address) must contain the processor's VMCS revision ID. */
+ /** @todo SMM checks. */
+ }
+
+ /** @todo Checks on Guest Page-Directory-Pointer-Table Entries when guest is
+ * not using Nested Paging? */
+ if ( pVM->hm.s.fNestedPaging
+ && !fLongModeGuest
+ && CPUMIsGuestInPAEModeEx(pCtx))
+ {
+ rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE0_FULL, &u64Val);
+ AssertRCBreak(rc);
+ HMVMX_CHECK_BREAK(!(u64Val & X86_PDPE_PAE_MBZ_MASK), VMX_IGS_PAE_PDPTE_RESERVED);
+
+ rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE1_FULL, &u64Val);
+ AssertRCBreak(rc);
+ HMVMX_CHECK_BREAK(!(u64Val & X86_PDPE_PAE_MBZ_MASK), VMX_IGS_PAE_PDPTE_RESERVED);
+
+ rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE2_FULL, &u64Val);
+ AssertRCBreak(rc);
+ HMVMX_CHECK_BREAK(!(u64Val & X86_PDPE_PAE_MBZ_MASK), VMX_IGS_PAE_PDPTE_RESERVED);
+
+ rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE3_FULL, &u64Val);
+ AssertRCBreak(rc);
+ HMVMX_CHECK_BREAK(!(u64Val & X86_PDPE_PAE_MBZ_MASK), VMX_IGS_PAE_PDPTE_RESERVED);
+ }
+
+ /* Shouldn't happen but distinguish it from AssertRCBreak() errors. */
+ if (uError == VMX_IGS_ERROR)
+ uError = VMX_IGS_REASON_NOT_FOUND;
+ } while (0);
+
+ pVCpu->hm.s.u32HMError = uError;
+ return uError;
+
+#undef HMVMX_ERROR_BREAK
+#undef HMVMX_CHECK_BREAK
+}
+
+
+/** @name VM-exit handlers.
+ * @{
+ */
+/* -=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= */
+/* -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- VM-exit handlers -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- */
+/* -=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= */
+
+/**
+ * VM-exit handler for external interrupts (VMX_EXIT_EXT_INT).
+ */
+HMVMX_EXIT_DECL hmR0VmxExitExtInt(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+ HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatExitExtInt);
+ /* Windows hosts (32-bit and 64-bit) have DPC latency issues. See @bugref{6853}. */
+ if (VMMR0ThreadCtxHookIsEnabled(pVCpu))
+ return VINF_SUCCESS;
+ return VINF_EM_RAW_INTERRUPT;
+}
+
+
+/**
+ * VM-exit handler for exceptions or NMIs (VMX_EXIT_XCPT_OR_NMI).
+ */
+HMVMX_EXIT_DECL hmR0VmxExitXcptOrNmi(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+ HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+ STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatExitXcptNmi, y3);
+
+ int rc = hmR0VmxReadExitIntInfoVmcs(pVmxTransient);
+ AssertRCReturn(rc, rc);
+
+ uint32_t uIntType = VMX_EXIT_INT_INFO_TYPE(pVmxTransient->uExitIntInfo);
+ Assert( !(pVCpu->hm.s.vmx.u32ExitCtls & VMX_EXIT_CTLS_ACK_EXT_INT)
+ && uIntType != VMX_EXIT_INT_INFO_TYPE_EXT_INT);
+ Assert(VMX_EXIT_INT_INFO_IS_VALID(pVmxTransient->uExitIntInfo));
+
+ if (uIntType == VMX_EXIT_INT_INFO_TYPE_NMI)
+ {
+ /*
+ * This cannot be a guest NMI as the only way for the guest to receive an NMI is if we
+ * injected it ourselves and anything we inject is not going to cause a VM-exit directly
+ * for the event being injected[1]. Go ahead and dispatch the NMI to the host[2].
+ *
+ * [1] -- See Intel spec. 27.2.3 "Information for VM Exits During Event Delivery".
+ * [2] -- See Intel spec. 27.5.5 "Updating Non-Register State".
+ */
+ VMXDispatchHostNmi();
+ STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatExitHostNmiInGC);
+ STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitXcptNmi, y3);
+ return VINF_SUCCESS;
+ }
+
+ /* If this VM-exit occurred while delivering an event through the guest IDT, handle it accordingly. */
+ VBOXSTRICTRC rcStrictRc1 = hmR0VmxCheckExitDueToEventDelivery(pVCpu, pVmxTransient);
+ if (RT_UNLIKELY(rcStrictRc1 == VINF_SUCCESS))
+ { /* likely */ }
+ else
+ {
+ if (rcStrictRc1 == VINF_HM_DOUBLE_FAULT)
+ rcStrictRc1 = VINF_SUCCESS;
+ STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitXcptNmi, y3);
+ return rcStrictRc1;
+ }
+
+ uint32_t uExitIntInfo = pVmxTransient->uExitIntInfo;
+ uint32_t uVector = VMX_EXIT_INT_INFO_VECTOR(uExitIntInfo);
+ switch (uIntType)
+ {
+ case VMX_EXIT_INT_INFO_TYPE_PRIV_SW_XCPT: /* Privileged software exception. (#DB from ICEBP) */
+ Assert(uVector == X86_XCPT_DB);
+ RT_FALL_THRU();
+ case VMX_EXIT_INT_INFO_TYPE_SW_XCPT: /* Software exception. (#BP or #OF) */
+ Assert(uVector == X86_XCPT_BP || uVector == X86_XCPT_OF || uIntType == VMX_EXIT_INT_INFO_TYPE_PRIV_SW_XCPT);
+ RT_FALL_THRU();
+ case VMX_EXIT_INT_INFO_TYPE_HW_XCPT:
+ {
+ /*
+ * If there's any exception caused as a result of event injection, the resulting
+ * secondary/final execption will be pending, we shall continue guest execution
+ * after injecting the event. The page-fault case is complicated and we manually
+ * handle any currently pending event in hmR0VmxExitXcptPF.
+ */
+ if (!pVCpu->hm.s.Event.fPending)
+ { /* likely */ }
+ else if (uVector != X86_XCPT_PF)
+ {
+ rc = VINF_SUCCESS;
+ break;
+ }
+
+ switch (uVector)
+ {
+ case X86_XCPT_PF: rc = hmR0VmxExitXcptPF(pVCpu, pVmxTransient); break;
+ case X86_XCPT_GP: rc = hmR0VmxExitXcptGP(pVCpu, pVmxTransient); break;
+ case X86_XCPT_MF: rc = hmR0VmxExitXcptMF(pVCpu, pVmxTransient); break;
+ case X86_XCPT_DB: rc = hmR0VmxExitXcptDB(pVCpu, pVmxTransient); break;
+ case X86_XCPT_BP: rc = hmR0VmxExitXcptBP(pVCpu, pVmxTransient); break;
+ case X86_XCPT_AC: rc = hmR0VmxExitXcptAC(pVCpu, pVmxTransient); break;
+
+ case X86_XCPT_NM: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestNM);
+ rc = hmR0VmxExitXcptGeneric(pVCpu, pVmxTransient); break;
+ case X86_XCPT_XF: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestXF);
+ rc = hmR0VmxExitXcptGeneric(pVCpu, pVmxTransient); break;
+ case X86_XCPT_DE: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestDE);
+ rc = hmR0VmxExitXcptGeneric(pVCpu, pVmxTransient); break;
+ case X86_XCPT_UD: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestUD);
+ rc = hmR0VmxExitXcptGeneric(pVCpu, pVmxTransient); break;
+ case X86_XCPT_SS: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestSS);
+ rc = hmR0VmxExitXcptGeneric(pVCpu, pVmxTransient); break;
+ case X86_XCPT_NP: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestNP);
+ rc = hmR0VmxExitXcptGeneric(pVCpu, pVmxTransient); break;
+ case X86_XCPT_TS: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestTS);
+ rc = hmR0VmxExitXcptGeneric(pVCpu, pVmxTransient); break;
+ default:
+ {
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestXcpUnk);
+ if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active)
+ {
+ Assert(pVCpu->CTX_SUFF(pVM)->hm.s.vmx.pRealModeTSS);
+ Assert(PDMVmmDevHeapIsEnabled(pVCpu->CTX_SUFF(pVM)));
+ Assert(CPUMIsGuestInRealModeEx(&pVCpu->cpum.GstCtx));
+
+ rc = HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, CPUMCTX_EXTRN_CR0);
+ rc |= hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
+ rc |= hmR0VmxReadExitIntErrorCodeVmcs(pVmxTransient);
+ AssertRCReturn(rc, rc);
+ hmR0VmxSetPendingEvent(pVCpu, VMX_ENTRY_INT_INFO_FROM_EXIT_INT_INFO(uExitIntInfo),
+ pVmxTransient->cbInstr, pVmxTransient->uExitIntErrorCode,
+ 0 /* GCPtrFaultAddress */);
+ }
+ else
+ {
+ AssertMsgFailed(("Unexpected VM-exit caused by exception %#x\n", uVector));
+ pVCpu->hm.s.u32HMError = uVector;
+ rc = VERR_VMX_UNEXPECTED_EXCEPTION;
+ }
+ break;
+ }
+ }
+ break;
+ }
+
+ default:
+ {
+ pVCpu->hm.s.u32HMError = uExitIntInfo;
+ rc = VERR_VMX_UNEXPECTED_INTERRUPTION_EXIT_TYPE;
+ AssertMsgFailed(("Unexpected interruption info %#x\n", VMX_EXIT_INT_INFO_TYPE(uExitIntInfo)));
+ break;
+ }
+ }
+ STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitXcptNmi, y3);
+ return rc;
+}
+
+
+/**
+ * VM-exit handler for interrupt-window exiting (VMX_EXIT_INT_WINDOW).
+ */
+HMVMX_EXIT_NSRC_DECL hmR0VmxExitIntWindow(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+ HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+
+ /* Indicate that we no longer need to VM-exit when the guest is ready to receive interrupts, it is now ready. */
+ hmR0VmxClearIntWindowExitVmcs(pVCpu);
+
+ /* Deliver the pending interrupts via hmR0VmxEvaluatePendingEvent() and resume guest execution. */
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIntWindow);
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * VM-exit handler for NMI-window exiting (VMX_EXIT_NMI_WINDOW).
+ */
+HMVMX_EXIT_NSRC_DECL hmR0VmxExitNmiWindow(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+ HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+ if (RT_UNLIKELY(!(pVCpu->hm.s.vmx.u32ProcCtls & VMX_PROC_CTLS_NMI_WINDOW_EXIT)))
+ {
+ AssertMsgFailed(("Unexpected NMI-window exit.\n"));
+ HMVMX_UNEXPECTED_EXIT_RET(pVCpu, pVmxTransient);
+ }
+
+ Assert(!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_BLOCK_NMIS));
+
+ /*
+ * If block-by-STI is set when we get this VM-exit, it means the CPU doesn't block NMIs following STI.
+ * It is therefore safe to unblock STI and deliver the NMI ourselves. See @bugref{7445}.
+ */
+ uint32_t fIntrState = 0;
+ int rc = VMXReadVmcs32(VMX_VMCS32_GUEST_INT_STATE, &fIntrState);
+ AssertRCReturn(rc, rc);
+ Assert(!(fIntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_MOVSS));
+ if (fIntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_STI)
+ {
+ if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS))
+ VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS);
+
+ fIntrState &= ~VMX_VMCS_GUEST_INT_STATE_BLOCK_STI;
+ rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_INT_STATE, fIntrState);
+ AssertRCReturn(rc, rc);
+ }
+
+ /* Indicate that we no longer need to VM-exit when the guest is ready to receive NMIs, it is now ready */
+ hmR0VmxClearNmiWindowExitVmcs(pVCpu);
+
+ /* Deliver the pending NMI via hmR0VmxEvaluatePendingEvent() and resume guest execution. */
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * VM-exit handler for WBINVD (VMX_EXIT_WBINVD). Conditional VM-exit.
+ */
+HMVMX_EXIT_NSRC_DECL hmR0VmxExitWbinvd(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+ HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+ return hmR0VmxAdvanceGuestRip(pVCpu, pVmxTransient);
+}
+
+
+/**
+ * VM-exit handler for INVD (VMX_EXIT_INVD). Unconditional VM-exit.
+ */
+HMVMX_EXIT_NSRC_DECL hmR0VmxExitInvd(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+ HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+ return hmR0VmxAdvanceGuestRip(pVCpu, pVmxTransient);
+}
+
+
+/**
+ * VM-exit handler for CPUID (VMX_EXIT_CPUID). Unconditional VM-exit.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitCpuid(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+ HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+
+ /*
+ * Get the state we need and update the exit history entry.
+ */
+ int rc = hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
+ rc |= HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_EXEC_DECODED_NO_MEM_MASK);
+ AssertRCReturn(rc, rc);
+
+ VBOXSTRICTRC rcStrict;
+ PCEMEXITREC pExitRec = EMHistoryUpdateFlagsAndTypeAndPC(pVCpu,
+ EMEXIT_MAKE_FT(EMEXIT_F_KIND_EM | EMEXIT_F_HM, EMEXITTYPE_CPUID),
+ pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base);
+ if (!pExitRec)
+ {
+ /*
+ * Regular CPUID instruction execution.
+ */
+ rcStrict = IEMExecDecodedCpuid(pVCpu, pVmxTransient->cbInstr);
+ if (rcStrict == VINF_SUCCESS)
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS);
+ else if (rcStrict == VINF_IEM_RAISED_XCPT)
+ {
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
+ rcStrict = VINF_SUCCESS;
+ }
+ }
+ else
+ {
+ /*
+ * Frequent exit or something needing probing. Get state and call EMHistoryExec.
+ */
+ int rc2 = HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, HMVMX_CPUMCTX_EXTRN_ALL);
+ AssertRCReturn(rc2, rc2);
+
+ Log4(("CpuIdExit/%u: %04x:%08RX64: %#x/%#x -> EMHistoryExec\n",
+ pVCpu->idCpu, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, pVCpu->cpum.GstCtx.eax, pVCpu->cpum.GstCtx.ecx));
+
+ rcStrict = EMHistoryExec(pVCpu, pExitRec, 0);
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_ALL_GUEST);
+
+ Log4(("CpuIdExit/%u: %04x:%08RX64: EMHistoryExec -> %Rrc + %04x:%08RX64\n",
+ pVCpu->idCpu, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
+ VBOXSTRICTRC_VAL(rcStrict), pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
+ }
+ return rcStrict;
+}
+
+
+/**
+ * VM-exit handler for GETSEC (VMX_EXIT_GETSEC). Unconditional VM-exit.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitGetsec(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+ HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+ int rc = HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, CPUMCTX_EXTRN_CR4);
+ AssertRCReturn(rc, rc);
+
+ if (pVCpu->cpum.GstCtx.cr4 & X86_CR4_SMXE)
+ return VINF_EM_RAW_EMULATE_INSTR;
+
+ AssertMsgFailed(("hmR0VmxExitGetsec: unexpected VM-exit when CR4.SMXE is 0.\n"));
+ HMVMX_UNEXPECTED_EXIT_RET(pVCpu, pVmxTransient);
+}
+
+
+/**
+ * VM-exit handler for RDTSC (VMX_EXIT_RDTSC). Conditional VM-exit.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitRdtsc(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+ HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+ int rc = HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_MUST_MASK);
+ rc |= hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
+ AssertRCReturn(rc, rc);
+
+ VBOXSTRICTRC rcStrict = IEMExecDecodedRdtsc(pVCpu, pVmxTransient->cbInstr);
+ if (RT_LIKELY(rcStrict == VINF_SUCCESS))
+ {
+ /* If we get a spurious VM-exit when offsetting is enabled,
+ we must reset offsetting on VM-reentry. See @bugref{6634}. */
+ if (pVCpu->hm.s.vmx.u32ProcCtls & VMX_PROC_CTLS_USE_TSC_OFFSETTING)
+ pVmxTransient->fUpdateTscOffsettingAndPreemptTimer = true;
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS);
+ }
+ else if (rcStrict == VINF_IEM_RAISED_XCPT)
+ {
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
+ rcStrict = VINF_SUCCESS;
+ }
+ return rcStrict;
+}
+
+
+/**
+ * VM-exit handler for RDTSCP (VMX_EXIT_RDTSCP). Conditional VM-exit.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitRdtscp(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+ HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+ int rc = HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_MUST_MASK | CPUMCTX_EXTRN_TSC_AUX);
+ rc |= hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
+ AssertRCReturn(rc, rc);
+
+ VBOXSTRICTRC rcStrict = IEMExecDecodedRdtscp(pVCpu, pVmxTransient->cbInstr);
+ if (RT_LIKELY(rcStrict == VINF_SUCCESS))
+ {
+ /* If we get a spurious VM-exit when offsetting is enabled,
+ we must reset offsetting on VM-reentry. See @bugref{6634}. */
+ if (pVCpu->hm.s.vmx.u32ProcCtls & VMX_PROC_CTLS_USE_TSC_OFFSETTING)
+ pVmxTransient->fUpdateTscOffsettingAndPreemptTimer = true;
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS);
+ }
+ else if (rcStrict == VINF_IEM_RAISED_XCPT)
+ {
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
+ rcStrict = VINF_SUCCESS;
+ }
+ return rcStrict;
+}
+
+
+/**
+ * VM-exit handler for RDPMC (VMX_EXIT_RDPMC). Conditional VM-exit.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitRdpmc(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+ HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+ int rc = HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, CPUMCTX_EXTRN_CR4 | CPUMCTX_EXTRN_CR0 | CPUMCTX_EXTRN_RFLAGS | CPUMCTX_EXTRN_SS);
+ AssertRCReturn(rc, rc);
+
+ PVM pVM = pVCpu->CTX_SUFF(pVM);
+ PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+ rc = EMInterpretRdpmc(pVM, pVCpu, CPUMCTX2CORE(pCtx));
+ if (RT_LIKELY(rc == VINF_SUCCESS))
+ {
+ rc = hmR0VmxAdvanceGuestRip(pVCpu, pVmxTransient);
+ Assert(pVmxTransient->cbInstr == 2);
+ }
+ else
+ {
+ AssertMsgFailed(("hmR0VmxExitRdpmc: EMInterpretRdpmc failed with %Rrc\n", rc));
+ rc = VERR_EM_INTERPRETER;
+ }
+ return rc;
+}
+
+
+/**
+ * VM-exit handler for VMCALL (VMX_EXIT_VMCALL). Unconditional VM-exit.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitVmcall(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+ HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+
+ VBOXSTRICTRC rcStrict = VERR_VMX_IPE_3;
+ if (EMAreHypercallInstructionsEnabled(pVCpu))
+ {
+ int rc = HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, CPUMCTX_EXTRN_RIP | CPUMCTX_EXTRN_RFLAGS | CPUMCTX_EXTRN_CR0
+ | CPUMCTX_EXTRN_SS | CPUMCTX_EXTRN_CS | CPUMCTX_EXTRN_EFER);
+ AssertRCReturn(rc, rc);
+
+ /* Perform the hypercall. */
+ rcStrict = GIMHypercall(pVCpu, &pVCpu->cpum.GstCtx);
+ if (rcStrict == VINF_SUCCESS)
+ {
+ rc = hmR0VmxAdvanceGuestRip(pVCpu, pVmxTransient);
+ AssertRCReturn(rc, rc);
+ }
+ else
+ Assert( rcStrict == VINF_GIM_R3_HYPERCALL
+ || rcStrict == VINF_GIM_HYPERCALL_CONTINUING
+ || RT_FAILURE(rcStrict));
+
+ /* If the hypercall changes anything other than guest's general-purpose registers,
+ we would need to reload the guest changed bits here before VM-entry. */
+ }
+ else
+ Log4Func(("Hypercalls not enabled\n"));
+
+ /* If hypercalls are disabled or the hypercall failed for some reason, raise #UD and continue. */
+ if (RT_FAILURE(rcStrict))
+ {
+ hmR0VmxSetPendingXcptUD(pVCpu);
+ rcStrict = VINF_SUCCESS;
+ }
+
+ return rcStrict;
+}
+
+
+/**
+ * VM-exit handler for INVLPG (VMX_EXIT_INVLPG). Conditional VM-exit.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitInvlpg(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+ HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+ Assert(!pVCpu->CTX_SUFF(pVM)->hm.s.fNestedPaging || pVCpu->hm.s.fUsingDebugLoop);
+
+ int rc = hmR0VmxReadExitQualVmcs(pVCpu, pVmxTransient);
+ rc |= hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
+ rc |= HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_EXEC_DECODED_MEM_MASK);
+ AssertRCReturn(rc, rc);
+
+ VBOXSTRICTRC rcStrict = IEMExecDecodedInvlpg(pVCpu, pVmxTransient->cbInstr, pVmxTransient->uExitQual);
+
+ if (rcStrict == VINF_SUCCESS || rcStrict == VINF_PGM_SYNC_CR3)
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS);
+ else if (rcStrict == VINF_IEM_RAISED_XCPT)
+ {
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
+ rcStrict = VINF_SUCCESS;
+ }
+ else
+ AssertMsgFailed(("Unexpected IEMExecDecodedInvlpg(%#RX64) sttus: %Rrc\n", pVmxTransient->uExitQual,
+ VBOXSTRICTRC_VAL(rcStrict)));
+ return rcStrict;
+}
+
+
+/**
+ * VM-exit handler for MONITOR (VMX_EXIT_MONITOR). Conditional VM-exit.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitMonitor(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+ HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+ int rc = HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, CPUMCTX_EXTRN_CR0 | CPUMCTX_EXTRN_RFLAGS | CPUMCTX_EXTRN_SS);
+ AssertRCReturn(rc, rc);
+
+ PVM pVM = pVCpu->CTX_SUFF(pVM);
+ PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+ rc = EMInterpretMonitor(pVM, pVCpu, CPUMCTX2CORE(pCtx));
+ if (RT_LIKELY(rc == VINF_SUCCESS))
+ rc = hmR0VmxAdvanceGuestRip(pVCpu, pVmxTransient);
+ else
+ {
+ AssertMsg(rc == VERR_EM_INTERPRETER, ("hmR0VmxExitMonitor: EMInterpretMonitor failed with %Rrc\n", rc));
+ rc = VERR_EM_INTERPRETER;
+ }
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatExitMonitor);
+ return rc;
+}
+
+
+/**
+ * VM-exit handler for MWAIT (VMX_EXIT_MWAIT). Conditional VM-exit.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitMwait(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+ HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+ int rc = HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, CPUMCTX_EXTRN_CR0 | CPUMCTX_EXTRN_RFLAGS | CPUMCTX_EXTRN_SS);
+ AssertRCReturn(rc, rc);
+
+ PVM pVM = pVCpu->CTX_SUFF(pVM);
+ PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+ VBOXSTRICTRC rc2 = EMInterpretMWait(pVM, pVCpu, CPUMCTX2CORE(pCtx));
+ rc = VBOXSTRICTRC_VAL(rc2);
+ if (RT_LIKELY( rc == VINF_SUCCESS
+ || rc == VINF_EM_HALT))
+ {
+ int rc3 = hmR0VmxAdvanceGuestRip(pVCpu, pVmxTransient);
+ AssertRCReturn(rc3, rc3);
+
+ if ( rc == VINF_EM_HALT
+ && EMMonitorWaitShouldContinue(pVCpu, pCtx))
+ rc = VINF_SUCCESS;
+ }
+ else
+ {
+ AssertMsg(rc == VERR_EM_INTERPRETER, ("hmR0VmxExitMwait: EMInterpretMWait failed with %Rrc\n", rc));
+ rc = VERR_EM_INTERPRETER;
+ }
+ AssertMsg(rc == VINF_SUCCESS || rc == VINF_EM_HALT || rc == VERR_EM_INTERPRETER,
+ ("hmR0VmxExitMwait: failed, invalid error code %Rrc\n", rc));
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatExitMwait);
+ return rc;
+}
+
+
+/**
+ * VM-exit handler for RSM (VMX_EXIT_RSM). Unconditional VM-exit.
+ */
+HMVMX_EXIT_NSRC_DECL hmR0VmxExitRsm(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+ /*
+ * Execution of RSM outside of SMM mode causes #UD regardless of VMX root or VMX non-root
+ * mode. In theory, we should never get this VM-exit. This can happen only if dual-monitor
+ * treatment of SMI and VMX is enabled, which can (only?) be done by executing VMCALL in
+ * VMX root operation. If we get here, something funny is going on.
+ *
+ * See Intel spec. 33.15.5 "Enabling the Dual-Monitor Treatment".
+ */
+ HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+ AssertMsgFailed(("Unexpected RSM VM-exit\n"));
+ HMVMX_UNEXPECTED_EXIT_RET(pVCpu, pVmxTransient);
+}
+
+
+/**
+ * VM-exit handler for SMI (VMX_EXIT_SMI). Unconditional VM-exit.
+ */
+HMVMX_EXIT_NSRC_DECL hmR0VmxExitSmi(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+ /*
+ * This can only happen if we support dual-monitor treatment of SMI, which can be activated
+ * by executing VMCALL in VMX root operation. Only an STM (SMM transfer monitor) would get
+ * this VM-exit when we (the executive monitor) execute a VMCALL in VMX root mode or receive
+ * an SMI. If we get here, something funny is going on.
+ *
+ * See Intel spec. 33.15.6 "Activating the Dual-Monitor Treatment"
+ * See Intel spec. 25.3 "Other Causes of VM-Exits"
+ */
+ HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+ AssertMsgFailed(("Unexpected SMI VM-exit\n"));
+ HMVMX_UNEXPECTED_EXIT_RET(pVCpu, pVmxTransient);
+}
+
+
+/**
+ * VM-exit handler for IO SMI (VMX_EXIT_IO_SMI). Unconditional VM-exit.
+ */
+HMVMX_EXIT_NSRC_DECL hmR0VmxExitIoSmi(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+ /* Same treatment as VMX_EXIT_SMI. See comment in hmR0VmxExitSmi(). */
+ HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+ AssertMsgFailed(("Unexpected IO SMI VM-exit\n"));
+ HMVMX_UNEXPECTED_EXIT_RET(pVCpu, pVmxTransient);
+}
+
+
+/**
+ * VM-exit handler for SIPI (VMX_EXIT_SIPI). Conditional VM-exit.
+ */
+HMVMX_EXIT_NSRC_DECL hmR0VmxExitSipi(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+ /*
+ * SIPI exits can only occur in VMX non-root operation when the "wait-for-SIPI" guest activity state is used.
+ * We don't make use of it as our guests don't have direct access to the host LAPIC.
+ * See Intel spec. 25.3 "Other Causes of VM-exits".
+ */
+ HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+ AssertMsgFailed(("Unexpected SIPI VM-exit\n"));
+ HMVMX_UNEXPECTED_EXIT_RET(pVCpu, pVmxTransient);
+}
+
+
+/**
+ * VM-exit handler for INIT signal (VMX_EXIT_INIT_SIGNAL). Unconditional
+ * VM-exit.
+ */
+HMVMX_EXIT_NSRC_DECL hmR0VmxExitInitSignal(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+ /*
+ * INIT signals are blocked in VMX root operation by VMXON and by SMI in SMM.
+ * See Intel spec. 33.14.1 Default Treatment of SMI Delivery" and Intel spec. 29.3 "VMX Instructions" for "VMXON".
+ *
+ * It is -NOT- blocked in VMX non-root operation so we can, in theory, still get these VM-exits.
+ * See Intel spec. "23.8 Restrictions on VMX operation".
+ */
+ HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * VM-exit handler for triple faults (VMX_EXIT_TRIPLE_FAULT). Unconditional
+ * VM-exit.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitTripleFault(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+ HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+ return VINF_EM_RESET;
+}
+
+
+/**
+ * VM-exit handler for HLT (VMX_EXIT_HLT). Conditional VM-exit.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitHlt(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+ HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+ Assert(pVCpu->hm.s.vmx.u32ProcCtls & VMX_PROC_CTLS_HLT_EXIT);
+
+ int rc = hmR0VmxAdvanceGuestRip(pVCpu, pVmxTransient);
+ rc |= HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, CPUMCTX_EXTRN_RFLAGS);
+ AssertRCReturn(rc, rc);
+
+ if (EMShouldContinueAfterHalt(pVCpu, &pVCpu->cpum.GstCtx)) /* Requires eflags. */
+ rc = VINF_SUCCESS;
+ else
+ rc = VINF_EM_HALT;
+
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatExitHlt);
+ if (rc != VINF_SUCCESS)
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchHltToR3);
+ return rc;
+}
+
+
+/**
+ * VM-exit handler for instructions that result in a \#UD exception delivered to
+ * the guest.
+ */
+HMVMX_EXIT_NSRC_DECL hmR0VmxExitSetPendingXcptUD(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+ HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+ hmR0VmxSetPendingXcptUD(pVCpu);
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * VM-exit handler for expiry of the VMX preemption timer.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitPreemptTimer(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+ HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+
+ /* If the preemption-timer has expired, reinitialize the preemption timer on next VM-entry. */
+ pVmxTransient->fUpdateTscOffsettingAndPreemptTimer = true;
+
+ /* If there are any timer events pending, fall back to ring-3, otherwise resume guest execution. */
+ PVM pVM = pVCpu->CTX_SUFF(pVM);
+ bool fTimersPending = TMTimerPollBool(pVM, pVCpu);
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatExitPreemptTimer);
+ return fTimersPending ? VINF_EM_RAW_TIMER_PENDING : VINF_SUCCESS;
+}
+
+
+/**
+ * VM-exit handler for XSETBV (VMX_EXIT_XSETBV). Unconditional VM-exit.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitXsetbv(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+ HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+
+ int rc = hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
+ rc |= HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_MUST_MASK | CPUMCTX_EXTRN_CR4);
+ AssertRCReturn(rc, rc);
+
+ VBOXSTRICTRC rcStrict = IEMExecDecodedXsetbv(pVCpu, pVmxTransient->cbInstr);
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, rcStrict != VINF_IEM_RAISED_XCPT ? HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS
+ : HM_CHANGED_RAISED_XCPT_MASK);
+
+ PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+ pVCpu->hm.s.fLoadSaveGuestXcr0 = (pCtx->cr4 & X86_CR4_OSXSAVE) && pCtx->aXcr[0] != ASMGetXcr0();
+
+ return rcStrict;
+}
+
+
+/**
+ * VM-exit handler for INVPCID (VMX_EXIT_INVPCID). Conditional VM-exit.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitInvpcid(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+ HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+ /** @todo Use VM-exit instruction information. */
+ return VERR_EM_INTERPRETER;
+}
+
+
+/**
+ * VM-exit handler for invalid-guest-state (VMX_EXIT_ERR_INVALID_GUEST_STATE).
+ * Error VM-exit.
+ */
+HMVMX_EXIT_NSRC_DECL hmR0VmxExitErrInvalidGuestState(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+ int rc = HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, HMVMX_CPUMCTX_EXTRN_ALL);
+ AssertRCReturn(rc, rc);
+ rc = hmR0VmxCheckVmcsCtls(pVCpu);
+ if (RT_FAILURE(rc))
+ return rc;
+
+ uint32_t uInvalidReason = hmR0VmxCheckGuestState(pVCpu);
+ NOREF(uInvalidReason);
+
+#ifdef VBOX_STRICT
+ uint32_t fIntrState;
+ RTHCUINTREG uHCReg;
+ uint64_t u64Val;
+ uint32_t u32Val;
+
+ rc = hmR0VmxReadEntryIntInfoVmcs(pVmxTransient);
+ rc |= hmR0VmxReadEntryXcptErrorCodeVmcs(pVmxTransient);
+ rc |= hmR0VmxReadEntryInstrLenVmcs(pVmxTransient);
+ rc |= VMXReadVmcs32(VMX_VMCS32_GUEST_INT_STATE, &fIntrState);
+ AssertRCReturn(rc, rc);
+
+ Log4(("uInvalidReason %u\n", uInvalidReason));
+ Log4(("VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO %#RX32\n", pVmxTransient->uEntryIntInfo));
+ Log4(("VMX_VMCS32_CTRL_ENTRY_EXCEPTION_ERRCODE %#RX32\n", pVmxTransient->uEntryXcptErrorCode));
+ Log4(("VMX_VMCS32_CTRL_ENTRY_INSTR_LENGTH %#RX32\n", pVmxTransient->cbEntryInstr));
+ Log4(("VMX_VMCS32_GUEST_INT_STATE %#RX32\n", fIntrState));
+
+ rc = VMXReadVmcs32(VMX_VMCS_GUEST_CR0, &u32Val); AssertRC(rc);
+ Log4(("VMX_VMCS_GUEST_CR0 %#RX32\n", u32Val));
+ rc = VMXReadVmcsHstN(VMX_VMCS_CTRL_CR0_MASK, &uHCReg); AssertRC(rc);
+ Log4(("VMX_VMCS_CTRL_CR0_MASK %#RHr\n", uHCReg));
+ rc = VMXReadVmcsHstN(VMX_VMCS_CTRL_CR0_READ_SHADOW, &uHCReg); AssertRC(rc);
+ Log4(("VMX_VMCS_CTRL_CR4_READ_SHADOW %#RHr\n", uHCReg));
+ rc = VMXReadVmcsHstN(VMX_VMCS_CTRL_CR4_MASK, &uHCReg); AssertRC(rc);
+ Log4(("VMX_VMCS_CTRL_CR4_MASK %#RHr\n", uHCReg));
+ rc = VMXReadVmcsHstN(VMX_VMCS_CTRL_CR4_READ_SHADOW, &uHCReg); AssertRC(rc);
+ Log4(("VMX_VMCS_CTRL_CR4_READ_SHADOW %#RHr\n", uHCReg));
+ rc = VMXReadVmcs64(VMX_VMCS64_CTRL_EPTP_FULL, &u64Val); AssertRC(rc);
+ Log4(("VMX_VMCS64_CTRL_EPTP_FULL %#RX64\n", u64Val));
+
+ hmR0DumpRegs(pVCpu);
+#else
+ NOREF(pVmxTransient);
+#endif
+
+ return VERR_VMX_INVALID_GUEST_STATE;
+}
+
+
+/**
+ * VM-exit handler for VM-entry failure due to an MSR-load
+ * (VMX_EXIT_ERR_MSR_LOAD). Error VM-exit.
+ */
+HMVMX_EXIT_NSRC_DECL hmR0VmxExitErrMsrLoad(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+ AssertMsgFailed(("Unexpected MSR-load exit\n"));
+ HMVMX_UNEXPECTED_EXIT_RET(pVCpu, pVmxTransient);
+}
+
+
+/**
+ * VM-exit handler for VM-entry failure due to a machine-check event
+ * (VMX_EXIT_ERR_MACHINE_CHECK). Error VM-exit.
+ */
+HMVMX_EXIT_NSRC_DECL hmR0VmxExitErrMachineCheck(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+ AssertMsgFailed(("Unexpected machine-check event exit\n"));
+ HMVMX_UNEXPECTED_EXIT_RET(pVCpu, pVmxTransient);
+}
+
+
+/**
+ * VM-exit handler for all undefined reasons. Should never ever happen.. in
+ * theory.
+ */
+HMVMX_EXIT_NSRC_DECL hmR0VmxExitErrUndefined(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+ RT_NOREF2(pVCpu, pVmxTransient);
+ AssertMsgFailed(("Huh!? Undefined VM-exit reason %d\n", pVmxTransient->uExitReason));
+ return VERR_VMX_UNDEFINED_EXIT_CODE;
+}
+
+
+/**
+ * VM-exit handler for XDTR (LGDT, SGDT, LIDT, SIDT) accesses
+ * (VMX_EXIT_GDTR_IDTR_ACCESS) and LDT and TR access (LLDT, LTR, SLDT, STR).
+ * Conditional VM-exit.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitXdtrAccess(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+ HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+
+ /* By default, we don't enable VMX_PROC_CTLS2_DESCRIPTOR_TABLE_EXIT. */
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatExitXdtrAccess);
+ if (pVCpu->hm.s.vmx.u32ProcCtls2 & VMX_PROC_CTLS2_DESC_TABLE_EXIT)
+ return VERR_EM_INTERPRETER;
+ AssertMsgFailed(("Unexpected XDTR access\n"));
+ HMVMX_UNEXPECTED_EXIT_RET(pVCpu, pVmxTransient);
+}
+
+
+/**
+ * VM-exit handler for RDRAND (VMX_EXIT_RDRAND). Conditional VM-exit.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitRdrand(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+ HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+
+ /* By default, we don't enable VMX_PROC_CTLS2_RDRAND_EXIT. */
+ if (pVCpu->hm.s.vmx.u32ProcCtls2 & VMX_PROC_CTLS2_RDRAND_EXIT)
+ return VERR_EM_INTERPRETER;
+ AssertMsgFailed(("Unexpected RDRAND exit\n"));
+ HMVMX_UNEXPECTED_EXIT_RET(pVCpu, pVmxTransient);
+}
+
+
+/**
+ * VM-exit handler for RDMSR (VMX_EXIT_RDMSR).
+ */
+HMVMX_EXIT_DECL hmR0VmxExitRdmsr(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+ HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+
+ /** @todo Optimize this: We currently drag in in the whole MSR state
+ * (CPUMCTX_EXTRN_ALL_MSRS) here. We should optimize this to only get
+ * MSRs required. That would require changes to IEM and possibly CPUM too.
+ * (Should probably do it lazy fashion from CPUMAllMsrs.cpp). */
+ uint32_t const idMsr = pVCpu->cpum.GstCtx.ecx;
+ int rc = hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
+ rc |= HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_EXEC_DECODED_NO_MEM_MASK | CPUMCTX_EXTRN_ALL_MSRS);
+ switch (idMsr)
+ {
+ /* The FS and GS base MSRs are not part of the above all-MSRs mask. */
+ case MSR_K8_FS_BASE: rc |= HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, CPUMCTX_EXTRN_FS); break;
+ case MSR_K8_GS_BASE: rc |= HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, CPUMCTX_EXTRN_GS); break;
+ }
+ AssertRCReturn(rc, rc);
+
+ Log4Func(("ecx=%#RX32\n", idMsr));
+
+#ifdef VBOX_STRICT
+ if (pVCpu->hm.s.vmx.u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS)
+ {
+ if ( hmR0VmxIsAutoLoadStoreGuestMsr(pVCpu, idMsr)
+ && idMsr != MSR_K6_EFER)
+ {
+ AssertMsgFailed(("Unexpected RDMSR for an MSR in the auto-load/store area in the VMCS. ecx=%#RX32\n", idMsr));
+ HMVMX_UNEXPECTED_EXIT_RET(pVCpu, pVmxTransient);
+ }
+ if (hmR0VmxIsLazyGuestMsr(pVCpu, idMsr))
+ {
+ VMXMSREXITREAD enmRead;
+ VMXMSREXITWRITE enmWrite;
+ int rc2 = HMGetVmxMsrPermission(pVCpu->hm.s.vmx.pvMsrBitmap, idMsr, &enmRead, &enmWrite);
+ AssertRCReturn(rc2, rc2);
+ if (enmRead == VMXMSREXIT_PASSTHRU_READ)
+ {
+ AssertMsgFailed(("Unexpected RDMSR for a passthru lazy-restore MSR. ecx=%#RX32\n", idMsr));
+ HMVMX_UNEXPECTED_EXIT_RET(pVCpu, pVmxTransient);
+ }
+ }
+ }
+#endif
+
+ VBOXSTRICTRC rcStrict = IEMExecDecodedRdmsr(pVCpu, pVmxTransient->cbInstr);
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatExitRdmsr);
+ if (rcStrict == VINF_SUCCESS)
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS
+ | HM_CHANGED_GUEST_RAX | HM_CHANGED_GUEST_RDX);
+ else if (rcStrict == VINF_IEM_RAISED_XCPT)
+ {
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
+ rcStrict = VINF_SUCCESS;
+ }
+ else
+ AssertMsg(rcStrict == VINF_CPUM_R3_MSR_READ, ("Unexpected IEMExecDecodedRdmsr rc (%Rrc)\n", VBOXSTRICTRC_VAL(rcStrict)));
+
+ return rcStrict;
+}
+
+
+/**
+ * VM-exit handler for WRMSR (VMX_EXIT_WRMSR).
+ */
+HMVMX_EXIT_DECL hmR0VmxExitWrmsr(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+ HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+
+ /** @todo Optimize this: We currently drag in in the whole MSR state
+ * (CPUMCTX_EXTRN_ALL_MSRS) here. We should optimize this to only get
+ * MSRs required. That would require changes to IEM and possibly CPUM too.
+ * (Should probably do it lazy fashion from CPUMAllMsrs.cpp). */
+ uint32_t const idMsr = pVCpu->cpum.GstCtx.ecx;
+ int rc = hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
+ rc |= HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_EXEC_DECODED_NO_MEM_MASK
+ | CPUMCTX_EXTRN_ALL_MSRS);
+ switch (idMsr)
+ {
+ /*
+ * The FS and GS base MSRs are not part of the above all-MSRs mask.
+ *
+ * Although we don't need to fetch the base as it will be overwritten shortly, while
+ * loading guest-state we would also load the entire segment register including limit
+ * and attributes and thus we need to load them here.
+ */
+ case MSR_K8_FS_BASE: rc |= HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, CPUMCTX_EXTRN_FS); break;
+ case MSR_K8_GS_BASE: rc |= HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, CPUMCTX_EXTRN_GS); break;
+ }
+ AssertRCReturn(rc, rc);
+
+ Log4Func(("ecx=%#RX32 edx:eax=%#RX32:%#RX32\n", idMsr, pVCpu->cpum.GstCtx.edx, pVCpu->cpum.GstCtx.eax));
+
+ VBOXSTRICTRC rcStrict = IEMExecDecodedWrmsr(pVCpu, pVmxTransient->cbInstr);
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatExitWrmsr);
+
+ if (rcStrict == VINF_SUCCESS)
+ {
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS);
+
+ /* If this is an X2APIC WRMSR access, update the APIC state as well. */
+ if ( idMsr == MSR_IA32_APICBASE
+ || ( idMsr >= MSR_IA32_X2APIC_START
+ && idMsr <= MSR_IA32_X2APIC_END))
+ {
+ /*
+ * We've already saved the APIC related guest-state (TPR) in hmR0VmxPostRunGuest(). When full APIC register
+ * virtualization is implemented we'll have to make sure APIC state is saved from the VMCS before IEM changes it.
+ */
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_APIC_TPR);
+ }
+ else if (idMsr == MSR_IA32_TSC) /* Windows 7 does this during bootup. See @bugref{6398}. */
+ pVmxTransient->fUpdateTscOffsettingAndPreemptTimer = true;
+ else if (idMsr == MSR_K6_EFER)
+ {
+ /*
+ * If the guest touches EFER we need to update the VM-Entry and VM-Exit controls as well,
+ * even if it is -not- touching bits that cause paging mode changes (LMA/LME). We care about
+ * the other bits as well, SCE and NXE. See @bugref{7368}.
+ */
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_EFER_MSR | HM_CHANGED_VMX_ENTRY_CTLS
+ | HM_CHANGED_VMX_EXIT_CTLS);
+ }
+
+ /* Update MSRs that are part of the VMCS and auto-load/store area when MSR-bitmaps are not supported. */
+ if (!(pVCpu->hm.s.vmx.u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS))
+ {
+ switch (idMsr)
+ {
+ case MSR_IA32_SYSENTER_CS: ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_SYSENTER_CS_MSR); break;
+ case MSR_IA32_SYSENTER_EIP: ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_SYSENTER_EIP_MSR); break;
+ case MSR_IA32_SYSENTER_ESP: ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_SYSENTER_ESP_MSR); break;
+ case MSR_K8_FS_BASE: ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_FS); break;
+ case MSR_K8_GS_BASE: ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_GS); break;
+ case MSR_K6_EFER: /* Nothing to do, already handled above. */ break;
+ default:
+ {
+ if (hmR0VmxIsAutoLoadStoreGuestMsr(pVCpu, idMsr))
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_VMX_GUEST_AUTO_MSRS);
+ else if (hmR0VmxIsLazyGuestMsr(pVCpu, idMsr))
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_VMX_GUEST_LAZY_MSRS);
+ break;
+ }
+ }
+ }
+#ifdef VBOX_STRICT
+ else
+ {
+ /* Paranoia. Validate that MSRs in the MSR-bitmaps with write-passthru are not intercepted. */
+ switch (idMsr)
+ {
+ case MSR_IA32_SYSENTER_CS:
+ case MSR_IA32_SYSENTER_EIP:
+ case MSR_IA32_SYSENTER_ESP:
+ case MSR_K8_FS_BASE:
+ case MSR_K8_GS_BASE:
+ {
+ AssertMsgFailed(("Unexpected WRMSR for an MSR in the VMCS. ecx=%#RX32\n", idMsr));
+ HMVMX_UNEXPECTED_EXIT_RET(pVCpu, pVmxTransient);
+ }
+
+ /* Writes to MSRs in auto-load/store area/swapped MSRs, shouldn't cause VM-exits with MSR-bitmaps. */
+ default:
+ {
+ if (hmR0VmxIsAutoLoadStoreGuestMsr(pVCpu, idMsr))
+ {
+ /* EFER writes are always intercepted, see hmR0VmxExportGuestMsrs(). */
+ if (idMsr != MSR_K6_EFER)
+ {
+ AssertMsgFailed(("Unexpected WRMSR for an MSR in the auto-load/store area in the VMCS. ecx=%#RX32\n",
+ idMsr));
+ HMVMX_UNEXPECTED_EXIT_RET(pVCpu, pVmxTransient);
+ }
+ }
+
+ if (hmR0VmxIsLazyGuestMsr(pVCpu, idMsr))
+ {
+ VMXMSREXITREAD enmRead;
+ VMXMSREXITWRITE enmWrite;
+ int rc2 = HMGetVmxMsrPermission(pVCpu->hm.s.vmx.pvMsrBitmap, idMsr, &enmRead, &enmWrite);
+ AssertRCReturn(rc2, rc2);
+ if (enmWrite == VMXMSREXIT_PASSTHRU_WRITE)
+ {
+ AssertMsgFailed(("Unexpected WRMSR for passthru, lazy-restore MSR. ecx=%#RX32\n", idMsr));
+ HMVMX_UNEXPECTED_EXIT_RET(pVCpu, pVmxTransient);
+ }
+ }
+ break;
+ }
+ }
+ }
+#endif /* VBOX_STRICT */
+ }
+ else if (rcStrict == VINF_IEM_RAISED_XCPT)
+ {
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
+ rcStrict = VINF_SUCCESS;
+ }
+ else
+ AssertMsg(rcStrict == VINF_CPUM_R3_MSR_WRITE, ("Unexpected IEMExecDecodedWrmsr rc (%Rrc)\n", VBOXSTRICTRC_VAL(rcStrict)));
+
+ return rcStrict;
+}
+
+
+/**
+ * VM-exit handler for PAUSE (VMX_EXIT_PAUSE). Conditional VM-exit.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitPause(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+ HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+ /** @todo The guest has likely hit a contended spinlock. We might want to
+ * poke a schedule different guest VCPU. */
+ return VINF_EM_RAW_INTERRUPT;
+}
+
+
+/**
+ * VM-exit handler for when the TPR value is lowered below the specified
+ * threshold (VMX_EXIT_TPR_BELOW_THRESHOLD). Conditional VM-exit.
+ */
+HMVMX_EXIT_NSRC_DECL hmR0VmxExitTprBelowThreshold(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+ HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+ Assert(pVCpu->hm.s.vmx.u32ProcCtls & VMX_PROC_CTLS_USE_TPR_SHADOW);
+
+ /*
+ * The TPR shadow would've been synced with the APIC TPR in hmR0VmxPostRunGuest(). We'll re-evaluate
+ * pending interrupts and inject them before the next VM-entry so we can just continue execution here.
+ */
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatExitTprBelowThreshold);
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * VM-exit handler for control-register accesses (VMX_EXIT_MOV_CRX). Conditional
+ * VM-exit.
+ *
+ * @retval VINF_SUCCESS when guest execution can continue.
+ * @retval VINF_PGM_SYNC_CR3 CR3 sync is required, back to ring-3.
+ * @retval VERR_EM_INTERPRETER when something unexpected happened, fallback to
+ * interpreter.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitMovCRx(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+ HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+ STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatExitMovCRx, y2);
+
+ int rc = hmR0VmxReadExitQualVmcs(pVCpu, pVmxTransient);
+ rc |= hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
+ rc |= HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_MUST_MASK);
+ AssertRCReturn(rc, rc);
+
+ VBOXSTRICTRC rcStrict;
+ PVM pVM = pVCpu->CTX_SUFF(pVM);
+ RTGCUINTPTR const uExitQual = pVmxTransient->uExitQual;
+ uint32_t const uAccessType = VMX_EXIT_QUAL_CRX_ACCESS(uExitQual);
+ switch (uAccessType)
+ {
+ case VMX_EXIT_QUAL_CRX_ACCESS_WRITE: /* MOV to CRx */
+ {
+ uint32_t const uOldCr0 = pVCpu->cpum.GstCtx.cr0;
+ rcStrict = IEMExecDecodedMovCRxWrite(pVCpu, pVmxTransient->cbInstr, VMX_EXIT_QUAL_CRX_REGISTER(uExitQual),
+ VMX_EXIT_QUAL_CRX_GENREG(uExitQual));
+ AssertMsg( rcStrict == VINF_SUCCESS
+ || rcStrict == VINF_IEM_RAISED_XCPT
+ || rcStrict == VINF_PGM_SYNC_CR3, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict)));
+
+ switch (VMX_EXIT_QUAL_CRX_REGISTER(uExitQual))
+ {
+ case 0:
+ {
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged,
+ HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS | HM_CHANGED_GUEST_CR0);
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCR0Write);
+ Log4Func(("CR0 write rcStrict=%Rrc CR0=%#RX64\n", VBOXSTRICTRC_VAL(rcStrict), pVCpu->cpum.GstCtx.cr0));
+
+ /*
+ * This is a kludge for handling switches back to real mode when we try to use
+ * V86 mode to run real mode code directly. Problem is that V86 mode cannot
+ * deal with special selector values, so we have to return to ring-3 and run
+ * there till the selector values are V86 mode compatible.
+ *
+ * Note! Using VINF_EM_RESCHEDULE_REM here rather than VINF_EM_RESCHEDULE since the
+ * latter is an alias for VINF_IEM_RAISED_XCPT which is converted to VINF_SUCCESs
+ * at the end of this function.
+ */
+ if ( rc == VINF_SUCCESS
+ && !pVCpu->CTX_SUFF(pVM)->hm.s.vmx.fUnrestrictedGuest
+ && CPUMIsGuestInRealModeEx(&pVCpu->cpum.GstCtx)
+ && (uOldCr0 & X86_CR0_PE)
+ && !(pVCpu->cpum.GstCtx.cr0 & X86_CR0_PE) )
+ {
+ /** @todo check selectors rather than returning all the time. */
+ Log4Func(("CR0 write, back to real mode -> VINF_EM_RESCHEDULE_REM\n"));
+ rcStrict = VINF_EM_RESCHEDULE_REM;
+ }
+ break;
+ }
+
+ case 2:
+ {
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCR2Write);
+ /* Nothing to do here, CR2 it's not part of the VMCS. */
+ break;
+ }
+
+ case 3:
+ {
+ Assert( !pVM->hm.s.fNestedPaging
+ || !CPUMIsGuestPagingEnabledEx(&pVCpu->cpum.GstCtx)
+ || pVCpu->hm.s.fUsingDebugLoop);
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCR3Write);
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged,
+ HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS | HM_CHANGED_GUEST_CR3);
+ Log4Func(("CR3 write rcStrict=%Rrc CR3=%#RX64\n", VBOXSTRICTRC_VAL(rcStrict), pVCpu->cpum.GstCtx.cr3));
+ break;
+ }
+
+ case 4:
+ {
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCR4Write);
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged,
+ HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS | HM_CHANGED_GUEST_CR4);
+ Log4Func(("CR4 write rc=%Rrc CR4=%#RX64 fLoadSaveGuestXcr0=%u\n", VBOXSTRICTRC_VAL(rcStrict),
+ pVCpu->cpum.GstCtx.cr4, pVCpu->hm.s.fLoadSaveGuestXcr0));
+ break;
+ }
+
+ case 8:
+ {
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCR8Write);
+ Assert(!(pVCpu->hm.s.vmx.u32ProcCtls & VMX_PROC_CTLS_USE_TPR_SHADOW));
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged,
+ HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS | HM_CHANGED_GUEST_APIC_TPR);
+ break;
+ }
+ default:
+ AssertMsgFailed(("Invalid CRx register %#x\n", VMX_EXIT_QUAL_CRX_REGISTER(uExitQual)));
+ break;
+ }
+ break;
+ }
+
+ case VMX_EXIT_QUAL_CRX_ACCESS_READ: /* MOV from CRx */
+ {
+ Assert( !pVM->hm.s.fNestedPaging
+ || !CPUMIsGuestPagingEnabledEx(&pVCpu->cpum.GstCtx)
+ || pVCpu->hm.s.fUsingDebugLoop
+ || VMX_EXIT_QUAL_CRX_REGISTER(uExitQual) != 3);
+ /* CR8 reads only cause a VM-exit when the TPR shadow feature isn't enabled. */
+ Assert( VMX_EXIT_QUAL_CRX_REGISTER(uExitQual) != 8
+ || !(pVCpu->hm.s.vmx.u32ProcCtls & VMX_PROC_CTLS_USE_TPR_SHADOW));
+
+ rcStrict = IEMExecDecodedMovCRxRead(pVCpu, pVmxTransient->cbInstr, VMX_EXIT_QUAL_CRX_GENREG(uExitQual),
+ VMX_EXIT_QUAL_CRX_REGISTER(uExitQual));
+ AssertMsg( rcStrict == VINF_SUCCESS
+ || rcStrict == VINF_IEM_RAISED_XCPT, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict)));
+#ifdef VBOX_WITH_STATISTICS
+ switch (VMX_EXIT_QUAL_CRX_REGISTER(uExitQual))
+ {
+ case 0: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCR0Read); break;
+ case 2: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCR2Read); break;
+ case 3: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCR3Read); break;
+ case 4: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCR4Read); break;
+ case 8: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCR8Read); break;
+ }
+#endif
+ Log4Func(("CR%d Read access rcStrict=%Rrc\n", VMX_EXIT_QUAL_CRX_REGISTER(uExitQual),
+ VBOXSTRICTRC_VAL(rcStrict)));
+ if (VMX_EXIT_QUAL_CRX_GENREG(uExitQual) == X86_GREG_xSP)
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS | HM_CHANGED_GUEST_RSP);
+ else
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS);
+ break;
+ }
+
+ case VMX_EXIT_QUAL_CRX_ACCESS_CLTS: /* CLTS (Clear Task-Switch Flag in CR0) */
+ {
+ rcStrict = IEMExecDecodedClts(pVCpu, pVmxTransient->cbInstr);
+ AssertMsg( rcStrict == VINF_SUCCESS
+ || rcStrict == VINF_IEM_RAISED_XCPT, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict)));
+
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS | HM_CHANGED_GUEST_CR0);
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatExitClts);
+ Log4Func(("CLTS rcStrict=%d\n", VBOXSTRICTRC_VAL(rcStrict)));
+ break;
+ }
+
+ case VMX_EXIT_QUAL_CRX_ACCESS_LMSW: /* LMSW (Load Machine-Status Word into CR0) */
+ {
+ /* Note! LMSW cannot clear CR0.PE, so no fRealOnV86Active kludge needed here. */
+ rc = hmR0VmxReadGuestLinearAddrVmcs(pVCpu, pVmxTransient);
+ AssertRCReturn(rc, rc);
+ rcStrict = IEMExecDecodedLmsw(pVCpu, pVmxTransient->cbInstr, VMX_EXIT_QUAL_CRX_LMSW_DATA(uExitQual),
+ pVmxTransient->uGuestLinearAddr);
+ AssertMsg( rcStrict == VINF_SUCCESS
+ || rcStrict == VINF_IEM_RAISED_XCPT
+ , ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict)));
+
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS | HM_CHANGED_GUEST_CR0);
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatExitLmsw);
+ Log4Func(("LMSW rcStrict=%d\n", VBOXSTRICTRC_VAL(rcStrict)));
+ break;
+ }
+
+ default:
+ AssertMsgFailedReturn(("Invalid access-type in Mov CRx VM-exit qualification %#x\n", uAccessType),
+ VERR_VMX_UNEXPECTED_EXCEPTION);
+ }
+
+ Assert( (pVCpu->hm.s.fCtxChanged & (HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS))
+ == (HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS));
+ if (rcStrict == VINF_IEM_RAISED_XCPT)
+ {
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
+ rcStrict = VINF_SUCCESS;
+ }
+
+ STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitMovCRx, y2);
+ NOREF(pVM);
+ return rcStrict;
+}
+
+
+/**
+ * VM-exit handler for I/O instructions (VMX_EXIT_IO_INSTR). Conditional
+ * VM-exit.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitIoInstr(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+ HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+ STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatExitIO, y1);
+
+ PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+ int rc = hmR0VmxReadExitQualVmcs(pVCpu, pVmxTransient);
+ rc |= hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
+ rc |= HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_MUST_MASK | CPUMCTX_EXTRN_SREG_MASK | CPUMCTX_EXTRN_EFER);
+ /* EFER also required for longmode checks in EMInterpretDisasCurrent(), but it's always up-to-date. */
+ AssertRCReturn(rc, rc);
+
+ /* Refer Intel spec. 27-5. "Exit Qualifications for I/O Instructions" for the format. */
+ uint32_t uIOPort = VMX_EXIT_QUAL_IO_PORT(pVmxTransient->uExitQual);
+ uint8_t uIOWidth = VMX_EXIT_QUAL_IO_WIDTH(pVmxTransient->uExitQual);
+ bool fIOWrite = (VMX_EXIT_QUAL_IO_DIRECTION(pVmxTransient->uExitQual) == VMX_EXIT_QUAL_IO_DIRECTION_OUT);
+ bool fIOString = VMX_EXIT_QUAL_IO_IS_STRING(pVmxTransient->uExitQual);
+ bool fGstStepping = RT_BOOL(pCtx->eflags.Bits.u1TF);
+ bool fDbgStepping = pVCpu->hm.s.fSingleInstruction;
+ AssertReturn(uIOWidth <= 3 && uIOWidth != 2, VERR_VMX_IPE_1);
+
+ /*
+ * Update exit history to see if this exit can be optimized.
+ */
+ VBOXSTRICTRC rcStrict;
+ PCEMEXITREC pExitRec = NULL;
+ if ( !fGstStepping
+ && !fDbgStepping)
+ pExitRec = EMHistoryUpdateFlagsAndTypeAndPC(pVCpu,
+ !fIOString
+ ? !fIOWrite
+ ? EMEXIT_MAKE_FT(EMEXIT_F_KIND_EM | EMEXIT_F_HM, EMEXITTYPE_IO_PORT_READ)
+ : EMEXIT_MAKE_FT(EMEXIT_F_KIND_EM | EMEXIT_F_HM, EMEXITTYPE_IO_PORT_WRITE)
+ : !fIOWrite
+ ? EMEXIT_MAKE_FT(EMEXIT_F_KIND_EM | EMEXIT_F_HM, EMEXITTYPE_IO_PORT_STR_READ)
+ : EMEXIT_MAKE_FT(EMEXIT_F_KIND_EM | EMEXIT_F_HM, EMEXITTYPE_IO_PORT_STR_WRITE),
+ pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base);
+ if (!pExitRec)
+ {
+ /* I/O operation lookup arrays. */
+ static uint32_t const s_aIOSizes[4] = { 1, 2, 0, 4 }; /* Size of the I/O accesses. */
+ static uint32_t const s_aIOOpAnd[4] = { 0xff, 0xffff, 0, 0xffffffff }; /* AND masks for saving result in AL/AX/EAX. */
+ uint32_t const cbValue = s_aIOSizes[uIOWidth];
+ uint32_t const cbInstr = pVmxTransient->cbInstr;
+ bool fUpdateRipAlready = false; /* ugly hack, should be temporary. */
+ PVM pVM = pVCpu->CTX_SUFF(pVM);
+ if (fIOString)
+ {
+ /*
+ * INS/OUTS - I/O String instruction.
+ *
+ * Use instruction-information if available, otherwise fall back on
+ * interpreting the instruction.
+ */
+ Log4Func(("CS:RIP=%04x:%08RX64 %#06x/%u %c str\n", pCtx->cs.Sel, pCtx->rip, uIOPort, cbValue, fIOWrite ? 'w' : 'r'));
+ AssertReturn(pCtx->dx == uIOPort, VERR_VMX_IPE_2);
+ bool const fInsOutsInfo = RT_BF_GET(pVM->hm.s.vmx.Msrs.u64Basic, VMX_BF_BASIC_VMCS_INS_OUTS);
+ if (fInsOutsInfo)
+ {
+ int rc2 = hmR0VmxReadExitInstrInfoVmcs(pVmxTransient);
+ AssertRCReturn(rc2, rc2);
+ AssertReturn(pVmxTransient->ExitInstrInfo.StrIo.u3AddrSize <= 2, VERR_VMX_IPE_3);
+ AssertCompile(IEMMODE_16BIT == 0 && IEMMODE_32BIT == 1 && IEMMODE_64BIT == 2);
+ IEMMODE const enmAddrMode = (IEMMODE)pVmxTransient->ExitInstrInfo.StrIo.u3AddrSize;
+ bool const fRep = VMX_EXIT_QUAL_IO_IS_REP(pVmxTransient->uExitQual);
+ if (fIOWrite)
+ rcStrict = IEMExecStringIoWrite(pVCpu, cbValue, enmAddrMode, fRep, cbInstr,
+ pVmxTransient->ExitInstrInfo.StrIo.iSegReg, true /*fIoChecked*/);
+ else
+ {
+ /*
+ * The segment prefix for INS cannot be overridden and is always ES. We can safely assume X86_SREG_ES.
+ * Hence "iSegReg" field is undefined in the instruction-information field in VT-x for INS.
+ * See Intel Instruction spec. for "INS".
+ * See Intel spec. Table 27-8 "Format of the VM-Exit Instruction-Information Field as Used for INS and OUTS".
+ */
+ rcStrict = IEMExecStringIoRead(pVCpu, cbValue, enmAddrMode, fRep, cbInstr, true /*fIoChecked*/);
+ }
+ }
+ else
+ rcStrict = IEMExecOne(pVCpu);
+
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP);
+ fUpdateRipAlready = true;
+ }
+ else
+ {
+ /*
+ * IN/OUT - I/O instruction.
+ */
+ Log4Func(("CS:RIP=%04x:%08RX64 %#06x/%u %c\n", pCtx->cs.Sel, pCtx->rip, uIOPort, cbValue, fIOWrite ? 'w' : 'r'));
+ uint32_t const uAndVal = s_aIOOpAnd[uIOWidth];
+ Assert(!VMX_EXIT_QUAL_IO_IS_REP(pVmxTransient->uExitQual));
+ if (fIOWrite)
+ {
+ rcStrict = IOMIOPortWrite(pVM, pVCpu, uIOPort, pCtx->eax & uAndVal, cbValue);
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIOWrite);
+ if ( rcStrict == VINF_IOM_R3_IOPORT_WRITE
+ && !pCtx->eflags.Bits.u1TF)
+ rcStrict = EMRZSetPendingIoPortWrite(pVCpu, uIOPort, cbInstr, cbValue, pCtx->eax & uAndVal);
+ }
+ else
+ {
+ uint32_t u32Result = 0;
+ rcStrict = IOMIOPortRead(pVM, pVCpu, uIOPort, &u32Result, cbValue);
+ if (IOM_SUCCESS(rcStrict))
+ {
+ /* Save result of I/O IN instr. in AL/AX/EAX. */
+ pCtx->eax = (pCtx->eax & ~uAndVal) | (u32Result & uAndVal);
+ }
+ if ( rcStrict == VINF_IOM_R3_IOPORT_READ
+ && !pCtx->eflags.Bits.u1TF)
+ rcStrict = EMRZSetPendingIoPortRead(pVCpu, uIOPort, cbInstr, cbValue);
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIORead);
+ }
+ }
+
+ if (IOM_SUCCESS(rcStrict))
+ {
+ if (!fUpdateRipAlready)
+ {
+ hmR0VmxAdvanceGuestRipBy(pVCpu, cbInstr);
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP);
+ }
+
+ /*
+ * INS/OUTS with REP prefix updates RFLAGS, can be observed with triple-fault guru
+ * while booting Fedora 17 64-bit guest.
+ *
+ * See Intel Instruction reference for REP/REPE/REPZ/REPNE/REPNZ.
+ */
+ if (fIOString)
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RFLAGS);
+
+ /*
+ * If any I/O breakpoints are armed, we need to check if one triggered
+ * and take appropriate action.
+ * Note that the I/O breakpoint type is undefined if CR4.DE is 0.
+ */
+ rc = HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, CPUMCTX_EXTRN_DR7);
+ AssertRCReturn(rc, rc);
+
+ /** @todo Optimize away the DBGFBpIsHwIoArmed call by having DBGF tell the
+ * execution engines about whether hyper BPs and such are pending. */
+ uint32_t const uDr7 = pCtx->dr[7];
+ if (RT_UNLIKELY( ( (uDr7 & X86_DR7_ENABLED_MASK)
+ && X86_DR7_ANY_RW_IO(uDr7)
+ && (pCtx->cr4 & X86_CR4_DE))
+ || DBGFBpIsHwIoArmed(pVM)))
+ {
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatDRxIoCheck);
+
+ /* We're playing with the host CPU state here, make sure we don't preempt or longjmp. */
+ VMMRZCallRing3Disable(pVCpu);
+ HM_DISABLE_PREEMPT(pVCpu);
+
+ bool fIsGuestDbgActive = CPUMR0DebugStateMaybeSaveGuest(pVCpu, true /* fDr6 */);
+
+ VBOXSTRICTRC rcStrict2 = DBGFBpCheckIo(pVM, pVCpu, pCtx, uIOPort, cbValue);
+ if (rcStrict2 == VINF_EM_RAW_GUEST_TRAP)
+ {
+ /* Raise #DB. */
+ if (fIsGuestDbgActive)
+ ASMSetDR6(pCtx->dr[6]);
+ if (pCtx->dr[7] != uDr7)
+ pVCpu->hm.s.fCtxChanged |= HM_CHANGED_GUEST_DR7;
+
+ hmR0VmxSetPendingXcptDB(pVCpu);
+ }
+ /* rcStrict is VINF_SUCCESS, VINF_IOM_R3_IOPORT_COMMIT_WRITE, or in [VINF_EM_FIRST..VINF_EM_LAST],
+ however we can ditch VINF_IOM_R3_IOPORT_COMMIT_WRITE as it has VMCPU_FF_IOM as backup. */
+ else if ( rcStrict2 != VINF_SUCCESS
+ && (rcStrict == VINF_SUCCESS || rcStrict2 < rcStrict))
+ rcStrict = rcStrict2;
+ AssertCompile(VINF_EM_LAST < VINF_IOM_R3_IOPORT_COMMIT_WRITE);
+
+ HM_RESTORE_PREEMPT();
+ VMMRZCallRing3Enable(pVCpu);
+ }
+ }
+
+#ifdef VBOX_STRICT
+ if ( rcStrict == VINF_IOM_R3_IOPORT_READ
+ || rcStrict == VINF_EM_PENDING_R3_IOPORT_READ)
+ Assert(!fIOWrite);
+ else if ( rcStrict == VINF_IOM_R3_IOPORT_WRITE
+ || rcStrict == VINF_IOM_R3_IOPORT_COMMIT_WRITE
+ || rcStrict == VINF_EM_PENDING_R3_IOPORT_WRITE)
+ Assert(fIOWrite);
+ else
+ {
+# if 0 /** @todo r=bird: This is missing a bunch of VINF_EM_FIRST..VINF_EM_LAST
+ * statuses, that the VMM device and some others may return. See
+ * IOM_SUCCESS() for guidance. */
+ AssertMsg( RT_FAILURE(rcStrict)
+ || rcStrict == VINF_SUCCESS
+ || rcStrict == VINF_EM_RAW_EMULATE_INSTR
+ || rcStrict == VINF_EM_DBG_BREAKPOINT
+ || rcStrict == VINF_EM_RAW_GUEST_TRAP
+ || rcStrict == VINF_EM_RAW_TO_R3
+ || rcStrict == VINF_TRPM_XCPT_DISPATCHED, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict)));
+# endif
+ }
+#endif
+ STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitIO, y1);
+ }
+ else
+ {
+ /*
+ * Frequent exit or something needing probing. Get state and call EMHistoryExec.
+ */
+ int rc2 = HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, HMVMX_CPUMCTX_EXTRN_ALL);
+ AssertRCReturn(rc2, rc2);
+ STAM_COUNTER_INC(!fIOString ? fIOWrite ? &pVCpu->hm.s.StatExitIOWrite : &pVCpu->hm.s.StatExitIORead
+ : fIOWrite ? &pVCpu->hm.s.StatExitIOStringWrite : &pVCpu->hm.s.StatExitIOStringRead);
+ Log4(("IOExit/%u: %04x:%08RX64: %s%s%s %#x LB %u -> EMHistoryExec\n",
+ pVCpu->idCpu, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
+ VMX_EXIT_QUAL_IO_IS_REP(pVmxTransient->uExitQual) ? "REP " : "",
+ fIOWrite ? "OUT" : "IN", fIOString ? "S" : "", uIOPort, uIOWidth));
+
+ rcStrict = EMHistoryExec(pVCpu, pExitRec, 0);
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_ALL_GUEST);
+
+ Log4(("IOExit/%u: %04x:%08RX64: EMHistoryExec -> %Rrc + %04x:%08RX64\n",
+ pVCpu->idCpu, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
+ VBOXSTRICTRC_VAL(rcStrict), pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
+ }
+ return rcStrict;
+}
+
+
+/**
+ * VM-exit handler for task switches (VMX_EXIT_TASK_SWITCH). Unconditional
+ * VM-exit.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitTaskSwitch(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+ HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+
+ /* Check if this task-switch occurred while delivery an event through the guest IDT. */
+ int rc = hmR0VmxReadExitQualVmcs(pVCpu, pVmxTransient);
+ AssertRCReturn(rc, rc);
+ if (VMX_EXIT_QUAL_TASK_SWITCH_TYPE(pVmxTransient->uExitQual) == VMX_EXIT_QUAL_TASK_SWITCH_TYPE_IDT)
+ {
+ rc = hmR0VmxReadIdtVectoringInfoVmcs(pVmxTransient);
+ AssertRCReturn(rc, rc);
+ if (VMX_IDT_VECTORING_INFO_IS_VALID(pVmxTransient->uIdtVectoringInfo))
+ {
+ uint32_t uErrCode;
+ RTGCUINTPTR GCPtrFaultAddress;
+ uint32_t const uIntType = VMX_IDT_VECTORING_INFO_TYPE(pVmxTransient->uIdtVectoringInfo);
+ uint32_t const uVector = VMX_IDT_VECTORING_INFO_VECTOR(pVmxTransient->uIdtVectoringInfo);
+ bool const fErrorCodeValid = VMX_IDT_VECTORING_INFO_IS_ERROR_CODE_VALID(pVmxTransient->uIdtVectoringInfo);
+ if (fErrorCodeValid)
+ {
+ rc = hmR0VmxReadIdtVectoringErrorCodeVmcs(pVmxTransient);
+ AssertRCReturn(rc, rc);
+ uErrCode = pVmxTransient->uIdtVectoringErrorCode;
+ }
+ else
+ uErrCode = 0;
+
+ if ( uIntType == VMX_IDT_VECTORING_INFO_TYPE_HW_XCPT
+ && uVector == X86_XCPT_PF)
+ GCPtrFaultAddress = pVCpu->cpum.GstCtx.cr2;
+ else
+ GCPtrFaultAddress = 0;
+
+ rc = hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
+ AssertRCReturn(rc, rc);
+
+ hmR0VmxSetPendingEvent(pVCpu, VMX_ENTRY_INT_INFO_FROM_EXIT_IDT_INFO(pVmxTransient->uIdtVectoringInfo),
+ pVmxTransient->cbInstr, uErrCode, GCPtrFaultAddress);
+
+ Log4Func(("Pending event. uIntType=%#x uVector=%#x\n", uIntType, uVector));
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatExitTaskSwitch);
+ return VINF_EM_RAW_INJECT_TRPM_EVENT;
+ }
+ }
+
+ /* Fall back to the interpreter to emulate the task-switch. */
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatExitTaskSwitch);
+ return VERR_EM_INTERPRETER;
+}
+
+
+/**
+ * VM-exit handler for monitor-trap-flag (VMX_EXIT_MTF). Conditional VM-exit.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitMtf(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+ HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+ Assert(pVCpu->hm.s.vmx.u32ProcCtls & VMX_PROC_CTLS_MONITOR_TRAP_FLAG);
+ pVCpu->hm.s.vmx.u32ProcCtls &= ~VMX_PROC_CTLS_MONITOR_TRAP_FLAG;
+ int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVCpu->hm.s.vmx.u32ProcCtls);
+ AssertRCReturn(rc, rc);
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatExitMtf);
+ return VINF_EM_DBG_STEPPED;
+}
+
+
+/**
+ * VM-exit handler for APIC access (VMX_EXIT_APIC_ACCESS). Conditional VM-exit.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitApicAccess(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+ HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatExitApicAccess);
+
+ /* If this VM-exit occurred while delivering an event through the guest IDT, handle it accordingly. */
+ VBOXSTRICTRC rcStrict1 = hmR0VmxCheckExitDueToEventDelivery(pVCpu, pVmxTransient);
+ if (RT_LIKELY(rcStrict1 == VINF_SUCCESS))
+ {
+ /* For some crazy guest, if an event delivery causes an APIC-access VM-exit, go to instruction emulation. */
+ if (RT_UNLIKELY(pVCpu->hm.s.Event.fPending))
+ {
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatInjectPendingInterpret);
+ return VINF_EM_RAW_INJECT_TRPM_EVENT;
+ }
+ }
+ else
+ {
+ if (rcStrict1 == VINF_HM_DOUBLE_FAULT)
+ rcStrict1 = VINF_SUCCESS;
+ return rcStrict1;
+ }
+
+ /* IOMMIOPhysHandler() below may call into IEM, save the necessary state. */
+ int rc = HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_MUST_MASK);
+ rc |= hmR0VmxReadExitQualVmcs(pVCpu, pVmxTransient);
+ AssertRCReturn(rc, rc);
+
+ /* See Intel spec. 27-6 "Exit Qualifications for APIC-access VM-exits from Linear Accesses & Guest-Phyiscal Addresses" */
+ uint32_t uAccessType = VMX_EXIT_QUAL_APIC_ACCESS_TYPE(pVmxTransient->uExitQual);
+ VBOXSTRICTRC rcStrict2;
+ switch (uAccessType)
+ {
+ case VMX_APIC_ACCESS_TYPE_LINEAR_WRITE:
+ case VMX_APIC_ACCESS_TYPE_LINEAR_READ:
+ {
+ AssertMsg( !(pVCpu->hm.s.vmx.u32ProcCtls & VMX_PROC_CTLS_USE_TPR_SHADOW)
+ || VMX_EXIT_QUAL_APIC_ACCESS_OFFSET(pVmxTransient->uExitQual) != XAPIC_OFF_TPR,
+ ("hmR0VmxExitApicAccess: can't access TPR offset while using TPR shadowing.\n"));
+
+ RTGCPHYS GCPhys = pVCpu->hm.s.vmx.u64MsrApicBase; /* Always up-to-date, u64MsrApicBase is not part of the VMCS. */
+ GCPhys &= PAGE_BASE_GC_MASK;
+ GCPhys += VMX_EXIT_QUAL_APIC_ACCESS_OFFSET(pVmxTransient->uExitQual);
+ PVM pVM = pVCpu->CTX_SUFF(pVM);
+ Log4Func(("Linear access uAccessType=%#x GCPhys=%#RGp Off=%#x\n", uAccessType, GCPhys,
+ VMX_EXIT_QUAL_APIC_ACCESS_OFFSET(pVmxTransient->uExitQual)));
+
+ PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+ rcStrict2 = IOMMMIOPhysHandler(pVM, pVCpu,
+ uAccessType == VMX_APIC_ACCESS_TYPE_LINEAR_READ ? 0 : X86_TRAP_PF_RW,
+ CPUMCTX2CORE(pCtx), GCPhys);
+ Log4Func(("IOMMMIOPhysHandler returned %Rrc\n", VBOXSTRICTRC_VAL(rcStrict2)));
+ if ( rcStrict2 == VINF_SUCCESS
+ || rcStrict2 == VERR_PAGE_TABLE_NOT_PRESENT
+ || rcStrict2 == VERR_PAGE_NOT_PRESENT)
+ {
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RSP | HM_CHANGED_GUEST_RFLAGS
+ | HM_CHANGED_GUEST_APIC_TPR);
+ rcStrict2 = VINF_SUCCESS;
+ }
+ break;
+ }
+
+ default:
+ Log4Func(("uAccessType=%#x\n", uAccessType));
+ rcStrict2 = VINF_EM_RAW_EMULATE_INSTR;
+ break;
+ }
+
+ if (rcStrict2 != VINF_SUCCESS)
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchApicAccessToR3);
+ return rcStrict2;
+}
+
+
+/**
+ * VM-exit handler for debug-register accesses (VMX_EXIT_MOV_DRX). Conditional
+ * VM-exit.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitMovDRx(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+ HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+
+ /* We should -not- get this VM-exit if the guest's debug registers were active. */
+ if (pVmxTransient->fWasGuestDebugStateActive)
+ {
+ AssertMsgFailed(("Unexpected MOV DRx exit\n"));
+ HMVMX_UNEXPECTED_EXIT_RET(pVCpu, pVmxTransient);
+ }
+
+ if ( !pVCpu->hm.s.fSingleInstruction
+ && !pVmxTransient->fWasHyperDebugStateActive)
+ {
+ Assert(!DBGFIsStepping(pVCpu));
+ Assert(pVCpu->hm.s.vmx.u32XcptBitmap & RT_BIT_32(X86_XCPT_DB));
+
+ /* Don't intercept MOV DRx any more. */
+ pVCpu->hm.s.vmx.u32ProcCtls &= ~VMX_PROC_CTLS_MOV_DR_EXIT;
+ int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVCpu->hm.s.vmx.u32ProcCtls);
+ AssertRCReturn(rc, rc);
+
+ /* We're playing with the host CPU state here, make sure we can't preempt or longjmp. */
+ VMMRZCallRing3Disable(pVCpu);
+ HM_DISABLE_PREEMPT(pVCpu);
+
+ /* Save the host & load the guest debug state, restart execution of the MOV DRx instruction. */
+ CPUMR0LoadGuestDebugState(pVCpu, true /* include DR6 */);
+ Assert(CPUMIsGuestDebugStateActive(pVCpu) || HC_ARCH_BITS == 32);
+
+ HM_RESTORE_PREEMPT();
+ VMMRZCallRing3Enable(pVCpu);
+
+#ifdef VBOX_WITH_STATISTICS
+ rc = hmR0VmxReadExitQualVmcs(pVCpu, pVmxTransient);
+ AssertRCReturn(rc, rc);
+ if (VMX_EXIT_QUAL_DRX_DIRECTION(pVmxTransient->uExitQual) == VMX_EXIT_QUAL_DRX_DIRECTION_WRITE)
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatExitDRxWrite);
+ else
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatExitDRxRead);
+#endif
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatDRxContextSwitch);
+ return VINF_SUCCESS;
+ }
+
+ /*
+ * EMInterpretDRx[Write|Read]() calls CPUMIsGuestIn64BitCode() which requires EFER, CS. EFER is always up-to-date.
+ * Update the segment registers and DR7 from the CPU.
+ */
+ PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+ int rc = hmR0VmxReadExitQualVmcs(pVCpu, pVmxTransient);
+ rc |= HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, CPUMCTX_EXTRN_SREG_MASK | CPUMCTX_EXTRN_DR7);
+ AssertRCReturn(rc, rc);
+ Log4Func(("CS:RIP=%04x:%08RX64\n", pCtx->cs.Sel, pCtx->rip));
+
+ PVM pVM = pVCpu->CTX_SUFF(pVM);
+ if (VMX_EXIT_QUAL_DRX_DIRECTION(pVmxTransient->uExitQual) == VMX_EXIT_QUAL_DRX_DIRECTION_WRITE)
+ {
+ rc = EMInterpretDRxWrite(pVM, pVCpu, CPUMCTX2CORE(pCtx),
+ VMX_EXIT_QUAL_DRX_REGISTER(pVmxTransient->uExitQual),
+ VMX_EXIT_QUAL_DRX_GENREG(pVmxTransient->uExitQual));
+ if (RT_SUCCESS(rc))
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_DR7);
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatExitDRxWrite);
+ }
+ else
+ {
+ rc = EMInterpretDRxRead(pVM, pVCpu, CPUMCTX2CORE(pCtx),
+ VMX_EXIT_QUAL_DRX_GENREG(pVmxTransient->uExitQual),
+ VMX_EXIT_QUAL_DRX_REGISTER(pVmxTransient->uExitQual));
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatExitDRxRead);
+ }
+
+ Assert(rc == VINF_SUCCESS || rc == VERR_EM_INTERPRETER);
+ if (RT_SUCCESS(rc))
+ {
+ int rc2 = hmR0VmxAdvanceGuestRip(pVCpu, pVmxTransient);
+ AssertRCReturn(rc2, rc2);
+ return VINF_SUCCESS;
+ }
+ return rc;
+}
+
+
+/**
+ * VM-exit handler for EPT misconfiguration (VMX_EXIT_EPT_MISCONFIG).
+ * Conditional VM-exit.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitEptMisconfig(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+ HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+ Assert(pVCpu->CTX_SUFF(pVM)->hm.s.fNestedPaging);
+
+ /* If this VM-exit occurred while delivering an event through the guest IDT, handle it accordingly. */
+ VBOXSTRICTRC rcStrict1 = hmR0VmxCheckExitDueToEventDelivery(pVCpu, pVmxTransient);
+ if (RT_LIKELY(rcStrict1 == VINF_SUCCESS))
+ {
+ /* If event delivery causes an EPT misconfig (MMIO), go back to instruction emulation as otherwise
+ injecting the original pending event would most likely cause the same EPT misconfig VM-exit. */
+ if (RT_UNLIKELY(pVCpu->hm.s.Event.fPending))
+ {
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatInjectPendingInterpret);
+ return VINF_EM_RAW_INJECT_TRPM_EVENT;
+ }
+ }
+ else
+ {
+ if (rcStrict1 == VINF_HM_DOUBLE_FAULT)
+ rcStrict1 = VINF_SUCCESS;
+ return rcStrict1;
+ }
+
+ /*
+ * Get sufficent state and update the exit history entry.
+ */
+ RTGCPHYS GCPhys;
+ int rc = VMXReadVmcs64(VMX_VMCS64_RO_GUEST_PHYS_ADDR_FULL, &GCPhys);
+ rc |= HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_MUST_MASK);
+ AssertRCReturn(rc, rc);
+
+ VBOXSTRICTRC rcStrict;
+ PCEMEXITREC pExitRec = EMHistoryUpdateFlagsAndTypeAndPC(pVCpu,
+ EMEXIT_MAKE_FT(EMEXIT_F_KIND_EM | EMEXIT_F_HM, EMEXITTYPE_MMIO),
+ pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base);
+ if (!pExitRec)
+ {
+ /*
+ * If we succeed, resume guest execution.
+ * If we fail in interpreting the instruction because we couldn't get the guest physical address
+ * of the page containing the instruction via the guest's page tables (we would invalidate the guest page
+ * in the host TLB), resume execution which would cause a guest page fault to let the guest handle this
+ * weird case. See @bugref{6043}.
+ */
+ PVM pVM = pVCpu->CTX_SUFF(pVM);
+ PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+ rcStrict = PGMR0Trap0eHandlerNPMisconfig(pVM, pVCpu, PGMMODE_EPT, CPUMCTX2CORE(pCtx), GCPhys, UINT32_MAX);
+ Log4Func(("At %#RGp RIP=%#RX64 rc=%Rrc\n", GCPhys, pCtx->rip, VBOXSTRICTRC_VAL(rcStrict)));
+ if ( rcStrict == VINF_SUCCESS
+ || rcStrict == VERR_PAGE_TABLE_NOT_PRESENT
+ || rcStrict == VERR_PAGE_NOT_PRESENT)
+ {
+ /* Successfully handled MMIO operation. */
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RSP | HM_CHANGED_GUEST_RFLAGS
+ | HM_CHANGED_GUEST_APIC_TPR);
+ rcStrict = VINF_SUCCESS;
+ }
+ }
+ else
+ {
+ /*
+ * Frequent exit or something needing probing. Get state and call EMHistoryExec.
+ */
+ int rc2 = HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_MUST_MASK);
+ AssertRCReturn(rc2, rc2);
+
+ Log4(("EptMisscfgExit/%u: %04x:%08RX64: %RGp -> EMHistoryExec\n",
+ pVCpu->idCpu, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, GCPhys));
+
+ rcStrict = EMHistoryExec(pVCpu, pExitRec, 0);
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_ALL_GUEST);
+
+ Log4(("EptMisscfgExit/%u: %04x:%08RX64: EMHistoryExec -> %Rrc + %04x:%08RX64\n",
+ pVCpu->idCpu, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
+ VBOXSTRICTRC_VAL(rcStrict), pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
+ }
+ return VBOXSTRICTRC_TODO(rcStrict);
+}
+
+
+/**
+ * VM-exit handler for EPT violation (VMX_EXIT_EPT_VIOLATION). Conditional
+ * VM-exit.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitEptViolation(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+ HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+ Assert(pVCpu->CTX_SUFF(pVM)->hm.s.fNestedPaging);
+
+ /* If this VM-exit occurred while delivering an event through the guest IDT, handle it accordingly. */
+ VBOXSTRICTRC rcStrict1 = hmR0VmxCheckExitDueToEventDelivery(pVCpu, pVmxTransient);
+ if (RT_LIKELY(rcStrict1 == VINF_SUCCESS))
+ {
+ /* In the unlikely case that the EPT violation happened as a result of delivering an event, log it. */
+ if (RT_UNLIKELY(pVCpu->hm.s.Event.fPending))
+ Log4Func(("EPT violation with an event pending u64IntInfo=%#RX64\n", pVCpu->hm.s.Event.u64IntInfo));
+ }
+ else
+ {
+ if (rcStrict1 == VINF_HM_DOUBLE_FAULT)
+ rcStrict1 = VINF_SUCCESS;
+ return rcStrict1;
+ }
+
+ RTGCPHYS GCPhys;
+ int rc = VMXReadVmcs64(VMX_VMCS64_RO_GUEST_PHYS_ADDR_FULL, &GCPhys);
+ rc |= hmR0VmxReadExitQualVmcs(pVCpu, pVmxTransient);
+ rc |= HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_MUST_MASK);
+ AssertRCReturn(rc, rc);
+
+ /* Intel spec. Table 27-7 "Exit Qualifications for EPT violations". */
+ AssertMsg(((pVmxTransient->uExitQual >> 7) & 3) != 2, ("%#RX64", pVmxTransient->uExitQual));
+
+ RTGCUINT uErrorCode = 0;
+ if (pVmxTransient->uExitQual & VMX_EXIT_QUAL_EPT_INSTR_FETCH)
+ uErrorCode |= X86_TRAP_PF_ID;
+ if (pVmxTransient->uExitQual & VMX_EXIT_QUAL_EPT_DATA_WRITE)
+ uErrorCode |= X86_TRAP_PF_RW;
+ if (pVmxTransient->uExitQual & VMX_EXIT_QUAL_EPT_ENTRY_PRESENT)
+ uErrorCode |= X86_TRAP_PF_P;
+
+ TRPMAssertXcptPF(pVCpu, GCPhys, uErrorCode);
+
+
+ /* Handle the pagefault trap for the nested shadow table. */
+ PVM pVM = pVCpu->CTX_SUFF(pVM);
+ PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+
+ Log4Func(("EPT violation %#x at %#RX64 ErrorCode %#x CS:RIP=%04x:%08RX64\n", pVmxTransient->uExitQual, GCPhys, uErrorCode,
+ pCtx->cs.Sel, pCtx->rip));
+
+ VBOXSTRICTRC rcStrict2 = PGMR0Trap0eHandlerNestedPaging(pVM, pVCpu, PGMMODE_EPT, uErrorCode, CPUMCTX2CORE(pCtx), GCPhys);
+ TRPMResetTrap(pVCpu);
+
+ /* Same case as PGMR0Trap0eHandlerNPMisconfig(). See comment above, @bugref{6043}. */
+ if ( rcStrict2 == VINF_SUCCESS
+ || rcStrict2 == VERR_PAGE_TABLE_NOT_PRESENT
+ || rcStrict2 == VERR_PAGE_NOT_PRESENT)
+ {
+ /* Successfully synced our nested page tables. */
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatExitReasonNpf);
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RSP | HM_CHANGED_GUEST_RFLAGS);
+ return VINF_SUCCESS;
+ }
+
+ Log4Func(("EPT return to ring-3 rcStrict2=%Rrc\n", VBOXSTRICTRC_VAL(rcStrict2)));
+ return rcStrict2;
+}
+
+/** @} */
+
+/** @name VM-exit exception handlers.
+ * @{
+ */
+/* -=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= */
+/* -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= VM-exit exception handlers =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- */
+/* -=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= */
+
+/**
+ * VM-exit exception handler for \#MF (Math Fault: floating point exception).
+ */
+static int hmR0VmxExitXcptMF(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+ HMVMX_VALIDATE_EXIT_XCPT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestMF);
+
+ int rc = HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, CPUMCTX_EXTRN_CR0);
+ AssertRCReturn(rc, rc);
+
+ if (!(pVCpu->cpum.GstCtx.cr0 & X86_CR0_NE))
+ {
+ /* Convert a #MF into a FERR -> IRQ 13. See @bugref{6117}. */
+ rc = PDMIsaSetIrq(pVCpu->CTX_SUFF(pVM), 13, 1, 0 /* uTagSrc */);
+
+ /** @todo r=ramshankar: The Intel spec. does -not- specify that this VM-exit
+ * provides VM-exit instruction length. If this causes problem later,
+ * disassemble the instruction like it's done on AMD-V. */
+ int rc2 = hmR0VmxAdvanceGuestRip(pVCpu, pVmxTransient);
+ AssertRCReturn(rc2, rc2);
+ return rc;
+ }
+
+ hmR0VmxSetPendingEvent(pVCpu, VMX_ENTRY_INT_INFO_FROM_EXIT_INT_INFO(pVmxTransient->uExitIntInfo), pVmxTransient->cbInstr,
+ pVmxTransient->uExitIntErrorCode, 0 /* GCPtrFaultAddress */);
+ return rc;
+}
+
+
+/**
+ * VM-exit exception handler for \#BP (Breakpoint exception).
+ */
+static int hmR0VmxExitXcptBP(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+ HMVMX_VALIDATE_EXIT_XCPT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestBP);
+
+ int rc = HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, HMVMX_CPUMCTX_EXTRN_ALL);
+ AssertRCReturn(rc, rc);
+
+ PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+ rc = DBGFRZTrap03Handler(pVCpu->CTX_SUFF(pVM), pVCpu, CPUMCTX2CORE(pCtx));
+ if (rc == VINF_EM_RAW_GUEST_TRAP)
+ {
+ rc = hmR0VmxReadExitIntInfoVmcs(pVmxTransient);
+ rc |= hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
+ rc |= hmR0VmxReadExitIntErrorCodeVmcs(pVmxTransient);
+ AssertRCReturn(rc, rc);
+
+ hmR0VmxSetPendingEvent(pVCpu, VMX_ENTRY_INT_INFO_FROM_EXIT_INT_INFO(pVmxTransient->uExitIntInfo), pVmxTransient->cbInstr,
+ pVmxTransient->uExitIntErrorCode, 0 /* GCPtrFaultAddress */);
+ }
+
+ Assert(rc == VINF_SUCCESS || rc == VINF_EM_RAW_GUEST_TRAP || rc == VINF_EM_DBG_BREAKPOINT);
+ return rc;
+}
+
+
+/**
+ * VM-exit exception handler for \#AC (alignment check exception).
+ */
+static int hmR0VmxExitXcptAC(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+ HMVMX_VALIDATE_EXIT_XCPT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+
+ /*
+ * Re-inject it. We'll detect any nesting before getting here.
+ */
+ int rc = hmR0VmxReadExitIntErrorCodeVmcs(pVmxTransient);
+ rc |= hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
+ AssertRCReturn(rc, rc);
+ Assert(ASMAtomicUoReadU32(&pVmxTransient->fVmcsFieldsRead) & HMVMX_READ_EXIT_INTERRUPTION_INFO);
+
+ hmR0VmxSetPendingEvent(pVCpu, VMX_ENTRY_INT_INFO_FROM_EXIT_INT_INFO(pVmxTransient->uExitIntInfo), pVmxTransient->cbInstr,
+ pVmxTransient->uExitIntErrorCode, 0 /* GCPtrFaultAddress */);
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * VM-exit exception handler for \#DB (Debug exception).
+ */
+static int hmR0VmxExitXcptDB(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+ HMVMX_VALIDATE_EXIT_XCPT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestDB);
+
+ /*
+ * Get the DR6-like values from the VM-exit qualification and pass it to DBGF
+ * for processing.
+ */
+ int rc = hmR0VmxReadExitQualVmcs(pVCpu, pVmxTransient);
+
+ /* Refer Intel spec. Table 27-1. "Exit Qualifications for debug exceptions" for the format. */
+ uint64_t uDR6 = X86_DR6_INIT_VAL;
+ uDR6 |= (pVmxTransient->uExitQual & (X86_DR6_B0 | X86_DR6_B1 | X86_DR6_B2 | X86_DR6_B3 | X86_DR6_BD | X86_DR6_BS));
+
+ PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+ rc = DBGFRZTrap01Handler(pVCpu->CTX_SUFF(pVM), pVCpu, CPUMCTX2CORE(pCtx), uDR6, pVCpu->hm.s.fSingleInstruction);
+ Log6Func(("rc=%Rrc\n", rc));
+ if (rc == VINF_EM_RAW_GUEST_TRAP)
+ {
+ /*
+ * The exception was for the guest. Update DR6, DR7.GD and
+ * IA32_DEBUGCTL.LBR before forwarding it.
+ * (See Intel spec. 27.1 "Architectural State before a VM-Exit".)
+ */
+ VMMRZCallRing3Disable(pVCpu);
+ HM_DISABLE_PREEMPT(pVCpu);
+
+ pCtx->dr[6] &= ~X86_DR6_B_MASK;
+ pCtx->dr[6] |= uDR6;
+ if (CPUMIsGuestDebugStateActive(pVCpu))
+ ASMSetDR6(pCtx->dr[6]);
+
+ HM_RESTORE_PREEMPT();
+ VMMRZCallRing3Enable(pVCpu);
+
+ rc = HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, CPUMCTX_EXTRN_DR7);
+ AssertRCReturn(rc, rc);
+
+ /* X86_DR7_GD will be cleared if DRx accesses should be trapped inside the guest. */
+ pCtx->dr[7] &= ~X86_DR7_GD;
+
+ /* Paranoia. */
+ pCtx->dr[7] &= ~X86_DR7_RAZ_MASK;
+ pCtx->dr[7] |= X86_DR7_RA1_MASK;
+
+ rc = VMXWriteVmcs32(VMX_VMCS_GUEST_DR7, (uint32_t)pCtx->dr[7]);
+ AssertRCReturn(rc, rc);
+
+ /*
+ * Raise #DB in the guest.
+ *
+ * It is important to reflect exactly what the VM-exit gave us (preserving the
+ * interruption-type) rather than use hmR0VmxSetPendingXcptDB() as the #DB could've
+ * been raised while executing ICEBP (INT1) and not the regular #DB. Thus it may
+ * trigger different handling in the CPU (like skipping DPL checks), see @bugref{6398}.
+ *
+ * Intel re-documented ICEBP/INT1 on May 2018 previously documented as part of
+ * Intel 386, see Intel spec. 24.8.3 "VM-Entry Controls for Event Injection".
+ */
+ rc = hmR0VmxReadExitIntInfoVmcs(pVmxTransient);
+ rc |= hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
+ rc |= hmR0VmxReadExitIntErrorCodeVmcs(pVmxTransient);
+ AssertRCReturn(rc, rc);
+ hmR0VmxSetPendingEvent(pVCpu, VMX_ENTRY_INT_INFO_FROM_EXIT_INT_INFO(pVmxTransient->uExitIntInfo), pVmxTransient->cbInstr,
+ pVmxTransient->uExitIntErrorCode, 0 /* GCPtrFaultAddress */);
+ return VINF_SUCCESS;
+ }
+
+ /*
+ * Not a guest trap, must be a hypervisor related debug event then.
+ * Update DR6 in case someone is interested in it.
+ */
+ AssertMsg(rc == VINF_EM_DBG_STEPPED || rc == VINF_EM_DBG_BREAKPOINT, ("%Rrc\n", rc));
+ AssertReturn(pVmxTransient->fWasHyperDebugStateActive, VERR_HM_IPE_5);
+ CPUMSetHyperDR6(pVCpu, uDR6);
+
+ return rc;
+}
+
+
+/**
+ * Hacks its way around the lovely mesa driver's backdoor accesses.
+ *
+ * @sa hmR0SvmHandleMesaDrvGp
+ */
+static int hmR0VmxHandleMesaDrvGp(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient, PCPUMCTX pCtx)
+{
+ Log(("hmR0VmxHandleMesaDrvGp: at %04x:%08RX64 rcx=%RX64 rbx=%RX64\n", pCtx->cs.Sel, pCtx->rip, pCtx->rcx, pCtx->rbx));
+ RT_NOREF(pCtx);
+
+ /* For now we'll just skip the instruction. */
+ return hmR0VmxAdvanceGuestRip(pVCpu, pVmxTransient);
+}
+
+
+/**
+ * Checks if the \#GP'ing instruction is the mesa driver doing it's lovely
+ * backdoor logging w/o checking what it is running inside.
+ *
+ * This recognizes an "IN EAX,DX" instruction executed in flat ring-3, with the
+ * backdoor port and magic numbers loaded in registers.
+ *
+ * @returns true if it is, false if it isn't.
+ * @sa hmR0SvmIsMesaDrvGp
+ */
+DECLINLINE(bool) hmR0VmxIsMesaDrvGp(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient, PCPUMCTX pCtx)
+{
+ /* 0xed: IN eAX,dx */
+ uint8_t abInstr[1];
+ if (pVmxTransient->cbInstr != sizeof(abInstr))
+ return false;
+
+ /* Check that it is #GP(0). */
+ if (pVmxTransient->uExitIntErrorCode != 0)
+ return false;
+
+ /* Check magic and port. */
+ Assert(!(pCtx->fExtrn & (CPUMCTX_EXTRN_RAX | CPUMCTX_EXTRN_RDX | CPUMCTX_EXTRN_RCX)));
+ /*Log(("hmR0VmxIsMesaDrvGp: rax=%RX64 rdx=%RX64\n", pCtx->rax, pCtx->rdx));*/
+ if (pCtx->rax != UINT32_C(0x564d5868))
+ return false;
+ if (pCtx->dx != UINT32_C(0x5658))
+ return false;
+
+ /* Flat ring-3 CS. */
+ AssertCompile(HMVMX_CPUMCTX_EXTRN_ALL & CPUMCTX_EXTRN_CS);
+ Assert(!(pCtx->fExtrn & CPUMCTX_EXTRN_CS));
+ /*Log(("hmR0VmxIsMesaDrvGp: cs.Attr.n.u2Dpl=%d base=%Rx64\n", pCtx->cs.Attr.n.u2Dpl, pCtx->cs.u64Base));*/
+ if (pCtx->cs.Attr.n.u2Dpl != 3)
+ return false;
+ if (pCtx->cs.u64Base != 0)
+ return false;
+
+ /* Check opcode. */
+ AssertCompile(HMVMX_CPUMCTX_EXTRN_ALL & CPUMCTX_EXTRN_RIP);
+ Assert(!(pCtx->fExtrn & CPUMCTX_EXTRN_RIP));
+ int rc = PGMPhysSimpleReadGCPtr(pVCpu, abInstr, pCtx->rip, sizeof(abInstr));
+ /*Log(("hmR0VmxIsMesaDrvGp: PGMPhysSimpleReadGCPtr -> %Rrc %#x\n", rc, abInstr[0]));*/
+ if (RT_FAILURE(rc))
+ return false;
+ if (abInstr[0] != 0xed)
+ return false;
+
+ return true;
+}
+
+
+/**
+ * VM-exit exception handler for \#GP (General-protection exception).
+ *
+ * @remarks Requires pVmxTransient->uExitIntInfo to be up-to-date.
+ */
+static int hmR0VmxExitXcptGP(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+ HMVMX_VALIDATE_EXIT_XCPT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestGP);
+
+ PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+ if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active)
+ { /* likely */ }
+ else
+ {
+#ifndef HMVMX_ALWAYS_TRAP_ALL_XCPTS
+ Assert(pVCpu->hm.s.fUsingDebugLoop || pVCpu->hm.s.fTrapXcptGpForLovelyMesaDrv);
+#endif
+ /* If the guest is not in real-mode or we have unrestricted execution support, reflect #GP to the guest. */
+ int rc = hmR0VmxReadExitIntInfoVmcs(pVmxTransient);
+ rc |= hmR0VmxReadExitIntErrorCodeVmcs(pVmxTransient);
+ rc |= hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
+ rc |= HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, HMVMX_CPUMCTX_EXTRN_ALL);
+ AssertRCReturn(rc, rc);
+ Log4Func(("Gst: CS:RIP %04x:%08RX64 ErrorCode=%#x CR0=%#RX64 CPL=%u TR=%#04x\n", pCtx->cs.Sel, pCtx->rip,
+ pVmxTransient->uExitIntErrorCode, pCtx->cr0, CPUMGetGuestCPL(pVCpu), pCtx->tr.Sel));
+
+ if ( !pVCpu->hm.s.fTrapXcptGpForLovelyMesaDrv
+ || !hmR0VmxIsMesaDrvGp(pVCpu, pVmxTransient, pCtx))
+ hmR0VmxSetPendingEvent(pVCpu, VMX_ENTRY_INT_INFO_FROM_EXIT_INT_INFO(pVmxTransient->uExitIntInfo),
+ pVmxTransient->cbInstr, pVmxTransient->uExitIntErrorCode, 0 /* GCPtrFaultAddress */);
+ else
+ rc = hmR0VmxHandleMesaDrvGp(pVCpu, pVmxTransient, pCtx);
+ return rc;
+ }
+
+ Assert(CPUMIsGuestInRealModeEx(pCtx));
+ Assert(!pVCpu->CTX_SUFF(pVM)->hm.s.vmx.fUnrestrictedGuest);
+
+ int rc = HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, HMVMX_CPUMCTX_EXTRN_ALL);
+ AssertRCReturn(rc, rc);
+
+ VBOXSTRICTRC rcStrict = IEMExecOne(pVCpu);
+ if (rcStrict == VINF_SUCCESS)
+ {
+ if (!CPUMIsGuestInRealModeEx(pCtx))
+ {
+ /*
+ * The guest is no longer in real-mode, check if we can continue executing the
+ * guest using hardware-assisted VMX. Otherwise, fall back to emulation.
+ */
+ if (HMCanExecuteVmxGuest(pVCpu, pCtx))
+ {
+ Log4Func(("Mode changed but guest still suitable for executing using VT-x\n"));
+ pVCpu->hm.s.vmx.RealMode.fRealOnV86Active = false;
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_ALL_GUEST);
+ }
+ else
+ {
+ Log4Func(("Mode changed -> VINF_EM_RESCHEDULE\n"));
+ rcStrict = VINF_EM_RESCHEDULE;
+ }
+ }
+ else
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_ALL_GUEST);
+ }
+ else if (rcStrict == VINF_IEM_RAISED_XCPT)
+ {
+ rcStrict = VINF_SUCCESS;
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
+ }
+ return VBOXSTRICTRC_VAL(rcStrict);
+}
+
+
+/**
+ * VM-exit exception handler wrapper for generic exceptions. Simply re-injects
+ * the exception reported in the VMX transient structure back into the VM.
+ *
+ * @remarks Requires uExitIntInfo in the VMX transient structure to be
+ * up-to-date.
+ */
+static int hmR0VmxExitXcptGeneric(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+ HMVMX_VALIDATE_EXIT_XCPT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+#ifndef HMVMX_ALWAYS_TRAP_ALL_XCPTS
+ AssertMsg(pVCpu->hm.s.fUsingDebugLoop || pVCpu->hm.s.vmx.RealMode.fRealOnV86Active,
+ ("uVector=%#x u32XcptBitmap=%#X32\n",
+ VMX_EXIT_INT_INFO_VECTOR(pVmxTransient->uExitIntInfo), pVCpu->hm.s.vmx.u32XcptBitmap));
+#endif
+
+ /* Re-inject the exception into the guest. This cannot be a double-fault condition which would have been handled in
+ hmR0VmxCheckExitDueToEventDelivery(). */
+ int rc = hmR0VmxReadExitIntErrorCodeVmcs(pVmxTransient);
+ rc |= hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
+ AssertRCReturn(rc, rc);
+ Assert(ASMAtomicUoReadU32(&pVmxTransient->fVmcsFieldsRead) & HMVMX_READ_EXIT_INTERRUPTION_INFO);
+
+#ifdef DEBUG_ramshankar
+ rc |= HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, CPUMCTX_EXTRN_CS | CPUMCTX_EXTRN_RIP);
+ uint8_t uVector = VMX_EXIT_INT_INFO_VECTOR(pVmxTransient->uExitIntInfo);
+ Log(("hmR0VmxExitXcptGeneric: Reinjecting Xcpt. uVector=%#x cs:rip=%#04x:%#RX64\n", uVector, pCtx->cs.Sel, pCtx->rip));
+#endif
+
+ hmR0VmxSetPendingEvent(pVCpu, VMX_ENTRY_INT_INFO_FROM_EXIT_INT_INFO(pVmxTransient->uExitIntInfo), pVmxTransient->cbInstr,
+ pVmxTransient->uExitIntErrorCode, 0 /* GCPtrFaultAddress */);
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * VM-exit exception handler for \#PF (Page-fault exception).
+ */
+static int hmR0VmxExitXcptPF(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+ HMVMX_VALIDATE_EXIT_XCPT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+ PVM pVM = pVCpu->CTX_SUFF(pVM);
+ int rc = hmR0VmxReadExitQualVmcs(pVCpu, pVmxTransient);
+ rc |= hmR0VmxReadExitIntInfoVmcs(pVmxTransient);
+ rc |= hmR0VmxReadExitIntErrorCodeVmcs(pVmxTransient);
+ AssertRCReturn(rc, rc);
+
+ if (!pVM->hm.s.fNestedPaging)
+ { /* likely */ }
+ else
+ {
+#if !defined(HMVMX_ALWAYS_TRAP_ALL_XCPTS) && !defined(HMVMX_ALWAYS_TRAP_PF)
+ Assert(pVCpu->hm.s.fUsingDebugLoop);
+#endif
+ pVCpu->hm.s.Event.fPending = false; /* In case it's a contributory or vectoring #PF. */
+ if (RT_LIKELY(!pVmxTransient->fVectoringDoublePF))
+ {
+ hmR0VmxSetPendingEvent(pVCpu, VMX_ENTRY_INT_INFO_FROM_EXIT_INT_INFO(pVmxTransient->uExitIntInfo), 0 /* cbInstr */,
+ pVmxTransient->uExitIntErrorCode, pVmxTransient->uExitQual);
+ }
+ else
+ {
+ /* A guest page-fault occurred during delivery of a page-fault. Inject #DF. */
+ hmR0VmxSetPendingXcptDF(pVCpu);
+ Log4Func(("Pending #DF due to vectoring #PF w/ NestedPaging\n"));
+ }
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestPF);
+ return rc;
+ }
+
+ /* If it's a vectoring #PF, emulate injecting the original event injection as PGMTrap0eHandler() is incapable
+ of differentiating between instruction emulation and event injection that caused a #PF. See @bugref{6607}. */
+ if (pVmxTransient->fVectoringPF)
+ {
+ Assert(pVCpu->hm.s.Event.fPending);
+ return VINF_EM_RAW_INJECT_TRPM_EVENT;
+ }
+
+ PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
+ rc = HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, HMVMX_CPUMCTX_EXTRN_ALL);
+ AssertRCReturn(rc, rc);
+
+ Log4Func(("#PF: cr2=%#RX64 cs:rip=%#04x:%#RX64 uErrCode %#RX32 cr3=%#RX64\n", pVmxTransient->uExitQual, pCtx->cs.Sel,
+ pCtx->rip, pVmxTransient->uExitIntErrorCode, pCtx->cr3));
+
+ TRPMAssertXcptPF(pVCpu, pVmxTransient->uExitQual, (RTGCUINT)pVmxTransient->uExitIntErrorCode);
+ rc = PGMTrap0eHandler(pVCpu, pVmxTransient->uExitIntErrorCode, CPUMCTX2CORE(pCtx), (RTGCPTR)pVmxTransient->uExitQual);
+
+ Log4Func(("#PF: rc=%Rrc\n", rc));
+ if (rc == VINF_SUCCESS)
+ {
+ /*
+ * This is typically a shadow page table sync or a MMIO instruction. But we may have
+ * emulated something like LTR or a far jump. Any part of the CPU context may have changed.
+ */
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_ALL_GUEST);
+ TRPMResetTrap(pVCpu);
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatExitShadowPF);
+ return rc;
+ }
+
+ if (rc == VINF_EM_RAW_GUEST_TRAP)
+ {
+ if (!pVmxTransient->fVectoringDoublePF)
+ {
+ /* It's a guest page fault and needs to be reflected to the guest. */
+ uint32_t uGstErrorCode = TRPMGetErrorCode(pVCpu);
+ TRPMResetTrap(pVCpu);
+ pVCpu->hm.s.Event.fPending = false; /* In case it's a contributory #PF. */
+ hmR0VmxSetPendingEvent(pVCpu, VMX_ENTRY_INT_INFO_FROM_EXIT_INT_INFO(pVmxTransient->uExitIntInfo), 0 /* cbInstr */,
+ uGstErrorCode, pVmxTransient->uExitQual);
+ }
+ else
+ {
+ /* A guest page-fault occurred during delivery of a page-fault. Inject #DF. */
+ TRPMResetTrap(pVCpu);
+ pVCpu->hm.s.Event.fPending = false; /* Clear pending #PF to replace it with #DF. */
+ hmR0VmxSetPendingXcptDF(pVCpu);
+ Log4Func(("#PF: Pending #DF due to vectoring #PF\n"));
+ }
+
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestPF);
+ return VINF_SUCCESS;
+ }
+
+ TRPMResetTrap(pVCpu);
+ STAM_COUNTER_INC(&pVCpu->hm.s.StatExitShadowPFEM);
+ return rc;
+}
+
+/** @} */
+
+#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
+/** @name Nested-guest VM-exit handlers.
+ * @{
+ */
+/* -=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= */
+/* -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= Nested-guest VM-exit handlers =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= */
+/* -=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= */
+
+/**
+ * VM-exit handler for VMCLEAR (VMX_EXIT_VMCLEAR). Unconditional VM-exit.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitVmclear(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+ HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+#ifndef VBOX_WITH_NESTED_HWVIRT_ONLY_IN_IEM
+ int rc = hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
+ rc |= HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, CPUMCTX_EXTRN_RSP | CPUMCTX_EXTRN_SREG_MASK
+ | IEM_CPUMCTX_EXTRN_EXEC_DECODED_MEM_MASK);
+ rc |= hmR0VmxReadExitInstrInfoVmcs(pVmxTransient);
+ rc |= hmR0VmxReadExitQualVmcs(pVCpu, pVmxTransient);
+ AssertRCReturn(rc, rc);
+
+ HMVMX_CHECK_EXIT_DUE_TO_VMX_INSTR(pVCpu, pVmxTransient->uExitReason);
+
+ VMXVEXITINFO ExitInfo;
+ RT_ZERO(ExitInfo);
+ ExitInfo.uReason = pVmxTransient->uExitReason;
+ ExitInfo.u64Qual = pVmxTransient->uExitQual;
+ ExitInfo.InstrInfo.u = pVmxTransient->ExitInstrInfo.u;
+ ExitInfo.cbInstr = pVmxTransient->cbInstr;
+ HMVMX_DECODE_MEM_OPERAND(pVCpu, ExitInfo.InstrInfo.u, ExitInfo.u64Qual, VMXMEMACCESS_READ, &ExitInfo.GCPtrEffAddr);
+
+ VBOXSTRICTRC rcStrict = IEMExecDecodedVmclear(pVCpu, &ExitInfo);
+ if (RT_LIKELY(rcStrict == VINF_SUCCESS))
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS | HM_CHANGED_GUEST_HWVIRT);
+ else if (rcStrict == VINF_IEM_RAISED_XCPT)
+ {
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
+ rcStrict = VINF_SUCCESS;
+ }
+ return rcStrict;
+#else
+ HMVMX_IEM_EXEC_VMX_INSTR_RET(pVCpu);
+#endif
+}
+
+
+/**
+ * VM-exit handler for VMLAUNCH (VMX_EXIT_VMLAUNCH). Unconditional VM-exit.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitVmlaunch(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+ HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+#ifndef VBOX_WITH_NESTED_HWVIRT_ONLY_IN_IEM
+ int rc = hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
+ rc |= HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_VMX_VMENTRY_MASK);
+ AssertRCReturn(rc, rc);
+
+ HMVMX_CHECK_EXIT_DUE_TO_VMX_INSTR(pVCpu, pVmxTransient->uExitReason);
+
+ VBOXSTRICTRC rcStrict = IEMExecDecodedVmlaunchVmresume(pVCpu, pVmxTransient->cbInstr, VMXINSTRID_VMLAUNCH);
+ if (RT_LIKELY(rcStrict == VINF_SUCCESS))
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_ALL_GUEST);
+ Assert(rcStrict != VINF_IEM_RAISED_XCPT);
+ return rcStrict;
+#else
+ HMVMX_IEM_EXEC_VMX_INSTR_RET(pVCpu);
+#endif
+}
+
+
+/**
+ * VM-exit handler for VMPTRLD (VMX_EXIT_VMPTRLD). Unconditional VM-exit.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitVmptrld(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+ HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+#ifndef VBOX_WITH_NESTED_HWVIRT_ONLY_IN_IEM
+ int rc = hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
+ rc |= HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, CPUMCTX_EXTRN_RSP | CPUMCTX_EXTRN_SREG_MASK
+ | IEM_CPUMCTX_EXTRN_EXEC_DECODED_MEM_MASK);
+ rc |= hmR0VmxReadExitInstrInfoVmcs(pVmxTransient);
+ rc |= hmR0VmxReadExitQualVmcs(pVCpu, pVmxTransient);
+ AssertRCReturn(rc, rc);
+
+ HMVMX_CHECK_EXIT_DUE_TO_VMX_INSTR(pVCpu, pVmxTransient->uExitReason);
+
+ VMXVEXITINFO ExitInfo;
+ RT_ZERO(ExitInfo);
+ ExitInfo.uReason = pVmxTransient->uExitReason;
+ ExitInfo.u64Qual = pVmxTransient->uExitQual;
+ ExitInfo.InstrInfo.u = pVmxTransient->ExitInstrInfo.u;
+ ExitInfo.cbInstr = pVmxTransient->cbInstr;
+ HMVMX_DECODE_MEM_OPERAND(pVCpu, ExitInfo.InstrInfo.u, ExitInfo.u64Qual, VMXMEMACCESS_READ, &ExitInfo.GCPtrEffAddr);
+
+ VBOXSTRICTRC rcStrict = IEMExecDecodedVmptrld(pVCpu, &ExitInfo);
+ if (RT_LIKELY(rcStrict == VINF_SUCCESS))
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS | HM_CHANGED_GUEST_HWVIRT);
+ else if (rcStrict == VINF_IEM_RAISED_XCPT)
+ {
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
+ rcStrict = VINF_SUCCESS;
+ }
+ return rcStrict;
+#else
+ HMVMX_IEM_EXEC_VMX_INSTR_RET(pVCpu);
+#endif
+}
+
+
+/**
+ * VM-exit handler for VMPTRST (VMX_EXIT_VMPTRST). Unconditional VM-exit.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitVmptrst(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+ HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+#ifndef VBOX_WITH_NESTED_HWVIRT_ONLY_IN_IEM
+ int rc = hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
+ rc |= HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, CPUMCTX_EXTRN_RSP | CPUMCTX_EXTRN_SREG_MASK
+ | IEM_CPUMCTX_EXTRN_EXEC_DECODED_MEM_MASK);
+ rc |= hmR0VmxReadExitInstrInfoVmcs(pVmxTransient);
+ rc |= hmR0VmxReadExitQualVmcs(pVCpu, pVmxTransient);
+ AssertRCReturn(rc, rc);
+
+ HMVMX_CHECK_EXIT_DUE_TO_VMX_INSTR(pVCpu, pVmxTransient->uExitReason);
+
+ VMXVEXITINFO ExitInfo;
+ RT_ZERO(ExitInfo);
+ ExitInfo.uReason = pVmxTransient->uExitReason;
+ ExitInfo.u64Qual = pVmxTransient->uExitQual;
+ ExitInfo.InstrInfo.u = pVmxTransient->ExitInstrInfo.u;
+ ExitInfo.cbInstr = pVmxTransient->cbInstr;
+ HMVMX_DECODE_MEM_OPERAND(pVCpu, ExitInfo.InstrInfo.u, ExitInfo.u64Qual, VMXMEMACCESS_WRITE, &ExitInfo.GCPtrEffAddr);
+
+ VBOXSTRICTRC rcStrict = IEMExecDecodedVmptrst(pVCpu, &ExitInfo);
+ if (RT_LIKELY(rcStrict == VINF_SUCCESS))
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS | HM_CHANGED_GUEST_HWVIRT);
+ else if (rcStrict == VINF_IEM_RAISED_XCPT)
+ {
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
+ rcStrict = VINF_SUCCESS;
+ }
+ return rcStrict;
+#else
+ HMVMX_IEM_EXEC_VMX_INSTR_RET(pVCpu);
+#endif
+}
+
+
+/**
+ * VM-exit handler for VMREAD (VMX_EXIT_VMREAD). Unconditional VM-exit.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitVmread(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+ HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+#ifndef VBOX_WITH_NESTED_HWVIRT_ONLY_IN_IEM
+ int rc = hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
+ rc |= HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, CPUMCTX_EXTRN_RSP | CPUMCTX_EXTRN_SREG_MASK
+ | IEM_CPUMCTX_EXTRN_EXEC_DECODED_MEM_MASK);
+ rc |= hmR0VmxReadExitInstrInfoVmcs(pVmxTransient);
+ rc |= hmR0VmxReadExitQualVmcs(pVCpu, pVmxTransient);
+ AssertRCReturn(rc, rc);
+
+ HMVMX_CHECK_EXIT_DUE_TO_VMX_INSTR(pVCpu, pVmxTransient->uExitReason);
+
+ VMXVEXITINFO ExitInfo;
+ RT_ZERO(ExitInfo);
+ ExitInfo.uReason = pVmxTransient->uExitReason;
+ ExitInfo.u64Qual = pVmxTransient->uExitQual;
+ ExitInfo.InstrInfo.u = pVmxTransient->ExitInstrInfo.u;
+ ExitInfo.cbInstr = pVmxTransient->cbInstr;
+ if (!ExitInfo.InstrInfo.VmreadVmwrite.fIsRegOperand)
+ HMVMX_DECODE_MEM_OPERAND(pVCpu, ExitInfo.InstrInfo.u, ExitInfo.u64Qual, VMXMEMACCESS_WRITE, &ExitInfo.GCPtrEffAddr);
+
+ VBOXSTRICTRC rcStrict = IEMExecDecodedVmread(pVCpu, &ExitInfo);
+ if (RT_LIKELY(rcStrict == VINF_SUCCESS))
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS | HM_CHANGED_GUEST_HWVIRT);
+ else if (rcStrict == VINF_IEM_RAISED_XCPT)
+ {
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
+ rcStrict = VINF_SUCCESS;
+ }
+ return rcStrict;
+#else
+ HMVMX_IEM_EXEC_VMX_INSTR_RET(pVCpu);
+#endif
+}
+
+
+/**
+ * VM-exit handler for VMRESUME (VMX_EXIT_VMRESUME). Unconditional VM-exit.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitVmresume(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+ HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+#ifndef VBOX_WITH_NESTED_HWVIRT_ONLY_IN_IEM
+ int rc = hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
+ rc |= HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_VMX_VMENTRY_MASK);
+ AssertRCReturn(rc, rc);
+
+ HMVMX_CHECK_EXIT_DUE_TO_VMX_INSTR(pVCpu, pVmxTransient->uExitReason);
+
+ VBOXSTRICTRC rcStrict = IEMExecDecodedVmlaunchVmresume(pVCpu, pVmxTransient->cbInstr, VMXINSTRID_VMRESUME);
+ if (RT_LIKELY(rcStrict == VINF_SUCCESS))
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_ALL_GUEST);
+ Assert(rcStrict != VINF_IEM_RAISED_XCPT);
+ return rcStrict;
+#else
+ HMVMX_IEM_EXEC_VMX_INSTR_RET(pVCpu);
+#endif
+}
+
+
+/**
+ * VM-exit handler for VMWRITE (VMX_EXIT_VMWRITE). Unconditional VM-exit.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitVmwrite(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+ HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+#ifndef VBOX_WITH_NESTED_HWVIRT_ONLY_IN_IEM
+ int rc = hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
+ rc |= HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, CPUMCTX_EXTRN_RSP | CPUMCTX_EXTRN_SREG_MASK
+ | IEM_CPUMCTX_EXTRN_EXEC_DECODED_MEM_MASK);
+ rc |= hmR0VmxReadExitInstrInfoVmcs(pVmxTransient);
+ rc |= hmR0VmxReadExitQualVmcs(pVCpu, pVmxTransient);
+ AssertRCReturn(rc, rc);
+
+ HMVMX_CHECK_EXIT_DUE_TO_VMX_INSTR(pVCpu, pVmxTransient->uExitReason);
+
+ VMXVEXITINFO ExitInfo;
+ RT_ZERO(ExitInfo);
+ ExitInfo.uReason = pVmxTransient->uExitReason;
+ ExitInfo.u64Qual = pVmxTransient->uExitQual;
+ ExitInfo.InstrInfo.u = pVmxTransient->ExitInstrInfo.u;
+ ExitInfo.cbInstr = pVmxTransient->cbInstr;
+ if (!ExitInfo.InstrInfo.VmreadVmwrite.fIsRegOperand)
+ HMVMX_DECODE_MEM_OPERAND(pVCpu, ExitInfo.InstrInfo.u, ExitInfo.u64Qual, VMXMEMACCESS_READ, &ExitInfo.GCPtrEffAddr);
+
+ VBOXSTRICTRC rcStrict = IEMExecDecodedVmwrite(pVCpu, &ExitInfo);
+ if (RT_LIKELY(rcStrict == VINF_SUCCESS))
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS | HM_CHANGED_GUEST_HWVIRT);
+ else if (rcStrict == VINF_IEM_RAISED_XCPT)
+ {
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
+ rcStrict = VINF_SUCCESS;
+ }
+ return rcStrict;
+#else
+ HMVMX_IEM_EXEC_VMX_INSTR_RET(pVCpu);
+#endif
+}
+
+
+/**
+ * VM-exit handler for VMXOFF (VMX_EXIT_VMXOFF). Unconditional VM-exit.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitVmxoff(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+ HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+#ifndef VBOX_WITH_NESTED_HWVIRT_ONLY_IN_IEM
+ int rc = hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
+ rc |= HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, CPUMCTX_EXTRN_CR4 | IEM_CPUMCTX_EXTRN_EXEC_DECODED_NO_MEM_MASK);
+ AssertRCReturn(rc, rc);
+
+ HMVMX_CHECK_EXIT_DUE_TO_VMX_INSTR(pVCpu, pVmxTransient->uExitReason);
+
+ VBOXSTRICTRC rcStrict = IEMExecDecodedVmxoff(pVCpu, pVmxTransient->cbInstr);
+ if (RT_LIKELY(rcStrict == VINF_SUCCESS))
+ {
+ /* VMXOFF changes the internal hwvirt. state but not anything that's visible to the guest other than RIP. */
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_HWVIRT);
+ }
+ else if (rcStrict == VINF_IEM_RAISED_XCPT)
+ {
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
+ rcStrict = VINF_SUCCESS;
+ }
+ return rcStrict;
+#else
+ HMVMX_IEM_EXEC_VMX_INSTR_RET(pVCpu);
+#endif
+}
+
+
+/**
+ * VM-exit handler for VMXON (VMX_EXIT_VMXON). Unconditional VM-exit.
+ */
+HMVMX_EXIT_DECL hmR0VmxExitVmxon(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient)
+{
+ HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
+#ifndef VBOX_WITH_NESTED_HWVIRT_ONLY_IN_IEM
+ int rc = hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
+ rc |= HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, CPUMCTX_EXTRN_RSP | CPUMCTX_EXTRN_SREG_MASK
+ | IEM_CPUMCTX_EXTRN_EXEC_DECODED_MEM_MASK);
+ rc |= hmR0VmxReadExitInstrInfoVmcs(pVmxTransient);
+ rc |= hmR0VmxReadExitQualVmcs(pVCpu, pVmxTransient);
+ AssertRCReturn(rc, rc);
+
+ HMVMX_CHECK_EXIT_DUE_TO_VMX_INSTR(pVCpu, pVmxTransient->uExitReason);
+
+ VMXVEXITINFO ExitInfo;
+ RT_ZERO(ExitInfo);
+ ExitInfo.uReason = pVmxTransient->uExitReason;
+ ExitInfo.u64Qual = pVmxTransient->uExitQual;
+ ExitInfo.InstrInfo.u = pVmxTransient->ExitInstrInfo.u;
+ ExitInfo.cbInstr = pVmxTransient->cbInstr;
+ HMVMX_DECODE_MEM_OPERAND(pVCpu, ExitInfo.InstrInfo.u, ExitInfo.u64Qual, VMXMEMACCESS_READ, &ExitInfo.GCPtrEffAddr);
+
+ VBOXSTRICTRC rcStrict = IEMExecDecodedVmxon(pVCpu, &ExitInfo);
+ if (RT_LIKELY(rcStrict == VINF_SUCCESS))
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS | HM_CHANGED_GUEST_HWVIRT);
+ else if (rcStrict == VINF_IEM_RAISED_XCPT)
+ {
+ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
+ rcStrict = VINF_SUCCESS;
+ }
+ return rcStrict;
+#else
+ HMVMX_IEM_EXEC_VMX_INSTR_RET(pVCpu);
+#endif
+}
+
+/** @} */
+#endif /* VBOX_WITH_NESTED_HWVIRT_VMX */
+
diff --git a/src/VBox/VMM/VMMR0/HMVMXR0.h b/src/VBox/VMM/VMMR0/HMVMXR0.h
new file mode 100644
index 00000000..1094cceb
--- /dev/null
+++ b/src/VBox/VMM/VMMR0/HMVMXR0.h
@@ -0,0 +1,85 @@
+/* $Id: HMVMXR0.h $ */
+/** @file
+ * HM VMX (VT-x) - Internal header file.
+ */
+
+/*
+ * Copyright (C) 2006-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+#ifndef VMM_INCLUDED_SRC_VMMR0_HMVMXR0_h
+#define VMM_INCLUDED_SRC_VMMR0_HMVMXR0_h
+#ifndef RT_WITHOUT_PRAGMA_ONCE
+# pragma once
+#endif
+
+RT_C_DECLS_BEGIN
+
+/** @defgroup grp_vmx_int Internal
+ * @ingroup grp_vmx
+ * @internal
+ * @{
+ */
+
+#ifdef IN_RING0
+
+VMMR0DECL(int) VMXR0Enter(PVMCPU pVCpu);
+VMMR0DECL(void) VMXR0ThreadCtxCallback(RTTHREADCTXEVENT enmEvent, PVMCPU pVCpu, bool fGlobalInit);
+VMMR0DECL(int) VMXR0EnableCpu(PHMPHYSCPU pHostCpu, PVM pVM, void *pvPageCpu, RTHCPHYS pPageCpuPhys,
+ bool fEnabledBySystem, PCSUPHWVIRTMSRS pHwvirtMsrs);
+VMMR0DECL(int) VMXR0DisableCpu(void *pvPageCpu, RTHCPHYS pPageCpuPhys);
+VMMR0DECL(int) VMXR0GlobalInit(void);
+VMMR0DECL(void) VMXR0GlobalTerm(void);
+VMMR0DECL(int) VMXR0InitVM(PVM pVM);
+VMMR0DECL(int) VMXR0TermVM(PVM pVM);
+VMMR0DECL(int) VMXR0SetupVM(PVM pVM);
+VMMR0DECL(int) VMXR0ExportHostState(PVMCPU pVCpu);
+VMMR0DECL(int) VMXR0InvalidatePage(PVMCPU pVCpu, RTGCPTR GCVirt);
+VMMR0DECL(int) VMXR0ImportStateOnDemand(PVMCPU pVCpu, uint64_t fWhat);
+VMMR0DECL(VBOXSTRICTRC) VMXR0RunGuestCode(PVMCPU pVCpu);
+DECLASM(int) VMXR0StartVM32(RTHCUINT fResume, PCPUMCTX pCtx, PVMCSCACHE pCache, PVM pVM, PVMCPU pVCpu);
+DECLASM(int) VMXR0StartVM64(RTHCUINT fResume, PCPUMCTX pCtx, PVMCSCACHE pCache, PVM pVM, PVMCPU pVCpu);
+
+# if HC_ARCH_BITS == 32 && defined(VBOX_WITH_64_BITS_GUESTS)
+DECLASM(int) VMXR0SwitcherStartVM64(RTHCUINT fResume, PCPUMCTX pCtx, PVMCSCACHE pCache, PVM pVM, PVMCPU pVCpu);
+VMMR0DECL(int) VMXR0Execute64BitsHandler(PVMCPU pVCpu, HM64ON32OP enmOp, uint32_t cbParam, uint32_t *paParam);
+# endif
+
+/* Cached VMCS accesses -- defined only for 32-bit hosts (with 64-bit guest support). */
+# ifdef VMX_USE_CACHED_VMCS_ACCESSES
+VMMR0DECL(int) VMXWriteCachedVmcsEx(PVMCPU pVCpu, uint32_t idxField, uint64_t u64Val);
+
+DECLINLINE(int) VMXReadCachedVmcsEx(PVMCPU pVCpu, uint32_t idxCache, RTGCUINTREG *pVal)
+{
+ Assert(idxCache <= VMX_VMCS_MAX_NESTED_PAGING_CACHE_IDX);
+ *pVal = pVCpu->hm.s.vmx.VMCSCache.Read.aFieldVal[idxCache];
+ return VINF_SUCCESS;
+}
+# endif
+
+# if HC_ARCH_BITS == 32
+# define VMXReadVmcsHstN VMXReadVmcs32
+# define VMXReadVmcsGstN(idxField, pVal) VMXReadCachedVmcsEx(pVCpu, idxField##_CACHE_IDX, pVal)
+# define VMXReadVmcsGstNByIdxVal(idxField, pVal) VMXReadCachedVmcsEx(pVCpu, idxField, pVal)
+# else /* HC_ARCH_BITS == 64 */
+# define VMXReadVmcsHstN VMXReadVmcs64
+# define VMXReadVmcsGstN VMXReadVmcs64
+# define VMXReadVmcsGstNByIdxVal VMXReadVmcs64
+# endif
+
+#endif /* IN_RING0 */
+
+/** @} */
+
+RT_C_DECLS_END
+
+#endif /* !VMM_INCLUDED_SRC_VMMR0_HMVMXR0_h */
+
diff --git a/src/VBox/VMM/VMMR0/Makefile.kup b/src/VBox/VMM/VMMR0/Makefile.kup
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/src/VBox/VMM/VMMR0/Makefile.kup
diff --git a/src/VBox/VMM/VMMR0/NEMR0Native-win.cpp b/src/VBox/VMM/VMMR0/NEMR0Native-win.cpp
new file mode 100644
index 00000000..796428a8
--- /dev/null
+++ b/src/VBox/VMM/VMMR0/NEMR0Native-win.cpp
@@ -0,0 +1,2628 @@
+/* $Id: NEMR0Native-win.cpp $ */
+/** @file
+ * NEM - Native execution manager, native ring-0 Windows backend.
+ */
+
+/*
+ * Copyright (C) 2018-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#define LOG_GROUP LOG_GROUP_NEM
+#define VMCPU_INCL_CPUM_GST_CTX
+#include <iprt/nt/nt.h>
+#include <iprt/nt/hyperv.h>
+#include <iprt/nt/vid.h>
+#include <winerror.h>
+
+#include <VBox/vmm/nem.h>
+#include <VBox/vmm/iem.h>
+#include <VBox/vmm/em.h>
+#include <VBox/vmm/apic.h>
+#include <VBox/vmm/pdm.h>
+#include <VBox/vmm/dbgftrace.h>
+#include "NEMInternal.h"
+#include <VBox/vmm/gvm.h>
+#include <VBox/vmm/vm.h>
+#include <VBox/vmm/gvmm.h>
+#include <VBox/param.h>
+
+#include <iprt/dbg.h>
+#include <iprt/memobj.h>
+#include <iprt/string.h>
+#include <iprt/time.h>
+
+
+/* Assert compile context sanity. */
+#ifndef RT_OS_WINDOWS
+# error "Windows only file!"
+#endif
+#ifndef RT_ARCH_AMD64
+# error "AMD64 only file!"
+#endif
+
+
+/*********************************************************************************************************************************
+* Internal Functions *
+*********************************************************************************************************************************/
+typedef uint32_t DWORD; /* for winerror.h constants */
+
+
+/*********************************************************************************************************************************
+* Global Variables *
+*********************************************************************************************************************************/
+static uint64_t (*g_pfnHvlInvokeHypercall)(uint64_t uCallInfo, uint64_t HCPhysInput, uint64_t HCPhysOutput);
+
+/**
+ * WinHvr.sys!WinHvDepositMemory
+ *
+ * This API will try allocates cPages on IdealNode and deposit it to the
+ * hypervisor for use with the given partition. The memory will be freed when
+ * VID.SYS calls WinHvWithdrawAllMemory when the partition is cleanedup.
+ *
+ * Apparently node numbers above 64 has a different meaning.
+ */
+static NTSTATUS (*g_pfnWinHvDepositMemory)(uintptr_t idPartition, size_t cPages, uintptr_t IdealNode, size_t *pcActuallyAdded);
+
+
+/*********************************************************************************************************************************
+* Internal Functions *
+*********************************************************************************************************************************/
+NEM_TMPL_STATIC int nemR0WinMapPages(PGVM pGVM, PVM pVM, PGVMCPU pGVCpu, RTGCPHYS GCPhysSrc, RTGCPHYS GCPhysDst,
+ uint32_t cPages, uint32_t fFlags);
+NEM_TMPL_STATIC int nemR0WinUnmapPages(PGVM pGVM, PGVMCPU pGVCpu, RTGCPHYS GCPhys, uint32_t cPages);
+#if defined(NEM_WIN_WITH_RING0_RUNLOOP) || defined(NEM_WIN_USE_HYPERCALLS_FOR_REGISTERS)
+NEM_TMPL_STATIC int nemR0WinExportState(PGVM pGVM, PGVMCPU pGVCpu, PCPUMCTX pCtx);
+NEM_TMPL_STATIC int nemR0WinImportState(PGVM pGVM, PGVMCPU pGVCpu, PCPUMCTX pCtx, uint64_t fWhat, bool fCanUpdateCr3);
+NEM_TMPL_STATIC int nemR0WinQueryCpuTick(PGVM pGVM, PGVMCPU pGVCpu, uint64_t *pcTicks, uint32_t *pcAux);
+NEM_TMPL_STATIC int nemR0WinResumeCpuTickOnAll(PGVM pGVM, PGVMCPU pGVCpu, uint64_t uPausedTscValue);
+#endif
+DECLINLINE(NTSTATUS) nemR0NtPerformIoControl(PGVM pGVM, uint32_t uFunction, void *pvInput, uint32_t cbInput,
+ void *pvOutput, uint32_t cbOutput);
+
+
+/*
+ * Instantate the code we share with ring-0.
+ */
+#ifdef NEM_WIN_WITH_RING0_RUNLOOP
+# define NEM_WIN_TEMPLATE_MODE_OWN_RUN_API
+#else
+# undef NEM_WIN_TEMPLATE_MODE_OWN_RUN_API
+#endif
+#include "../VMMAll/NEMAllNativeTemplate-win.cpp.h"
+
+
+
+/**
+ * Worker for NEMR0InitVM that allocates a hypercall page.
+ *
+ * @returns VBox status code.
+ * @param pHypercallData The hypercall data page to initialize.
+ */
+static int nemR0InitHypercallData(PNEMR0HYPERCALLDATA pHypercallData)
+{
+ int rc = RTR0MemObjAllocPage(&pHypercallData->hMemObj, PAGE_SIZE, false /*fExecutable*/);
+ if (RT_SUCCESS(rc))
+ {
+ pHypercallData->HCPhysPage = RTR0MemObjGetPagePhysAddr(pHypercallData->hMemObj, 0 /*iPage*/);
+ AssertStmt(pHypercallData->HCPhysPage != NIL_RTHCPHYS, rc = VERR_INTERNAL_ERROR_3);
+ pHypercallData->pbPage = (uint8_t *)RTR0MemObjAddress(pHypercallData->hMemObj);
+ AssertStmt(pHypercallData->pbPage, rc = VERR_INTERNAL_ERROR_3);
+ if (RT_SUCCESS(rc))
+ return VINF_SUCCESS;
+
+ /* bail out */
+ RTR0MemObjFree(pHypercallData->hMemObj, true /*fFreeMappings*/);
+ }
+ pHypercallData->hMemObj = NIL_RTR0MEMOBJ;
+ pHypercallData->HCPhysPage = NIL_RTHCPHYS;
+ pHypercallData->pbPage = NULL;
+ return rc;
+}
+
+/**
+ * Worker for NEMR0CleanupVM and NEMR0InitVM that cleans up a hypercall page.
+ *
+ * @param pHypercallData The hypercall data page to uninitialize.
+ */
+static void nemR0DeleteHypercallData(PNEMR0HYPERCALLDATA pHypercallData)
+{
+ /* Check pbPage here since it's NULL, whereas the hMemObj can be either
+ NIL_RTR0MEMOBJ or 0 (they aren't necessarily the same). */
+ if (pHypercallData->pbPage != NULL)
+ {
+ RTR0MemObjFree(pHypercallData->hMemObj, true /*fFreeMappings*/);
+ pHypercallData->pbPage = NULL;
+ }
+ pHypercallData->hMemObj = NIL_RTR0MEMOBJ;
+ pHypercallData->HCPhysPage = NIL_RTHCPHYS;
+}
+
+
+/**
+ * Called by NEMR3Init to make sure we've got what we need.
+ *
+ * @returns VBox status code.
+ * @param pGVM The ring-0 VM handle.
+ * @param pVM The cross context VM handle.
+ * @thread EMT(0)
+ */
+VMMR0_INT_DECL(int) NEMR0InitVM(PGVM pGVM, PVM pVM)
+{
+ AssertCompile(sizeof(pGVM->nem.s) <= sizeof(pGVM->nem.padding));
+ AssertCompile(sizeof(pGVM->aCpus[0].nem.s) <= sizeof(pGVM->aCpus[0].nem.padding));
+
+ int rc = GVMMR0ValidateGVMandVMandEMT(pGVM, pVM, 0);
+ AssertRCReturn(rc, rc);
+
+ /*
+ * We want to perform hypercalls here. The NT kernel started to expose a very low
+ * level interface to do this thru somewhere between build 14271 and 16299. Since
+ * we need build 17134 to get anywhere at all, the exact build is not relevant here.
+ *
+ * We also need to deposit memory to the hypervisor for use with partition (page
+ * mapping structures, stuff).
+ */
+ RTDBGKRNLINFO hKrnlInfo;
+ rc = RTR0DbgKrnlInfoOpen(&hKrnlInfo, 0);
+ if (RT_SUCCESS(rc))
+ {
+ rc = RTR0DbgKrnlInfoQuerySymbol(hKrnlInfo, NULL, "HvlInvokeHypercall", (void **)&g_pfnHvlInvokeHypercall);
+ if (RT_SUCCESS(rc))
+ rc = RTR0DbgKrnlInfoQuerySymbol(hKrnlInfo, "winhvr.sys", "WinHvDepositMemory", (void **)&g_pfnWinHvDepositMemory);
+ RTR0DbgKrnlInfoRelease(hKrnlInfo);
+ if (RT_SUCCESS(rc))
+ {
+ /*
+ * Allocate a page for non-EMT threads to use for hypercalls (update
+ * statistics and such) and a critical section protecting it.
+ */
+ rc = RTCritSectInit(&pGVM->nem.s.HypercallDataCritSect);
+ if (RT_SUCCESS(rc))
+ {
+ rc = nemR0InitHypercallData(&pGVM->nem.s.HypercallData);
+ if (RT_SUCCESS(rc))
+ {
+ /*
+ * Allocate a page for each VCPU to place hypercall data on.
+ */
+ for (VMCPUID i = 0; i < pGVM->cCpus; i++)
+ {
+ rc = nemR0InitHypercallData(&pGVM->aCpus[i].nem.s.HypercallData);
+ if (RT_FAILURE(rc))
+ {
+ while (i-- > 0)
+ nemR0DeleteHypercallData(&pGVM->aCpus[i].nem.s.HypercallData);
+ break;
+ }
+ }
+ if (RT_SUCCESS(rc))
+ {
+ /*
+ * So far, so good.
+ */
+ return rc;
+ }
+
+ /*
+ * Bail out.
+ */
+ nemR0DeleteHypercallData(&pGVM->nem.s.HypercallData);
+ }
+ RTCritSectDelete(&pGVM->nem.s.HypercallDataCritSect);
+ }
+ }
+ else
+ rc = VERR_NEM_MISSING_KERNEL_API;
+ }
+
+ RT_NOREF(pVM);
+ return rc;
+}
+
+
+/**
+ * Perform an I/O control operation on the partition handle (VID.SYS).
+ *
+ * @returns NT status code.
+ * @param pGVM The ring-0 VM structure.
+ * @param uFunction The function to perform.
+ * @param pvInput The input buffer. This must point within the VM
+ * structure so we can easily convert to a ring-3
+ * pointer if necessary.
+ * @param cbInput The size of the input. @a pvInput must be NULL when
+ * zero.
+ * @param pvOutput The output buffer. This must also point within the
+ * VM structure for ring-3 pointer magic.
+ * @param cbOutput The size of the output. @a pvOutput must be NULL
+ * when zero.
+ */
+DECLINLINE(NTSTATUS) nemR0NtPerformIoControl(PGVM pGVM, uint32_t uFunction, void *pvInput, uint32_t cbInput,
+ void *pvOutput, uint32_t cbOutput)
+{
+#ifdef RT_STRICT
+ /*
+ * Input and output parameters are part of the VM CPU structure.
+ */
+ PVM pVM = pGVM->pVM;
+ size_t const cbVM = RT_UOFFSETOF_DYN(VM, aCpus[pGVM->cCpus]);
+ if (pvInput)
+ AssertReturn(((uintptr_t)pvInput + cbInput) - (uintptr_t)pVM <= cbVM, VERR_INVALID_PARAMETER);
+ if (pvOutput)
+ AssertReturn(((uintptr_t)pvOutput + cbOutput) - (uintptr_t)pVM <= cbVM, VERR_INVALID_PARAMETER);
+#endif
+
+ int32_t rcNt = STATUS_UNSUCCESSFUL;
+ int rc = SUPR0IoCtlPerform(pGVM->nem.s.pIoCtlCtx, uFunction,
+ pvInput,
+ pvInput ? (uintptr_t)pvInput + pGVM->nem.s.offRing3ConversionDelta : NIL_RTR3PTR,
+ cbInput,
+ pvOutput,
+ pvOutput ? (uintptr_t)pvOutput + pGVM->nem.s.offRing3ConversionDelta : NIL_RTR3PTR,
+ cbOutput,
+ &rcNt);
+ if (RT_SUCCESS(rc) || !NT_SUCCESS((NTSTATUS)rcNt))
+ return (NTSTATUS)rcNt;
+ return STATUS_UNSUCCESSFUL;
+}
+
+
+/**
+ * 2nd part of the initialization, after we've got a partition handle.
+ *
+ * @returns VBox status code.
+ * @param pGVM The ring-0 VM handle.
+ * @param pVM The cross context VM handle.
+ * @thread EMT(0)
+ */
+VMMR0_INT_DECL(int) NEMR0InitVMPart2(PGVM pGVM, PVM pVM)
+{
+ int rc = GVMMR0ValidateGVMandVMandEMT(pGVM, pVM, 0);
+ AssertRCReturn(rc, rc);
+ SUPR0Printf("NEMR0InitVMPart2\n"); LogRel(("2: NEMR0InitVMPart2\n"));
+ Assert(pGVM->nem.s.fMayUseRing0Runloop == false);
+
+ /*
+ * Copy and validate the I/O control information from ring-3.
+ */
+ NEMWINIOCTL Copy = pVM->nem.s.IoCtlGetHvPartitionId;
+ AssertLogRelReturn(Copy.uFunction != 0, VERR_NEM_INIT_FAILED);
+ AssertLogRelReturn(Copy.cbInput == 0, VERR_NEM_INIT_FAILED);
+ AssertLogRelReturn(Copy.cbOutput == sizeof(HV_PARTITION_ID), VERR_NEM_INIT_FAILED);
+ pGVM->nem.s.IoCtlGetHvPartitionId = Copy;
+
+ pGVM->nem.s.fMayUseRing0Runloop = pVM->nem.s.fUseRing0Runloop;
+
+ Copy = pVM->nem.s.IoCtlStartVirtualProcessor;
+ AssertLogRelStmt(Copy.uFunction != 0, rc = VERR_NEM_INIT_FAILED);
+ AssertLogRelStmt(Copy.cbInput == sizeof(HV_VP_INDEX), rc = VERR_NEM_INIT_FAILED);
+ AssertLogRelStmt(Copy.cbOutput == 0, rc = VERR_NEM_INIT_FAILED);
+ AssertLogRelStmt(Copy.uFunction != pGVM->nem.s.IoCtlGetHvPartitionId.uFunction, rc = VERR_NEM_INIT_FAILED);
+ if (RT_SUCCESS(rc))
+ pGVM->nem.s.IoCtlStartVirtualProcessor = Copy;
+
+ Copy = pVM->nem.s.IoCtlStopVirtualProcessor;
+ AssertLogRelStmt(Copy.uFunction != 0, rc = VERR_NEM_INIT_FAILED);
+ AssertLogRelStmt(Copy.cbInput == sizeof(HV_VP_INDEX), rc = VERR_NEM_INIT_FAILED);
+ AssertLogRelStmt(Copy.cbOutput == 0, rc = VERR_NEM_INIT_FAILED);
+ AssertLogRelStmt(Copy.uFunction != pGVM->nem.s.IoCtlGetHvPartitionId.uFunction, rc = VERR_NEM_INIT_FAILED);
+ AssertLogRelStmt(Copy.uFunction != pGVM->nem.s.IoCtlStartVirtualProcessor.uFunction, rc = VERR_NEM_INIT_FAILED);
+ if (RT_SUCCESS(rc))
+ pGVM->nem.s.IoCtlStopVirtualProcessor = Copy;
+
+ Copy = pVM->nem.s.IoCtlMessageSlotHandleAndGetNext;
+ AssertLogRelStmt(Copy.uFunction != 0, rc = VERR_NEM_INIT_FAILED);
+ AssertLogRelStmt( Copy.cbInput == sizeof(VID_IOCTL_INPUT_MESSAGE_SLOT_HANDLE_AND_GET_NEXT)
+ || Copy.cbInput == RT_OFFSETOF(VID_IOCTL_INPUT_MESSAGE_SLOT_HANDLE_AND_GET_NEXT, cMillies),
+ rc = VERR_NEM_INIT_FAILED);
+ AssertLogRelStmt(Copy.cbOutput == 0, VERR_NEM_INIT_FAILED);
+ AssertLogRelStmt(Copy.uFunction != pGVM->nem.s.IoCtlGetHvPartitionId.uFunction, rc = VERR_NEM_INIT_FAILED);
+ AssertLogRelStmt(Copy.uFunction != pGVM->nem.s.IoCtlStartVirtualProcessor.uFunction, rc = VERR_NEM_INIT_FAILED);
+ AssertLogRelStmt(Copy.uFunction != pGVM->nem.s.IoCtlStopVirtualProcessor.uFunction, rc = VERR_NEM_INIT_FAILED);
+ if (RT_SUCCESS(rc))
+ pGVM->nem.s.IoCtlMessageSlotHandleAndGetNext = Copy;
+
+ if ( RT_SUCCESS(rc)
+ || !pVM->nem.s.fUseRing0Runloop)
+ {
+ /*
+ * Setup of an I/O control context for the partition handle for later use.
+ */
+ rc = SUPR0IoCtlSetupForHandle(pGVM->pSession, pVM->nem.s.hPartitionDevice, 0, &pGVM->nem.s.pIoCtlCtx);
+ AssertLogRelRCReturn(rc, rc);
+ pGVM->nem.s.offRing3ConversionDelta = (uintptr_t)pVM->pVMR3 - (uintptr_t)pGVM->pVM;
+
+ /*
+ * Get the partition ID.
+ */
+ PVMCPU pVCpu = &pGVM->pVM->aCpus[0];
+ NTSTATUS rcNt = nemR0NtPerformIoControl(pGVM, pGVM->nem.s.IoCtlGetHvPartitionId.uFunction, NULL, 0,
+ &pVCpu->nem.s.uIoCtlBuf.idPartition, sizeof(pVCpu->nem.s.uIoCtlBuf.idPartition));
+ AssertLogRelMsgReturn(NT_SUCCESS(rcNt), ("IoCtlGetHvPartitionId failed: %#x\n", rcNt), VERR_NEM_INIT_FAILED);
+ pGVM->nem.s.idHvPartition = pVCpu->nem.s.uIoCtlBuf.idPartition;
+ AssertLogRelMsgReturn(pGVM->nem.s.idHvPartition == pVM->nem.s.idHvPartition,
+ ("idHvPartition mismatch: r0=%#RX64, r3=%#RX64\n", pGVM->nem.s.idHvPartition, pVM->nem.s.idHvPartition),
+ VERR_NEM_INIT_FAILED);
+ }
+
+ return rc;
+}
+
+
+/**
+ * Cleanup the NEM parts of the VM in ring-0.
+ *
+ * This is always called and must deal the state regardless of whether
+ * NEMR0InitVM() was called or not. So, take care here.
+ *
+ * @param pGVM The ring-0 VM handle.
+ */
+VMMR0_INT_DECL(void) NEMR0CleanupVM(PGVM pGVM)
+{
+ pGVM->nem.s.idHvPartition = HV_PARTITION_ID_INVALID;
+
+ /* Clean up I/O control context. */
+ if (pGVM->nem.s.pIoCtlCtx)
+ {
+ int rc = SUPR0IoCtlCleanup(pGVM->nem.s.pIoCtlCtx);
+ AssertRC(rc);
+ pGVM->nem.s.pIoCtlCtx = NULL;
+ }
+
+ /* Free the hypercall pages. */
+ VMCPUID i = pGVM->cCpus;
+ while (i-- > 0)
+ nemR0DeleteHypercallData(&pGVM->aCpus[i].nem.s.HypercallData);
+
+ /* The non-EMT one too. */
+ if (RTCritSectIsInitialized(&pGVM->nem.s.HypercallDataCritSect))
+ RTCritSectDelete(&pGVM->nem.s.HypercallDataCritSect);
+ nemR0DeleteHypercallData(&pGVM->nem.s.HypercallData);
+}
+
+
+#if 0 /* for debugging GPA unmapping. */
+static int nemR3WinDummyReadGpa(PGVM pGVM, PGVMCPU pGVCpu, RTGCPHYS GCPhys)
+{
+ PHV_INPUT_READ_GPA pIn = (PHV_INPUT_READ_GPA)pGVCpu->nem.s.pbHypercallData;
+ PHV_OUTPUT_READ_GPA pOut = (PHV_OUTPUT_READ_GPA)(pIn + 1);
+ pIn->PartitionId = pGVM->nem.s.idHvPartition;
+ pIn->VpIndex = pGVCpu->idCpu;
+ pIn->ByteCount = 0x10;
+ pIn->BaseGpa = GCPhys;
+ pIn->ControlFlags.AsUINT64 = 0;
+ pIn->ControlFlags.CacheType = HvCacheTypeX64WriteCombining;
+ memset(pOut, 0xfe, sizeof(*pOut));
+ uint64_t volatile uResult = g_pfnHvlInvokeHypercall(HvCallReadGpa, pGVCpu->nem.s.HCPhysHypercallData,
+ pGVCpu->nem.s.HCPhysHypercallData + sizeof(*pIn));
+ LogRel(("nemR3WinDummyReadGpa: %RGp -> %#RX64; code=%u rsvd=%u abData=%.16Rhxs\n",
+ GCPhys, uResult, pOut->AccessResult.ResultCode, pOut->AccessResult.Reserved, pOut->Data));
+ __debugbreak();
+
+ return uResult != 0 ? VERR_READ_ERROR : VINF_SUCCESS;
+}
+#endif
+
+
+/**
+ * Worker for NEMR0MapPages and others.
+ */
+NEM_TMPL_STATIC int nemR0WinMapPages(PGVM pGVM, PVM pVM, PGVMCPU pGVCpu, RTGCPHYS GCPhysSrc, RTGCPHYS GCPhysDst,
+ uint32_t cPages, uint32_t fFlags)
+{
+ /*
+ * Validate.
+ */
+ AssertReturn(g_pfnHvlInvokeHypercall, VERR_NEM_MISSING_KERNEL_API);
+
+ AssertReturn(cPages > 0, VERR_OUT_OF_RANGE);
+ AssertReturn(cPages <= NEM_MAX_MAP_PAGES, VERR_OUT_OF_RANGE);
+ AssertReturn(!(fFlags & ~(HV_MAP_GPA_MAYBE_ACCESS_MASK & ~HV_MAP_GPA_DUNNO_ACCESS)), VERR_INVALID_FLAGS);
+ AssertMsgReturn(!(GCPhysDst & X86_PAGE_OFFSET_MASK), ("GCPhysDst=%RGp\n", GCPhysDst), VERR_OUT_OF_RANGE);
+ AssertReturn(GCPhysDst < _1E, VERR_OUT_OF_RANGE);
+ if (GCPhysSrc != GCPhysDst)
+ {
+ AssertMsgReturn(!(GCPhysSrc & X86_PAGE_OFFSET_MASK), ("GCPhysSrc=%RGp\n", GCPhysSrc), VERR_OUT_OF_RANGE);
+ AssertReturn(GCPhysSrc < _1E, VERR_OUT_OF_RANGE);
+ }
+
+ /*
+ * Compose and make the hypercall.
+ * Ring-3 is not allowed to fill in the host physical addresses of the call.
+ */
+ for (uint32_t iTries = 0;; iTries++)
+ {
+ HV_INPUT_MAP_GPA_PAGES *pMapPages = (HV_INPUT_MAP_GPA_PAGES *)pGVCpu->nem.s.HypercallData.pbPage;
+ AssertPtrReturn(pMapPages, VERR_INTERNAL_ERROR_3);
+ pMapPages->TargetPartitionId = pGVM->nem.s.idHvPartition;
+ pMapPages->TargetGpaBase = GCPhysDst >> X86_PAGE_SHIFT;
+ pMapPages->MapFlags = fFlags;
+ pMapPages->u32ExplicitPadding = 0;
+ for (uint32_t iPage = 0; iPage < cPages; iPage++, GCPhysSrc += X86_PAGE_SIZE)
+ {
+ RTHCPHYS HCPhys = NIL_RTGCPHYS;
+ int rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysSrc, &HCPhys);
+ AssertRCReturn(rc, rc);
+ pMapPages->PageList[iPage] = HCPhys >> X86_PAGE_SHIFT;
+ }
+
+ uint64_t uResult = g_pfnHvlInvokeHypercall(HvCallMapGpaPages | ((uint64_t)cPages << 32),
+ pGVCpu->nem.s.HypercallData.HCPhysPage, 0);
+ Log6(("NEMR0MapPages: %RGp/%RGp L %u prot %#x -> %#RX64\n",
+ GCPhysDst, GCPhysSrc - cPages * X86_PAGE_SIZE, cPages, fFlags, uResult));
+ if (uResult == ((uint64_t)cPages << 32))
+ return VINF_SUCCESS;
+
+ /*
+ * If the partition is out of memory, try donate another 512 pages to
+ * it (2MB). VID.SYS does multiples of 512 pages, nothing smaller.
+ */
+ if ( uResult != HV_STATUS_INSUFFICIENT_MEMORY
+ || iTries > 16
+ || g_pfnWinHvDepositMemory == NULL)
+ {
+ LogRel(("g_pfnHvlInvokeHypercall/MapGpaPages -> %#RX64\n", uResult));
+ return VERR_NEM_MAP_PAGES_FAILED;
+ }
+
+ size_t cPagesAdded = 0;
+ NTSTATUS rcNt = g_pfnWinHvDepositMemory(pGVM->nem.s.idHvPartition, 512, 0, &cPagesAdded);
+ if (!cPagesAdded)
+ {
+ LogRel(("g_pfnWinHvDepositMemory -> %#x / %#RX64\n", rcNt, uResult));
+ return VERR_NEM_MAP_PAGES_FAILED;
+ }
+ }
+}
+
+
+/**
+ * Maps pages into the guest physical address space.
+ *
+ * Generally the caller will be under the PGM lock already, so no extra effort
+ * is needed to make sure all changes happens under it.
+ *
+ * @returns VBox status code.
+ * @param pGVM The ring-0 VM handle.
+ * @param pVM The cross context VM handle.
+ * @param idCpu The calling EMT. Necessary for getting the
+ * hypercall page and arguments.
+ * @thread EMT(idCpu)
+ */
+VMMR0_INT_DECL(int) NEMR0MapPages(PGVM pGVM, PVM pVM, VMCPUID idCpu)
+{
+ /*
+ * Unpack the call.
+ */
+ int rc = GVMMR0ValidateGVMandVMandEMT(pGVM, pVM, idCpu);
+ if (RT_SUCCESS(rc))
+ {
+ PVMCPU pVCpu = &pVM->aCpus[idCpu];
+ PGVMCPU pGVCpu = &pGVM->aCpus[idCpu];
+
+ RTGCPHYS const GCPhysSrc = pVCpu->nem.s.Hypercall.MapPages.GCPhysSrc;
+ RTGCPHYS const GCPhysDst = pVCpu->nem.s.Hypercall.MapPages.GCPhysDst;
+ uint32_t const cPages = pVCpu->nem.s.Hypercall.MapPages.cPages;
+ HV_MAP_GPA_FLAGS const fFlags = pVCpu->nem.s.Hypercall.MapPages.fFlags;
+
+ /*
+ * Do the work.
+ */
+ rc = nemR0WinMapPages(pGVM, pVM, pGVCpu, GCPhysSrc, GCPhysDst, cPages, fFlags);
+ }
+ return rc;
+}
+
+
+/**
+ * Worker for NEMR0UnmapPages and others.
+ */
+NEM_TMPL_STATIC int nemR0WinUnmapPages(PGVM pGVM, PGVMCPU pGVCpu, RTGCPHYS GCPhys, uint32_t cPages)
+{
+ /*
+ * Validate input.
+ */
+ AssertReturn(g_pfnHvlInvokeHypercall, VERR_NEM_MISSING_KERNEL_API);
+
+ AssertReturn(cPages > 0, VERR_OUT_OF_RANGE);
+ AssertReturn(cPages <= NEM_MAX_UNMAP_PAGES, VERR_OUT_OF_RANGE);
+ AssertMsgReturn(!(GCPhys & X86_PAGE_OFFSET_MASK), ("%RGp\n", GCPhys), VERR_OUT_OF_RANGE);
+ AssertReturn(GCPhys < _1E, VERR_OUT_OF_RANGE);
+
+ /*
+ * Compose and make the hypercall.
+ */
+ HV_INPUT_UNMAP_GPA_PAGES *pUnmapPages = (HV_INPUT_UNMAP_GPA_PAGES *)pGVCpu->nem.s.HypercallData.pbPage;
+ AssertPtrReturn(pUnmapPages, VERR_INTERNAL_ERROR_3);
+ pUnmapPages->TargetPartitionId = pGVM->nem.s.idHvPartition;
+ pUnmapPages->TargetGpaBase = GCPhys >> X86_PAGE_SHIFT;
+ pUnmapPages->fFlags = 0;
+
+ uint64_t uResult = g_pfnHvlInvokeHypercall(HvCallUnmapGpaPages | ((uint64_t)cPages << 32),
+ pGVCpu->nem.s.HypercallData.HCPhysPage, 0);
+ Log6(("NEMR0UnmapPages: %RGp L %u -> %#RX64\n", GCPhys, cPages, uResult));
+ if (uResult == ((uint64_t)cPages << 32))
+ {
+#if 1 /* Do we need to do this? Hopefully not... */
+ uint64_t volatile uR = g_pfnHvlInvokeHypercall(HvCallUncommitGpaPages | ((uint64_t)cPages << 32),
+ pGVCpu->nem.s.HypercallData.HCPhysPage, 0);
+ AssertMsg(uR == ((uint64_t)cPages << 32), ("uR=%#RX64\n", uR)); NOREF(uR);
+#endif
+ return VINF_SUCCESS;
+ }
+
+ LogRel(("g_pfnHvlInvokeHypercall/UnmapGpaPages -> %#RX64\n", uResult));
+ return VERR_NEM_UNMAP_PAGES_FAILED;
+}
+
+
+/**
+ * Unmaps pages from the guest physical address space.
+ *
+ * Generally the caller will be under the PGM lock already, so no extra effort
+ * is needed to make sure all changes happens under it.
+ *
+ * @returns VBox status code.
+ * @param pGVM The ring-0 VM handle.
+ * @param pVM The cross context VM handle.
+ * @param idCpu The calling EMT. Necessary for getting the
+ * hypercall page and arguments.
+ * @thread EMT(idCpu)
+ */
+VMMR0_INT_DECL(int) NEMR0UnmapPages(PGVM pGVM, PVM pVM, VMCPUID idCpu)
+{
+ /*
+ * Unpack the call.
+ */
+ int rc = GVMMR0ValidateGVMandVMandEMT(pGVM, pVM, idCpu);
+ if (RT_SUCCESS(rc))
+ {
+ PVMCPU pVCpu = &pVM->aCpus[idCpu];
+ PGVMCPU pGVCpu = &pGVM->aCpus[idCpu];
+
+ RTGCPHYS const GCPhys = pVCpu->nem.s.Hypercall.UnmapPages.GCPhys;
+ uint32_t const cPages = pVCpu->nem.s.Hypercall.UnmapPages.cPages;
+
+ /*
+ * Do the work.
+ */
+ rc = nemR0WinUnmapPages(pGVM, pGVCpu, GCPhys, cPages);
+ }
+ return rc;
+}
+
+
+#if defined(NEM_WIN_WITH_RING0_RUNLOOP) || defined(NEM_WIN_USE_HYPERCALLS_FOR_REGISTERS)
+/**
+ * Worker for NEMR0ExportState.
+ *
+ * Intention is to use it internally later.
+ *
+ * @returns VBox status code.
+ * @param pGVM The ring-0 VM handle.
+ * @param pGVCpu The ring-0 VCPU handle.
+ * @param pCtx The CPU context structure to import into.
+ */
+NEM_TMPL_STATIC int nemR0WinExportState(PGVM pGVM, PGVMCPU pGVCpu, PCPUMCTX pCtx)
+{
+ PVMCPU pVCpu = &pGVM->pVM->aCpus[pGVCpu->idCpu];
+ HV_INPUT_SET_VP_REGISTERS *pInput = (HV_INPUT_SET_VP_REGISTERS *)pGVCpu->nem.s.HypercallData.pbPage;
+ AssertPtrReturn(pInput, VERR_INTERNAL_ERROR_3);
+ AssertReturn(g_pfnHvlInvokeHypercall, VERR_NEM_MISSING_KERNEL_API);
+
+ pInput->PartitionId = pGVM->nem.s.idHvPartition;
+ pInput->VpIndex = pGVCpu->idCpu;
+ pInput->RsvdZ = 0;
+
+ uint64_t const fWhat = ~pCtx->fExtrn & (CPUMCTX_EXTRN_ALL | CPUMCTX_EXTRN_NEM_WIN_MASK);
+ if ( !fWhat
+ && pVCpu->nem.s.fCurrentInterruptWindows == pVCpu->nem.s.fDesiredInterruptWindows)
+ return VINF_SUCCESS;
+ uintptr_t iReg = 0;
+
+ /* GPRs */
+ if (fWhat & CPUMCTX_EXTRN_GPRS_MASK)
+ {
+ if (fWhat & CPUMCTX_EXTRN_RAX)
+ {
+ HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+ pInput->Elements[iReg].Name = HvX64RegisterRax;
+ pInput->Elements[iReg].Value.Reg64 = pCtx->rax;
+ iReg++;
+ }
+ if (fWhat & CPUMCTX_EXTRN_RCX)
+ {
+ HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+ pInput->Elements[iReg].Name = HvX64RegisterRcx;
+ pInput->Elements[iReg].Value.Reg64 = pCtx->rcx;
+ iReg++;
+ }
+ if (fWhat & CPUMCTX_EXTRN_RDX)
+ {
+ HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+ pInput->Elements[iReg].Name = HvX64RegisterRdx;
+ pInput->Elements[iReg].Value.Reg64 = pCtx->rdx;
+ iReg++;
+ }
+ if (fWhat & CPUMCTX_EXTRN_RBX)
+ {
+ HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+ pInput->Elements[iReg].Name = HvX64RegisterRbx;
+ pInput->Elements[iReg].Value.Reg64 = pCtx->rbx;
+ iReg++;
+ }
+ if (fWhat & CPUMCTX_EXTRN_RSP)
+ {
+ HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+ pInput->Elements[iReg].Name = HvX64RegisterRsp;
+ pInput->Elements[iReg].Value.Reg64 = pCtx->rsp;
+ iReg++;
+ }
+ if (fWhat & CPUMCTX_EXTRN_RBP)
+ {
+ HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+ pInput->Elements[iReg].Name = HvX64RegisterRbp;
+ pInput->Elements[iReg].Value.Reg64 = pCtx->rbp;
+ iReg++;
+ }
+ if (fWhat & CPUMCTX_EXTRN_RSI)
+ {
+ HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+ pInput->Elements[iReg].Name = HvX64RegisterRsi;
+ pInput->Elements[iReg].Value.Reg64 = pCtx->rsi;
+ iReg++;
+ }
+ if (fWhat & CPUMCTX_EXTRN_RDI)
+ {
+ HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+ pInput->Elements[iReg].Name = HvX64RegisterRdi;
+ pInput->Elements[iReg].Value.Reg64 = pCtx->rdi;
+ iReg++;
+ }
+ if (fWhat & CPUMCTX_EXTRN_R8_R15)
+ {
+ HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+ pInput->Elements[iReg].Name = HvX64RegisterR8;
+ pInput->Elements[iReg].Value.Reg64 = pCtx->r8;
+ iReg++;
+ HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+ pInput->Elements[iReg].Name = HvX64RegisterR9;
+ pInput->Elements[iReg].Value.Reg64 = pCtx->r9;
+ iReg++;
+ HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+ pInput->Elements[iReg].Name = HvX64RegisterR10;
+ pInput->Elements[iReg].Value.Reg64 = pCtx->r10;
+ iReg++;
+ HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+ pInput->Elements[iReg].Name = HvX64RegisterR11;
+ pInput->Elements[iReg].Value.Reg64 = pCtx->r11;
+ iReg++;
+ HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+ pInput->Elements[iReg].Name = HvX64RegisterR12;
+ pInput->Elements[iReg].Value.Reg64 = pCtx->r12;
+ iReg++;
+ HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+ pInput->Elements[iReg].Name = HvX64RegisterR13;
+ pInput->Elements[iReg].Value.Reg64 = pCtx->r13;
+ iReg++;
+ HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+ pInput->Elements[iReg].Name = HvX64RegisterR14;
+ pInput->Elements[iReg].Value.Reg64 = pCtx->r14;
+ iReg++;
+ HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+ pInput->Elements[iReg].Name = HvX64RegisterR15;
+ pInput->Elements[iReg].Value.Reg64 = pCtx->r15;
+ iReg++;
+ }
+ }
+
+ /* RIP & Flags */
+ if (fWhat & CPUMCTX_EXTRN_RIP)
+ {
+ HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+ pInput->Elements[iReg].Name = HvX64RegisterRip;
+ pInput->Elements[iReg].Value.Reg64 = pCtx->rip;
+ iReg++;
+ }
+ if (fWhat & CPUMCTX_EXTRN_RFLAGS)
+ {
+ HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+ pInput->Elements[iReg].Name = HvX64RegisterRflags;
+ pInput->Elements[iReg].Value.Reg64 = pCtx->rflags.u;
+ iReg++;
+ }
+
+ /* Segments */
+# define COPY_OUT_SEG(a_idx, a_enmName, a_SReg) \
+ do { \
+ HV_REGISTER_ASSOC_ZERO_PADDING(&pInput->Elements[a_idx]); \
+ pInput->Elements[a_idx].Name = a_enmName; \
+ pInput->Elements[a_idx].Value.Segment.Base = (a_SReg).u64Base; \
+ pInput->Elements[a_idx].Value.Segment.Limit = (a_SReg).u32Limit; \
+ pInput->Elements[a_idx].Value.Segment.Selector = (a_SReg).Sel; \
+ pInput->Elements[a_idx].Value.Segment.Attributes = (a_SReg).Attr.u; \
+ } while (0)
+ if (fWhat & CPUMCTX_EXTRN_SREG_MASK)
+ {
+ if (fWhat & CPUMCTX_EXTRN_CS)
+ {
+ COPY_OUT_SEG(iReg, HvX64RegisterCs, pCtx->cs);
+ iReg++;
+ }
+ if (fWhat & CPUMCTX_EXTRN_ES)
+ {
+ COPY_OUT_SEG(iReg, HvX64RegisterEs, pCtx->es);
+ iReg++;
+ }
+ if (fWhat & CPUMCTX_EXTRN_SS)
+ {
+ COPY_OUT_SEG(iReg, HvX64RegisterSs, pCtx->ss);
+ iReg++;
+ }
+ if (fWhat & CPUMCTX_EXTRN_DS)
+ {
+ COPY_OUT_SEG(iReg, HvX64RegisterDs, pCtx->ds);
+ iReg++;
+ }
+ if (fWhat & CPUMCTX_EXTRN_FS)
+ {
+ COPY_OUT_SEG(iReg, HvX64RegisterFs, pCtx->fs);
+ iReg++;
+ }
+ if (fWhat & CPUMCTX_EXTRN_GS)
+ {
+ COPY_OUT_SEG(iReg, HvX64RegisterGs, pCtx->gs);
+ iReg++;
+ }
+ }
+
+ /* Descriptor tables & task segment. */
+ if (fWhat & CPUMCTX_EXTRN_TABLE_MASK)
+ {
+ if (fWhat & CPUMCTX_EXTRN_LDTR)
+ {
+ COPY_OUT_SEG(iReg, HvX64RegisterLdtr, pCtx->ldtr);
+ iReg++;
+ }
+ if (fWhat & CPUMCTX_EXTRN_TR)
+ {
+ COPY_OUT_SEG(iReg, HvX64RegisterTr, pCtx->tr);
+ iReg++;
+ }
+
+ if (fWhat & CPUMCTX_EXTRN_IDTR)
+ {
+ HV_REGISTER_ASSOC_ZERO_PADDING(&pInput->Elements[iReg]);
+ pInput->Elements[iReg].Value.Table.Pad[0] = 0;
+ pInput->Elements[iReg].Value.Table.Pad[1] = 0;
+ pInput->Elements[iReg].Value.Table.Pad[2] = 0;
+ pInput->Elements[iReg].Name = HvX64RegisterIdtr;
+ pInput->Elements[iReg].Value.Table.Limit = pCtx->idtr.cbIdt;
+ pInput->Elements[iReg].Value.Table.Base = pCtx->idtr.pIdt;
+ iReg++;
+ }
+ if (fWhat & CPUMCTX_EXTRN_GDTR)
+ {
+ HV_REGISTER_ASSOC_ZERO_PADDING(&pInput->Elements[iReg]);
+ pInput->Elements[iReg].Value.Table.Pad[0] = 0;
+ pInput->Elements[iReg].Value.Table.Pad[1] = 0;
+ pInput->Elements[iReg].Value.Table.Pad[2] = 0;
+ pInput->Elements[iReg].Name = HvX64RegisterGdtr;
+ pInput->Elements[iReg].Value.Table.Limit = pCtx->gdtr.cbGdt;
+ pInput->Elements[iReg].Value.Table.Base = pCtx->gdtr.pGdt;
+ iReg++;
+ }
+ }
+
+ /* Control registers. */
+ if (fWhat & CPUMCTX_EXTRN_CR_MASK)
+ {
+ if (fWhat & CPUMCTX_EXTRN_CR0)
+ {
+ HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+ pInput->Elements[iReg].Name = HvX64RegisterCr0;
+ pInput->Elements[iReg].Value.Reg64 = pCtx->cr0;
+ iReg++;
+ }
+ if (fWhat & CPUMCTX_EXTRN_CR2)
+ {
+ HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+ pInput->Elements[iReg].Name = HvX64RegisterCr2;
+ pInput->Elements[iReg].Value.Reg64 = pCtx->cr2;
+ iReg++;
+ }
+ if (fWhat & CPUMCTX_EXTRN_CR3)
+ {
+ HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+ pInput->Elements[iReg].Name = HvX64RegisterCr3;
+ pInput->Elements[iReg].Value.Reg64 = pCtx->cr3;
+ iReg++;
+ }
+ if (fWhat & CPUMCTX_EXTRN_CR4)
+ {
+ HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+ pInput->Elements[iReg].Name = HvX64RegisterCr4;
+ pInput->Elements[iReg].Value.Reg64 = pCtx->cr4;
+ iReg++;
+ }
+ }
+ if (fWhat & CPUMCTX_EXTRN_APIC_TPR)
+ {
+ HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+ pInput->Elements[iReg].Name = HvX64RegisterCr8;
+ pInput->Elements[iReg].Value.Reg64 = CPUMGetGuestCR8(pVCpu);
+ iReg++;
+ }
+
+ /** @todo does HvX64RegisterXfem mean XCR0? What about the related MSR. */
+
+ /* Debug registers. */
+/** @todo fixme. Figure out what the hyper-v version of KVM_SET_GUEST_DEBUG would be. */
+ if (fWhat & CPUMCTX_EXTRN_DR0_DR3)
+ {
+ HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+ pInput->Elements[iReg].Name = HvX64RegisterDr0;
+ //pInput->Elements[iReg].Value.Reg64 = CPUMGetHyperDR0(pVCpu);
+ pInput->Elements[iReg].Value.Reg64 = pCtx->dr[0];
+ iReg++;
+ HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+ pInput->Elements[iReg].Name = HvX64RegisterDr1;
+ //pInput->Elements[iReg].Value.Reg64 = CPUMGetHyperDR1(pVCpu);
+ pInput->Elements[iReg].Value.Reg64 = pCtx->dr[1];
+ iReg++;
+ HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+ pInput->Elements[iReg].Name = HvX64RegisterDr2;
+ //pInput->Elements[iReg].Value.Reg64 = CPUMGetHyperDR2(pVCpu);
+ pInput->Elements[iReg].Value.Reg64 = pCtx->dr[2];
+ iReg++;
+ HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+ pInput->Elements[iReg].Name = HvX64RegisterDr3;
+ //pInput->Elements[iReg].Value.Reg64 = CPUMGetHyperDR3(pVCpu);
+ pInput->Elements[iReg].Value.Reg64 = pCtx->dr[3];
+ iReg++;
+ }
+ if (fWhat & CPUMCTX_EXTRN_DR6)
+ {
+ HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+ pInput->Elements[iReg].Name = HvX64RegisterDr6;
+ //pInput->Elements[iReg].Value.Reg64 = CPUMGetHyperDR6(pVCpu);
+ pInput->Elements[iReg].Value.Reg64 = pCtx->dr[6];
+ iReg++;
+ }
+ if (fWhat & CPUMCTX_EXTRN_DR7)
+ {
+ HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+ pInput->Elements[iReg].Name = HvX64RegisterDr7;
+ //pInput->Elements[iReg].Value.Reg64 = CPUMGetHyperDR7(pVCpu);
+ pInput->Elements[iReg].Value.Reg64 = pCtx->dr[7];
+ iReg++;
+ }
+
+ /* Floating point state. */
+ if (fWhat & CPUMCTX_EXTRN_X87)
+ {
+ HV_REGISTER_ASSOC_ZERO_PADDING(&pInput->Elements[iReg]);
+ pInput->Elements[iReg].Name = HvX64RegisterFpMmx0;
+ pInput->Elements[iReg].Value.Fp.AsUINT128.Low64 = pCtx->pXStateR0->x87.aRegs[0].au64[0];
+ pInput->Elements[iReg].Value.Fp.AsUINT128.High64 = pCtx->pXStateR0->x87.aRegs[0].au64[1];
+ iReg++;
+ HV_REGISTER_ASSOC_ZERO_PADDING(&pInput->Elements[iReg]);
+ pInput->Elements[iReg].Name = HvX64RegisterFpMmx1;
+ pInput->Elements[iReg].Value.Fp.AsUINT128.Low64 = pCtx->pXStateR0->x87.aRegs[1].au64[0];
+ pInput->Elements[iReg].Value.Fp.AsUINT128.High64 = pCtx->pXStateR0->x87.aRegs[1].au64[1];
+ iReg++;
+ HV_REGISTER_ASSOC_ZERO_PADDING(&pInput->Elements[iReg]);
+ pInput->Elements[iReg].Name = HvX64RegisterFpMmx2;
+ pInput->Elements[iReg].Value.Fp.AsUINT128.Low64 = pCtx->pXStateR0->x87.aRegs[2].au64[0];
+ pInput->Elements[iReg].Value.Fp.AsUINT128.High64 = pCtx->pXStateR0->x87.aRegs[2].au64[1];
+ iReg++;
+ HV_REGISTER_ASSOC_ZERO_PADDING(&pInput->Elements[iReg]);
+ pInput->Elements[iReg].Name = HvX64RegisterFpMmx3;
+ pInput->Elements[iReg].Value.Fp.AsUINT128.Low64 = pCtx->pXStateR0->x87.aRegs[3].au64[0];
+ pInput->Elements[iReg].Value.Fp.AsUINT128.High64 = pCtx->pXStateR0->x87.aRegs[3].au64[1];
+ iReg++;
+ HV_REGISTER_ASSOC_ZERO_PADDING(&pInput->Elements[iReg]);
+ pInput->Elements[iReg].Name = HvX64RegisterFpMmx4;
+ pInput->Elements[iReg].Value.Fp.AsUINT128.Low64 = pCtx->pXStateR0->x87.aRegs[4].au64[0];
+ pInput->Elements[iReg].Value.Fp.AsUINT128.High64 = pCtx->pXStateR0->x87.aRegs[4].au64[1];
+ iReg++;
+ HV_REGISTER_ASSOC_ZERO_PADDING(&pInput->Elements[iReg]);
+ pInput->Elements[iReg].Name = HvX64RegisterFpMmx5;
+ pInput->Elements[iReg].Value.Fp.AsUINT128.Low64 = pCtx->pXStateR0->x87.aRegs[5].au64[0];
+ pInput->Elements[iReg].Value.Fp.AsUINT128.High64 = pCtx->pXStateR0->x87.aRegs[5].au64[1];
+ iReg++;
+ HV_REGISTER_ASSOC_ZERO_PADDING(&pInput->Elements[iReg]);
+ pInput->Elements[iReg].Name = HvX64RegisterFpMmx6;
+ pInput->Elements[iReg].Value.Fp.AsUINT128.Low64 = pCtx->pXStateR0->x87.aRegs[6].au64[0];
+ pInput->Elements[iReg].Value.Fp.AsUINT128.High64 = pCtx->pXStateR0->x87.aRegs[6].au64[1];
+ iReg++;
+ HV_REGISTER_ASSOC_ZERO_PADDING(&pInput->Elements[iReg]);
+ pInput->Elements[iReg].Name = HvX64RegisterFpMmx7;
+ pInput->Elements[iReg].Value.Fp.AsUINT128.Low64 = pCtx->pXStateR0->x87.aRegs[7].au64[0];
+ pInput->Elements[iReg].Value.Fp.AsUINT128.High64 = pCtx->pXStateR0->x87.aRegs[7].au64[1];
+ iReg++;
+
+ HV_REGISTER_ASSOC_ZERO_PADDING(&pInput->Elements[iReg]);
+ pInput->Elements[iReg].Name = HvX64RegisterFpControlStatus;
+ pInput->Elements[iReg].Value.FpControlStatus.FpControl = pCtx->pXStateR0->x87.FCW;
+ pInput->Elements[iReg].Value.FpControlStatus.FpStatus = pCtx->pXStateR0->x87.FSW;
+ pInput->Elements[iReg].Value.FpControlStatus.FpTag = pCtx->pXStateR0->x87.FTW;
+ pInput->Elements[iReg].Value.FpControlStatus.Reserved = pCtx->pXStateR0->x87.FTW >> 8;
+ pInput->Elements[iReg].Value.FpControlStatus.LastFpOp = pCtx->pXStateR0->x87.FOP;
+ pInput->Elements[iReg].Value.FpControlStatus.LastFpRip = (pCtx->pXStateR0->x87.FPUIP)
+ | ((uint64_t)pCtx->pXStateR0->x87.CS << 32)
+ | ((uint64_t)pCtx->pXStateR0->x87.Rsrvd1 << 48);
+ iReg++;
+/** @todo we've got trouble if if we try write just SSE w/o X87. */
+ HV_REGISTER_ASSOC_ZERO_PADDING(&pInput->Elements[iReg]);
+ pInput->Elements[iReg].Name = HvX64RegisterXmmControlStatus;
+ pInput->Elements[iReg].Value.XmmControlStatus.LastFpRdp = (pCtx->pXStateR0->x87.FPUDP)
+ | ((uint64_t)pCtx->pXStateR0->x87.DS << 32)
+ | ((uint64_t)pCtx->pXStateR0->x87.Rsrvd2 << 48);
+ pInput->Elements[iReg].Value.XmmControlStatus.XmmStatusControl = pCtx->pXStateR0->x87.MXCSR;
+ pInput->Elements[iReg].Value.XmmControlStatus.XmmStatusControlMask = pCtx->pXStateR0->x87.MXCSR_MASK; /** @todo ??? (Isn't this an output field?) */
+ iReg++;
+ }
+
+ /* Vector state. */
+ if (fWhat & CPUMCTX_EXTRN_SSE_AVX)
+ {
+ HV_REGISTER_ASSOC_ZERO_PADDING(&pInput->Elements[iReg]);
+ pInput->Elements[iReg].Name = HvX64RegisterXmm0;
+ pInput->Elements[iReg].Value.Reg128.Low64 = pCtx->pXStateR0->x87.aXMM[0].uXmm.s.Lo;
+ pInput->Elements[iReg].Value.Reg128.High64 = pCtx->pXStateR0->x87.aXMM[0].uXmm.s.Hi;
+ iReg++;
+ HV_REGISTER_ASSOC_ZERO_PADDING(&pInput->Elements[iReg]);
+ pInput->Elements[iReg].Name = HvX64RegisterXmm1;
+ pInput->Elements[iReg].Value.Reg128.Low64 = pCtx->pXStateR0->x87.aXMM[1].uXmm.s.Lo;
+ pInput->Elements[iReg].Value.Reg128.High64 = pCtx->pXStateR0->x87.aXMM[1].uXmm.s.Hi;
+ iReg++;
+ HV_REGISTER_ASSOC_ZERO_PADDING(&pInput->Elements[iReg]);
+ pInput->Elements[iReg].Name = HvX64RegisterXmm2;
+ pInput->Elements[iReg].Value.Reg128.Low64 = pCtx->pXStateR0->x87.aXMM[2].uXmm.s.Lo;
+ pInput->Elements[iReg].Value.Reg128.High64 = pCtx->pXStateR0->x87.aXMM[2].uXmm.s.Hi;
+ iReg++;
+ HV_REGISTER_ASSOC_ZERO_PADDING(&pInput->Elements[iReg]);
+ pInput->Elements[iReg].Name = HvX64RegisterXmm3;
+ pInput->Elements[iReg].Value.Reg128.Low64 = pCtx->pXStateR0->x87.aXMM[3].uXmm.s.Lo;
+ pInput->Elements[iReg].Value.Reg128.High64 = pCtx->pXStateR0->x87.aXMM[3].uXmm.s.Hi;
+ iReg++;
+ HV_REGISTER_ASSOC_ZERO_PADDING(&pInput->Elements[iReg]);
+ pInput->Elements[iReg].Name = HvX64RegisterXmm4;
+ pInput->Elements[iReg].Value.Reg128.Low64 = pCtx->pXStateR0->x87.aXMM[4].uXmm.s.Lo;
+ pInput->Elements[iReg].Value.Reg128.High64 = pCtx->pXStateR0->x87.aXMM[4].uXmm.s.Hi;
+ iReg++;
+ HV_REGISTER_ASSOC_ZERO_PADDING(&pInput->Elements[iReg]);
+ pInput->Elements[iReg].Name = HvX64RegisterXmm5;
+ pInput->Elements[iReg].Value.Reg128.Low64 = pCtx->pXStateR0->x87.aXMM[5].uXmm.s.Lo;
+ pInput->Elements[iReg].Value.Reg128.High64 = pCtx->pXStateR0->x87.aXMM[5].uXmm.s.Hi;
+ iReg++;
+ HV_REGISTER_ASSOC_ZERO_PADDING(&pInput->Elements[iReg]);
+ pInput->Elements[iReg].Name = HvX64RegisterXmm6;
+ pInput->Elements[iReg].Value.Reg128.Low64 = pCtx->pXStateR0->x87.aXMM[6].uXmm.s.Lo;
+ pInput->Elements[iReg].Value.Reg128.High64 = pCtx->pXStateR0->x87.aXMM[6].uXmm.s.Hi;
+ iReg++;
+ HV_REGISTER_ASSOC_ZERO_PADDING(&pInput->Elements[iReg]);
+ pInput->Elements[iReg].Name = HvX64RegisterXmm7;
+ pInput->Elements[iReg].Value.Reg128.Low64 = pCtx->pXStateR0->x87.aXMM[7].uXmm.s.Lo;
+ pInput->Elements[iReg].Value.Reg128.High64 = pCtx->pXStateR0->x87.aXMM[7].uXmm.s.Hi;
+ iReg++;
+ HV_REGISTER_ASSOC_ZERO_PADDING(&pInput->Elements[iReg]);
+ pInput->Elements[iReg].Name = HvX64RegisterXmm8;
+ pInput->Elements[iReg].Value.Reg128.Low64 = pCtx->pXStateR0->x87.aXMM[8].uXmm.s.Lo;
+ pInput->Elements[iReg].Value.Reg128.High64 = pCtx->pXStateR0->x87.aXMM[8].uXmm.s.Hi;
+ iReg++;
+ HV_REGISTER_ASSOC_ZERO_PADDING(&pInput->Elements[iReg]);
+ pInput->Elements[iReg].Name = HvX64RegisterXmm9;
+ pInput->Elements[iReg].Value.Reg128.Low64 = pCtx->pXStateR0->x87.aXMM[9].uXmm.s.Lo;
+ pInput->Elements[iReg].Value.Reg128.High64 = pCtx->pXStateR0->x87.aXMM[9].uXmm.s.Hi;
+ iReg++;
+ HV_REGISTER_ASSOC_ZERO_PADDING(&pInput->Elements[iReg]);
+ pInput->Elements[iReg].Name = HvX64RegisterXmm10;
+ pInput->Elements[iReg].Value.Reg128.Low64 = pCtx->pXStateR0->x87.aXMM[10].uXmm.s.Lo;
+ pInput->Elements[iReg].Value.Reg128.High64 = pCtx->pXStateR0->x87.aXMM[10].uXmm.s.Hi;
+ iReg++;
+ HV_REGISTER_ASSOC_ZERO_PADDING(&pInput->Elements[iReg]);
+ pInput->Elements[iReg].Name = HvX64RegisterXmm11;
+ pInput->Elements[iReg].Value.Reg128.Low64 = pCtx->pXStateR0->x87.aXMM[11].uXmm.s.Lo;
+ pInput->Elements[iReg].Value.Reg128.High64 = pCtx->pXStateR0->x87.aXMM[11].uXmm.s.Hi;
+ iReg++;
+ HV_REGISTER_ASSOC_ZERO_PADDING(&pInput->Elements[iReg]);
+ pInput->Elements[iReg].Name = HvX64RegisterXmm12;
+ pInput->Elements[iReg].Value.Reg128.Low64 = pCtx->pXStateR0->x87.aXMM[12].uXmm.s.Lo;
+ pInput->Elements[iReg].Value.Reg128.High64 = pCtx->pXStateR0->x87.aXMM[12].uXmm.s.Hi;
+ iReg++;
+ HV_REGISTER_ASSOC_ZERO_PADDING(&pInput->Elements[iReg]);
+ pInput->Elements[iReg].Name = HvX64RegisterXmm13;
+ pInput->Elements[iReg].Value.Reg128.Low64 = pCtx->pXStateR0->x87.aXMM[13].uXmm.s.Lo;
+ pInput->Elements[iReg].Value.Reg128.High64 = pCtx->pXStateR0->x87.aXMM[13].uXmm.s.Hi;
+ iReg++;
+ HV_REGISTER_ASSOC_ZERO_PADDING(&pInput->Elements[iReg]);
+ pInput->Elements[iReg].Name = HvX64RegisterXmm14;
+ pInput->Elements[iReg].Value.Reg128.Low64 = pCtx->pXStateR0->x87.aXMM[14].uXmm.s.Lo;
+ pInput->Elements[iReg].Value.Reg128.High64 = pCtx->pXStateR0->x87.aXMM[14].uXmm.s.Hi;
+ iReg++;
+ HV_REGISTER_ASSOC_ZERO_PADDING(&pInput->Elements[iReg]);
+ pInput->Elements[iReg].Name = HvX64RegisterXmm15;
+ pInput->Elements[iReg].Value.Reg128.Low64 = pCtx->pXStateR0->x87.aXMM[15].uXmm.s.Lo;
+ pInput->Elements[iReg].Value.Reg128.High64 = pCtx->pXStateR0->x87.aXMM[15].uXmm.s.Hi;
+ iReg++;
+ }
+
+ /* MSRs */
+ // HvX64RegisterTsc - don't touch
+ if (fWhat & CPUMCTX_EXTRN_EFER)
+ {
+ HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+ pInput->Elements[iReg].Name = HvX64RegisterEfer;
+ pInput->Elements[iReg].Value.Reg64 = pCtx->msrEFER;
+ iReg++;
+ }
+ if (fWhat & CPUMCTX_EXTRN_KERNEL_GS_BASE)
+ {
+ HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+ pInput->Elements[iReg].Name = HvX64RegisterKernelGsBase;
+ pInput->Elements[iReg].Value.Reg64 = pCtx->msrKERNELGSBASE;
+ iReg++;
+ }
+ if (fWhat & CPUMCTX_EXTRN_SYSENTER_MSRS)
+ {
+ HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+ pInput->Elements[iReg].Name = HvX64RegisterSysenterCs;
+ pInput->Elements[iReg].Value.Reg64 = pCtx->SysEnter.cs;
+ iReg++;
+ HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+ pInput->Elements[iReg].Name = HvX64RegisterSysenterEip;
+ pInput->Elements[iReg].Value.Reg64 = pCtx->SysEnter.eip;
+ iReg++;
+ HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+ pInput->Elements[iReg].Name = HvX64RegisterSysenterEsp;
+ pInput->Elements[iReg].Value.Reg64 = pCtx->SysEnter.esp;
+ iReg++;
+ }
+ if (fWhat & CPUMCTX_EXTRN_SYSCALL_MSRS)
+ {
+ HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+ pInput->Elements[iReg].Name = HvX64RegisterStar;
+ pInput->Elements[iReg].Value.Reg64 = pCtx->msrSTAR;
+ iReg++;
+ HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+ pInput->Elements[iReg].Name = HvX64RegisterLstar;
+ pInput->Elements[iReg].Value.Reg64 = pCtx->msrLSTAR;
+ iReg++;
+ HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+ pInput->Elements[iReg].Name = HvX64RegisterCstar;
+ pInput->Elements[iReg].Value.Reg64 = pCtx->msrCSTAR;
+ iReg++;
+ HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+ pInput->Elements[iReg].Name = HvX64RegisterSfmask;
+ pInput->Elements[iReg].Value.Reg64 = pCtx->msrSFMASK;
+ iReg++;
+ }
+ if (fWhat & CPUMCTX_EXTRN_OTHER_MSRS)
+ {
+ HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+ pInput->Elements[iReg].Name = HvX64RegisterApicBase;
+ pInput->Elements[iReg].Value.Reg64 = APICGetBaseMsrNoCheck(pVCpu);
+ iReg++;
+ HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+ pInput->Elements[iReg].Name = HvX64RegisterPat;
+ pInput->Elements[iReg].Value.Reg64 = pCtx->msrPAT;
+ iReg++;
+# if 0 /** @todo HvX64RegisterMtrrCap is read only? Seems it's not even readable. */
+ HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+ pInput->Elements[iReg].Name = HvX64RegisterMtrrCap;
+ pInput->Elements[iReg].Value.Reg64 = CPUMGetGuestIa32MtrrCap(pVCpu);
+ iReg++;
+# endif
+
+ PCPUMCTXMSRS pCtxMsrs = CPUMQueryGuestCtxMsrsPtr(pVCpu);
+
+ HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+ pInput->Elements[iReg].Name = HvX64RegisterMtrrDefType;
+ pInput->Elements[iReg].Value.Reg64 = pCtxMsrs->msr.MtrrDefType;
+ iReg++;
+
+ /** @todo we dont keep state for HvX64RegisterMtrrPhysBaseX and HvX64RegisterMtrrPhysMaskX */
+
+ HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+ pInput->Elements[iReg].Name = HvX64RegisterMtrrFix64k00000;
+ pInput->Elements[iReg].Value.Reg64 = pCtxMsrs->msr.MtrrFix64K_00000;
+ iReg++;
+ HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+ pInput->Elements[iReg].Name = HvX64RegisterMtrrFix16k80000;
+ pInput->Elements[iReg].Value.Reg64 = pCtxMsrs->msr.MtrrFix16K_80000;
+ iReg++;
+ HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+ pInput->Elements[iReg].Name = HvX64RegisterMtrrFix16kA0000;
+ pInput->Elements[iReg].Value.Reg64 = pCtxMsrs->msr.MtrrFix16K_A0000;
+ iReg++;
+ HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+ pInput->Elements[iReg].Name = HvX64RegisterMtrrFix4kC0000;
+ pInput->Elements[iReg].Value.Reg64 = pCtxMsrs->msr.MtrrFix4K_C0000;
+ iReg++;
+ HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+ pInput->Elements[iReg].Name = HvX64RegisterMtrrFix4kC8000;
+ pInput->Elements[iReg].Value.Reg64 = pCtxMsrs->msr.MtrrFix4K_C8000;
+ iReg++;
+ HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+ pInput->Elements[iReg].Name = HvX64RegisterMtrrFix4kD0000;
+ pInput->Elements[iReg].Value.Reg64 = pCtxMsrs->msr.MtrrFix4K_D0000;
+ iReg++;
+ HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+ pInput->Elements[iReg].Name = HvX64RegisterMtrrFix4kD8000;
+ pInput->Elements[iReg].Value.Reg64 = pCtxMsrs->msr.MtrrFix4K_D8000;
+ iReg++;
+ HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+ pInput->Elements[iReg].Name = HvX64RegisterMtrrFix4kE0000;
+ pInput->Elements[iReg].Value.Reg64 = pCtxMsrs->msr.MtrrFix4K_E0000;
+ iReg++;
+ HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+ pInput->Elements[iReg].Name = HvX64RegisterMtrrFix4kE8000;
+ pInput->Elements[iReg].Value.Reg64 = pCtxMsrs->msr.MtrrFix4K_E8000;
+ iReg++;
+ HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+ pInput->Elements[iReg].Name = HvX64RegisterMtrrFix4kF0000;
+ pInput->Elements[iReg].Value.Reg64 = pCtxMsrs->msr.MtrrFix4K_F0000;
+ iReg++;
+ HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+ pInput->Elements[iReg].Name = HvX64RegisterMtrrFix4kF8000;
+ pInput->Elements[iReg].Value.Reg64 = pCtxMsrs->msr.MtrrFix4K_F8000;
+ iReg++;
+ HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+ pInput->Elements[iReg].Name = HvX64RegisterTscAux;
+ pInput->Elements[iReg].Value.Reg64 = pCtxMsrs->msr.TscAux;
+ iReg++;
+
+# if 0 /** @todo Why can't we write these on Intel systems? Not that we really care... */
+ const CPUMCPUVENDOR enmCpuVendor = CPUMGetHostCpuVendor(pGVM->pVM);
+ if (enmCpuVendor != CPUMCPUVENDOR_AMD)
+ {
+ HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+ pInput->Elements[iReg].Name = HvX64RegisterIa32MiscEnable;
+ pInput->Elements[iReg].Value.Reg64 = pCtxMsrs->msr.MiscEnable;
+ iReg++;
+ HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+ pInput->Elements[iReg].Name = HvX64RegisterIa32FeatureControl;
+ pInput->Elements[iReg].Value.Reg64 = CPUMGetGuestIa32FeatureControl(pVCpu);
+ iReg++;
+ }
+# endif
+ }
+
+ /* event injection (clear it). */
+ if (fWhat & CPUMCTX_EXTRN_NEM_WIN_EVENT_INJECT)
+ {
+ HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+ pInput->Elements[iReg].Name = HvRegisterPendingInterruption;
+ pInput->Elements[iReg].Value.Reg64 = 0;
+ iReg++;
+ }
+
+ /* Interruptibility state. This can get a little complicated since we get
+ half of the state via HV_X64_VP_EXECUTION_STATE. */
+ if ( (fWhat & (CPUMCTX_EXTRN_NEM_WIN_INHIBIT_INT | CPUMCTX_EXTRN_NEM_WIN_INHIBIT_NMI))
+ == (CPUMCTX_EXTRN_NEM_WIN_INHIBIT_INT | CPUMCTX_EXTRN_NEM_WIN_INHIBIT_NMI) )
+ {
+ HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+ pInput->Elements[iReg].Name = HvRegisterInterruptState;
+ pInput->Elements[iReg].Value.Reg64 = 0;
+ if ( VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS)
+ && EMGetInhibitInterruptsPC(pVCpu) == pCtx->rip)
+ pInput->Elements[iReg].Value.InterruptState.InterruptShadow = 1;
+ if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_BLOCK_NMIS))
+ pInput->Elements[iReg].Value.InterruptState.NmiMasked = 1;
+ iReg++;
+ }
+ else if (fWhat & CPUMCTX_EXTRN_NEM_WIN_INHIBIT_INT)
+ {
+ if ( pVCpu->nem.s.fLastInterruptShadow
+ || ( VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS)
+ && EMGetInhibitInterruptsPC(pVCpu) == pCtx->rip))
+ {
+ HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+ pInput->Elements[iReg].Name = HvRegisterInterruptState;
+ pInput->Elements[iReg].Value.Reg64 = 0;
+ if ( VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS)
+ && EMGetInhibitInterruptsPC(pVCpu) == pCtx->rip)
+ pInput->Elements[iReg].Value.InterruptState.InterruptShadow = 1;
+ /** @todo Retrieve NMI state, currently assuming it's zero. (yes this may happen on I/O) */
+ //if (VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_BLOCK_NMIS))
+ // pInput->Elements[iReg].Value.InterruptState.NmiMasked = 1;
+ iReg++;
+ }
+ }
+ else
+ Assert(!(fWhat & CPUMCTX_EXTRN_NEM_WIN_INHIBIT_NMI));
+
+ /* Interrupt windows. Always set if active as Hyper-V seems to be forgetful. */
+ uint8_t const fDesiredIntWin = pVCpu->nem.s.fDesiredInterruptWindows;
+ if ( fDesiredIntWin
+ || pVCpu->nem.s.fCurrentInterruptWindows != fDesiredIntWin)
+ {
+ pVCpu->nem.s.fCurrentInterruptWindows = pVCpu->nem.s.fDesiredInterruptWindows;
+ HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
+ pInput->Elements[iReg].Name = HvX64RegisterDeliverabilityNotifications;
+ pInput->Elements[iReg].Value.DeliverabilityNotifications.AsUINT64 = fDesiredIntWin;
+ Assert(pInput->Elements[iReg].Value.DeliverabilityNotifications.NmiNotification == RT_BOOL(fDesiredIntWin & NEM_WIN_INTW_F_NMI));
+ Assert(pInput->Elements[iReg].Value.DeliverabilityNotifications.InterruptNotification == RT_BOOL(fDesiredIntWin & NEM_WIN_INTW_F_REGULAR));
+ Assert(pInput->Elements[iReg].Value.DeliverabilityNotifications.InterruptPriority == (fDesiredIntWin & NEM_WIN_INTW_F_PRIO_MASK) >> NEM_WIN_INTW_F_PRIO_SHIFT);
+ iReg++;
+ }
+
+ /// @todo HvRegisterPendingEvent0
+ /// @todo HvRegisterPendingEvent1
+
+ /*
+ * Set the registers.
+ */
+ Assert((uintptr_t)&pInput->Elements[iReg] - (uintptr_t)pGVCpu->nem.s.HypercallData.pbPage < PAGE_SIZE); /* max is 127 */
+
+ /*
+ * Make the hypercall.
+ */
+ uint64_t uResult = g_pfnHvlInvokeHypercall(HV_MAKE_CALL_INFO(HvCallSetVpRegisters, iReg),
+ pGVCpu->nem.s.HypercallData.HCPhysPage, 0 /*GCPhysOutput*/);
+ AssertLogRelMsgReturn(uResult == HV_MAKE_CALL_REP_RET(iReg),
+ ("uResult=%RX64 iRegs=%#x\n", uResult, iReg),
+ VERR_NEM_SET_REGISTERS_FAILED);
+ //LogFlow(("nemR0WinExportState: uResult=%#RX64 iReg=%zu fWhat=%#018RX64 fExtrn=%#018RX64 -> %#018RX64\n", uResult, iReg, fWhat, pCtx->fExtrn,
+ // pCtx->fExtrn | CPUMCTX_EXTRN_ALL | CPUMCTX_EXTRN_NEM_WIN_MASK | CPUMCTX_EXTRN_KEEPER_NEM ));
+ pCtx->fExtrn |= CPUMCTX_EXTRN_ALL | CPUMCTX_EXTRN_NEM_WIN_MASK | CPUMCTX_EXTRN_KEEPER_NEM;
+ return VINF_SUCCESS;
+}
+#endif /* NEM_WIN_WITH_RING0_RUNLOOP || NEM_WIN_USE_HYPERCALLS_FOR_REGISTERS */
+
+
+/**
+ * Export the state to the native API (out of CPUMCTX).
+ *
+ * @returns VBox status code
+ * @param pGVM The ring-0 VM handle.
+ * @param pVM The cross context VM handle.
+ * @param idCpu The calling EMT. Necessary for getting the
+ * hypercall page and arguments.
+ */
+VMMR0_INT_DECL(int) NEMR0ExportState(PGVM pGVM, PVM pVM, VMCPUID idCpu)
+{
+#if defined(NEM_WIN_WITH_RING0_RUNLOOP) || defined(NEM_WIN_USE_HYPERCALLS_FOR_REGISTERS)
+ /*
+ * Validate the call.
+ */
+ int rc = GVMMR0ValidateGVMandVMandEMT(pGVM, pVM, idCpu);
+ if (RT_SUCCESS(rc))
+ {
+ PVMCPU pVCpu = &pVM->aCpus[idCpu];
+ PGVMCPU pGVCpu = &pGVM->aCpus[idCpu];
+ AssertReturn(g_pfnHvlInvokeHypercall, VERR_NEM_MISSING_KERNEL_API);
+
+ /*
+ * Call worker.
+ */
+ rc = nemR0WinExportState(pGVM, pGVCpu, &pVCpu->cpum.GstCtx);
+ }
+ return rc;
+#else
+ RT_NOREF(pGVM, pVM, idCpu);
+ return VERR_NOT_IMPLEMENTED;
+#endif
+}
+
+
+#if defined(NEM_WIN_WITH_RING0_RUNLOOP) || defined(NEM_WIN_USE_HYPERCALLS_FOR_REGISTERS)
+/**
+ * Worker for NEMR0ImportState.
+ *
+ * Intention is to use it internally later.
+ *
+ * @returns VBox status code.
+ * @param pGVM The ring-0 VM handle.
+ * @param pGVCpu The ring-0 VCPU handle.
+ * @param pCtx The CPU context structure to import into.
+ * @param fWhat What to import, CPUMCTX_EXTRN_XXX.
+ * @param fCanUpdateCr3 Whether it's safe to update CR3 or not.
+ */
+NEM_TMPL_STATIC int nemR0WinImportState(PGVM pGVM, PGVMCPU pGVCpu, PCPUMCTX pCtx, uint64_t fWhat, bool fCanUpdateCr3)
+{
+ HV_INPUT_GET_VP_REGISTERS *pInput = (HV_INPUT_GET_VP_REGISTERS *)pGVCpu->nem.s.HypercallData.pbPage;
+ AssertPtrReturn(pInput, VERR_INTERNAL_ERROR_3);
+ AssertReturn(g_pfnHvlInvokeHypercall, VERR_NEM_MISSING_KERNEL_API);
+ Assert(pCtx == &pGVCpu->pVCpu->cpum.GstCtx);
+
+ fWhat &= pCtx->fExtrn;
+
+ pInput->PartitionId = pGVM->nem.s.idHvPartition;
+ pInput->VpIndex = pGVCpu->idCpu;
+ pInput->fFlags = 0;
+
+ /* GPRs */
+ uintptr_t iReg = 0;
+ if (fWhat & CPUMCTX_EXTRN_GPRS_MASK)
+ {
+ if (fWhat & CPUMCTX_EXTRN_RAX)
+ pInput->Names[iReg++] = HvX64RegisterRax;
+ if (fWhat & CPUMCTX_EXTRN_RCX)
+ pInput->Names[iReg++] = HvX64RegisterRcx;
+ if (fWhat & CPUMCTX_EXTRN_RDX)
+ pInput->Names[iReg++] = HvX64RegisterRdx;
+ if (fWhat & CPUMCTX_EXTRN_RBX)
+ pInput->Names[iReg++] = HvX64RegisterRbx;
+ if (fWhat & CPUMCTX_EXTRN_RSP)
+ pInput->Names[iReg++] = HvX64RegisterRsp;
+ if (fWhat & CPUMCTX_EXTRN_RBP)
+ pInput->Names[iReg++] = HvX64RegisterRbp;
+ if (fWhat & CPUMCTX_EXTRN_RSI)
+ pInput->Names[iReg++] = HvX64RegisterRsi;
+ if (fWhat & CPUMCTX_EXTRN_RDI)
+ pInput->Names[iReg++] = HvX64RegisterRdi;
+ if (fWhat & CPUMCTX_EXTRN_R8_R15)
+ {
+ pInput->Names[iReg++] = HvX64RegisterR8;
+ pInput->Names[iReg++] = HvX64RegisterR9;
+ pInput->Names[iReg++] = HvX64RegisterR10;
+ pInput->Names[iReg++] = HvX64RegisterR11;
+ pInput->Names[iReg++] = HvX64RegisterR12;
+ pInput->Names[iReg++] = HvX64RegisterR13;
+ pInput->Names[iReg++] = HvX64RegisterR14;
+ pInput->Names[iReg++] = HvX64RegisterR15;
+ }
+ }
+
+ /* RIP & Flags */
+ if (fWhat & CPUMCTX_EXTRN_RIP)
+ pInput->Names[iReg++] = HvX64RegisterRip;
+ if (fWhat & CPUMCTX_EXTRN_RFLAGS)
+ pInput->Names[iReg++] = HvX64RegisterRflags;
+
+ /* Segments */
+ if (fWhat & CPUMCTX_EXTRN_SREG_MASK)
+ {
+ if (fWhat & CPUMCTX_EXTRN_CS)
+ pInput->Names[iReg++] = HvX64RegisterCs;
+ if (fWhat & CPUMCTX_EXTRN_ES)
+ pInput->Names[iReg++] = HvX64RegisterEs;
+ if (fWhat & CPUMCTX_EXTRN_SS)
+ pInput->Names[iReg++] = HvX64RegisterSs;
+ if (fWhat & CPUMCTX_EXTRN_DS)
+ pInput->Names[iReg++] = HvX64RegisterDs;
+ if (fWhat & CPUMCTX_EXTRN_FS)
+ pInput->Names[iReg++] = HvX64RegisterFs;
+ if (fWhat & CPUMCTX_EXTRN_GS)
+ pInput->Names[iReg++] = HvX64RegisterGs;
+ }
+
+ /* Descriptor tables and the task segment. */
+ if (fWhat & CPUMCTX_EXTRN_TABLE_MASK)
+ {
+ if (fWhat & CPUMCTX_EXTRN_LDTR)
+ pInput->Names[iReg++] = HvX64RegisterLdtr;
+ if (fWhat & CPUMCTX_EXTRN_TR)
+ pInput->Names[iReg++] = HvX64RegisterTr;
+ if (fWhat & CPUMCTX_EXTRN_IDTR)
+ pInput->Names[iReg++] = HvX64RegisterIdtr;
+ if (fWhat & CPUMCTX_EXTRN_GDTR)
+ pInput->Names[iReg++] = HvX64RegisterGdtr;
+ }
+
+ /* Control registers. */
+ if (fWhat & CPUMCTX_EXTRN_CR_MASK)
+ {
+ if (fWhat & CPUMCTX_EXTRN_CR0)
+ pInput->Names[iReg++] = HvX64RegisterCr0;
+ if (fWhat & CPUMCTX_EXTRN_CR2)
+ pInput->Names[iReg++] = HvX64RegisterCr2;
+ if (fWhat & CPUMCTX_EXTRN_CR3)
+ pInput->Names[iReg++] = HvX64RegisterCr3;
+ if (fWhat & CPUMCTX_EXTRN_CR4)
+ pInput->Names[iReg++] = HvX64RegisterCr4;
+ }
+ if (fWhat & CPUMCTX_EXTRN_APIC_TPR)
+ pInput->Names[iReg++] = HvX64RegisterCr8;
+
+ /* Debug registers. */
+ if (fWhat & CPUMCTX_EXTRN_DR7)
+ pInput->Names[iReg++] = HvX64RegisterDr7;
+ if (fWhat & CPUMCTX_EXTRN_DR0_DR3)
+ {
+ if (!(fWhat & CPUMCTX_EXTRN_DR7) && (pCtx->fExtrn & CPUMCTX_EXTRN_DR7))
+ {
+ fWhat |= CPUMCTX_EXTRN_DR7;
+ pInput->Names[iReg++] = HvX64RegisterDr7;
+ }
+ pInput->Names[iReg++] = HvX64RegisterDr0;
+ pInput->Names[iReg++] = HvX64RegisterDr1;
+ pInput->Names[iReg++] = HvX64RegisterDr2;
+ pInput->Names[iReg++] = HvX64RegisterDr3;
+ }
+ if (fWhat & CPUMCTX_EXTRN_DR6)
+ pInput->Names[iReg++] = HvX64RegisterDr6;
+
+ /* Floating point state. */
+ if (fWhat & CPUMCTX_EXTRN_X87)
+ {
+ pInput->Names[iReg++] = HvX64RegisterFpMmx0;
+ pInput->Names[iReg++] = HvX64RegisterFpMmx1;
+ pInput->Names[iReg++] = HvX64RegisterFpMmx2;
+ pInput->Names[iReg++] = HvX64RegisterFpMmx3;
+ pInput->Names[iReg++] = HvX64RegisterFpMmx4;
+ pInput->Names[iReg++] = HvX64RegisterFpMmx5;
+ pInput->Names[iReg++] = HvX64RegisterFpMmx6;
+ pInput->Names[iReg++] = HvX64RegisterFpMmx7;
+ pInput->Names[iReg++] = HvX64RegisterFpControlStatus;
+ }
+ if (fWhat & (CPUMCTX_EXTRN_X87 | CPUMCTX_EXTRN_SSE_AVX))
+ pInput->Names[iReg++] = HvX64RegisterXmmControlStatus;
+
+ /* Vector state. */
+ if (fWhat & CPUMCTX_EXTRN_SSE_AVX)
+ {
+ pInput->Names[iReg++] = HvX64RegisterXmm0;
+ pInput->Names[iReg++] = HvX64RegisterXmm1;
+ pInput->Names[iReg++] = HvX64RegisterXmm2;
+ pInput->Names[iReg++] = HvX64RegisterXmm3;
+ pInput->Names[iReg++] = HvX64RegisterXmm4;
+ pInput->Names[iReg++] = HvX64RegisterXmm5;
+ pInput->Names[iReg++] = HvX64RegisterXmm6;
+ pInput->Names[iReg++] = HvX64RegisterXmm7;
+ pInput->Names[iReg++] = HvX64RegisterXmm8;
+ pInput->Names[iReg++] = HvX64RegisterXmm9;
+ pInput->Names[iReg++] = HvX64RegisterXmm10;
+ pInput->Names[iReg++] = HvX64RegisterXmm11;
+ pInput->Names[iReg++] = HvX64RegisterXmm12;
+ pInput->Names[iReg++] = HvX64RegisterXmm13;
+ pInput->Names[iReg++] = HvX64RegisterXmm14;
+ pInput->Names[iReg++] = HvX64RegisterXmm15;
+ }
+
+ /* MSRs */
+ // HvX64RegisterTsc - don't touch
+ if (fWhat & CPUMCTX_EXTRN_EFER)
+ pInput->Names[iReg++] = HvX64RegisterEfer;
+ if (fWhat & CPUMCTX_EXTRN_KERNEL_GS_BASE)
+ pInput->Names[iReg++] = HvX64RegisterKernelGsBase;
+ if (fWhat & CPUMCTX_EXTRN_SYSENTER_MSRS)
+ {
+ pInput->Names[iReg++] = HvX64RegisterSysenterCs;
+ pInput->Names[iReg++] = HvX64RegisterSysenterEip;
+ pInput->Names[iReg++] = HvX64RegisterSysenterEsp;
+ }
+ if (fWhat & CPUMCTX_EXTRN_SYSCALL_MSRS)
+ {
+ pInput->Names[iReg++] = HvX64RegisterStar;
+ pInput->Names[iReg++] = HvX64RegisterLstar;
+ pInput->Names[iReg++] = HvX64RegisterCstar;
+ pInput->Names[iReg++] = HvX64RegisterSfmask;
+ }
+
+# ifdef LOG_ENABLED
+ const CPUMCPUVENDOR enmCpuVendor = CPUMGetHostCpuVendor(pGVM->pVM);
+# endif
+ if (fWhat & CPUMCTX_EXTRN_OTHER_MSRS)
+ {
+ pInput->Names[iReg++] = HvX64RegisterApicBase; /// @todo APIC BASE
+ pInput->Names[iReg++] = HvX64RegisterPat;
+# if 0 /*def LOG_ENABLED*/ /** @todo something's wrong with HvX64RegisterMtrrCap? (AMD) */
+ pInput->Names[iReg++] = HvX64RegisterMtrrCap;
+# endif
+ pInput->Names[iReg++] = HvX64RegisterMtrrDefType;
+ pInput->Names[iReg++] = HvX64RegisterMtrrFix64k00000;
+ pInput->Names[iReg++] = HvX64RegisterMtrrFix16k80000;
+ pInput->Names[iReg++] = HvX64RegisterMtrrFix16kA0000;
+ pInput->Names[iReg++] = HvX64RegisterMtrrFix4kC0000;
+ pInput->Names[iReg++] = HvX64RegisterMtrrFix4kC8000;
+ pInput->Names[iReg++] = HvX64RegisterMtrrFix4kD0000;
+ pInput->Names[iReg++] = HvX64RegisterMtrrFix4kD8000;
+ pInput->Names[iReg++] = HvX64RegisterMtrrFix4kE0000;
+ pInput->Names[iReg++] = HvX64RegisterMtrrFix4kE8000;
+ pInput->Names[iReg++] = HvX64RegisterMtrrFix4kF0000;
+ pInput->Names[iReg++] = HvX64RegisterMtrrFix4kF8000;
+ pInput->Names[iReg++] = HvX64RegisterTscAux;
+# if 0 /** @todo why can't we read HvX64RegisterIa32MiscEnable? */
+ if (enmCpuVendor != CPUMCPUVENDOR_AMD)
+ pInput->Names[iReg++] = HvX64RegisterIa32MiscEnable;
+# endif
+# ifdef LOG_ENABLED
+ if (enmCpuVendor != CPUMCPUVENDOR_AMD)
+ pInput->Names[iReg++] = HvX64RegisterIa32FeatureControl;
+# endif
+ }
+
+ /* Interruptibility. */
+ if (fWhat & (CPUMCTX_EXTRN_NEM_WIN_INHIBIT_INT | CPUMCTX_EXTRN_NEM_WIN_INHIBIT_NMI))
+ {
+ pInput->Names[iReg++] = HvRegisterInterruptState;
+ pInput->Names[iReg++] = HvX64RegisterRip;
+ }
+
+ /* event injection */
+ pInput->Names[iReg++] = HvRegisterPendingInterruption;
+ pInput->Names[iReg++] = HvRegisterPendingEvent0;
+ pInput->Names[iReg++] = HvRegisterPendingEvent1;
+ size_t const cRegs = iReg;
+ size_t const cbInput = RT_ALIGN_Z(RT_UOFFSETOF_DYN(HV_INPUT_GET_VP_REGISTERS, Names[cRegs]), 32);
+
+ HV_REGISTER_VALUE *paValues = (HV_REGISTER_VALUE *)((uint8_t *)pInput + cbInput);
+ Assert((uintptr_t)&paValues[cRegs] - (uintptr_t)pGVCpu->nem.s.HypercallData.pbPage < PAGE_SIZE); /* (max is around 168 registers) */
+ RT_BZERO(paValues, cRegs * sizeof(paValues[0]));
+
+ /*
+ * Make the hypercall.
+ */
+ uint64_t uResult = g_pfnHvlInvokeHypercall(HV_MAKE_CALL_INFO(HvCallGetVpRegisters, cRegs),
+ pGVCpu->nem.s.HypercallData.HCPhysPage,
+ pGVCpu->nem.s.HypercallData.HCPhysPage + cbInput);
+ AssertLogRelMsgReturn(uResult == HV_MAKE_CALL_REP_RET(cRegs),
+ ("uResult=%RX64 cRegs=%#x\n", uResult, cRegs),
+ VERR_NEM_GET_REGISTERS_FAILED);
+ //LogFlow(("nemR0WinImportState: uResult=%#RX64 iReg=%zu fWhat=%#018RX64 fExtr=%#018RX64\n", uResult, cRegs, fWhat, pCtx->fExtrn));
+
+ /*
+ * Copy information to the CPUM context.
+ */
+ PVMCPU pVCpu = &pGVM->pVM->aCpus[pGVCpu->idCpu];
+ iReg = 0;
+
+ /* GPRs */
+ if (fWhat & CPUMCTX_EXTRN_GPRS_MASK)
+ {
+ if (fWhat & CPUMCTX_EXTRN_RAX)
+ {
+ Assert(pInput->Names[iReg] == HvX64RegisterRax);
+ pCtx->rax = paValues[iReg++].Reg64;
+ }
+ if (fWhat & CPUMCTX_EXTRN_RCX)
+ {
+ Assert(pInput->Names[iReg] == HvX64RegisterRcx);
+ pCtx->rcx = paValues[iReg++].Reg64;
+ }
+ if (fWhat & CPUMCTX_EXTRN_RDX)
+ {
+ Assert(pInput->Names[iReg] == HvX64RegisterRdx);
+ pCtx->rdx = paValues[iReg++].Reg64;
+ }
+ if (fWhat & CPUMCTX_EXTRN_RBX)
+ {
+ Assert(pInput->Names[iReg] == HvX64RegisterRbx);
+ pCtx->rbx = paValues[iReg++].Reg64;
+ }
+ if (fWhat & CPUMCTX_EXTRN_RSP)
+ {
+ Assert(pInput->Names[iReg] == HvX64RegisterRsp);
+ pCtx->rsp = paValues[iReg++].Reg64;
+ }
+ if (fWhat & CPUMCTX_EXTRN_RBP)
+ {
+ Assert(pInput->Names[iReg] == HvX64RegisterRbp);
+ pCtx->rbp = paValues[iReg++].Reg64;
+ }
+ if (fWhat & CPUMCTX_EXTRN_RSI)
+ {
+ Assert(pInput->Names[iReg] == HvX64RegisterRsi);
+ pCtx->rsi = paValues[iReg++].Reg64;
+ }
+ if (fWhat & CPUMCTX_EXTRN_RDI)
+ {
+ Assert(pInput->Names[iReg] == HvX64RegisterRdi);
+ pCtx->rdi = paValues[iReg++].Reg64;
+ }
+ if (fWhat & CPUMCTX_EXTRN_R8_R15)
+ {
+ Assert(pInput->Names[iReg] == HvX64RegisterR8);
+ Assert(pInput->Names[iReg + 7] == HvX64RegisterR15);
+ pCtx->r8 = paValues[iReg++].Reg64;
+ pCtx->r9 = paValues[iReg++].Reg64;
+ pCtx->r10 = paValues[iReg++].Reg64;
+ pCtx->r11 = paValues[iReg++].Reg64;
+ pCtx->r12 = paValues[iReg++].Reg64;
+ pCtx->r13 = paValues[iReg++].Reg64;
+ pCtx->r14 = paValues[iReg++].Reg64;
+ pCtx->r15 = paValues[iReg++].Reg64;
+ }
+ }
+
+ /* RIP & Flags */
+ if (fWhat & CPUMCTX_EXTRN_RIP)
+ {
+ Assert(pInput->Names[iReg] == HvX64RegisterRip);
+ pCtx->rip = paValues[iReg++].Reg64;
+ }
+ if (fWhat & CPUMCTX_EXTRN_RFLAGS)
+ {
+ Assert(pInput->Names[iReg] == HvX64RegisterRflags);
+ pCtx->rflags.u = paValues[iReg++].Reg64;
+ }
+
+ /* Segments */
+# define COPY_BACK_SEG(a_idx, a_enmName, a_SReg) \
+ do { \
+ Assert(pInput->Names[a_idx] == a_enmName); \
+ (a_SReg).u64Base = paValues[a_idx].Segment.Base; \
+ (a_SReg).u32Limit = paValues[a_idx].Segment.Limit; \
+ (a_SReg).ValidSel = (a_SReg).Sel = paValues[a_idx].Segment.Selector; \
+ (a_SReg).Attr.u = paValues[a_idx].Segment.Attributes; \
+ (a_SReg).fFlags = CPUMSELREG_FLAGS_VALID; \
+ } while (0)
+ if (fWhat & CPUMCTX_EXTRN_SREG_MASK)
+ {
+ if (fWhat & CPUMCTX_EXTRN_CS)
+ {
+ COPY_BACK_SEG(iReg, HvX64RegisterCs, pCtx->cs);
+ iReg++;
+ }
+ if (fWhat & CPUMCTX_EXTRN_ES)
+ {
+ COPY_BACK_SEG(iReg, HvX64RegisterEs, pCtx->es);
+ iReg++;
+ }
+ if (fWhat & CPUMCTX_EXTRN_SS)
+ {
+ COPY_BACK_SEG(iReg, HvX64RegisterSs, pCtx->ss);
+ iReg++;
+ }
+ if (fWhat & CPUMCTX_EXTRN_DS)
+ {
+ COPY_BACK_SEG(iReg, HvX64RegisterDs, pCtx->ds);
+ iReg++;
+ }
+ if (fWhat & CPUMCTX_EXTRN_FS)
+ {
+ COPY_BACK_SEG(iReg, HvX64RegisterFs, pCtx->fs);
+ iReg++;
+ }
+ if (fWhat & CPUMCTX_EXTRN_GS)
+ {
+ COPY_BACK_SEG(iReg, HvX64RegisterGs, pCtx->gs);
+ iReg++;
+ }
+ }
+ /* Descriptor tables and the task segment. */
+ if (fWhat & CPUMCTX_EXTRN_TABLE_MASK)
+ {
+ if (fWhat & CPUMCTX_EXTRN_LDTR)
+ {
+ COPY_BACK_SEG(iReg, HvX64RegisterLdtr, pCtx->ldtr);
+ iReg++;
+ }
+ if (fWhat & CPUMCTX_EXTRN_TR)
+ {
+ /* AMD-V likes loading TR with in AVAIL state, whereas intel insists on BUSY. So,
+ avoid to trigger sanity assertions around the code, always fix this. */
+ COPY_BACK_SEG(iReg, HvX64RegisterTr, pCtx->tr);
+ switch (pCtx->tr.Attr.n.u4Type)
+ {
+ case X86_SEL_TYPE_SYS_386_TSS_BUSY:
+ case X86_SEL_TYPE_SYS_286_TSS_BUSY:
+ break;
+ case X86_SEL_TYPE_SYS_386_TSS_AVAIL:
+ pCtx->tr.Attr.n.u4Type = X86_SEL_TYPE_SYS_386_TSS_BUSY;
+ break;
+ case X86_SEL_TYPE_SYS_286_TSS_AVAIL:
+ pCtx->tr.Attr.n.u4Type = X86_SEL_TYPE_SYS_286_TSS_BUSY;
+ break;
+ }
+ iReg++;
+ }
+ if (fWhat & CPUMCTX_EXTRN_IDTR)
+ {
+ Assert(pInput->Names[iReg] == HvX64RegisterIdtr);
+ pCtx->idtr.cbIdt = paValues[iReg].Table.Limit;
+ pCtx->idtr.pIdt = paValues[iReg].Table.Base;
+ iReg++;
+ }
+ if (fWhat & CPUMCTX_EXTRN_GDTR)
+ {
+ Assert(pInput->Names[iReg] == HvX64RegisterGdtr);
+ pCtx->gdtr.cbGdt = paValues[iReg].Table.Limit;
+ pCtx->gdtr.pGdt = paValues[iReg].Table.Base;
+ iReg++;
+ }
+ }
+
+ /* Control registers. */
+ bool fMaybeChangedMode = false;
+ bool fUpdateCr3 = false;
+ if (fWhat & CPUMCTX_EXTRN_CR_MASK)
+ {
+ if (fWhat & CPUMCTX_EXTRN_CR0)
+ {
+ Assert(pInput->Names[iReg] == HvX64RegisterCr0);
+ if (pCtx->cr0 != paValues[iReg].Reg64)
+ {
+ CPUMSetGuestCR0(pVCpu, paValues[iReg].Reg64);
+ fMaybeChangedMode = true;
+ }
+ iReg++;
+ }
+ if (fWhat & CPUMCTX_EXTRN_CR2)
+ {
+ Assert(pInput->Names[iReg] == HvX64RegisterCr2);
+ pCtx->cr2 = paValues[iReg].Reg64;
+ iReg++;
+ }
+ if (fWhat & CPUMCTX_EXTRN_CR3)
+ {
+ Assert(pInput->Names[iReg] == HvX64RegisterCr3);
+ if (pCtx->cr3 != paValues[iReg].Reg64)
+ {
+ CPUMSetGuestCR3(pVCpu, paValues[iReg].Reg64);
+ fUpdateCr3 = true;
+ }
+ iReg++;
+ }
+ if (fWhat & CPUMCTX_EXTRN_CR4)
+ {
+ Assert(pInput->Names[iReg] == HvX64RegisterCr4);
+ if (pCtx->cr4 != paValues[iReg].Reg64)
+ {
+ CPUMSetGuestCR4(pVCpu, paValues[iReg].Reg64);
+ fMaybeChangedMode = true;
+ }
+ iReg++;
+ }
+ }
+ if (fWhat & CPUMCTX_EXTRN_APIC_TPR)
+ {
+ Assert(pInput->Names[iReg] == HvX64RegisterCr8);
+ APICSetTpr(pVCpu, (uint8_t)paValues[iReg].Reg64 << 4);
+ iReg++;
+ }
+
+ /* Debug registers. */
+ if (fWhat & CPUMCTX_EXTRN_DR7)
+ {
+ Assert(pInput->Names[iReg] == HvX64RegisterDr7);
+ if (pCtx->dr[7] != paValues[iReg].Reg64)
+ CPUMSetGuestDR7(pVCpu, paValues[iReg].Reg64);
+ pCtx->fExtrn &= ~CPUMCTX_EXTRN_DR7; /* Hack alert! Avoids asserting when processing CPUMCTX_EXTRN_DR0_DR3. */
+ iReg++;
+ }
+ if (fWhat & CPUMCTX_EXTRN_DR0_DR3)
+ {
+ Assert(pInput->Names[iReg] == HvX64RegisterDr0);
+ Assert(pInput->Names[iReg+3] == HvX64RegisterDr3);
+ if (pCtx->dr[0] != paValues[iReg].Reg64)
+ CPUMSetGuestDR0(pVCpu, paValues[iReg].Reg64);
+ iReg++;
+ if (pCtx->dr[1] != paValues[iReg].Reg64)
+ CPUMSetGuestDR1(pVCpu, paValues[iReg].Reg64);
+ iReg++;
+ if (pCtx->dr[2] != paValues[iReg].Reg64)
+ CPUMSetGuestDR2(pVCpu, paValues[iReg].Reg64);
+ iReg++;
+ if (pCtx->dr[3] != paValues[iReg].Reg64)
+ CPUMSetGuestDR3(pVCpu, paValues[iReg].Reg64);
+ iReg++;
+ }
+ if (fWhat & CPUMCTX_EXTRN_DR6)
+ {
+ Assert(pInput->Names[iReg] == HvX64RegisterDr6);
+ if (pCtx->dr[6] != paValues[iReg].Reg64)
+ CPUMSetGuestDR6(pVCpu, paValues[iReg].Reg64);
+ iReg++;
+ }
+
+ /* Floating point state. */
+ if (fWhat & CPUMCTX_EXTRN_X87)
+ {
+ Assert(pInput->Names[iReg] == HvX64RegisterFpMmx0);
+ Assert(pInput->Names[iReg + 7] == HvX64RegisterFpMmx7);
+ pCtx->pXStateR0->x87.aRegs[0].au64[0] = paValues[iReg].Fp.AsUINT128.Low64;
+ pCtx->pXStateR0->x87.aRegs[0].au64[1] = paValues[iReg].Fp.AsUINT128.High64;
+ iReg++;
+ pCtx->pXStateR0->x87.aRegs[1].au64[0] = paValues[iReg].Fp.AsUINT128.Low64;
+ pCtx->pXStateR0->x87.aRegs[1].au64[1] = paValues[iReg].Fp.AsUINT128.High64;
+ iReg++;
+ pCtx->pXStateR0->x87.aRegs[2].au64[0] = paValues[iReg].Fp.AsUINT128.Low64;
+ pCtx->pXStateR0->x87.aRegs[2].au64[1] = paValues[iReg].Fp.AsUINT128.High64;
+ iReg++;
+ pCtx->pXStateR0->x87.aRegs[3].au64[0] = paValues[iReg].Fp.AsUINT128.Low64;
+ pCtx->pXStateR0->x87.aRegs[3].au64[1] = paValues[iReg].Fp.AsUINT128.High64;
+ iReg++;
+ pCtx->pXStateR0->x87.aRegs[4].au64[0] = paValues[iReg].Fp.AsUINT128.Low64;
+ pCtx->pXStateR0->x87.aRegs[4].au64[1] = paValues[iReg].Fp.AsUINT128.High64;
+ iReg++;
+ pCtx->pXStateR0->x87.aRegs[5].au64[0] = paValues[iReg].Fp.AsUINT128.Low64;
+ pCtx->pXStateR0->x87.aRegs[5].au64[1] = paValues[iReg].Fp.AsUINT128.High64;
+ iReg++;
+ pCtx->pXStateR0->x87.aRegs[6].au64[0] = paValues[iReg].Fp.AsUINT128.Low64;
+ pCtx->pXStateR0->x87.aRegs[6].au64[1] = paValues[iReg].Fp.AsUINT128.High64;
+ iReg++;
+ pCtx->pXStateR0->x87.aRegs[7].au64[0] = paValues[iReg].Fp.AsUINT128.Low64;
+ pCtx->pXStateR0->x87.aRegs[7].au64[1] = paValues[iReg].Fp.AsUINT128.High64;
+ iReg++;
+
+ Assert(pInput->Names[iReg] == HvX64RegisterFpControlStatus);
+ pCtx->pXStateR0->x87.FCW = paValues[iReg].FpControlStatus.FpControl;
+ pCtx->pXStateR0->x87.FSW = paValues[iReg].FpControlStatus.FpStatus;
+ pCtx->pXStateR0->x87.FTW = paValues[iReg].FpControlStatus.FpTag
+ /*| (paValues[iReg].FpControlStatus.Reserved << 8)*/;
+ pCtx->pXStateR0->x87.FOP = paValues[iReg].FpControlStatus.LastFpOp;
+ pCtx->pXStateR0->x87.FPUIP = (uint32_t)paValues[iReg].FpControlStatus.LastFpRip;
+ pCtx->pXStateR0->x87.CS = (uint16_t)(paValues[iReg].FpControlStatus.LastFpRip >> 32);
+ pCtx->pXStateR0->x87.Rsrvd1 = (uint16_t)(paValues[iReg].FpControlStatus.LastFpRip >> 48);
+ iReg++;
+ }
+
+ if (fWhat & (CPUMCTX_EXTRN_X87 | CPUMCTX_EXTRN_SSE_AVX))
+ {
+ Assert(pInput->Names[iReg] == HvX64RegisterXmmControlStatus);
+ if (fWhat & CPUMCTX_EXTRN_X87)
+ {
+ pCtx->pXStateR0->x87.FPUDP = (uint32_t)paValues[iReg].XmmControlStatus.LastFpRdp;
+ pCtx->pXStateR0->x87.DS = (uint16_t)(paValues[iReg].XmmControlStatus.LastFpRdp >> 32);
+ pCtx->pXStateR0->x87.Rsrvd2 = (uint16_t)(paValues[iReg].XmmControlStatus.LastFpRdp >> 48);
+ }
+ pCtx->pXStateR0->x87.MXCSR = paValues[iReg].XmmControlStatus.XmmStatusControl;
+ pCtx->pXStateR0->x87.MXCSR_MASK = paValues[iReg].XmmControlStatus.XmmStatusControlMask; /** @todo ??? (Isn't this an output field?) */
+ iReg++;
+ }
+
+ /* Vector state. */
+ if (fWhat & CPUMCTX_EXTRN_SSE_AVX)
+ {
+ Assert(pInput->Names[iReg] == HvX64RegisterXmm0);
+ Assert(pInput->Names[iReg+15] == HvX64RegisterXmm15);
+ pCtx->pXStateR0->x87.aXMM[0].uXmm.s.Lo = paValues[iReg].Reg128.Low64;
+ pCtx->pXStateR0->x87.aXMM[0].uXmm.s.Hi = paValues[iReg].Reg128.High64;
+ iReg++;
+ pCtx->pXStateR0->x87.aXMM[1].uXmm.s.Lo = paValues[iReg].Reg128.Low64;
+ pCtx->pXStateR0->x87.aXMM[1].uXmm.s.Hi = paValues[iReg].Reg128.High64;
+ iReg++;
+ pCtx->pXStateR0->x87.aXMM[2].uXmm.s.Lo = paValues[iReg].Reg128.Low64;
+ pCtx->pXStateR0->x87.aXMM[2].uXmm.s.Hi = paValues[iReg].Reg128.High64;
+ iReg++;
+ pCtx->pXStateR0->x87.aXMM[3].uXmm.s.Lo = paValues[iReg].Reg128.Low64;
+ pCtx->pXStateR0->x87.aXMM[3].uXmm.s.Hi = paValues[iReg].Reg128.High64;
+ iReg++;
+ pCtx->pXStateR0->x87.aXMM[4].uXmm.s.Lo = paValues[iReg].Reg128.Low64;
+ pCtx->pXStateR0->x87.aXMM[4].uXmm.s.Hi = paValues[iReg].Reg128.High64;
+ iReg++;
+ pCtx->pXStateR0->x87.aXMM[5].uXmm.s.Lo = paValues[iReg].Reg128.Low64;
+ pCtx->pXStateR0->x87.aXMM[5].uXmm.s.Hi = paValues[iReg].Reg128.High64;
+ iReg++;
+ pCtx->pXStateR0->x87.aXMM[6].uXmm.s.Lo = paValues[iReg].Reg128.Low64;
+ pCtx->pXStateR0->x87.aXMM[6].uXmm.s.Hi = paValues[iReg].Reg128.High64;
+ iReg++;
+ pCtx->pXStateR0->x87.aXMM[7].uXmm.s.Lo = paValues[iReg].Reg128.Low64;
+ pCtx->pXStateR0->x87.aXMM[7].uXmm.s.Hi = paValues[iReg].Reg128.High64;
+ iReg++;
+ pCtx->pXStateR0->x87.aXMM[8].uXmm.s.Lo = paValues[iReg].Reg128.Low64;
+ pCtx->pXStateR0->x87.aXMM[8].uXmm.s.Hi = paValues[iReg].Reg128.High64;
+ iReg++;
+ pCtx->pXStateR0->x87.aXMM[9].uXmm.s.Lo = paValues[iReg].Reg128.Low64;
+ pCtx->pXStateR0->x87.aXMM[9].uXmm.s.Hi = paValues[iReg].Reg128.High64;
+ iReg++;
+ pCtx->pXStateR0->x87.aXMM[10].uXmm.s.Lo = paValues[iReg].Reg128.Low64;
+ pCtx->pXStateR0->x87.aXMM[10].uXmm.s.Hi = paValues[iReg].Reg128.High64;
+ iReg++;
+ pCtx->pXStateR0->x87.aXMM[11].uXmm.s.Lo = paValues[iReg].Reg128.Low64;
+ pCtx->pXStateR0->x87.aXMM[11].uXmm.s.Hi = paValues[iReg].Reg128.High64;
+ iReg++;
+ pCtx->pXStateR0->x87.aXMM[12].uXmm.s.Lo = paValues[iReg].Reg128.Low64;
+ pCtx->pXStateR0->x87.aXMM[12].uXmm.s.Hi = paValues[iReg].Reg128.High64;
+ iReg++;
+ pCtx->pXStateR0->x87.aXMM[13].uXmm.s.Lo = paValues[iReg].Reg128.Low64;
+ pCtx->pXStateR0->x87.aXMM[13].uXmm.s.Hi = paValues[iReg].Reg128.High64;
+ iReg++;
+ pCtx->pXStateR0->x87.aXMM[14].uXmm.s.Lo = paValues[iReg].Reg128.Low64;
+ pCtx->pXStateR0->x87.aXMM[14].uXmm.s.Hi = paValues[iReg].Reg128.High64;
+ iReg++;
+ pCtx->pXStateR0->x87.aXMM[15].uXmm.s.Lo = paValues[iReg].Reg128.Low64;
+ pCtx->pXStateR0->x87.aXMM[15].uXmm.s.Hi = paValues[iReg].Reg128.High64;
+ iReg++;
+ }
+
+
+ /* MSRs */
+ // HvX64RegisterTsc - don't touch
+ if (fWhat & CPUMCTX_EXTRN_EFER)
+ {
+ Assert(pInput->Names[iReg] == HvX64RegisterEfer);
+ if (paValues[iReg].Reg64 != pCtx->msrEFER)
+ {
+ Log7(("NEM/%u: MSR EFER changed %RX64 -> %RX64\n", pVCpu->idCpu, pCtx->msrEFER, paValues[iReg].Reg64));
+ if ((paValues[iReg].Reg64 ^ pCtx->msrEFER) & MSR_K6_EFER_NXE)
+ PGMNotifyNxeChanged(pVCpu, RT_BOOL(paValues[iReg].Reg64 & MSR_K6_EFER_NXE));
+ pCtx->msrEFER = paValues[iReg].Reg64;
+ fMaybeChangedMode = true;
+ }
+ iReg++;
+ }
+ if (fWhat & CPUMCTX_EXTRN_KERNEL_GS_BASE)
+ {
+ Assert(pInput->Names[iReg] == HvX64RegisterKernelGsBase);
+ if (pCtx->msrKERNELGSBASE != paValues[iReg].Reg64)
+ Log7(("NEM/%u: MSR KERNELGSBASE changed %RX64 -> %RX64\n", pVCpu->idCpu, pCtx->msrKERNELGSBASE, paValues[iReg].Reg64));
+ pCtx->msrKERNELGSBASE = paValues[iReg].Reg64;
+ iReg++;
+ }
+ if (fWhat & CPUMCTX_EXTRN_SYSENTER_MSRS)
+ {
+ Assert(pInput->Names[iReg] == HvX64RegisterSysenterCs);
+ if (pCtx->SysEnter.cs != paValues[iReg].Reg64)
+ Log7(("NEM/%u: MSR SYSENTER.CS changed %RX64 -> %RX64\n", pVCpu->idCpu, pCtx->SysEnter.cs, paValues[iReg].Reg64));
+ pCtx->SysEnter.cs = paValues[iReg].Reg64;
+ iReg++;
+
+ Assert(pInput->Names[iReg] == HvX64RegisterSysenterEip);
+ if (pCtx->SysEnter.eip != paValues[iReg].Reg64)
+ Log7(("NEM/%u: MSR SYSENTER.EIP changed %RX64 -> %RX64\n", pVCpu->idCpu, pCtx->SysEnter.eip, paValues[iReg].Reg64));
+ pCtx->SysEnter.eip = paValues[iReg].Reg64;
+ iReg++;
+
+ Assert(pInput->Names[iReg] == HvX64RegisterSysenterEsp);
+ if (pCtx->SysEnter.esp != paValues[iReg].Reg64)
+ Log7(("NEM/%u: MSR SYSENTER.ESP changed %RX64 -> %RX64\n", pVCpu->idCpu, pCtx->SysEnter.esp, paValues[iReg].Reg64));
+ pCtx->SysEnter.esp = paValues[iReg].Reg64;
+ iReg++;
+ }
+ if (fWhat & CPUMCTX_EXTRN_SYSCALL_MSRS)
+ {
+ Assert(pInput->Names[iReg] == HvX64RegisterStar);
+ if (pCtx->msrSTAR != paValues[iReg].Reg64)
+ Log7(("NEM/%u: MSR STAR changed %RX64 -> %RX64\n", pVCpu->idCpu, pCtx->msrSTAR, paValues[iReg].Reg64));
+ pCtx->msrSTAR = paValues[iReg].Reg64;
+ iReg++;
+
+ Assert(pInput->Names[iReg] == HvX64RegisterLstar);
+ if (pCtx->msrLSTAR != paValues[iReg].Reg64)
+ Log7(("NEM/%u: MSR LSTAR changed %RX64 -> %RX64\n", pVCpu->idCpu, pCtx->msrLSTAR, paValues[iReg].Reg64));
+ pCtx->msrLSTAR = paValues[iReg].Reg64;
+ iReg++;
+
+ Assert(pInput->Names[iReg] == HvX64RegisterCstar);
+ if (pCtx->msrCSTAR != paValues[iReg].Reg64)
+ Log7(("NEM/%u: MSR CSTAR changed %RX64 -> %RX64\n", pVCpu->idCpu, pCtx->msrCSTAR, paValues[iReg].Reg64));
+ pCtx->msrCSTAR = paValues[iReg].Reg64;
+ iReg++;
+
+ Assert(pInput->Names[iReg] == HvX64RegisterSfmask);
+ if (pCtx->msrSFMASK != paValues[iReg].Reg64)
+ Log7(("NEM/%u: MSR SFMASK changed %RX64 -> %RX64\n", pVCpu->idCpu, pCtx->msrSFMASK, paValues[iReg].Reg64));
+ pCtx->msrSFMASK = paValues[iReg].Reg64;
+ iReg++;
+ }
+ if (fWhat & CPUMCTX_EXTRN_OTHER_MSRS)
+ {
+ Assert(pInput->Names[iReg] == HvX64RegisterApicBase);
+ const uint64_t uOldBase = APICGetBaseMsrNoCheck(pVCpu);
+ if (paValues[iReg].Reg64 != uOldBase)
+ {
+ Log7(("NEM/%u: MSR APICBase changed %RX64 -> %RX64 (%RX64)\n",
+ pVCpu->idCpu, uOldBase, paValues[iReg].Reg64, paValues[iReg].Reg64 ^ uOldBase));
+ int rc2 = APICSetBaseMsr(pVCpu, paValues[iReg].Reg64);
+ AssertLogRelMsg(rc2 == VINF_SUCCESS, ("rc2=%Rrc [%#RX64]\n", rc2, paValues[iReg].Reg64));
+ }
+ iReg++;
+
+ Assert(pInput->Names[iReg] == HvX64RegisterPat);
+ if (pCtx->msrPAT != paValues[iReg].Reg64)
+ Log7(("NEM/%u: MSR PAT changed %RX64 -> %RX64\n", pVCpu->idCpu, pCtx->msrPAT, paValues[iReg].Reg64));
+ pCtx->msrPAT = paValues[iReg].Reg64;
+ iReg++;
+
+# if 0 /*def LOG_ENABLED*/ /** @todo something's wrong with HvX64RegisterMtrrCap? (AMD) */
+ Assert(pInput->Names[iReg] == HvX64RegisterMtrrCap);
+ if (paValues[iReg].Reg64 != CPUMGetGuestIa32MtrrCap(pVCpu))
+ Log7(("NEM/%u: MSR MTRR_CAP changed %RX64 -> %RX64 (!!)\n", pVCpu->idCpu, CPUMGetGuestIa32MtrrCap(pVCpu), paValues[iReg].Reg64));
+ iReg++;
+# endif
+
+ PCPUMCTXMSRS pCtxMsrs = CPUMQueryGuestCtxMsrsPtr(pVCpu);
+ Assert(pInput->Names[iReg] == HvX64RegisterMtrrDefType);
+ if (paValues[iReg].Reg64 != pCtxMsrs->msr.MtrrDefType )
+ Log7(("NEM/%u: MSR MTRR_DEF_TYPE changed %RX64 -> %RX64\n", pVCpu->idCpu, pCtxMsrs->msr.MtrrDefType, paValues[iReg].Reg64));
+ pCtxMsrs->msr.MtrrDefType = paValues[iReg].Reg64;
+ iReg++;
+
+ /** @todo we dont keep state for HvX64RegisterMtrrPhysBaseX and HvX64RegisterMtrrPhysMaskX */
+
+ Assert(pInput->Names[iReg] == HvX64RegisterMtrrFix64k00000);
+ if (paValues[iReg].Reg64 != pCtxMsrs->msr.MtrrFix64K_00000 )
+ Log7(("NEM/%u: MSR MTRR_FIX16K_00000 changed %RX64 -> %RX64\n", pVCpu->idCpu, pCtxMsrs->msr.MtrrFix64K_00000, paValues[iReg].Reg64));
+ pCtxMsrs->msr.MtrrFix64K_00000 = paValues[iReg].Reg64;
+ iReg++;
+
+ Assert(pInput->Names[iReg] == HvX64RegisterMtrrFix16k80000);
+ if (paValues[iReg].Reg64 != pCtxMsrs->msr.MtrrFix16K_80000 )
+ Log7(("NEM/%u: MSR MTRR_FIX16K_80000 changed %RX64 -> %RX64\n", pVCpu->idCpu, pCtxMsrs->msr.MtrrFix16K_80000, paValues[iReg].Reg64));
+ pCtxMsrs->msr.MtrrFix16K_80000 = paValues[iReg].Reg64;
+ iReg++;
+
+ Assert(pInput->Names[iReg] == HvX64RegisterMtrrFix16kA0000);
+ if (paValues[iReg].Reg64 != pCtxMsrs->msr.MtrrFix16K_A0000 )
+ Log7(("NEM/%u: MSR MTRR_FIX16K_A0000 changed %RX64 -> %RX64\n", pVCpu->idCpu, pCtxMsrs->msr.MtrrFix16K_A0000, paValues[iReg].Reg64));
+ pCtxMsrs->msr.MtrrFix16K_A0000 = paValues[iReg].Reg64;
+ iReg++;
+
+ Assert(pInput->Names[iReg] == HvX64RegisterMtrrFix4kC0000);
+ if (paValues[iReg].Reg64 != pCtxMsrs->msr.MtrrFix4K_C0000 )
+ Log7(("NEM/%u: MSR MTRR_FIX16K_C0000 changed %RX64 -> %RX64\n", pVCpu->idCpu, pCtxMsrs->msr.MtrrFix4K_C0000, paValues[iReg].Reg64));
+ pCtxMsrs->msr.MtrrFix4K_C0000 = paValues[iReg].Reg64;
+ iReg++;
+
+ Assert(pInput->Names[iReg] == HvX64RegisterMtrrFix4kC8000);
+ if (paValues[iReg].Reg64 != pCtxMsrs->msr.MtrrFix4K_C8000 )
+ Log7(("NEM/%u: MSR MTRR_FIX16K_C8000 changed %RX64 -> %RX64\n", pVCpu->idCpu, pCtxMsrs->msr.MtrrFix4K_C8000, paValues[iReg].Reg64));
+ pCtxMsrs->msr.MtrrFix4K_C8000 = paValues[iReg].Reg64;
+ iReg++;
+
+ Assert(pInput->Names[iReg] == HvX64RegisterMtrrFix4kD0000);
+ if (paValues[iReg].Reg64 != pCtxMsrs->msr.MtrrFix4K_D0000 )
+ Log7(("NEM/%u: MSR MTRR_FIX16K_D0000 changed %RX64 -> %RX64\n", pVCpu->idCpu, pCtxMsrs->msr.MtrrFix4K_D0000, paValues[iReg].Reg64));
+ pCtxMsrs->msr.MtrrFix4K_D0000 = paValues[iReg].Reg64;
+ iReg++;
+
+ Assert(pInput->Names[iReg] == HvX64RegisterMtrrFix4kD8000);
+ if (paValues[iReg].Reg64 != pCtxMsrs->msr.MtrrFix4K_D8000 )
+ Log7(("NEM/%u: MSR MTRR_FIX16K_D8000 changed %RX64 -> %RX64\n", pVCpu->idCpu, pCtxMsrs->msr.MtrrFix4K_D8000, paValues[iReg].Reg64));
+ pCtxMsrs->msr.MtrrFix4K_D8000 = paValues[iReg].Reg64;
+ iReg++;
+
+ Assert(pInput->Names[iReg] == HvX64RegisterMtrrFix4kE0000);
+ if (paValues[iReg].Reg64 != pCtxMsrs->msr.MtrrFix4K_E0000 )
+ Log7(("NEM/%u: MSR MTRR_FIX16K_E0000 changed %RX64 -> %RX64\n", pVCpu->idCpu, pCtxMsrs->msr.MtrrFix4K_E0000, paValues[iReg].Reg64));
+ pCtxMsrs->msr.MtrrFix4K_E0000 = paValues[iReg].Reg64;
+ iReg++;
+
+ Assert(pInput->Names[iReg] == HvX64RegisterMtrrFix4kE8000);
+ if (paValues[iReg].Reg64 != pCtxMsrs->msr.MtrrFix4K_E8000 )
+ Log7(("NEM/%u: MSR MTRR_FIX16K_E8000 changed %RX64 -> %RX64\n", pVCpu->idCpu, pCtxMsrs->msr.MtrrFix4K_E8000, paValues[iReg].Reg64));
+ pCtxMsrs->msr.MtrrFix4K_E8000 = paValues[iReg].Reg64;
+ iReg++;
+
+ Assert(pInput->Names[iReg] == HvX64RegisterMtrrFix4kF0000);
+ if (paValues[iReg].Reg64 != pCtxMsrs->msr.MtrrFix4K_F0000 )
+ Log7(("NEM/%u: MSR MTRR_FIX16K_F0000 changed %RX64 -> %RX64\n", pVCpu->idCpu, pCtxMsrs->msr.MtrrFix4K_F0000, paValues[iReg].Reg64));
+ pCtxMsrs->msr.MtrrFix4K_F0000 = paValues[iReg].Reg64;
+ iReg++;
+
+ Assert(pInput->Names[iReg] == HvX64RegisterMtrrFix4kF8000);
+ if (paValues[iReg].Reg64 != pCtxMsrs->msr.MtrrFix4K_F8000 )
+ Log7(("NEM/%u: MSR MTRR_FIX16K_F8000 changed %RX64 -> %RX64\n", pVCpu->idCpu, pCtxMsrs->msr.MtrrFix4K_F8000, paValues[iReg].Reg64));
+ pCtxMsrs->msr.MtrrFix4K_F8000 = paValues[iReg].Reg64;
+ iReg++;
+
+ Assert(pInput->Names[iReg] == HvX64RegisterTscAux);
+ if (paValues[iReg].Reg64 != pCtxMsrs->msr.TscAux )
+ Log7(("NEM/%u: MSR TSC_AUX changed %RX64 -> %RX64\n", pVCpu->idCpu, pCtxMsrs->msr.TscAux, paValues[iReg].Reg64));
+ pCtxMsrs->msr.TscAux = paValues[iReg].Reg64;
+ iReg++;
+
+# if 0 /** @todo why can't we even read HvX64RegisterIa32MiscEnable? */
+ if (enmCpuVendor != CPUMCPUVENDOR_AMD)
+ {
+ Assert(pInput->Names[iReg] == HvX64RegisterIa32MiscEnable);
+ if (paValues[iReg].Reg64 != pCtxMsrs->msr.MiscEnable)
+ Log7(("NEM/%u: MSR MISC_ENABLE changed %RX64 -> %RX64\n", pVCpu->idCpu, pCtxMsrs->msr.MiscEnable, paValues[iReg].Reg64));
+ pCtxMsrs->msr.MiscEnable = paValues[iReg].Reg64;
+ iReg++;
+ }
+# endif
+# ifdef LOG_ENABLED
+ if (enmCpuVendor != CPUMCPUVENDOR_AMD)
+ {
+ Assert(pInput->Names[iReg] == HvX64RegisterIa32FeatureControl);
+ if (paValues[iReg].Reg64 != pCtx->hwvirt.vmx.Msrs.u64FeatCtrl)
+ Log7(("NEM/%u: MSR FEATURE_CONTROL changed %RX64 -> %RX64 (!!)\n", pVCpu->idCpu, pCtx->hwvirt.vmx.Msrs.u64FeatCtrl, paValues[iReg].Reg64));
+ iReg++;
+ }
+# endif
+ }
+
+ /* Interruptibility. */
+ if (fWhat & (CPUMCTX_EXTRN_NEM_WIN_INHIBIT_INT | CPUMCTX_EXTRN_NEM_WIN_INHIBIT_NMI))
+ {
+ Assert(pInput->Names[iReg] == HvRegisterInterruptState);
+ Assert(pInput->Names[iReg + 1] == HvX64RegisterRip);
+
+ if (!(pCtx->fExtrn & CPUMCTX_EXTRN_NEM_WIN_INHIBIT_INT))
+ {
+ pVCpu->nem.s.fLastInterruptShadow = paValues[iReg].InterruptState.InterruptShadow;
+ if (paValues[iReg].InterruptState.InterruptShadow)
+ EMSetInhibitInterruptsPC(pVCpu, paValues[iReg + 1].Reg64);
+ else
+ VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS);
+ }
+
+ if (!(pCtx->fExtrn & CPUMCTX_EXTRN_NEM_WIN_INHIBIT_NMI))
+ {
+ if (paValues[iReg].InterruptState.NmiMasked)
+ VMCPU_FF_SET(pVCpu, VMCPU_FF_BLOCK_NMIS);
+ else
+ VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_BLOCK_NMIS);
+ }
+
+ fWhat |= CPUMCTX_EXTRN_NEM_WIN_INHIBIT_INT | CPUMCTX_EXTRN_NEM_WIN_INHIBIT_NMI;
+ iReg += 2;
+ }
+
+ /* Event injection. */
+ /// @todo HvRegisterPendingInterruption
+ Assert(pInput->Names[iReg] == HvRegisterPendingInterruption);
+ if (paValues[iReg].PendingInterruption.InterruptionPending)
+ {
+ Log7(("PendingInterruption: type=%u vector=%#x errcd=%RTbool/%#x instr-len=%u nested=%u\n",
+ paValues[iReg].PendingInterruption.InterruptionType, paValues[iReg].PendingInterruption.InterruptionVector,
+ paValues[iReg].PendingInterruption.DeliverErrorCode, paValues[iReg].PendingInterruption.ErrorCode,
+ paValues[iReg].PendingInterruption.InstructionLength, paValues[iReg].PendingInterruption.NestedEvent));
+ AssertMsg((paValues[iReg].PendingInterruption.AsUINT64 & UINT64_C(0xfc00)) == 0,
+ ("%#RX64\n", paValues[iReg].PendingInterruption.AsUINT64));
+ }
+
+ /// @todo HvRegisterPendingEvent0
+ /// @todo HvRegisterPendingEvent1
+
+ /* Almost done, just update extrn flags and maybe change PGM mode. */
+ pCtx->fExtrn &= ~fWhat;
+ if (!(pCtx->fExtrn & (CPUMCTX_EXTRN_ALL | (CPUMCTX_EXTRN_NEM_WIN_MASK & ~CPUMCTX_EXTRN_NEM_WIN_EVENT_INJECT))))
+ pCtx->fExtrn = 0;
+
+ /* Typical. */
+ if (!fMaybeChangedMode && !fUpdateCr3)
+ return VINF_SUCCESS;
+
+ /*
+ * Slow.
+ */
+ int rc = VINF_SUCCESS;
+ if (fMaybeChangedMode)
+ {
+ rc = PGMChangeMode(pVCpu, pCtx->cr0, pCtx->cr4, pCtx->msrEFER);
+ AssertMsgReturn(rc == VINF_SUCCESS, ("rc=%Rrc\n", rc), RT_FAILURE_NP(rc) ? rc : VERR_NEM_IPE_1);
+ }
+
+ if (fUpdateCr3)
+ {
+ if (fCanUpdateCr3)
+ {
+ LogFlow(("nemR0WinImportState: -> PGMUpdateCR3!\n"));
+ rc = PGMUpdateCR3(pVCpu, pCtx->cr3);
+ AssertMsgReturn(rc == VINF_SUCCESS, ("rc=%Rrc\n", rc), RT_FAILURE_NP(rc) ? rc : VERR_NEM_IPE_2);
+ }
+ else
+ {
+ LogFlow(("nemR0WinImportState: -> VERR_NEM_FLUSH_TLB!\n"));
+ rc = VERR_NEM_FLUSH_TLB; /* Calling PGMFlushTLB w/o long jump setup doesn't work, ring-3 does it. */
+ }
+ }
+
+ return rc;
+}
+#endif /* NEM_WIN_WITH_RING0_RUNLOOP || NEM_WIN_USE_HYPERCALLS_FOR_REGISTERS */
+
+
+/**
+ * Import the state from the native API (back to CPUMCTX).
+ *
+ * @returns VBox status code
+ * @param pGVM The ring-0 VM handle.
+ * @param pVM The cross context VM handle.
+ * @param idCpu The calling EMT. Necessary for getting the
+ * hypercall page and arguments.
+ * @param fWhat What to import, CPUMCTX_EXTRN_XXX. Set
+ * CPUMCTX_EXTERN_ALL for everything.
+ */
+VMMR0_INT_DECL(int) NEMR0ImportState(PGVM pGVM, PVM pVM, VMCPUID idCpu, uint64_t fWhat)
+{
+#if defined(NEM_WIN_WITH_RING0_RUNLOOP) || defined(NEM_WIN_USE_HYPERCALLS_FOR_REGISTERS)
+ /*
+ * Validate the call.
+ */
+ int rc = GVMMR0ValidateGVMandVMandEMT(pGVM, pVM, idCpu);
+ if (RT_SUCCESS(rc))
+ {
+ PVMCPU pVCpu = &pVM->aCpus[idCpu];
+ PGVMCPU pGVCpu = &pGVM->aCpus[idCpu];
+ AssertReturn(g_pfnHvlInvokeHypercall, VERR_NEM_MISSING_KERNEL_API);
+
+ /*
+ * Call worker.
+ */
+ rc = nemR0WinImportState(pGVM, pGVCpu, &pVCpu->cpum.GstCtx, fWhat, false /*fCanUpdateCr3*/);
+ }
+ return rc;
+#else
+ RT_NOREF(pGVM, pVM, idCpu, fWhat);
+ return VERR_NOT_IMPLEMENTED;
+#endif
+}
+
+
+#if defined(NEM_WIN_WITH_RING0_RUNLOOP) || defined(NEM_WIN_USE_HYPERCALLS_FOR_REGISTERS)
+/**
+ * Worker for NEMR0QueryCpuTick and the ring-0 NEMHCQueryCpuTick.
+ *
+ * @returns VBox status code.
+ * @param pGVM The ring-0 VM handle.
+ * @param pGVCpu The ring-0 VCPU handle.
+ * @param pcTicks Where to return the current CPU tick count.
+ * @param pcAux Where to return the hyper-V TSC_AUX value. Optional.
+ */
+NEM_TMPL_STATIC int nemR0WinQueryCpuTick(PGVM pGVM, PGVMCPU pGVCpu, uint64_t *pcTicks, uint32_t *pcAux)
+{
+ /*
+ * Hypercall parameters.
+ */
+ HV_INPUT_GET_VP_REGISTERS *pInput = (HV_INPUT_GET_VP_REGISTERS *)pGVCpu->nem.s.HypercallData.pbPage;
+ AssertPtrReturn(pInput, VERR_INTERNAL_ERROR_3);
+ AssertReturn(g_pfnHvlInvokeHypercall, VERR_NEM_MISSING_KERNEL_API);
+
+ pInput->PartitionId = pGVM->nem.s.idHvPartition;
+ pInput->VpIndex = pGVCpu->idCpu;
+ pInput->fFlags = 0;
+ pInput->Names[0] = HvX64RegisterTsc;
+ pInput->Names[1] = HvX64RegisterTscAux;
+
+ size_t const cbInput = RT_ALIGN_Z(RT_UOFFSETOF(HV_INPUT_GET_VP_REGISTERS, Names[2]), 32);
+ HV_REGISTER_VALUE *paValues = (HV_REGISTER_VALUE *)((uint8_t *)pInput + cbInput);
+ RT_BZERO(paValues, sizeof(paValues[0]) * 2);
+
+ /*
+ * Make the hypercall.
+ */
+ uint64_t uResult = g_pfnHvlInvokeHypercall(HV_MAKE_CALL_INFO(HvCallGetVpRegisters, 2),
+ pGVCpu->nem.s.HypercallData.HCPhysPage,
+ pGVCpu->nem.s.HypercallData.HCPhysPage + cbInput);
+ AssertLogRelMsgReturn(uResult == HV_MAKE_CALL_REP_RET(2), ("uResult=%RX64 cRegs=%#x\n", uResult, 2),
+ VERR_NEM_GET_REGISTERS_FAILED);
+
+ /*
+ * Get results.
+ */
+ *pcTicks = paValues[0].Reg64;
+ if (pcAux)
+ *pcAux = paValues[0].Reg32;
+ return VINF_SUCCESS;
+}
+#endif /* NEM_WIN_WITH_RING0_RUNLOOP || NEM_WIN_USE_HYPERCALLS_FOR_REGISTERS */
+
+
+/**
+ * Queries the TSC and TSC_AUX values, putting the results in .
+ *
+ * @returns VBox status code
+ * @param pGVM The ring-0 VM handle.
+ * @param pVM The cross context VM handle.
+ * @param idCpu The calling EMT. Necessary for getting the
+ * hypercall page and arguments.
+ */
+VMMR0_INT_DECL(int) NEMR0QueryCpuTick(PGVM pGVM, PVM pVM, VMCPUID idCpu)
+{
+#if defined(NEM_WIN_WITH_RING0_RUNLOOP) || defined(NEM_WIN_USE_HYPERCALLS_FOR_REGISTERS)
+ /*
+ * Validate the call.
+ */
+ int rc = GVMMR0ValidateGVMandVMandEMT(pGVM, pVM, idCpu);
+ if (RT_SUCCESS(rc))
+ {
+ PVMCPU pVCpu = &pVM->aCpus[idCpu];
+ PGVMCPU pGVCpu = &pGVM->aCpus[idCpu];
+ AssertReturn(g_pfnHvlInvokeHypercall, VERR_NEM_MISSING_KERNEL_API);
+
+ /*
+ * Call worker.
+ */
+ pVCpu->nem.s.Hypercall.QueryCpuTick.cTicks = 0;
+ pVCpu->nem.s.Hypercall.QueryCpuTick.uAux = 0;
+ rc = nemR0WinQueryCpuTick(pGVM, pGVCpu, &pVCpu->nem.s.Hypercall.QueryCpuTick.cTicks,
+ &pVCpu->nem.s.Hypercall.QueryCpuTick.uAux);
+ }
+ return rc;
+#else
+ RT_NOREF(pGVM, pVM, idCpu);
+ return VERR_NOT_IMPLEMENTED;
+#endif
+}
+
+
+#if defined(NEM_WIN_WITH_RING0_RUNLOOP) || defined(NEM_WIN_USE_HYPERCALLS_FOR_REGISTERS)
+/**
+ * Worker for NEMR0ResumeCpuTickOnAll and the ring-0 NEMHCResumeCpuTickOnAll.
+ *
+ * @returns VBox status code.
+ * @param pGVM The ring-0 VM handle.
+ * @param pGVCpu The ring-0 VCPU handle.
+ * @param uPausedTscValue The TSC value at the time of pausing.
+ */
+NEM_TMPL_STATIC int nemR0WinResumeCpuTickOnAll(PGVM pGVM, PGVMCPU pGVCpu, uint64_t uPausedTscValue)
+{
+ AssertReturn(g_pfnHvlInvokeHypercall, VERR_NEM_MISSING_KERNEL_API);
+
+ /*
+ * Set up the hypercall parameters.
+ */
+ HV_INPUT_SET_VP_REGISTERS *pInput = (HV_INPUT_SET_VP_REGISTERS *)pGVCpu->nem.s.HypercallData.pbPage;
+ AssertPtrReturn(pInput, VERR_INTERNAL_ERROR_3);
+
+ pInput->PartitionId = pGVM->nem.s.idHvPartition;
+ pInput->VpIndex = 0;
+ pInput->RsvdZ = 0;
+ pInput->Elements[0].Name = HvX64RegisterTsc;
+ pInput->Elements[0].Pad0 = 0;
+ pInput->Elements[0].Pad1 = 0;
+ pInput->Elements[0].Value.Reg128.High64 = 0;
+ pInput->Elements[0].Value.Reg64 = uPausedTscValue;
+
+ /*
+ * Disable interrupts and do the first virtual CPU.
+ */
+ RTCCINTREG const fSavedFlags = ASMIntDisableFlags();
+ uint64_t const uFirstTsc = ASMReadTSC();
+ uint64_t uResult = g_pfnHvlInvokeHypercall(HV_MAKE_CALL_INFO(HvCallSetVpRegisters, 1),
+ pGVCpu->nem.s.HypercallData.HCPhysPage, 0 /* no output */);
+ AssertLogRelMsgReturnStmt(uResult == HV_MAKE_CALL_REP_RET(1), ("uResult=%RX64 uTsc=%#RX64\n", uResult, uPausedTscValue),
+ ASMSetFlags(fSavedFlags), VERR_NEM_SET_TSC);
+
+ /*
+ * Do secondary processors, adjusting for elapsed TSC and keeping finger crossed
+ * that we don't introduce too much drift here.
+ */
+ for (VMCPUID iCpu = 1; iCpu < pGVM->cCpus; iCpu++)
+ {
+ Assert(pInput->PartitionId == pGVM->nem.s.idHvPartition);
+ Assert(pInput->RsvdZ == 0);
+ Assert(pInput->Elements[0].Name == HvX64RegisterTsc);
+ Assert(pInput->Elements[0].Pad0 == 0);
+ Assert(pInput->Elements[0].Pad1 == 0);
+ Assert(pInput->Elements[0].Value.Reg128.High64 == 0);
+
+ pInput->VpIndex = iCpu;
+ const uint64_t offDelta = (ASMReadTSC() - uFirstTsc);
+ pInput->Elements[0].Value.Reg64 = uPausedTscValue + offDelta;
+
+ uResult = g_pfnHvlInvokeHypercall(HV_MAKE_CALL_INFO(HvCallSetVpRegisters, 1),
+ pGVCpu->nem.s.HypercallData.HCPhysPage, 0 /* no output */);
+ AssertLogRelMsgReturnStmt(uResult == HV_MAKE_CALL_REP_RET(1),
+ ("uResult=%RX64 uTsc=%#RX64 + %#RX64\n", uResult, uPausedTscValue, offDelta),
+ ASMSetFlags(fSavedFlags), VERR_NEM_SET_TSC);
+ }
+
+ /*
+ * Done.
+ */
+ ASMSetFlags(fSavedFlags);
+ return VINF_SUCCESS;
+}
+#endif /* NEM_WIN_WITH_RING0_RUNLOOP || NEM_WIN_USE_HYPERCALLS_FOR_REGISTERS */
+
+
+/**
+ * Sets the TSC register to @a uPausedTscValue on all CPUs.
+ *
+ * @returns VBox status code
+ * @param pGVM The ring-0 VM handle.
+ * @param pVM The cross context VM handle.
+ * @param idCpu The calling EMT. Necessary for getting the
+ * hypercall page and arguments.
+ * @param uPausedTscValue The TSC value at the time of pausing.
+ */
+VMMR0_INT_DECL(int) NEMR0ResumeCpuTickOnAll(PGVM pGVM, PVM pVM, VMCPUID idCpu, uint64_t uPausedTscValue)
+{
+#if defined(NEM_WIN_WITH_RING0_RUNLOOP) || defined(NEM_WIN_USE_HYPERCALLS_FOR_REGISTERS)
+ /*
+ * Validate the call.
+ */
+ int rc = GVMMR0ValidateGVMandVMandEMT(pGVM, pVM, idCpu);
+ if (RT_SUCCESS(rc))
+ {
+ PVMCPU pVCpu = &pVM->aCpus[idCpu];
+ PGVMCPU pGVCpu = &pGVM->aCpus[idCpu];
+ AssertReturn(g_pfnHvlInvokeHypercall, VERR_NEM_MISSING_KERNEL_API);
+
+ /*
+ * Call worker.
+ */
+ pVCpu->nem.s.Hypercall.QueryCpuTick.cTicks = 0;
+ pVCpu->nem.s.Hypercall.QueryCpuTick.uAux = 0;
+ rc = nemR0WinResumeCpuTickOnAll(pGVM, pGVCpu, uPausedTscValue);
+ }
+ return rc;
+#else
+ RT_NOREF(pGVM, pVM, idCpu, uPausedTscValue);
+ return VERR_NOT_IMPLEMENTED;
+#endif
+}
+
+
+VMMR0_INT_DECL(VBOXSTRICTRC) NEMR0RunGuestCode(PGVM pGVM, VMCPUID idCpu)
+{
+#ifdef NEM_WIN_WITH_RING0_RUNLOOP
+ if (pGVM->nem.s.fMayUseRing0Runloop)
+ {
+ PVM pVM = pGVM->pVM;
+ return nemHCWinRunGC(pVM, &pVM->aCpus[idCpu], pGVM, &pGVM->aCpus[idCpu]);
+ }
+ return VERR_NEM_RING3_ONLY;
+#else
+ RT_NOREF(pGVM, idCpu);
+ return VERR_NOT_IMPLEMENTED;
+#endif
+}
+
+
+/**
+ * Updates statistics in the VM structure.
+ *
+ * @returns VBox status code.
+ * @param pGVM The ring-0 VM handle.
+ * @param pVM The cross context VM handle.
+ * @param idCpu The calling EMT, or NIL. Necessary for getting the hypercall
+ * page and arguments.
+ */
+VMMR0_INT_DECL(int) NEMR0UpdateStatistics(PGVM pGVM, PVM pVM, VMCPUID idCpu)
+{
+ /*
+ * Validate the call.
+ */
+ int rc;
+ if (idCpu == NIL_VMCPUID)
+ rc = GVMMR0ValidateGVMandVM(pGVM, pVM);
+ else
+ rc = GVMMR0ValidateGVMandVMandEMT(pGVM, pVM, idCpu);
+ if (RT_SUCCESS(rc))
+ {
+ AssertReturn(g_pfnHvlInvokeHypercall, VERR_NEM_MISSING_KERNEL_API);
+
+ PNEMR0HYPERCALLDATA pHypercallData = idCpu != NIL_VMCPUID
+ ? &pGVM->aCpus[idCpu].nem.s.HypercallData
+ : &pGVM->nem.s.HypercallData;
+ if ( RT_VALID_PTR(pHypercallData->pbPage)
+ && pHypercallData->HCPhysPage != NIL_RTHCPHYS)
+ {
+ if (idCpu == NIL_VMCPUID)
+ rc = RTCritSectEnter(&pGVM->nem.s.HypercallDataCritSect);
+ if (RT_SUCCESS(rc))
+ {
+ /*
+ * Query the memory statistics for the partition.
+ */
+ HV_INPUT_GET_MEMORY_BALANCE *pInput = (HV_INPUT_GET_MEMORY_BALANCE *)pHypercallData->pbPage;
+ pInput->TargetPartitionId = pGVM->nem.s.idHvPartition;
+ pInput->ProximityDomainInfo.Flags.ProximityPreferred = 0;
+ pInput->ProximityDomainInfo.Flags.ProxyimityInfoValid = 0;
+ pInput->ProximityDomainInfo.Flags.Reserved = 0;
+ pInput->ProximityDomainInfo.Id = 0;
+
+ HV_OUTPUT_GET_MEMORY_BALANCE *pOutput = (HV_OUTPUT_GET_MEMORY_BALANCE *)(pInput + 1);
+ RT_ZERO(*pOutput);
+
+ uint64_t uResult = g_pfnHvlInvokeHypercall(HvCallGetMemoryBalance,
+ pHypercallData->HCPhysPage,
+ pHypercallData->HCPhysPage + sizeof(*pInput));
+ if (uResult == HV_STATUS_SUCCESS)
+ {
+ pVM->nem.s.R0Stats.cPagesAvailable = pOutput->PagesAvailable;
+ pVM->nem.s.R0Stats.cPagesInUse = pOutput->PagesInUse;
+ rc = VINF_SUCCESS;
+ }
+ else
+ {
+ LogRel(("HvCallGetMemoryBalance -> %#RX64 (%#RX64 %#RX64)!!\n",
+ uResult, pOutput->PagesAvailable, pOutput->PagesInUse));
+ rc = VERR_NEM_IPE_0;
+ }
+
+ if (idCpu == NIL_VMCPUID)
+ RTCritSectLeave(&pGVM->nem.s.HypercallDataCritSect);
+ }
+ }
+ else
+ rc = VERR_WRONG_ORDER;
+ }
+ return rc;
+}
+
+
+#if 1 && defined(DEBUG_bird)
+/**
+ * Debug only interface for poking around and exploring Hyper-V stuff.
+ *
+ * @param pGVM The ring-0 VM handle.
+ * @param pVM The cross context VM handle.
+ * @param idCpu The calling EMT.
+ * @param u64Arg What to query. 0 == registers.
+ */
+VMMR0_INT_DECL(int) NEMR0DoExperiment(PGVM pGVM, PVM pVM, VMCPUID idCpu, uint64_t u64Arg)
+{
+ /*
+ * Resolve CPU structures.
+ */
+ int rc = GVMMR0ValidateGVMandVMandEMT(pGVM, pVM, idCpu);
+ if (RT_SUCCESS(rc))
+ {
+ AssertReturn(g_pfnHvlInvokeHypercall, VERR_NEM_MISSING_KERNEL_API);
+
+ PGVMCPU pGVCpu = &pGVM->aCpus[idCpu];
+ PVMCPU pVCpu = &pVM->aCpus[idCpu];
+ if (u64Arg == 0)
+ {
+ /*
+ * Query register.
+ */
+ HV_INPUT_GET_VP_REGISTERS *pInput = (HV_INPUT_GET_VP_REGISTERS *)pGVCpu->nem.s.HypercallData.pbPage;
+ AssertPtrReturn(pInput, VERR_INTERNAL_ERROR_3);
+
+ size_t const cbInput = RT_ALIGN_Z(RT_UOFFSETOF(HV_INPUT_GET_VP_REGISTERS, Names[1]), 32);
+ HV_REGISTER_VALUE *paValues = (HV_REGISTER_VALUE *)((uint8_t *)pInput + cbInput);
+ RT_BZERO(paValues, sizeof(paValues[0]) * 1);
+
+ pInput->PartitionId = pGVM->nem.s.idHvPartition;
+ pInput->VpIndex = pGVCpu->idCpu;
+ pInput->fFlags = 0;
+ pInput->Names[0] = (HV_REGISTER_NAME)pVCpu->nem.s.Hypercall.Experiment.uItem;
+
+ uint64_t uResult = g_pfnHvlInvokeHypercall(HV_MAKE_CALL_INFO(HvCallGetVpRegisters, 1),
+ pGVCpu->nem.s.HypercallData.HCPhysPage,
+ pGVCpu->nem.s.HypercallData.HCPhysPage + cbInput);
+ pVCpu->nem.s.Hypercall.Experiment.fSuccess = uResult == HV_MAKE_CALL_REP_RET(1);
+ pVCpu->nem.s.Hypercall.Experiment.uStatus = uResult;
+ pVCpu->nem.s.Hypercall.Experiment.uLoValue = paValues[0].Reg128.Low64;
+ pVCpu->nem.s.Hypercall.Experiment.uHiValue = paValues[0].Reg128.High64;
+ rc = VINF_SUCCESS;
+ }
+ else if (u64Arg == 1)
+ {
+ /*
+ * Query partition property.
+ */
+ HV_INPUT_GET_PARTITION_PROPERTY *pInput = (HV_INPUT_GET_PARTITION_PROPERTY *)pGVCpu->nem.s.HypercallData.pbPage;
+ AssertPtrReturn(pInput, VERR_INTERNAL_ERROR_3);
+
+ size_t const cbInput = RT_ALIGN_Z(sizeof(*pInput), 32);
+ HV_OUTPUT_GET_PARTITION_PROPERTY *pOutput = (HV_OUTPUT_GET_PARTITION_PROPERTY *)((uint8_t *)pInput + cbInput);
+ pOutput->PropertyValue = 0;
+
+ pInput->PartitionId = pGVM->nem.s.idHvPartition;
+ pInput->PropertyCode = (HV_PARTITION_PROPERTY_CODE)pVCpu->nem.s.Hypercall.Experiment.uItem;
+ pInput->uPadding = 0;
+
+ uint64_t uResult = g_pfnHvlInvokeHypercall(HvCallGetPartitionProperty,
+ pGVCpu->nem.s.HypercallData.HCPhysPage,
+ pGVCpu->nem.s.HypercallData.HCPhysPage + cbInput);
+ pVCpu->nem.s.Hypercall.Experiment.fSuccess = uResult == HV_STATUS_SUCCESS;
+ pVCpu->nem.s.Hypercall.Experiment.uStatus = uResult;
+ pVCpu->nem.s.Hypercall.Experiment.uLoValue = pOutput->PropertyValue;
+ pVCpu->nem.s.Hypercall.Experiment.uHiValue = 0;
+ rc = VINF_SUCCESS;
+ }
+ else if (u64Arg == 2)
+ {
+ /*
+ * Set register.
+ */
+ HV_INPUT_SET_VP_REGISTERS *pInput = (HV_INPUT_SET_VP_REGISTERS *)pGVCpu->nem.s.HypercallData.pbPage;
+ AssertPtrReturn(pInput, VERR_INTERNAL_ERROR_3);
+ RT_BZERO(pInput, RT_UOFFSETOF(HV_INPUT_SET_VP_REGISTERS, Elements[1]));
+
+ pInput->PartitionId = pGVM->nem.s.idHvPartition;
+ pInput->VpIndex = pGVCpu->idCpu;
+ pInput->RsvdZ = 0;
+ pInput->Elements[0].Name = (HV_REGISTER_NAME)pVCpu->nem.s.Hypercall.Experiment.uItem;
+ pInput->Elements[0].Value.Reg128.High64 = pVCpu->nem.s.Hypercall.Experiment.uHiValue;
+ pInput->Elements[0].Value.Reg128.Low64 = pVCpu->nem.s.Hypercall.Experiment.uLoValue;
+
+ uint64_t uResult = g_pfnHvlInvokeHypercall(HV_MAKE_CALL_INFO(HvCallSetVpRegisters, 1),
+ pGVCpu->nem.s.HypercallData.HCPhysPage, 0);
+ pVCpu->nem.s.Hypercall.Experiment.fSuccess = uResult == HV_MAKE_CALL_REP_RET(1);
+ pVCpu->nem.s.Hypercall.Experiment.uStatus = uResult;
+ rc = VINF_SUCCESS;
+ }
+ else
+ rc = VERR_INVALID_FUNCTION;
+ }
+ return rc;
+}
+#endif /* DEBUG_bird */
+
diff --git a/src/VBox/VMM/VMMR0/PDMR0Device.cpp b/src/VBox/VMM/VMMR0/PDMR0Device.cpp
new file mode 100644
index 00000000..e905f1d6
--- /dev/null
+++ b/src/VBox/VMM/VMMR0/PDMR0Device.cpp
@@ -0,0 +1,861 @@
+/* $Id: PDMR0Device.cpp $ */
+/** @file
+ * PDM - Pluggable Device and Driver Manager, R0 Device parts.
+ */
+
+/*
+ * Copyright (C) 2006-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#define LOG_GROUP LOG_GROUP_PDM_DEVICE
+#define PDMPCIDEV_INCLUDE_PRIVATE /* Hack to get pdmpcidevint.h included at the right point. */
+#include "PDMInternal.h"
+#include <VBox/vmm/pdm.h>
+#include <VBox/vmm/pgm.h>
+#include <VBox/vmm/mm.h>
+#include <VBox/vmm/vm.h>
+#include <VBox/vmm/vmm.h>
+#include <VBox/vmm/patm.h>
+#include <VBox/vmm/hm.h>
+#include <VBox/vmm/apic.h>
+
+#include <VBox/log.h>
+#include <VBox/err.h>
+#include <VBox/vmm/gvmm.h>
+#include <iprt/asm.h>
+#include <iprt/assert.h>
+#include <iprt/string.h>
+
+#include "dtrace/VBoxVMM.h"
+#include "PDMInline.h"
+
+
+/*********************************************************************************************************************************
+* Global Variables *
+*********************************************************************************************************************************/
+RT_C_DECLS_BEGIN
+extern DECLEXPORT(const PDMDEVHLPR0) g_pdmR0DevHlp;
+extern DECLEXPORT(const PDMPICHLPR0) g_pdmR0PicHlp;
+extern DECLEXPORT(const PDMIOAPICHLPR0) g_pdmR0IoApicHlp;
+extern DECLEXPORT(const PDMPCIHLPR0) g_pdmR0PciHlp;
+extern DECLEXPORT(const PDMHPETHLPR0) g_pdmR0HpetHlp;
+extern DECLEXPORT(const PDMPCIRAWHLPR0) g_pdmR0PciRawHlp;
+extern DECLEXPORT(const PDMDRVHLPR0) g_pdmR0DrvHlp;
+RT_C_DECLS_END
+
+
+/*********************************************************************************************************************************
+* Internal Functions *
+*********************************************************************************************************************************/
+static bool pdmR0IsaSetIrq(PVM pVM, int iIrq, int iLevel, uint32_t uTagSrc);
+
+
+
+/** @name Ring-0 Device Helpers
+ * @{
+ */
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnPCIPhysRead} */
+static DECLCALLBACK(int) pdmR0DevHlp_PCIPhysRead(PPDMDEVINS pDevIns, PPDMPCIDEV pPciDev, RTGCPHYS GCPhys,
+ void *pvBuf, size_t cbRead)
+{
+ PDMDEV_ASSERT_DEVINS(pDevIns);
+ if (!pPciDev) /* NULL is an alias for the default PCI device. */
+ pPciDev = pDevIns->Internal.s.pHeadPciDevR0;
+ AssertReturn(pPciDev, VERR_PDM_NOT_PCI_DEVICE);
+
+#ifndef PDM_DO_NOT_RESPECT_PCI_BM_BIT
+ /*
+ * Just check the busmaster setting here and forward the request to the generic read helper.
+ */
+ if (PCIDevIsBusmaster(pPciDev))
+ { /* likely */ }
+ else
+ {
+ Log(("pdmRCDevHlp_PCIPhysRead: caller=%p/%d: returns %Rrc - Not bus master! GCPhys=%RGp cbRead=%#zx\n",
+ pDevIns, pDevIns->iInstance, VERR_PDM_NOT_PCI_BUS_MASTER, GCPhys, cbRead));
+ memset(pvBuf, 0xff, cbRead);
+ return VERR_PDM_NOT_PCI_BUS_MASTER;
+ }
+#endif
+
+ return pDevIns->pHlpR0->pfnPhysRead(pDevIns, GCPhys, pvBuf, cbRead);
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnPCIPhysWrite} */
+static DECLCALLBACK(int) pdmR0DevHlp_PCIPhysWrite(PPDMDEVINS pDevIns, PPDMPCIDEV pPciDev, RTGCPHYS GCPhys,
+ const void *pvBuf, size_t cbWrite)
+{
+ PDMDEV_ASSERT_DEVINS(pDevIns);
+ if (!pPciDev) /* NULL is an alias for the default PCI device. */
+ pPciDev = pDevIns->Internal.s.pHeadPciDevR0;
+ AssertReturn(pPciDev, VERR_PDM_NOT_PCI_DEVICE);
+
+#ifndef PDM_DO_NOT_RESPECT_PCI_BM_BIT
+ /*
+ * Just check the busmaster setting here and forward the request to the generic read helper.
+ */
+ if (PCIDevIsBusmaster(pPciDev))
+ { /* likely */ }
+ else
+ {
+ Log(("pdmRCDevHlp_PCIPhysWrite: caller=%p/%d: returns %Rrc - Not bus master! GCPhys=%RGp cbWrite=%#zx\n",
+ pDevIns, pDevIns->iInstance, VERR_PDM_NOT_PCI_BUS_MASTER, GCPhys, cbWrite));
+ return VERR_PDM_NOT_PCI_BUS_MASTER;
+ }
+#endif
+
+ return pDevIns->pHlpR0->pfnPhysWrite(pDevIns, GCPhys, pvBuf, cbWrite);
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnPCISetIrq} */
+static DECLCALLBACK(void) pdmR0DevHlp_PCISetIrq(PPDMDEVINS pDevIns, PPDMPCIDEV pPciDev, int iIrq, int iLevel)
+{
+ PDMDEV_ASSERT_DEVINS(pDevIns);
+ if (!pPciDev) /* NULL is an alias for the default PCI device. */
+ pPciDev = pDevIns->Internal.s.pHeadPciDevR0;
+ AssertReturnVoid(pPciDev);
+ LogFlow(("pdmR0DevHlp_PCISetIrq: caller=%p/%d: pPciDev=%p:{%#x} iIrq=%d iLevel=%d\n",
+ pDevIns, pDevIns->iInstance, pPciDev, pPciDev->uDevFn, iIrq, iLevel));
+ PVM pVM = pDevIns->Internal.s.pVMR0;
+ PPDMPCIBUS pPciBus = pPciDev->Int.s.pPdmBusR0;
+
+ pdmLock(pVM);
+ uint32_t uTagSrc;
+ if (iLevel & PDM_IRQ_LEVEL_HIGH)
+ {
+ pDevIns->Internal.s.uLastIrqTag = uTagSrc = pdmCalcIrqTag(pVM, pDevIns->idTracing);
+ if (iLevel == PDM_IRQ_LEVEL_HIGH)
+ VBOXVMM_PDM_IRQ_HIGH(VMMGetCpu(pVM), RT_LOWORD(uTagSrc), RT_HIWORD(uTagSrc));
+ else
+ VBOXVMM_PDM_IRQ_HILO(VMMGetCpu(pVM), RT_LOWORD(uTagSrc), RT_HIWORD(uTagSrc));
+ }
+ else
+ uTagSrc = pDevIns->Internal.s.uLastIrqTag;
+
+ if ( pPciBus
+ && pPciBus->pDevInsR0)
+ {
+ pPciBus->pfnSetIrqR0(pPciBus->pDevInsR0, pPciDev, iIrq, iLevel, uTagSrc);
+
+ pdmUnlock(pVM);
+
+ if (iLevel == PDM_IRQ_LEVEL_LOW)
+ VBOXVMM_PDM_IRQ_LOW(VMMGetCpu(pVM), RT_LOWORD(uTagSrc), RT_HIWORD(uTagSrc));
+ }
+ else
+ {
+ pdmUnlock(pVM);
+
+ /* queue for ring-3 execution. */
+ PPDMDEVHLPTASK pTask = (PPDMDEVHLPTASK)PDMQueueAlloc(pVM->pdm.s.pDevHlpQueueR0);
+ AssertReturnVoid(pTask);
+
+ pTask->enmOp = PDMDEVHLPTASKOP_PCI_SET_IRQ;
+ pTask->pDevInsR3 = PDMDEVINS_2_R3PTR(pDevIns);
+ pTask->u.PciSetIRQ.iIrq = iIrq;
+ pTask->u.PciSetIRQ.iLevel = iLevel;
+ pTask->u.PciSetIRQ.uTagSrc = uTagSrc;
+ pTask->u.PciSetIRQ.pPciDevR3 = MMHyperR0ToR3(pVM, pPciDev);
+
+ PDMQueueInsertEx(pVM->pdm.s.pDevHlpQueueR0, &pTask->Core, 0);
+ }
+
+ LogFlow(("pdmR0DevHlp_PCISetIrq: caller=%p/%d: returns void; uTagSrc=%#x\n", pDevIns, pDevIns->iInstance, uTagSrc));
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnISASetIrq} */
+static DECLCALLBACK(void) pdmR0DevHlp_ISASetIrq(PPDMDEVINS pDevIns, int iIrq, int iLevel)
+{
+ PDMDEV_ASSERT_DEVINS(pDevIns);
+ LogFlow(("pdmR0DevHlp_ISASetIrq: caller=%p/%d: iIrq=%d iLevel=%d\n", pDevIns, pDevIns->iInstance, iIrq, iLevel));
+ PVM pVM = pDevIns->Internal.s.pVMR0;
+
+ pdmLock(pVM);
+ uint32_t uTagSrc;
+ if (iLevel & PDM_IRQ_LEVEL_HIGH)
+ {
+ pDevIns->Internal.s.uLastIrqTag = uTagSrc = pdmCalcIrqTag(pVM, pDevIns->idTracing);
+ if (iLevel == PDM_IRQ_LEVEL_HIGH)
+ VBOXVMM_PDM_IRQ_HIGH(VMMGetCpu(pVM), RT_LOWORD(uTagSrc), RT_HIWORD(uTagSrc));
+ else
+ VBOXVMM_PDM_IRQ_HILO(VMMGetCpu(pVM), RT_LOWORD(uTagSrc), RT_HIWORD(uTagSrc));
+ }
+ else
+ uTagSrc = pDevIns->Internal.s.uLastIrqTag;
+
+ bool fRc = pdmR0IsaSetIrq(pVM, iIrq, iLevel, uTagSrc);
+
+ if (iLevel == PDM_IRQ_LEVEL_LOW && fRc)
+ VBOXVMM_PDM_IRQ_LOW(VMMGetCpu(pVM), RT_LOWORD(uTagSrc), RT_HIWORD(uTagSrc));
+ pdmUnlock(pVM);
+ LogFlow(("pdmR0DevHlp_ISASetIrq: caller=%p/%d: returns void; uTagSrc=%#x\n", pDevIns, pDevIns->iInstance, uTagSrc));
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnIoApicSendMsi} */
+static DECLCALLBACK(void) pdmR0DevHlp_IoApicSendMsi(PPDMDEVINS pDevIns, RTGCPHYS GCPhys, uint32_t uValue)
+{
+ PDMDEV_ASSERT_DEVINS(pDevIns);
+ LogFlow(("pdmR0DevHlp_IoApicSendMsi: caller=%p/%d: GCPhys=%RGp uValue=%#x\n", pDevIns, pDevIns->iInstance, GCPhys, uValue));
+ PVM pVM = pDevIns->Internal.s.pVMR0;
+
+ uint32_t uTagSrc;
+ pDevIns->Internal.s.uLastIrqTag = uTagSrc = pdmCalcIrqTag(pVM, pDevIns->idTracing);
+ VBOXVMM_PDM_IRQ_HILO(VMMGetCpu(pVM), RT_LOWORD(uTagSrc), RT_HIWORD(uTagSrc));
+
+ if (pVM->pdm.s.IoApic.pDevInsR0)
+ pVM->pdm.s.IoApic.pfnSendMsiR0(pVM->pdm.s.IoApic.pDevInsR0, GCPhys, uValue, uTagSrc);
+ else
+ AssertFatalMsgFailed(("Lazy bastards!"));
+
+ LogFlow(("pdmR0DevHlp_IoApicSendMsi: caller=%p/%d: returns void; uTagSrc=%#x\n", pDevIns, pDevIns->iInstance, uTagSrc));
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnPhysRead} */
+static DECLCALLBACK(int) pdmR0DevHlp_PhysRead(PPDMDEVINS pDevIns, RTGCPHYS GCPhys, void *pvBuf, size_t cbRead)
+{
+ PDMDEV_ASSERT_DEVINS(pDevIns);
+ LogFlow(("pdmR0DevHlp_PhysRead: caller=%p/%d: GCPhys=%RGp pvBuf=%p cbRead=%#x\n",
+ pDevIns, pDevIns->iInstance, GCPhys, pvBuf, cbRead));
+
+ VBOXSTRICTRC rcStrict = PGMPhysRead(pDevIns->Internal.s.pVMR0, GCPhys, pvBuf, cbRead, PGMACCESSORIGIN_DEVICE);
+ AssertMsg(rcStrict == VINF_SUCCESS, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict))); /** @todo track down the users for this bugger. */
+
+ Log(("pdmR0DevHlp_PhysRead: caller=%p/%d: returns %Rrc\n", pDevIns, pDevIns->iInstance, VBOXSTRICTRC_VAL(rcStrict) ));
+ return VBOXSTRICTRC_VAL(rcStrict);
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnPhysWrite} */
+static DECLCALLBACK(int) pdmR0DevHlp_PhysWrite(PPDMDEVINS pDevIns, RTGCPHYS GCPhys, const void *pvBuf, size_t cbWrite)
+{
+ PDMDEV_ASSERT_DEVINS(pDevIns);
+ LogFlow(("pdmR0DevHlp_PhysWrite: caller=%p/%d: GCPhys=%RGp pvBuf=%p cbWrite=%#x\n",
+ pDevIns, pDevIns->iInstance, GCPhys, pvBuf, cbWrite));
+
+ VBOXSTRICTRC rcStrict = PGMPhysWrite(pDevIns->Internal.s.pVMR0, GCPhys, pvBuf, cbWrite, PGMACCESSORIGIN_DEVICE);
+ AssertMsg(rcStrict == VINF_SUCCESS, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict))); /** @todo track down the users for this bugger. */
+
+ Log(("pdmR0DevHlp_PhysWrite: caller=%p/%d: returns %Rrc\n", pDevIns, pDevIns->iInstance, VBOXSTRICTRC_VAL(rcStrict) ));
+ return VBOXSTRICTRC_VAL(rcStrict);
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnA20IsEnabled} */
+static DECLCALLBACK(bool) pdmR0DevHlp_A20IsEnabled(PPDMDEVINS pDevIns)
+{
+ PDMDEV_ASSERT_DEVINS(pDevIns);
+ LogFlow(("pdmR0DevHlp_A20IsEnabled: caller=%p/%d:\n", pDevIns, pDevIns->iInstance));
+
+ bool fEnabled = PGMPhysIsA20Enabled(VMMGetCpu(pDevIns->Internal.s.pVMR0));
+
+ Log(("pdmR0DevHlp_A20IsEnabled: caller=%p/%d: returns %RTbool\n", pDevIns, pDevIns->iInstance, fEnabled));
+ return fEnabled;
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnVMState} */
+static DECLCALLBACK(VMSTATE) pdmR0DevHlp_VMState(PPDMDEVINS pDevIns)
+{
+ PDMDEV_ASSERT_DEVINS(pDevIns);
+
+ VMSTATE enmVMState = pDevIns->Internal.s.pVMR0->enmVMState;
+
+ LogFlow(("pdmR0DevHlp_VMState: caller=%p/%d: returns %d\n", pDevIns, pDevIns->iInstance, enmVMState));
+ return enmVMState;
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnVMSetError} */
+static DECLCALLBACK(int) pdmR0DevHlp_VMSetError(PPDMDEVINS pDevIns, int rc, RT_SRC_POS_DECL, const char *pszFormat, ...)
+{
+ PDMDEV_ASSERT_DEVINS(pDevIns);
+ va_list args;
+ va_start(args, pszFormat);
+ int rc2 = VMSetErrorV(pDevIns->Internal.s.pVMR0, rc, RT_SRC_POS_ARGS, pszFormat, args); Assert(rc2 == rc); NOREF(rc2);
+ va_end(args);
+ return rc;
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnVMSetErrorV} */
+static DECLCALLBACK(int) pdmR0DevHlp_VMSetErrorV(PPDMDEVINS pDevIns, int rc, RT_SRC_POS_DECL, const char *pszFormat, va_list va)
+{
+ PDMDEV_ASSERT_DEVINS(pDevIns);
+ int rc2 = VMSetErrorV(pDevIns->Internal.s.pVMR0, rc, RT_SRC_POS_ARGS, pszFormat, va); Assert(rc2 == rc); NOREF(rc2);
+ return rc;
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnVMSetRuntimeError} */
+static DECLCALLBACK(int) pdmR0DevHlp_VMSetRuntimeError(PPDMDEVINS pDevIns, uint32_t fFlags, const char *pszErrorId, const char *pszFormat, ...)
+{
+ PDMDEV_ASSERT_DEVINS(pDevIns);
+ va_list va;
+ va_start(va, pszFormat);
+ int rc = VMSetRuntimeErrorV(pDevIns->Internal.s.pVMR0, fFlags, pszErrorId, pszFormat, va);
+ va_end(va);
+ return rc;
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnVMSetRuntimeErrorV} */
+static DECLCALLBACK(int) pdmR0DevHlp_VMSetRuntimeErrorV(PPDMDEVINS pDevIns, uint32_t fFlags, const char *pszErrorId, const char *pszFormat, va_list va)
+{
+ PDMDEV_ASSERT_DEVINS(pDevIns);
+ int rc = VMSetRuntimeErrorV(pDevIns->Internal.s.pVMR0, fFlags, pszErrorId, pszFormat, va);
+ return rc;
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnPATMSetMMIOPatchInfo} */
+static DECLCALLBACK(int) pdmR0DevHlp_PATMSetMMIOPatchInfo(PPDMDEVINS pDevIns, RTGCPHYS GCPhys, RTGCPTR pCachedData)
+{
+ PDMDEV_ASSERT_DEVINS(pDevIns);
+ LogFlow(("pdmR0DevHlp_PATMSetMMIOPatchInfo: caller=%p/%d:\n", pDevIns, pDevIns->iInstance));
+
+ AssertFailed();
+ NOREF(GCPhys); NOREF(pCachedData); NOREF(pDevIns);
+
+/* return PATMSetMMIOPatchInfo(pDevIns->Internal.s.pVMR0, GCPhys, pCachedData); */
+ return VINF_SUCCESS;
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnGetVM} */
+static DECLCALLBACK(PVM) pdmR0DevHlp_GetVM(PPDMDEVINS pDevIns)
+{
+ PDMDEV_ASSERT_DEVINS(pDevIns);
+ LogFlow(("pdmR0DevHlp_GetVM: caller='%p'/%d\n", pDevIns, pDevIns->iInstance));
+ return pDevIns->Internal.s.pVMR0;
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnGetVMCPU} */
+static DECLCALLBACK(PVMCPU) pdmR0DevHlp_GetVMCPU(PPDMDEVINS pDevIns)
+{
+ PDMDEV_ASSERT_DEVINS(pDevIns);
+ LogFlow(("pdmR0DevHlp_GetVMCPU: caller='%p'/%d\n", pDevIns, pDevIns->iInstance));
+ return VMMGetCpu(pDevIns->Internal.s.pVMR0);
+}
+
+
+/** @interface_method_impl{PDMDEVHLPRC,pfnGetCurrentCpuId} */
+static DECLCALLBACK(VMCPUID) pdmR0DevHlp_GetCurrentCpuId(PPDMDEVINS pDevIns)
+{
+ PDMDEV_ASSERT_DEVINS(pDevIns);
+ VMCPUID idCpu = VMMGetCpuId(pDevIns->Internal.s.pVMR0);
+ LogFlow(("pdmR0DevHlp_GetCurrentCpuId: caller='%p'/%d for CPU %u\n", pDevIns, pDevIns->iInstance, idCpu));
+ return idCpu;
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnTMTimeVirtGet} */
+static DECLCALLBACK(uint64_t) pdmR0DevHlp_TMTimeVirtGet(PPDMDEVINS pDevIns)
+{
+ PDMDEV_ASSERT_DEVINS(pDevIns);
+ LogFlow(("pdmR0DevHlp_TMTimeVirtGet: caller='%p'/%d\n", pDevIns, pDevIns->iInstance));
+ return TMVirtualGet(pDevIns->Internal.s.pVMR0);
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnTMTimeVirtGetFreq} */
+static DECLCALLBACK(uint64_t) pdmR0DevHlp_TMTimeVirtGetFreq(PPDMDEVINS pDevIns)
+{
+ PDMDEV_ASSERT_DEVINS(pDevIns);
+ LogFlow(("pdmR0DevHlp_TMTimeVirtGetFreq: caller='%p'/%d\n", pDevIns, pDevIns->iInstance));
+ return TMVirtualGetFreq(pDevIns->Internal.s.pVMR0);
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnTMTimeVirtGetNano} */
+static DECLCALLBACK(uint64_t) pdmR0DevHlp_TMTimeVirtGetNano(PPDMDEVINS pDevIns)
+{
+ PDMDEV_ASSERT_DEVINS(pDevIns);
+ LogFlow(("pdmR0DevHlp_TMTimeVirtGetNano: caller='%p'/%d\n", pDevIns, pDevIns->iInstance));
+ return TMVirtualToNano(pDevIns->Internal.s.pVMR0, TMVirtualGet(pDevIns->Internal.s.pVMR0));
+}
+
+
+/** @interface_method_impl{PDMDEVHLPR0,pfnDBGFTraceBuf} */
+static DECLCALLBACK(RTTRACEBUF) pdmR0DevHlp_DBGFTraceBuf(PPDMDEVINS pDevIns)
+{
+ PDMDEV_ASSERT_DEVINS(pDevIns);
+ RTTRACEBUF hTraceBuf = pDevIns->Internal.s.pVMR0->hTraceBufR0;
+ LogFlow(("pdmR3DevHlp_DBGFTraceBuf: caller='%p'/%d: returns %p\n", pDevIns, pDevIns->iInstance, hTraceBuf));
+ return hTraceBuf;
+}
+
+
+/**
+ * The Ring-0 Device Helper Callbacks.
+ */
+extern DECLEXPORT(const PDMDEVHLPR0) g_pdmR0DevHlp =
+{
+ PDM_DEVHLPR0_VERSION,
+ pdmR0DevHlp_PCIPhysRead,
+ pdmR0DevHlp_PCIPhysWrite,
+ pdmR0DevHlp_PCISetIrq,
+ pdmR0DevHlp_ISASetIrq,
+ pdmR0DevHlp_IoApicSendMsi,
+ pdmR0DevHlp_PhysRead,
+ pdmR0DevHlp_PhysWrite,
+ pdmR0DevHlp_A20IsEnabled,
+ pdmR0DevHlp_VMState,
+ pdmR0DevHlp_VMSetError,
+ pdmR0DevHlp_VMSetErrorV,
+ pdmR0DevHlp_VMSetRuntimeError,
+ pdmR0DevHlp_VMSetRuntimeErrorV,
+ pdmR0DevHlp_PATMSetMMIOPatchInfo,
+ pdmR0DevHlp_GetVM,
+ pdmR0DevHlp_GetVMCPU,
+ pdmR0DevHlp_GetCurrentCpuId,
+ pdmR0DevHlp_TMTimeVirtGet,
+ pdmR0DevHlp_TMTimeVirtGetFreq,
+ pdmR0DevHlp_TMTimeVirtGetNano,
+ pdmR0DevHlp_DBGFTraceBuf,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ PDM_DEVHLPR0_VERSION
+};
+
+/** @} */
+
+
+
+
+/** @name PIC Ring-0 Helpers
+ * @{
+ */
+
+/** @interface_method_impl{PDMPICHLPR0,pfnSetInterruptFF} */
+static DECLCALLBACK(void) pdmR0PicHlp_SetInterruptFF(PPDMDEVINS pDevIns)
+{
+ PDMDEV_ASSERT_DEVINS(pDevIns);
+ PVM pVM = pDevIns->Internal.s.pVMR0;
+ PVMCPU pVCpu = &pVM->aCpus[0]; /* for PIC we always deliver to CPU 0, MP use APIC */
+ /** @todo r=ramshankar: Propagating rcRZ and make all callers handle it? */
+ APICLocalInterrupt(pVCpu, 0 /* u8Pin */, 1 /* u8Level */, VINF_SUCCESS /* rcRZ */);
+}
+
+
+/** @interface_method_impl{PDMPICHLPR0,pfnClearInterruptFF} */
+static DECLCALLBACK(void) pdmR0PicHlp_ClearInterruptFF(PPDMDEVINS pDevIns)
+{
+ PDMDEV_ASSERT_DEVINS(pDevIns);
+ PVM pVM = pDevIns->Internal.s.pVMR0;
+ PVMCPU pVCpu = &pVM->aCpus[0]; /* for PIC we always deliver to CPU 0, MP use APIC */
+ /** @todo r=ramshankar: Propagating rcRZ and make all callers handle it? */
+ APICLocalInterrupt(pVCpu, 0 /* u8Pin */, 0 /* u8Level */, VINF_SUCCESS /* rcRZ */);
+}
+
+
+/** @interface_method_impl{PDMPICHLPR0,pfnLock} */
+static DECLCALLBACK(int) pdmR0PicHlp_Lock(PPDMDEVINS pDevIns, int rc)
+{
+ PDMDEV_ASSERT_DEVINS(pDevIns);
+ return pdmLockEx(pDevIns->Internal.s.pVMR0, rc);
+}
+
+
+/** @interface_method_impl{PDMPICHLPR0,pfnUnlock} */
+static DECLCALLBACK(void) pdmR0PicHlp_Unlock(PPDMDEVINS pDevIns)
+{
+ PDMDEV_ASSERT_DEVINS(pDevIns);
+ pdmUnlock(pDevIns->Internal.s.pVMR0);
+}
+
+
+/**
+ * The Ring-0 PIC Helper Callbacks.
+ */
+extern DECLEXPORT(const PDMPICHLPR0) g_pdmR0PicHlp =
+{
+ PDM_PICHLPR0_VERSION,
+ pdmR0PicHlp_SetInterruptFF,
+ pdmR0PicHlp_ClearInterruptFF,
+ pdmR0PicHlp_Lock,
+ pdmR0PicHlp_Unlock,
+ PDM_PICHLPR0_VERSION
+};
+
+/** @} */
+
+
+/** @name I/O APIC Ring-0 Helpers
+ * @{
+ */
+
+/** @interface_method_impl{PDMIOAPICHLPR0,pfnApicBusDeliver} */
+static DECLCALLBACK(int) pdmR0IoApicHlp_ApicBusDeliver(PPDMDEVINS pDevIns, uint8_t u8Dest, uint8_t u8DestMode,
+ uint8_t u8DeliveryMode, uint8_t uVector, uint8_t u8Polarity,
+ uint8_t u8TriggerMode, uint32_t uTagSrc)
+{
+ PDMDEV_ASSERT_DEVINS(pDevIns);
+ PVM pVM = pDevIns->Internal.s.pVMR0;
+ LogFlow(("pdmR0IoApicHlp_ApicBusDeliver: caller=%p/%d: u8Dest=%RX8 u8DestMode=%RX8 u8DeliveryMode=%RX8 uVector=%RX8 u8Polarity=%RX8 u8TriggerMode=%RX8 uTagSrc=%#x\n",
+ pDevIns, pDevIns->iInstance, u8Dest, u8DestMode, u8DeliveryMode, uVector, u8Polarity, u8TriggerMode, uTagSrc));
+ return APICBusDeliver(pVM, u8Dest, u8DestMode, u8DeliveryMode, uVector, u8Polarity, u8TriggerMode, uTagSrc);
+}
+
+
+/** @interface_method_impl{PDMIOAPICHLPR0,pfnLock} */
+static DECLCALLBACK(int) pdmR0IoApicHlp_Lock(PPDMDEVINS pDevIns, int rc)
+{
+ PDMDEV_ASSERT_DEVINS(pDevIns);
+ return pdmLockEx(pDevIns->Internal.s.pVMR0, rc);
+}
+
+
+/** @interface_method_impl{PDMIOAPICHLPR0,pfnUnlock} */
+static DECLCALLBACK(void) pdmR0IoApicHlp_Unlock(PPDMDEVINS pDevIns)
+{
+ PDMDEV_ASSERT_DEVINS(pDevIns);
+ pdmUnlock(pDevIns->Internal.s.pVMR0);
+}
+
+
+/**
+ * The Ring-0 I/O APIC Helper Callbacks.
+ */
+extern DECLEXPORT(const PDMIOAPICHLPR0) g_pdmR0IoApicHlp =
+{
+ PDM_IOAPICHLPR0_VERSION,
+ pdmR0IoApicHlp_ApicBusDeliver,
+ pdmR0IoApicHlp_Lock,
+ pdmR0IoApicHlp_Unlock,
+ PDM_IOAPICHLPR0_VERSION
+};
+
+/** @} */
+
+
+
+
+/** @name PCI Bus Ring-0 Helpers
+ * @{
+ */
+
+/** @interface_method_impl{PDMPCIHLPR0,pfnIsaSetIrq} */
+static DECLCALLBACK(void) pdmR0PciHlp_IsaSetIrq(PPDMDEVINS pDevIns, int iIrq, int iLevel, uint32_t uTagSrc)
+{
+ PDMDEV_ASSERT_DEVINS(pDevIns);
+ Log4(("pdmR0PciHlp_IsaSetIrq: iIrq=%d iLevel=%d uTagSrc=%#x\n", iIrq, iLevel, uTagSrc));
+ PVM pVM = pDevIns->Internal.s.pVMR0;
+
+ pdmLock(pVM);
+ pdmR0IsaSetIrq(pVM, iIrq, iLevel, uTagSrc);
+ pdmUnlock(pVM);
+}
+
+
+/** @interface_method_impl{PDMPCIHLPR0,pfnIoApicSetIrq} */
+static DECLCALLBACK(void) pdmR0PciHlp_IoApicSetIrq(PPDMDEVINS pDevIns, int iIrq, int iLevel, uint32_t uTagSrc)
+{
+ PDMDEV_ASSERT_DEVINS(pDevIns);
+ Log4(("pdmR0PciHlp_IoApicSetIrq: iIrq=%d iLevel=%d uTagSrc=%#x\n", iIrq, iLevel, uTagSrc));
+ PVM pVM = pDevIns->Internal.s.pVMR0;
+
+ if (pVM->pdm.s.IoApic.pDevInsR0)
+ pVM->pdm.s.IoApic.pfnSetIrqR0(pVM->pdm.s.IoApic.pDevInsR0, iIrq, iLevel, uTagSrc);
+ else if (pVM->pdm.s.IoApic.pDevInsR3)
+ {
+ /* queue for ring-3 execution. */
+ PPDMDEVHLPTASK pTask = (PPDMDEVHLPTASK)PDMQueueAlloc(pVM->pdm.s.pDevHlpQueueR0);
+ if (pTask)
+ {
+ pTask->enmOp = PDMDEVHLPTASKOP_IOAPIC_SET_IRQ;
+ pTask->pDevInsR3 = NIL_RTR3PTR; /* not required */
+ pTask->u.IoApicSetIRQ.iIrq = iIrq;
+ pTask->u.IoApicSetIRQ.iLevel = iLevel;
+ pTask->u.IoApicSetIRQ.uTagSrc = uTagSrc;
+
+ PDMQueueInsertEx(pVM->pdm.s.pDevHlpQueueR0, &pTask->Core, 0);
+ }
+ else
+ AssertMsgFailed(("We're out of devhlp queue items!!!\n"));
+ }
+}
+
+
+/** @interface_method_impl{PDMPCIHLPR0,pfnIoApicSendMsi} */
+static DECLCALLBACK(void) pdmR0PciHlp_IoApicSendMsi(PPDMDEVINS pDevIns, RTGCPHYS GCPhys, uint32_t uValue, uint32_t uTagSrc)
+{
+ PDMDEV_ASSERT_DEVINS(pDevIns);
+ Log4(("pdmR0PciHlp_IoApicSendMsi: GCPhys=%p uValue=%d uTagSrc=%#x\n", GCPhys, uValue, uTagSrc));
+ PVM pVM = pDevIns->Internal.s.pVMR0;
+ if (pVM->pdm.s.IoApic.pDevInsR0)
+ pVM->pdm.s.IoApic.pfnSendMsiR0(pVM->pdm.s.IoApic.pDevInsR0, GCPhys, uValue, uTagSrc);
+ else
+ AssertFatalMsgFailed(("Lazy bastards!"));
+}
+
+
+/** @interface_method_impl{PDMPCIHLPR0,pfnLock} */
+static DECLCALLBACK(int) pdmR0PciHlp_Lock(PPDMDEVINS pDevIns, int rc)
+{
+ PDMDEV_ASSERT_DEVINS(pDevIns);
+ return pdmLockEx(pDevIns->Internal.s.pVMR0, rc);
+}
+
+
+/** @interface_method_impl{PDMPCIHLPR0,pfnUnlock} */
+static DECLCALLBACK(void) pdmR0PciHlp_Unlock(PPDMDEVINS pDevIns)
+{
+ PDMDEV_ASSERT_DEVINS(pDevIns);
+ pdmUnlock(pDevIns->Internal.s.pVMR0);
+}
+
+
+/**
+ * The Ring-0 PCI Bus Helper Callbacks.
+ */
+extern DECLEXPORT(const PDMPCIHLPR0) g_pdmR0PciHlp =
+{
+ PDM_PCIHLPR0_VERSION,
+ pdmR0PciHlp_IsaSetIrq,
+ pdmR0PciHlp_IoApicSetIrq,
+ pdmR0PciHlp_IoApicSendMsi,
+ pdmR0PciHlp_Lock,
+ pdmR0PciHlp_Unlock,
+ PDM_PCIHLPR0_VERSION, /* the end */
+};
+
+/** @} */
+
+
+
+
+/** @name HPET Ring-0 Helpers
+ * @{
+ */
+/* none */
+
+/**
+ * The Ring-0 HPET Helper Callbacks.
+ */
+extern DECLEXPORT(const PDMHPETHLPR0) g_pdmR0HpetHlp =
+{
+ PDM_HPETHLPR0_VERSION,
+ PDM_HPETHLPR0_VERSION, /* the end */
+};
+
+/** @} */
+
+
+/** @name Raw PCI Ring-0 Helpers
+ * @{
+ */
+/* none */
+
+/**
+ * The Ring-0 PCI raw Helper Callbacks.
+ */
+extern DECLEXPORT(const PDMPCIRAWHLPR0) g_pdmR0PciRawHlp =
+{
+ PDM_PCIRAWHLPR0_VERSION,
+ PDM_PCIRAWHLPR0_VERSION, /* the end */
+};
+
+/** @} */
+
+
+/** @name Ring-0 Context Driver Helpers
+ * @{
+ */
+
+/** @interface_method_impl{PDMDRVHLPR0,pfnVMSetError} */
+static DECLCALLBACK(int) pdmR0DrvHlp_VMSetError(PPDMDRVINS pDrvIns, int rc, RT_SRC_POS_DECL, const char *pszFormat, ...)
+{
+ PDMDRV_ASSERT_DRVINS(pDrvIns);
+ va_list args;
+ va_start(args, pszFormat);
+ int rc2 = VMSetErrorV(pDrvIns->Internal.s.pVMR0, rc, RT_SRC_POS_ARGS, pszFormat, args); Assert(rc2 == rc); NOREF(rc2);
+ va_end(args);
+ return rc;
+}
+
+
+/** @interface_method_impl{PDMDRVHLPR0,pfnVMSetErrorV} */
+static DECLCALLBACK(int) pdmR0DrvHlp_VMSetErrorV(PPDMDRVINS pDrvIns, int rc, RT_SRC_POS_DECL, const char *pszFormat, va_list va)
+{
+ PDMDRV_ASSERT_DRVINS(pDrvIns);
+ int rc2 = VMSetErrorV(pDrvIns->Internal.s.pVMR0, rc, RT_SRC_POS_ARGS, pszFormat, va); Assert(rc2 == rc); NOREF(rc2);
+ return rc;
+}
+
+
+/** @interface_method_impl{PDMDRVHLPR0,pfnVMSetRuntimeError} */
+static DECLCALLBACK(int) pdmR0DrvHlp_VMSetRuntimeError(PPDMDRVINS pDrvIns, uint32_t fFlags, const char *pszErrorId,
+ const char *pszFormat, ...)
+{
+ PDMDRV_ASSERT_DRVINS(pDrvIns);
+ va_list va;
+ va_start(va, pszFormat);
+ int rc = VMSetRuntimeErrorV(pDrvIns->Internal.s.pVMR0, fFlags, pszErrorId, pszFormat, va);
+ va_end(va);
+ return rc;
+}
+
+
+/** @interface_method_impl{PDMDRVHLPR0,pfnVMSetRuntimeErrorV} */
+static DECLCALLBACK(int) pdmR0DrvHlp_VMSetRuntimeErrorV(PPDMDRVINS pDrvIns, uint32_t fFlags, const char *pszErrorId,
+ const char *pszFormat, va_list va)
+{
+ PDMDRV_ASSERT_DRVINS(pDrvIns);
+ int rc = VMSetRuntimeErrorV(pDrvIns->Internal.s.pVMR0, fFlags, pszErrorId, pszFormat, va);
+ return rc;
+}
+
+
+/** @interface_method_impl{PDMDRVHLPR0,pfnAssertEMT} */
+static DECLCALLBACK(bool) pdmR0DrvHlp_AssertEMT(PPDMDRVINS pDrvIns, const char *pszFile, unsigned iLine, const char *pszFunction)
+{
+ PDMDRV_ASSERT_DRVINS(pDrvIns);
+ if (VM_IS_EMT(pDrvIns->Internal.s.pVMR0))
+ return true;
+
+ RTAssertMsg1Weak("AssertEMT", iLine, pszFile, pszFunction);
+ RTAssertPanic();
+ return false;
+}
+
+
+/** @interface_method_impl{PDMDRVHLPR0,pfnAssertOther} */
+static DECLCALLBACK(bool) pdmR0DrvHlp_AssertOther(PPDMDRVINS pDrvIns, const char *pszFile, unsigned iLine, const char *pszFunction)
+{
+ PDMDRV_ASSERT_DRVINS(pDrvIns);
+ if (!VM_IS_EMT(pDrvIns->Internal.s.pVMR0))
+ return true;
+
+ RTAssertMsg1Weak("AssertOther", iLine, pszFile, pszFunction);
+ RTAssertPanic();
+ return false;
+}
+
+
+/** @interface_method_impl{PDMDRVHLPR0,pfnFTSetCheckpoint} */
+static DECLCALLBACK(int) pdmR0DrvHlp_FTSetCheckpoint(PPDMDRVINS pDrvIns, FTMCHECKPOINTTYPE enmType)
+{
+ PDMDRV_ASSERT_DRVINS(pDrvIns);
+ return FTMSetCheckpoint(pDrvIns->Internal.s.pVMR0, enmType);
+}
+
+
+/**
+ * The Ring-0 Context Driver Helper Callbacks.
+ */
+extern DECLEXPORT(const PDMDRVHLPR0) g_pdmR0DrvHlp =
+{
+ PDM_DRVHLPRC_VERSION,
+ pdmR0DrvHlp_VMSetError,
+ pdmR0DrvHlp_VMSetErrorV,
+ pdmR0DrvHlp_VMSetRuntimeError,
+ pdmR0DrvHlp_VMSetRuntimeErrorV,
+ pdmR0DrvHlp_AssertEMT,
+ pdmR0DrvHlp_AssertOther,
+ pdmR0DrvHlp_FTSetCheckpoint,
+ PDM_DRVHLPRC_VERSION
+};
+
+/** @} */
+
+
+
+
+/**
+ * Sets an irq on the PIC and I/O APIC.
+ *
+ * @returns true if delivered, false if postponed.
+ * @param pVM The cross context VM structure.
+ * @param iIrq The irq.
+ * @param iLevel The new level.
+ * @param uTagSrc The IRQ tag and source.
+ *
+ * @remarks The caller holds the PDM lock.
+ */
+static bool pdmR0IsaSetIrq(PVM pVM, int iIrq, int iLevel, uint32_t uTagSrc)
+{
+ if (RT_LIKELY( ( pVM->pdm.s.IoApic.pDevInsR0
+ || !pVM->pdm.s.IoApic.pDevInsR3)
+ && ( pVM->pdm.s.Pic.pDevInsR0
+ || !pVM->pdm.s.Pic.pDevInsR3)))
+ {
+ if (pVM->pdm.s.Pic.pDevInsR0)
+ pVM->pdm.s.Pic.pfnSetIrqR0(pVM->pdm.s.Pic.pDevInsR0, iIrq, iLevel, uTagSrc);
+ if (pVM->pdm.s.IoApic.pDevInsR0)
+ pVM->pdm.s.IoApic.pfnSetIrqR0(pVM->pdm.s.IoApic.pDevInsR0, iIrq, iLevel, uTagSrc);
+ return true;
+ }
+
+ /* queue for ring-3 execution. */
+ PPDMDEVHLPTASK pTask = (PPDMDEVHLPTASK)PDMQueueAlloc(pVM->pdm.s.pDevHlpQueueR0);
+ AssertReturn(pTask, false);
+
+ pTask->enmOp = PDMDEVHLPTASKOP_ISA_SET_IRQ;
+ pTask->pDevInsR3 = NIL_RTR3PTR; /* not required */
+ pTask->u.IsaSetIRQ.iIrq = iIrq;
+ pTask->u.IsaSetIRQ.iLevel = iLevel;
+ pTask->u.IsaSetIRQ.uTagSrc = uTagSrc;
+
+ PDMQueueInsertEx(pVM->pdm.s.pDevHlpQueueR0, &pTask->Core, 0);
+ return false;
+}
+
+
+/**
+ * PDMDevHlpCallR0 helper.
+ *
+ * @returns See PFNPDMDEVREQHANDLERR0.
+ * @param pGVM The global (ring-0) VM structure. (For validation.)
+ * @param pVM The cross context VM structure. (For validation.)
+ * @param pReq Pointer to the request buffer.
+ */
+VMMR0_INT_DECL(int) PDMR0DeviceCallReqHandler(PGVM pGVM, PVM pVM, PPDMDEVICECALLREQHANDLERREQ pReq)
+{
+ /*
+ * Validate input and make the call.
+ */
+ int rc = GVMMR0ValidateGVMandVM(pGVM, pVM);
+ if (RT_SUCCESS(rc))
+ {
+ AssertPtrReturn(pReq, VERR_INVALID_POINTER);
+ AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
+
+ PPDMDEVINS pDevIns = pReq->pDevInsR0;
+ AssertPtrReturn(pDevIns, VERR_INVALID_POINTER);
+ AssertReturn(pDevIns->Internal.s.pVMR0 == pVM, VERR_INVALID_PARAMETER);
+
+ PFNPDMDEVREQHANDLERR0 pfnReqHandlerR0 = pReq->pfnReqHandlerR0;
+ AssertPtrReturn(pfnReqHandlerR0, VERR_INVALID_POINTER);
+
+ rc = pfnReqHandlerR0(pDevIns, pReq->uOperation, pReq->u64Arg);
+ }
+ return rc;
+}
+
diff --git a/src/VBox/VMM/VMMR0/PDMR0Driver.cpp b/src/VBox/VMM/VMMR0/PDMR0Driver.cpp
new file mode 100644
index 00000000..bd6a528e
--- /dev/null
+++ b/src/VBox/VMM/VMMR0/PDMR0Driver.cpp
@@ -0,0 +1,64 @@
+/* $Id: PDMR0Driver.cpp $ */
+/** @file
+ * PDM - Pluggable Device and Driver Manager, R0 Driver parts.
+ */
+
+/*
+ * Copyright (C) 2010-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#define LOG_GROUP LOG_GROUP_PDM_DRIVER
+#include "PDMInternal.h"
+#include <VBox/vmm/pdm.h>
+#include <VBox/vmm/vm.h>
+#include <VBox/vmm/gvmm.h>
+
+#include <VBox/log.h>
+#include <iprt/errcore.h>
+#include <iprt/assert.h>
+
+
+
+/**
+ * PDMDrvHlpCallR0 helper.
+ *
+ * @returns See PFNPDMDRVREQHANDLERR0.
+ * @param pGVM The global (ring-0) VM structure. (For validation.)
+ * @param pVM The cross context VM structure. (For validation.)
+ * @param pReq Pointer to the request buffer.
+ */
+VMMR0_INT_DECL(int) PDMR0DriverCallReqHandler(PGVM pGVM, PVM pVM, PPDMDRIVERCALLREQHANDLERREQ pReq)
+{
+ /*
+ * Validate input and make the call.
+ */
+ int rc = GVMMR0ValidateGVMandVM(pGVM, pVM);
+ if (RT_SUCCESS(rc))
+ {
+ AssertPtrReturn(pReq, VERR_INVALID_POINTER);
+ AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
+
+ PPDMDRVINS pDrvIns = pReq->pDrvInsR0;
+ AssertPtrReturn(pDrvIns, VERR_INVALID_POINTER);
+ AssertReturn(pDrvIns->Internal.s.pVMR0 == pVM, VERR_INVALID_PARAMETER);
+
+ PFNPDMDRVREQHANDLERR0 pfnReqHandlerR0 = pDrvIns->Internal.s.pfnReqHandlerR0;
+ AssertPtrReturn(pfnReqHandlerR0, VERR_INVALID_POINTER);
+
+ rc = pfnReqHandlerR0(pDrvIns, pReq->uOperation, pReq->u64Arg);
+ }
+ return rc;
+}
+
diff --git a/src/VBox/VMM/VMMR0/PGMR0.cpp b/src/VBox/VMM/VMMR0/PGMR0.cpp
new file mode 100644
index 00000000..1cf8c74c
--- /dev/null
+++ b/src/VBox/VMM/VMMR0/PGMR0.cpp
@@ -0,0 +1,660 @@
+/* $Id: PGMR0.cpp $ */
+/** @file
+ * PGM - Page Manager and Monitor, Ring-0.
+ */
+
+/*
+ * Copyright (C) 2007-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#define LOG_GROUP LOG_GROUP_PGM
+#include <VBox/rawpci.h>
+#include <VBox/vmm/pgm.h>
+#include <VBox/vmm/gmm.h>
+#include <VBox/vmm/gvm.h>
+#include "PGMInternal.h"
+#include <VBox/vmm/vm.h>
+#include "PGMInline.h"
+#include <VBox/log.h>
+#include <VBox/err.h>
+#include <iprt/assert.h>
+#include <iprt/mem.h>
+
+
+/*
+ * Instantiate the ring-0 header/code templates.
+ */
+/** @todo r=bird: Gotta love this nested paging hacking we're still carrying with us... (Split PGM_TYPE_NESTED.) */
+#define PGM_BTH_NAME(name) PGM_BTH_NAME_32BIT_PROT(name)
+#include "PGMR0Bth.h"
+#undef PGM_BTH_NAME
+
+#define PGM_BTH_NAME(name) PGM_BTH_NAME_PAE_PROT(name)
+#include "PGMR0Bth.h"
+#undef PGM_BTH_NAME
+
+#define PGM_BTH_NAME(name) PGM_BTH_NAME_AMD64_PROT(name)
+#include "PGMR0Bth.h"
+#undef PGM_BTH_NAME
+
+#define PGM_BTH_NAME(name) PGM_BTH_NAME_EPT_PROT(name)
+#include "PGMR0Bth.h"
+#undef PGM_BTH_NAME
+
+
+/**
+ * Worker function for PGMR3PhysAllocateHandyPages and pgmPhysEnsureHandyPage.
+ *
+ * @returns The following VBox status codes.
+ * @retval VINF_SUCCESS on success. FF cleared.
+ * @retval VINF_EM_NO_MEMORY if we're out of memory. The FF is set in this case.
+ *
+ * @param pGVM The global (ring-0) VM structure.
+ * @param pVM The cross context VM structure.
+ * @param idCpu The ID of the calling EMT.
+ *
+ * @thread EMT(idCpu)
+ *
+ * @remarks Must be called from within the PGM critical section. The caller
+ * must clear the new pages.
+ */
+VMMR0_INT_DECL(int) PGMR0PhysAllocateHandyPages(PGVM pGVM, PVM pVM, VMCPUID idCpu)
+{
+ /*
+ * Validate inputs.
+ */
+ AssertReturn(idCpu < pGVM->cCpus, VERR_INVALID_CPU_ID); /* caller already checked this, but just to be sure. */
+ AssertReturn(pGVM->aCpus[idCpu].hEMT == RTThreadNativeSelf(), VERR_NOT_OWNER);
+ PGM_LOCK_ASSERT_OWNER_EX(pVM, &pVM->aCpus[idCpu]);
+
+ /*
+ * Check for error injection.
+ */
+ if (RT_UNLIKELY(pVM->pgm.s.fErrInjHandyPages))
+ return VERR_NO_MEMORY;
+
+ /*
+ * Try allocate a full set of handy pages.
+ */
+ uint32_t iFirst = pVM->pgm.s.cHandyPages;
+ AssertReturn(iFirst <= RT_ELEMENTS(pVM->pgm.s.aHandyPages), VERR_PGM_HANDY_PAGE_IPE);
+ uint32_t cPages = RT_ELEMENTS(pVM->pgm.s.aHandyPages) - iFirst;
+ if (!cPages)
+ return VINF_SUCCESS;
+ int rc = GMMR0AllocateHandyPages(pGVM, pVM, idCpu, cPages, cPages, &pVM->pgm.s.aHandyPages[iFirst]);
+ if (RT_SUCCESS(rc))
+ {
+#ifdef VBOX_STRICT
+ for (uint32_t i = 0; i < RT_ELEMENTS(pVM->pgm.s.aHandyPages); i++)
+ {
+ Assert(pVM->pgm.s.aHandyPages[i].idPage != NIL_GMM_PAGEID);
+ Assert(pVM->pgm.s.aHandyPages[i].idPage <= GMM_PAGEID_LAST);
+ Assert(pVM->pgm.s.aHandyPages[i].idSharedPage == NIL_GMM_PAGEID);
+ Assert(pVM->pgm.s.aHandyPages[i].HCPhysGCPhys != NIL_RTHCPHYS);
+ Assert(!(pVM->pgm.s.aHandyPages[i].HCPhysGCPhys & ~X86_PTE_PAE_PG_MASK));
+ }
+#endif
+
+ pVM->pgm.s.cHandyPages = RT_ELEMENTS(pVM->pgm.s.aHandyPages);
+ }
+ else if (rc != VERR_GMM_SEED_ME)
+ {
+ if ( ( rc == VERR_GMM_HIT_GLOBAL_LIMIT
+ || rc == VERR_GMM_HIT_VM_ACCOUNT_LIMIT)
+ && iFirst < PGM_HANDY_PAGES_MIN)
+ {
+
+#ifdef VBOX_STRICT
+ /* We're ASSUMING that GMM has updated all the entires before failing us. */
+ uint32_t i;
+ for (i = iFirst; i < RT_ELEMENTS(pVM->pgm.s.aHandyPages); i++)
+ {
+ Assert(pVM->pgm.s.aHandyPages[i].idPage == NIL_GMM_PAGEID);
+ Assert(pVM->pgm.s.aHandyPages[i].idSharedPage == NIL_GMM_PAGEID);
+ Assert(pVM->pgm.s.aHandyPages[i].HCPhysGCPhys == NIL_RTHCPHYS);
+ }
+#endif
+
+ /*
+ * Reduce the number of pages until we hit the minimum limit.
+ */
+ do
+ {
+ cPages >>= 1;
+ if (cPages + iFirst < PGM_HANDY_PAGES_MIN)
+ cPages = PGM_HANDY_PAGES_MIN - iFirst;
+ rc = GMMR0AllocateHandyPages(pGVM, pVM, idCpu, 0, cPages, &pVM->pgm.s.aHandyPages[iFirst]);
+ } while ( ( rc == VERR_GMM_HIT_GLOBAL_LIMIT
+ || rc == VERR_GMM_HIT_VM_ACCOUNT_LIMIT)
+ && cPages + iFirst > PGM_HANDY_PAGES_MIN);
+ if (RT_SUCCESS(rc))
+ {
+#ifdef VBOX_STRICT
+ i = iFirst + cPages;
+ while (i-- > 0)
+ {
+ Assert(pVM->pgm.s.aHandyPages[i].idPage != NIL_GMM_PAGEID);
+ Assert(pVM->pgm.s.aHandyPages[i].idPage <= GMM_PAGEID_LAST);
+ Assert(pVM->pgm.s.aHandyPages[i].idSharedPage == NIL_GMM_PAGEID);
+ Assert(pVM->pgm.s.aHandyPages[i].HCPhysGCPhys != NIL_RTHCPHYS);
+ Assert(!(pVM->pgm.s.aHandyPages[i].HCPhysGCPhys & ~X86_PTE_PAE_PG_MASK));
+ }
+
+ for (i = cPages + iFirst; i < RT_ELEMENTS(pVM->pgm.s.aHandyPages); i++)
+ {
+ Assert(pVM->pgm.s.aHandyPages[i].idPage == NIL_GMM_PAGEID);
+ Assert(pVM->pgm.s.aHandyPages[i].idSharedPage == NIL_GMM_PAGEID);
+ Assert(pVM->pgm.s.aHandyPages[i].HCPhysGCPhys == NIL_RTHCPHYS);
+ }
+#endif
+
+ pVM->pgm.s.cHandyPages = iFirst + cPages;
+ }
+ }
+
+ if (RT_FAILURE(rc) && rc != VERR_GMM_SEED_ME)
+ {
+ LogRel(("PGMR0PhysAllocateHandyPages: rc=%Rrc iFirst=%d cPages=%d\n", rc, iFirst, cPages));
+ VM_FF_SET(pVM, VM_FF_PGM_NO_MEMORY);
+ }
+ }
+
+
+ LogFlow(("PGMR0PhysAllocateHandyPages: cPages=%d rc=%Rrc\n", cPages, rc));
+ return rc;
+}
+
+
+/**
+ * Flushes any changes pending in the handy page array.
+ *
+ * It is very important that this gets done when page sharing is enabled.
+ *
+ * @returns The following VBox status codes.
+ * @retval VINF_SUCCESS on success. FF cleared.
+ *
+ * @param pGVM The global (ring-0) VM structure.
+ * @param pVM The cross context VM structure.
+ * @param idCpu The ID of the calling EMT.
+ *
+ * @thread EMT(idCpu)
+ *
+ * @remarks Must be called from within the PGM critical section.
+ */
+VMMR0_INT_DECL(int) PGMR0PhysFlushHandyPages(PGVM pGVM, PVM pVM, VMCPUID idCpu)
+{
+ /*
+ * Validate inputs.
+ */
+ AssertReturn(idCpu < pGVM->cCpus, VERR_INVALID_CPU_ID); /* caller already checked this, but just to be sure. */
+ AssertReturn(pGVM->aCpus[idCpu].hEMT == RTThreadNativeSelf(), VERR_NOT_OWNER);
+ PGM_LOCK_ASSERT_OWNER_EX(pVM, &pVM->aCpus[idCpu]);
+
+ /*
+ * Try allocate a full set of handy pages.
+ */
+ uint32_t iFirst = pVM->pgm.s.cHandyPages;
+ AssertReturn(iFirst <= RT_ELEMENTS(pVM->pgm.s.aHandyPages), VERR_PGM_HANDY_PAGE_IPE);
+ uint32_t cPages = RT_ELEMENTS(pVM->pgm.s.aHandyPages) - iFirst;
+ if (!cPages)
+ return VINF_SUCCESS;
+ int rc = GMMR0AllocateHandyPages(pGVM, pVM, idCpu, cPages, 0, &pVM->pgm.s.aHandyPages[iFirst]);
+
+ LogFlow(("PGMR0PhysFlushHandyPages: cPages=%d rc=%Rrc\n", cPages, rc));
+ return rc;
+}
+
+
+/**
+ * Worker function for PGMR3PhysAllocateLargeHandyPage
+ *
+ * @returns The following VBox status codes.
+ * @retval VINF_SUCCESS on success.
+ * @retval VINF_EM_NO_MEMORY if we're out of memory.
+ *
+ * @param pGVM The global (ring-0) VM structure.
+ * @param pVM The cross context VM structure.
+ * @param idCpu The ID of the calling EMT.
+ *
+ * @thread EMT(idCpu)
+ *
+ * @remarks Must be called from within the PGM critical section. The caller
+ * must clear the new pages.
+ */
+VMMR0_INT_DECL(int) PGMR0PhysAllocateLargeHandyPage(PGVM pGVM, PVM pVM, VMCPUID idCpu)
+{
+ /*
+ * Validate inputs.
+ */
+ AssertReturn(idCpu < pGVM->cCpus, VERR_INVALID_CPU_ID); /* caller already checked this, but just to be sure. */
+ AssertReturn(pGVM->aCpus[idCpu].hEMT == RTThreadNativeSelf(), VERR_NOT_OWNER);
+ PGM_LOCK_ASSERT_OWNER_EX(pVM, &pVM->aCpus[idCpu]);
+ Assert(!pVM->pgm.s.cLargeHandyPages);
+
+ /*
+ * Do the job.
+ */
+ int rc = GMMR0AllocateLargePage(pGVM, pVM, idCpu, _2M,
+ &pVM->pgm.s.aLargeHandyPage[0].idPage,
+ &pVM->pgm.s.aLargeHandyPage[0].HCPhysGCPhys);
+ if (RT_SUCCESS(rc))
+ pVM->pgm.s.cLargeHandyPages = 1;
+
+ return rc;
+}
+
+
+#ifdef VBOX_WITH_PCI_PASSTHROUGH
+/* Interface sketch. The interface belongs to a global PCI pass-through
+ manager. It shall use the global VM handle, not the user VM handle to
+ store the per-VM info (domain) since that is all ring-0 stuff, thus
+ passing pGVM here. I've tentitively prefixed the functions 'GPciRawR0',
+ we can discuss the PciRaw code re-organtization when I'm back from
+ vacation.
+
+ I've implemented the initial IOMMU set up below. For things to work
+ reliably, we will probably need add a whole bunch of checks and
+ GPciRawR0GuestPageUpdate call to the PGM code. For the present,
+ assuming nested paging (enforced) and prealloc (enforced), no
+ ballooning (check missing), page sharing (check missing) or live
+ migration (check missing), it might work fine. At least if some
+ VM power-off hook is present and can tear down the IOMMU page tables. */
+
+/**
+ * Tells the global PCI pass-through manager that we are about to set up the
+ * guest page to host page mappings for the specfied VM.
+ *
+ * @returns VBox status code.
+ *
+ * @param pGVM The ring-0 VM structure.
+ */
+VMMR0_INT_DECL(int) GPciRawR0GuestPageBeginAssignments(PGVM pGVM)
+{
+ NOREF(pGVM);
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Assigns a host page mapping for a guest page.
+ *
+ * This is only used when setting up the mappings, i.e. between
+ * GPciRawR0GuestPageBeginAssignments and GPciRawR0GuestPageEndAssignments.
+ *
+ * @returns VBox status code.
+ * @param pGVM The ring-0 VM structure.
+ * @param GCPhys The address of the guest page (page aligned).
+ * @param HCPhys The address of the host page (page aligned).
+ */
+VMMR0_INT_DECL(int) GPciRawR0GuestPageAssign(PGVM pGVM, RTGCPHYS GCPhys, RTHCPHYS HCPhys)
+{
+ AssertReturn(!(GCPhys & PAGE_OFFSET_MASK), VERR_INTERNAL_ERROR_3);
+ AssertReturn(!(HCPhys & PAGE_OFFSET_MASK), VERR_INTERNAL_ERROR_3);
+
+ if (pGVM->rawpci.s.pfnContigMemInfo)
+ /** @todo what do we do on failure? */
+ pGVM->rawpci.s.pfnContigMemInfo(&pGVM->rawpci.s, HCPhys, GCPhys, PAGE_SIZE, PCIRAW_MEMINFO_MAP);
+
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Indicates that the specified guest page doesn't exists but doesn't have host
+ * page mapping we trust PCI pass-through with.
+ *
+ * This is only used when setting up the mappings, i.e. between
+ * GPciRawR0GuestPageBeginAssignments and GPciRawR0GuestPageEndAssignments.
+ *
+ * @returns VBox status code.
+ * @param pGVM The ring-0 VM structure.
+ * @param GCPhys The address of the guest page (page aligned).
+ * @param HCPhys The address of the host page (page aligned).
+ */
+VMMR0_INT_DECL(int) GPciRawR0GuestPageUnassign(PGVM pGVM, RTGCPHYS GCPhys)
+{
+ AssertReturn(!(GCPhys & PAGE_OFFSET_MASK), VERR_INTERNAL_ERROR_3);
+
+ if (pGVM->rawpci.s.pfnContigMemInfo)
+ /** @todo what do we do on failure? */
+ pGVM->rawpci.s.pfnContigMemInfo(&pGVM->rawpci.s, 0, GCPhys, PAGE_SIZE, PCIRAW_MEMINFO_UNMAP);
+
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Tells the global PCI pass-through manager that we have completed setting up
+ * the guest page to host page mappings for the specfied VM.
+ *
+ * This complements GPciRawR0GuestPageBeginAssignments and will be called even
+ * if some page assignment failed.
+ *
+ * @returns VBox status code.
+ *
+ * @param pGVM The ring-0 VM structure.
+ */
+VMMR0_INT_DECL(int) GPciRawR0GuestPageEndAssignments(PGVM pGVM)
+{
+ NOREF(pGVM);
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Tells the global PCI pass-through manager that a guest page mapping has
+ * changed after the initial setup.
+ *
+ * @returns VBox status code.
+ * @param pGVM The ring-0 VM structure.
+ * @param GCPhys The address of the guest page (page aligned).
+ * @param HCPhys The new host page address or NIL_RTHCPHYS if
+ * now unassigned.
+ */
+VMMR0_INT_DECL(int) GPciRawR0GuestPageUpdate(PGVM pGVM, RTGCPHYS GCPhys, RTHCPHYS HCPhys)
+{
+ AssertReturn(!(GCPhys & PAGE_OFFSET_MASK), VERR_INTERNAL_ERROR_4);
+ AssertReturn(!(HCPhys & PAGE_OFFSET_MASK) || HCPhys == NIL_RTHCPHYS, VERR_INTERNAL_ERROR_4);
+ NOREF(pGVM);
+ return VINF_SUCCESS;
+}
+
+#endif /* VBOX_WITH_PCI_PASSTHROUGH */
+
+
+/**
+ * Sets up the IOMMU when raw PCI device is enabled.
+ *
+ * @note This is a hack that will probably be remodelled and refined later!
+ *
+ * @returns VBox status code.
+ *
+ * @param pGVM The global (ring-0) VM structure.
+ * @param pVM The cross context VM structure.
+ */
+VMMR0_INT_DECL(int) PGMR0PhysSetupIoMmu(PGVM pGVM, PVM pVM)
+{
+ int rc = GVMMR0ValidateGVMandVM(pGVM, pVM);
+ if (RT_FAILURE(rc))
+ return rc;
+
+#ifdef VBOX_WITH_PCI_PASSTHROUGH
+ if (pVM->pgm.s.fPciPassthrough)
+ {
+ /*
+ * The Simplistic Approach - Enumerate all the pages and call tell the
+ * IOMMU about each of them.
+ */
+ pgmLock(pVM);
+ rc = GPciRawR0GuestPageBeginAssignments(pGVM);
+ if (RT_SUCCESS(rc))
+ {
+ for (PPGMRAMRANGE pRam = pVM->pgm.s.pRamRangesXR0; RT_SUCCESS(rc) && pRam; pRam = pRam->pNextR0)
+ {
+ PPGMPAGE pPage = &pRam->aPages[0];
+ RTGCPHYS GCPhys = pRam->GCPhys;
+ uint32_t cLeft = pRam->cb >> PAGE_SHIFT;
+ while (cLeft-- > 0)
+ {
+ /* Only expose pages that are 100% safe for now. */
+ if ( PGM_PAGE_GET_TYPE(pPage) == PGMPAGETYPE_RAM
+ && PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED
+ && !PGM_PAGE_HAS_ANY_HANDLERS(pPage))
+ rc = GPciRawR0GuestPageAssign(pGVM, GCPhys, PGM_PAGE_GET_HCPHYS(pPage));
+ else
+ rc = GPciRawR0GuestPageUnassign(pGVM, GCPhys);
+
+ /* next */
+ pPage++;
+ GCPhys += PAGE_SIZE;
+ }
+ }
+
+ int rc2 = GPciRawR0GuestPageEndAssignments(pGVM);
+ if (RT_FAILURE(rc2) && RT_SUCCESS(rc))
+ rc = rc2;
+ }
+ pgmUnlock(pVM);
+ }
+ else
+#endif
+ rc = VERR_NOT_SUPPORTED;
+ return rc;
+}
+
+
+/**
+ * \#PF Handler for nested paging.
+ *
+ * @returns VBox status code (appropriate for trap handling and GC return).
+ * @param pVM The cross context VM structure.
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param enmShwPagingMode Paging mode for the nested page tables.
+ * @param uErr The trap error code.
+ * @param pRegFrame Trap register frame.
+ * @param GCPhysFault The fault address.
+ */
+VMMR0DECL(int) PGMR0Trap0eHandlerNestedPaging(PVM pVM, PVMCPU pVCpu, PGMMODE enmShwPagingMode, RTGCUINT uErr,
+ PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault)
+{
+ int rc;
+
+ LogFlow(("PGMTrap0eHandler: uErr=%RGx GCPhysFault=%RGp eip=%RGv\n", uErr, GCPhysFault, (RTGCPTR)pRegFrame->rip));
+ STAM_PROFILE_START(&pVCpu->pgm.s.StatRZTrap0e, a);
+ STAM_STATS({ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = NULL; } );
+
+ /* AMD uses the host's paging mode; Intel has a single mode (EPT). */
+ AssertMsg( enmShwPagingMode == PGMMODE_32_BIT || enmShwPagingMode == PGMMODE_PAE || enmShwPagingMode == PGMMODE_PAE_NX
+ || enmShwPagingMode == PGMMODE_AMD64 || enmShwPagingMode == PGMMODE_AMD64_NX || enmShwPagingMode == PGMMODE_EPT,
+ ("enmShwPagingMode=%d\n", enmShwPagingMode));
+
+ /* Reserved shouldn't end up here. */
+ Assert(!(uErr & X86_TRAP_PF_RSVD));
+
+#ifdef VBOX_WITH_STATISTICS
+ /*
+ * Error code stats.
+ */
+ if (uErr & X86_TRAP_PF_US)
+ {
+ if (!(uErr & X86_TRAP_PF_P))
+ {
+ if (uErr & X86_TRAP_PF_RW)
+ STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eUSNotPresentWrite);
+ else
+ STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eUSNotPresentRead);
+ }
+ else if (uErr & X86_TRAP_PF_RW)
+ STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eUSWrite);
+ else if (uErr & X86_TRAP_PF_RSVD)
+ STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eUSReserved);
+ else if (uErr & X86_TRAP_PF_ID)
+ STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eUSNXE);
+ else
+ STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eUSRead);
+ }
+ else
+ { /* Supervisor */
+ if (!(uErr & X86_TRAP_PF_P))
+ {
+ if (uErr & X86_TRAP_PF_RW)
+ STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eSVNotPresentWrite);
+ else
+ STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eSVNotPresentRead);
+ }
+ else if (uErr & X86_TRAP_PF_RW)
+ STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eSVWrite);
+ else if (uErr & X86_TRAP_PF_ID)
+ STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eSNXE);
+ else if (uErr & X86_TRAP_PF_RSVD)
+ STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eSVReserved);
+ }
+#endif
+
+ /*
+ * Call the worker.
+ *
+ * Note! We pretend the guest is in protected mode without paging, so we
+ * can use existing code to build the nested page tables.
+ */
+/** @todo r=bird: Gotta love this nested paging hacking we're still carrying with us... (Split PGM_TYPE_NESTED.) */
+ bool fLockTaken = false;
+ switch (enmShwPagingMode)
+ {
+ case PGMMODE_32_BIT:
+ rc = PGM_BTH_NAME_32BIT_PROT(Trap0eHandler)(pVCpu, uErr, pRegFrame, GCPhysFault, &fLockTaken);
+ break;
+ case PGMMODE_PAE:
+ case PGMMODE_PAE_NX:
+ rc = PGM_BTH_NAME_PAE_PROT(Trap0eHandler)(pVCpu, uErr, pRegFrame, GCPhysFault, &fLockTaken);
+ break;
+ case PGMMODE_AMD64:
+ case PGMMODE_AMD64_NX:
+ rc = PGM_BTH_NAME_AMD64_PROT(Trap0eHandler)(pVCpu, uErr, pRegFrame, GCPhysFault, &fLockTaken);
+ break;
+ case PGMMODE_EPT:
+ rc = PGM_BTH_NAME_EPT_PROT(Trap0eHandler)(pVCpu, uErr, pRegFrame, GCPhysFault, &fLockTaken);
+ break;
+ default:
+ AssertFailed();
+ rc = VERR_INVALID_PARAMETER;
+ break;
+ }
+ if (fLockTaken)
+ {
+ PGM_LOCK_ASSERT_OWNER(pVM);
+ pgmUnlock(pVM);
+ }
+
+ if (rc == VINF_PGM_SYNCPAGE_MODIFIED_PDE)
+ rc = VINF_SUCCESS;
+ /*
+ * Handle the case where we cannot interpret the instruction because we cannot get the guest physical address
+ * via its page tables, see @bugref{6043}.
+ */
+ else if ( rc == VERR_PAGE_NOT_PRESENT /* SMP only ; disassembly might fail. */
+ || rc == VERR_PAGE_TABLE_NOT_PRESENT /* seen with UNI & SMP */
+ || rc == VERR_PAGE_DIRECTORY_PTR_NOT_PRESENT /* seen with SMP */
+ || rc == VERR_PAGE_MAP_LEVEL4_NOT_PRESENT) /* precaution */
+ {
+ Log(("WARNING: Unexpected VERR_PAGE_TABLE_NOT_PRESENT (%d) for page fault at %RGp error code %x (rip=%RGv)\n", rc, GCPhysFault, uErr, pRegFrame->rip));
+ /* Some kind of inconsistency in the SMP case; it's safe to just execute the instruction again; not sure about
+ single VCPU VMs though. */
+ rc = VINF_SUCCESS;
+ }
+
+ STAM_STATS({ if (!pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution))
+ pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution) = &pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0eTime2Misc; });
+ STAM_PROFILE_STOP_EX(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatRZTrap0e, pVCpu->pgm.s.CTX_SUFF(pStatTrap0eAttribution), a);
+ return rc;
+}
+
+
+/**
+ * \#PF Handler for deliberate nested paging misconfiguration (/reserved bit)
+ * employed for MMIO pages.
+ *
+ * @returns VBox status code (appropriate for trap handling and GC return).
+ * @param pVM The cross context VM structure.
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param enmShwPagingMode Paging mode for the nested page tables.
+ * @param pRegFrame Trap register frame.
+ * @param GCPhysFault The fault address.
+ * @param uErr The error code, UINT32_MAX if not available
+ * (VT-x).
+ */
+VMMR0DECL(VBOXSTRICTRC) PGMR0Trap0eHandlerNPMisconfig(PVM pVM, PVMCPU pVCpu, PGMMODE enmShwPagingMode,
+ PCPUMCTXCORE pRegFrame, RTGCPHYS GCPhysFault, uint32_t uErr)
+{
+#ifdef PGM_WITH_MMIO_OPTIMIZATIONS
+ STAM_PROFILE_START(&pVCpu->CTX_SUFF(pStats)->StatR0NpMiscfg, a);
+ VBOXSTRICTRC rc;
+
+ /*
+ * Try lookup the all access physical handler for the address.
+ */
+ pgmLock(pVM);
+ PPGMPHYSHANDLER pHandler = pgmHandlerPhysicalLookup(pVM, GCPhysFault);
+ PPGMPHYSHANDLERTYPEINT pHandlerType = RT_LIKELY(pHandler) ? PGMPHYSHANDLER_GET_TYPE(pVM, pHandler) : NULL;
+ if (RT_LIKELY(pHandler && pHandlerType->enmKind != PGMPHYSHANDLERKIND_WRITE))
+ {
+ /*
+ * If the handle has aliases page or pages that have been temporarily
+ * disabled, we'll have to take a detour to make sure we resync them
+ * to avoid lots of unnecessary exits.
+ */
+ PPGMPAGE pPage;
+ if ( ( pHandler->cAliasedPages
+ || pHandler->cTmpOffPages)
+ && ( (pPage = pgmPhysGetPage(pVM, GCPhysFault)) == NULL
+ || PGM_PAGE_GET_HNDL_PHYS_STATE(pPage) == PGM_PAGE_HNDL_PHYS_STATE_DISABLED)
+ )
+ {
+ Log(("PGMR0Trap0eHandlerNPMisconfig: Resyncing aliases / tmp-off page at %RGp (uErr=%#x) %R[pgmpage]\n", GCPhysFault, uErr, pPage));
+ STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatR0NpMiscfgSyncPage);
+ rc = pgmShwSyncNestedPageLocked(pVCpu, GCPhysFault, 1 /*cPages*/, enmShwPagingMode);
+ pgmUnlock(pVM);
+ }
+ else
+ {
+ if (pHandlerType->CTX_SUFF(pfnPfHandler))
+ {
+ void *pvUser = pHandler->CTX_SUFF(pvUser);
+ STAM_PROFILE_START(&pHandler->Stat, h);
+ pgmUnlock(pVM);
+
+ Log6(("PGMR0Trap0eHandlerNPMisconfig: calling %p(,%#x,,%RGp,%p)\n", pHandlerType->CTX_SUFF(pfnPfHandler), uErr, GCPhysFault, pvUser));
+ rc = pHandlerType->CTX_SUFF(pfnPfHandler)(pVM, pVCpu, uErr == UINT32_MAX ? RTGCPTR_MAX : uErr, pRegFrame,
+ GCPhysFault, GCPhysFault, pvUser);
+
+#ifdef VBOX_WITH_STATISTICS
+ pgmLock(pVM);
+ pHandler = pgmHandlerPhysicalLookup(pVM, GCPhysFault);
+ if (pHandler)
+ STAM_PROFILE_STOP(&pHandler->Stat, h);
+ pgmUnlock(pVM);
+#endif
+ }
+ else
+ {
+ pgmUnlock(pVM);
+ Log(("PGMR0Trap0eHandlerNPMisconfig: %RGp (uErr=%#x) -> R3\n", GCPhysFault, uErr));
+ rc = VINF_EM_RAW_EMULATE_INSTR;
+ }
+ }
+ }
+ else
+ {
+ /*
+ * Must be out of sync, so do a SyncPage and restart the instruction.
+ *
+ * ASSUMES that ALL handlers are page aligned and covers whole pages
+ * (assumption asserted in PGMHandlerPhysicalRegisterEx).
+ */
+ Log(("PGMR0Trap0eHandlerNPMisconfig: Out of sync page at %RGp (uErr=%#x)\n", GCPhysFault, uErr));
+ STAM_COUNTER_INC(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatR0NpMiscfgSyncPage);
+ rc = pgmShwSyncNestedPageLocked(pVCpu, GCPhysFault, 1 /*cPages*/, enmShwPagingMode);
+ pgmUnlock(pVM);
+ }
+
+ STAM_PROFILE_STOP(&pVCpu->pgm.s.CTX_SUFF(pStats)->StatR0NpMiscfg, a);
+ return rc;
+
+#else
+ AssertLogRelFailed();
+ return VERR_PGM_NOT_USED_IN_MODE;
+#endif
+}
+
diff --git a/src/VBox/VMM/VMMR0/PGMR0Bth.h b/src/VBox/VMM/VMMR0/PGMR0Bth.h
new file mode 100644
index 00000000..e67cf6f4
--- /dev/null
+++ b/src/VBox/VMM/VMMR0/PGMR0Bth.h
@@ -0,0 +1,25 @@
+/* $Id: PGMR0Bth.h $ */
+/** @file
+ * VBox - Page Manager / Monitor, Shadow+Guest Paging Template.
+ */
+
+/*
+ * Copyright (C) 2006-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+
+/*******************************************************************************
+* Internal Functions *
+*******************************************************************************/
+RT_C_DECLS_BEGIN
+PGM_BTH_DECL(int, Trap0eHandler)(PVMCPU pVCpu, RTGCUINT uErr, PCPUMCTXCORE pRegFrame, RTGCPTR pvFault, bool *pfLockTaken);
+RT_C_DECLS_END
+
diff --git a/src/VBox/VMM/VMMR0/PGMR0SharedPage.cpp b/src/VBox/VMM/VMMR0/PGMR0SharedPage.cpp
new file mode 100644
index 00000000..de94eec3
--- /dev/null
+++ b/src/VBox/VMM/VMMR0/PGMR0SharedPage.cpp
@@ -0,0 +1,170 @@
+/* $Id: PGMR0SharedPage.cpp $ */
+/** @file
+ * PGM - Page Manager and Monitor, Page Sharing, Ring-0.
+ */
+
+/*
+ * Copyright (C) 2010-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#define LOG_GROUP LOG_GROUP_PGM_SHARED
+#include <VBox/vmm/pgm.h>
+#include <VBox/vmm/gmm.h>
+#include "PGMInternal.h"
+#include <VBox/vmm/vm.h>
+#include "PGMInline.h"
+#include <VBox/log.h>
+#include <VBox/err.h>
+#include <iprt/assert.h>
+#include <iprt/mem.h>
+
+
+#ifdef VBOX_WITH_PAGE_SHARING
+/**
+ * Check a registered module for shared page changes.
+ *
+ * The PGM lock shall be taken prior to calling this method.
+ *
+ * @returns The following VBox status codes.
+ *
+ * @param pVM The cross context VM structure.
+ * @param pGVM Pointer to the GVM instance data.
+ * @param idCpu The ID of the calling virtual CPU.
+ * @param pModule Global module description.
+ * @param paRegionsGCPtrs Array parallel to pModules->aRegions with the
+ * addresses of the regions in the calling
+ * process.
+ */
+VMMR0DECL(int) PGMR0SharedModuleCheck(PVM pVM, PGVM pGVM, VMCPUID idCpu, PGMMSHAREDMODULE pModule, PCRTGCPTR64 paRegionsGCPtrs)
+{
+ PVMCPU pVCpu = &pVM->aCpus[idCpu];
+ int rc = VINF_SUCCESS;
+ bool fFlushTLBs = false;
+ bool fFlushRemTLBs = false;
+ GMMSHAREDPAGEDESC PageDesc;
+
+ Log(("PGMR0SharedModuleCheck: check %s %s base=%RGv size=%x\n", pModule->szName, pModule->szVersion, pModule->Core.Key, pModule->cbModule));
+
+ PGM_LOCK_ASSERT_OWNER(pVM); /* This cannot fail as we grab the lock in pgmR3SharedModuleRegRendezvous before calling into ring-0. */
+
+ /*
+ * Check every region of the shared module.
+ */
+ for (uint32_t idxRegion = 0; idxRegion < pModule->cRegions; idxRegion++)
+ {
+ RTGCPTR GCPtrPage = paRegionsGCPtrs[idxRegion] & ~(RTGCPTR)PAGE_OFFSET_MASK;
+ uint32_t cbLeft = pModule->aRegions[idxRegion].cb; Assert(!(cbLeft & PAGE_OFFSET_MASK));
+ uint32_t idxPage = 0;
+
+ while (cbLeft)
+ {
+ /** @todo inefficient to fetch each guest page like this... */
+ RTGCPHYS GCPhys;
+ uint64_t fFlags;
+ rc = PGMGstGetPage(pVCpu, GCPtrPage, &fFlags, &GCPhys);
+ if ( rc == VINF_SUCCESS
+ && !(fFlags & X86_PTE_RW)) /* important as we make assumptions about this below! */
+ {
+ PPGMPAGE pPage = pgmPhysGetPage(pVM, GCPhys);
+ Assert(!pPage || !PGM_PAGE_IS_BALLOONED(pPage));
+ if ( pPage
+ && PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED
+ && PGM_PAGE_GET_READ_LOCKS(pPage) == 0
+ && PGM_PAGE_GET_WRITE_LOCKS(pPage) == 0 )
+ {
+ PageDesc.idPage = PGM_PAGE_GET_PAGEID(pPage);
+ PageDesc.HCPhys = PGM_PAGE_GET_HCPHYS(pPage);
+ PageDesc.GCPhys = GCPhys;
+
+ rc = GMMR0SharedModuleCheckPage(pGVM, pModule, idxRegion, idxPage, &PageDesc);
+ if (RT_FAILURE(rc))
+ break;
+
+ /*
+ * Any change for this page?
+ */
+ if (PageDesc.idPage != NIL_GMM_PAGEID)
+ {
+ Assert(PGM_PAGE_GET_STATE(pPage) == PGM_PAGE_STATE_ALLOCATED);
+
+ Log(("PGMR0SharedModuleCheck: shared page gst virt=%RGv phys=%RGp host %RHp->%RHp\n",
+ GCPtrPage, PageDesc.GCPhys, PGM_PAGE_GET_HCPHYS(pPage), PageDesc.HCPhys));
+
+ /* Page was either replaced by an existing shared
+ version of it or converted into a read-only shared
+ page, so, clear all references. */
+ bool fFlush = false;
+ rc = pgmPoolTrackUpdateGCPhys(pVM, PageDesc.GCPhys, pPage, true /* clear the entries */, &fFlush);
+ Assert( rc == VINF_SUCCESS
+ || ( VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)
+ && (pVCpu->pgm.s.fSyncFlags & PGM_SYNC_CLEAR_PGM_POOL)));
+ if (rc == VINF_SUCCESS)
+ fFlushTLBs |= fFlush;
+ fFlushRemTLBs = true;
+
+ if (PageDesc.HCPhys != PGM_PAGE_GET_HCPHYS(pPage))
+ {
+ /* Update the physical address and page id now. */
+ PGM_PAGE_SET_HCPHYS(pVM, pPage, PageDesc.HCPhys);
+ PGM_PAGE_SET_PAGEID(pVM, pPage, PageDesc.idPage);
+
+ /* Invalidate page map TLB entry for this page too. */
+ pgmPhysInvalidatePageMapTLBEntry(pVM, PageDesc.GCPhys);
+ pVM->pgm.s.cReusedSharedPages++;
+ }
+ /* else: nothing changed (== this page is now a shared
+ page), so no need to flush anything. */
+
+ pVM->pgm.s.cSharedPages++;
+ pVM->pgm.s.cPrivatePages--;
+ PGM_PAGE_SET_STATE(pVM, pPage, PGM_PAGE_STATE_SHARED);
+
+# ifdef VBOX_STRICT /* check sum hack */
+ pPage->s.u2Unused0 = PageDesc.u32StrictChecksum & 3;
+ //pPage->s.u2Unused1 = (PageDesc.u32StrictChecksum >> 8) & 3;
+# endif
+ }
+ }
+ }
+ else
+ {
+ Assert( rc == VINF_SUCCESS
+ || rc == VERR_PAGE_NOT_PRESENT
+ || rc == VERR_PAGE_MAP_LEVEL4_NOT_PRESENT
+ || rc == VERR_PAGE_DIRECTORY_PTR_NOT_PRESENT
+ || rc == VERR_PAGE_TABLE_NOT_PRESENT);
+ rc = VINF_SUCCESS; /* ignore error */
+ }
+
+ idxPage++;
+ GCPtrPage += PAGE_SIZE;
+ cbLeft -= PAGE_SIZE;
+ }
+ }
+
+ /*
+ * Do TLB flushing if necessary.
+ */
+ if (fFlushTLBs)
+ PGM_INVL_ALL_VCPU_TLBS(pVM);
+
+ if (fFlushRemTLBs)
+ for (VMCPUID idCurCpu = 0; idCurCpu < pVM->cCpus; idCurCpu++)
+ CPUMSetChangedFlags(&pVM->aCpus[idCurCpu], CPUM_CHANGED_GLOBAL_TLB_FLUSH);
+
+ return rc;
+}
+#endif /* VBOX_WITH_PAGE_SHARING */
+
diff --git a/src/VBox/VMM/VMMR0/TRPMR0.cpp b/src/VBox/VMM/VMMR0/TRPMR0.cpp
new file mode 100644
index 00000000..f9ca7939
--- /dev/null
+++ b/src/VBox/VMM/VMMR0/TRPMR0.cpp
@@ -0,0 +1,107 @@
+/* $Id: TRPMR0.cpp $ */
+/** @file
+ * TRPM - The Trap Monitor - HC Ring 0
+ */
+
+/*
+ * Copyright (C) 2006-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#define LOG_GROUP LOG_GROUP_TRPM
+#include <VBox/vmm/trpm.h>
+#include "TRPMInternal.h"
+#include <VBox/vmm/vm.h>
+#include <VBox/vmm/vmm.h>
+#include <iprt/errcore.h>
+#include <VBox/log.h>
+#include <iprt/assert.h>
+#include <iprt/asm-amd64-x86.h>
+
+
+#if defined(RT_OS_DARWIN) && ARCH_BITS == 32
+# error "32-bit darwin is no longer supported. Go back to 4.3 or earlier!"
+#endif
+
+
+/**
+ * Dispatches an interrupt that arrived while we were in the guest context.
+ *
+ * @param pVM The cross context VM structure.
+ * @remark Must be called with interrupts disabled.
+ */
+VMMR0DECL(void) TRPMR0DispatchHostInterrupt(PVM pVM)
+{
+ /*
+ * Get the active interrupt vector number.
+ */
+ PVMCPU pVCpu = VMMGetCpu0(pVM);
+ RTUINT uActiveVector = pVCpu->trpm.s.uActiveVector;
+ pVCpu->trpm.s.uActiveVector = UINT32_MAX;
+ AssertMsgReturnVoid(uActiveVector < 256, ("uActiveVector=%#x is invalid! (More assertions to come, please enjoy!)\n", uActiveVector));
+
+#if HC_ARCH_BITS == 64 && defined(RT_OS_DARWIN)
+ /*
+ * Do it the simple and safe way.
+ *
+ * This is a workaround for an optimization bug in the code below
+ * or a gcc 4.2 on mac (snow leopard seed 314).
+ */
+ trpmR0DispatchHostInterruptSimple(uActiveVector);
+
+#else /* The complicated way: */
+
+ /*
+ * Get the handler pointer (16:32 ptr) / (16:48 ptr).
+ */
+ RTIDTR Idtr;
+ ASMGetIDTR(&Idtr);
+# if HC_ARCH_BITS == 32
+ PVBOXIDTE pIdte = &((PVBOXIDTE)Idtr.pIdt)[uActiveVector];
+# else
+ PVBOXIDTE64 pIdte = &((PVBOXIDTE64)Idtr.pIdt)[uActiveVector];
+# endif
+ AssertMsgReturnVoid(pIdte->Gen.u1Present, ("The IDT entry (%d) is not present!\n", uActiveVector));
+ AssertMsgReturnVoid( pIdte->Gen.u3Type1 == VBOX_IDTE_TYPE1
+ || pIdte->Gen.u5Type2 == VBOX_IDTE_TYPE2_INT_32,
+ ("The IDT entry (%d) is not 32-bit int gate! type1=%#x type2=%#x\n",
+ uActiveVector, pIdte->Gen.u3Type1, pIdte->Gen.u5Type2));
+# if HC_ARCH_BITS == 32
+ RTFAR32 pfnHandler;
+ pfnHandler.off = VBOXIDTE_OFFSET(*pIdte);
+ pfnHandler.sel = pIdte->Gen.u16SegSel;
+
+ const RTR0UINTREG uRSP = ~(RTR0UINTREG)0;
+
+# else /* 64-bit: */
+ RTFAR64 pfnHandler;
+ pfnHandler.off = VBOXIDTE64_OFFSET(*pIdte);
+ pfnHandler.sel = pIdte->Gen.u16SegSel;
+
+ const RTR0UINTREG uRSP = ~(RTR0UINTREG)0;
+ if (pIdte->Gen.u3Ist)
+ {
+ trpmR0DispatchHostInterruptSimple(uActiveVector);
+ return;
+ }
+
+# endif
+
+ /*
+ * Dispatch it.
+ */
+ trpmR0DispatchHostInterrupt(pfnHandler.off, pfnHandler.sel, uRSP);
+#endif
+}
+
diff --git a/src/VBox/VMM/VMMR0/TRPMR0A.asm b/src/VBox/VMM/VMMR0/TRPMR0A.asm
new file mode 100644
index 00000000..8eee50f3
--- /dev/null
+++ b/src/VBox/VMM/VMMR0/TRPMR0A.asm
@@ -0,0 +1,155 @@
+; $Id: TRPMR0A.asm $
+;; @file
+; TRPM - Host Context Ring-0
+;
+
+;
+; Copyright (C) 2006-2019 Oracle Corporation
+;
+; This file is part of VirtualBox Open Source Edition (OSE), as
+; available from http://www.virtualbox.org. This file is free software;
+; you can redistribute it and/or modify it under the terms of the GNU
+; General Public License (GPL) as published by the Free Software
+; Foundation, in version 2 as it comes in the "COPYING" file of the
+; VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+;
+
+;*******************************************************************************
+;* Header Files *
+;*******************************************************************************
+%include "VBox/asmdefs.mac"
+%include "iprt/x86.mac"
+
+
+BEGINCODE
+
+;;
+; Calls the interrupt gate as if we received an interrupt while in Ring-0.
+;
+; @param uIP x86:[ebp+8] msc:rcx gcc:rdi The interrupt gate IP.
+; @param SelCS x86:[ebp+12] msc:dx gcc:si The interrupt gate CS.
+; @param RSP msc:r8 gcc:rdx The interrupt gate RSP. ~0 if no stack switch should take place. (only AMD64)
+;DECLASM(void) trpmR0DispatchHostInterrupt(RTR0UINTPTR uIP, RTSEL SelCS, RTR0UINTPTR RSP);
+ALIGNCODE(16)
+BEGINPROC trpmR0DispatchHostInterrupt
+ push xBP
+ mov xBP, xSP
+
+%ifdef RT_ARCH_AMD64
+ mov r11, rsp ; save the RSP for the iret frame.
+ and rsp, 0fffffffffffffff0h ; align the stack. (do it unconditionally saves some jump mess)
+
+ ; switch stack?
+ %ifdef ASM_CALL64_MSC
+ cmp r8, 0ffffffffffffffffh
+ je .no_stack_switch
+ mov rsp, r8
+ %else
+ cmp rdx, 0ffffffffffffffffh
+ je .no_stack_switch
+ mov rsp, rdx
+ %endif
+.no_stack_switch:
+
+ ; create the iret frame
+ push 0 ; SS
+ push r11 ; RSP
+ pushfq ; RFLAGS
+ and dword [rsp], ~X86_EFL_IF
+ mov ax, cs
+ push rax ; CS
+ lea r10, [.return wrt rip] ; RIP
+ push r10
+
+ ; create the retf frame
+ %ifdef ASM_CALL64_MSC
+ movzx rdx, dx
+ cmp rdx, r11
+ je .dir_jump
+ push rdx
+ push rcx
+ %else
+ movzx rsi, si
+ cmp rsi, r11
+ je .dir_jump
+ push rsi
+ push rdi
+ %endif
+
+ ; dispatch it
+ db 048h
+ retf
+
+ ; dispatch it by a jmp (don't mess up the IST stack)
+.dir_jump:
+ %ifdef ASM_CALL64_MSC
+ jmp rcx
+ %else
+ jmp rdi
+ %endif
+
+%else ; 32-bit:
+ mov ecx, [ebp + 8] ; uIP
+ movzx edx, word [ebp + 12] ; SelCS
+
+ ; create the iret frame
+ pushfd ; EFLAGS
+ and dword [esp], ~X86_EFL_IF
+ push cs ; CS
+ push .return ; EIP
+
+ ; create the retf frame
+ push edx
+ push ecx
+
+ ; dispatch it!
+ retf
+%endif
+.return:
+ cli
+
+ leave
+ ret
+ENDPROC trpmR0DispatchHostInterrupt
+
+
+;;
+; Issues a software interrupt to the specified interrupt vector.
+;
+; @param uActiveVector x86:[esp+4] msc:rcx gcc:rdi The vector number.
+;
+;DECLASM(void) trpmR0DispatchHostInterruptSimple(RTUINT uActiveVector);
+ALIGNCODE(16)
+BEGINPROC trpmR0DispatchHostInterruptSimple
+%ifdef RT_ARCH_X86
+ mov eax, [esp + 4]
+ jmp dword [.jmp_table + eax * 4]
+%else
+ lea r9, [.jmp_table wrt rip]
+ %ifdef ASM_CALL64_MSC
+ jmp qword [r9 + rcx * 8]
+ %else
+ jmp qword [r9 + rdi * 8]
+ %endif
+%endif
+
+ALIGNCODE(4)
+.jmp_table:
+%assign i 0
+%rep 256
+RTCCPTR_DEF .int_ %+ i
+%assign i i+1
+%endrep
+
+%assign i 0
+%rep 256
+ ALIGNCODE(4)
+.int_ %+ i:
+ int i
+ ret
+%assign i i+1
+%endrep
+
+ENDPROC trpmR0DispatchHostInterruptSimple
+
diff --git a/src/VBox/VMM/VMMR0/VMMR0.cpp b/src/VBox/VMM/VMMR0/VMMR0.cpp
new file mode 100644
index 00000000..4f5d1c2b
--- /dev/null
+++ b/src/VBox/VMM/VMMR0/VMMR0.cpp
@@ -0,0 +1,2861 @@
+/* $Id: VMMR0.cpp $ */
+/** @file
+ * VMM - Host Context Ring 0.
+ */
+
+/*
+ * Copyright (C) 2006-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#define LOG_GROUP LOG_GROUP_VMM
+#include <VBox/vmm/vmm.h>
+#include <VBox/sup.h>
+#include <VBox/vmm/trpm.h>
+#include <VBox/vmm/cpum.h>
+#include <VBox/vmm/pdmapi.h>
+#include <VBox/vmm/pgm.h>
+#ifdef VBOX_WITH_NEM_R0
+# include <VBox/vmm/nem.h>
+#endif
+#include <VBox/vmm/em.h>
+#include <VBox/vmm/stam.h>
+#include <VBox/vmm/tm.h>
+#include "VMMInternal.h"
+#include <VBox/vmm/vm.h>
+#include <VBox/vmm/gvm.h>
+#ifdef VBOX_WITH_PCI_PASSTHROUGH
+# include <VBox/vmm/pdmpci.h>
+#endif
+#include <VBox/vmm/apic.h>
+
+#include <VBox/vmm/gvmm.h>
+#include <VBox/vmm/gmm.h>
+#include <VBox/vmm/gim.h>
+#include <VBox/intnet.h>
+#include <VBox/vmm/hm.h>
+#include <VBox/param.h>
+#include <VBox/err.h>
+#include <VBox/version.h>
+#include <VBox/log.h>
+
+#include <iprt/asm-amd64-x86.h>
+#include <iprt/assert.h>
+#include <iprt/crc.h>
+#include <iprt/mp.h>
+#include <iprt/once.h>
+#include <iprt/stdarg.h>
+#include <iprt/string.h>
+#include <iprt/thread.h>
+#include <iprt/timer.h>
+#include <iprt/time.h>
+
+#include "dtrace/VBoxVMM.h"
+
+
+#if defined(_MSC_VER) && defined(RT_ARCH_AMD64) /** @todo check this with with VC7! */
+# pragma intrinsic(_AddressOfReturnAddress)
+#endif
+
+#if defined(RT_OS_DARWIN) && ARCH_BITS == 32
+# error "32-bit darwin is no longer supported. Go back to 4.3 or earlier!"
+#endif
+
+
+
+/*********************************************************************************************************************************
+* Defined Constants And Macros *
+*********************************************************************************************************************************/
+/** @def VMM_CHECK_SMAP_SETUP
+ * SMAP check setup. */
+/** @def VMM_CHECK_SMAP_CHECK
+ * Checks that the AC flag is set if SMAP is enabled. If AC is not set,
+ * it will be logged and @a a_BadExpr is executed. */
+/** @def VMM_CHECK_SMAP_CHECK2
+ * Checks that the AC flag is set if SMAP is enabled. If AC is not set, it will
+ * be logged, written to the VMs assertion text buffer, and @a a_BadExpr is
+ * executed. */
+#if defined(VBOX_STRICT) || 1
+# define VMM_CHECK_SMAP_SETUP() uint32_t const fKernelFeatures = SUPR0GetKernelFeatures()
+# define VMM_CHECK_SMAP_CHECK(a_BadExpr) \
+ do { \
+ if (fKernelFeatures & SUPKERNELFEATURES_SMAP) \
+ { \
+ RTCCUINTREG fEflCheck = ASMGetFlags(); \
+ if (RT_LIKELY(fEflCheck & X86_EFL_AC)) \
+ { /* likely */ } \
+ else \
+ { \
+ SUPR0Printf("%s, line %d: EFLAGS.AC is clear! (%#x)\n", __FUNCTION__, __LINE__, (uint32_t)fEflCheck); \
+ a_BadExpr; \
+ } \
+ } \
+ } while (0)
+# define VMM_CHECK_SMAP_CHECK2(a_pVM, a_BadExpr) \
+ do { \
+ if (fKernelFeatures & SUPKERNELFEATURES_SMAP) \
+ { \
+ RTCCUINTREG fEflCheck = ASMGetFlags(); \
+ if (RT_LIKELY(fEflCheck & X86_EFL_AC)) \
+ { /* likely */ } \
+ else \
+ { \
+ SUPR0BadContext((a_pVM) ? (a_pVM)->pSession : NULL, __FILE__, __LINE__, "EFLAGS.AC is zero!"); \
+ RTStrPrintf(pVM->vmm.s.szRing0AssertMsg1, sizeof(pVM->vmm.s.szRing0AssertMsg1), \
+ "%s, line %d: EFLAGS.AC is clear! (%#x)\n", __FUNCTION__, __LINE__, (uint32_t)fEflCheck); \
+ a_BadExpr; \
+ } \
+ } \
+ } while (0)
+#else
+# define VMM_CHECK_SMAP_SETUP() uint32_t const fKernelFeatures = 0
+# define VMM_CHECK_SMAP_CHECK(a_BadExpr) NOREF(fKernelFeatures)
+# define VMM_CHECK_SMAP_CHECK2(a_pVM, a_BadExpr) NOREF(fKernelFeatures)
+#endif
+
+
+/*********************************************************************************************************************************
+* Internal Functions *
+*********************************************************************************************************************************/
+RT_C_DECLS_BEGIN
+#if defined(RT_ARCH_X86) && (defined(RT_OS_SOLARIS) || defined(RT_OS_FREEBSD))
+extern uint64_t __udivdi3(uint64_t, uint64_t);
+extern uint64_t __umoddi3(uint64_t, uint64_t);
+#endif
+RT_C_DECLS_END
+
+
+/*********************************************************************************************************************************
+* Global Variables *
+*********************************************************************************************************************************/
+/** Drag in necessary library bits.
+ * The runtime lives here (in VMMR0.r0) and VBoxDD*R0.r0 links against us. */
+PFNRT g_VMMR0Deps[] =
+{
+ (PFNRT)RTCrc32,
+ (PFNRT)RTOnce,
+#if defined(RT_ARCH_X86) && (defined(RT_OS_SOLARIS) || defined(RT_OS_FREEBSD))
+ (PFNRT)__udivdi3,
+ (PFNRT)__umoddi3,
+#endif
+ NULL
+};
+
+#ifdef RT_OS_SOLARIS
+/* Dependency information for the native solaris loader. */
+extern "C" { char _depends_on[] = "vboxdrv"; }
+#endif
+
+/** The result of SUPR0GetRawModeUsability(), set by ModuleInit(). */
+int g_rcRawModeUsability = VINF_SUCCESS;
+
+
+/**
+ * Initialize the module.
+ * This is called when we're first loaded.
+ *
+ * @returns 0 on success.
+ * @returns VBox status on failure.
+ * @param hMod Image handle for use in APIs.
+ */
+DECLEXPORT(int) ModuleInit(void *hMod)
+{
+ VMM_CHECK_SMAP_SETUP();
+ VMM_CHECK_SMAP_CHECK(RT_NOTHING);
+
+#ifdef VBOX_WITH_DTRACE_R0
+ /*
+ * The first thing to do is register the static tracepoints.
+ * (Deregistration is automatic.)
+ */
+ int rc2 = SUPR0TracerRegisterModule(hMod, &g_VTGObjHeader);
+ if (RT_FAILURE(rc2))
+ return rc2;
+#endif
+ LogFlow(("ModuleInit:\n"));
+
+#ifdef VBOX_WITH_64ON32_CMOS_DEBUG
+ /*
+ * Display the CMOS debug code.
+ */
+ ASMOutU8(0x72, 0x03);
+ uint8_t bDebugCode = ASMInU8(0x73);
+ LogRel(("CMOS Debug Code: %#x (%d)\n", bDebugCode, bDebugCode));
+ RTLogComPrintf("CMOS Debug Code: %#x (%d)\n", bDebugCode, bDebugCode);
+#endif
+
+ /*
+ * Initialize the VMM, GVMM, GMM, HM, PGM (Darwin) and INTNET.
+ */
+ int rc = vmmInitFormatTypes();
+ if (RT_SUCCESS(rc))
+ {
+ VMM_CHECK_SMAP_CHECK(RT_NOTHING);
+ rc = GVMMR0Init();
+ if (RT_SUCCESS(rc))
+ {
+ VMM_CHECK_SMAP_CHECK(RT_NOTHING);
+ rc = GMMR0Init();
+ if (RT_SUCCESS(rc))
+ {
+ VMM_CHECK_SMAP_CHECK(RT_NOTHING);
+ rc = HMR0Init();
+ if (RT_SUCCESS(rc))
+ {
+ VMM_CHECK_SMAP_CHECK(RT_NOTHING);
+ rc = PGMRegisterStringFormatTypes();
+ if (RT_SUCCESS(rc))
+ {
+ VMM_CHECK_SMAP_CHECK(RT_NOTHING);
+#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE
+ rc = PGMR0DynMapInit();
+#endif
+ if (RT_SUCCESS(rc))
+ {
+ VMM_CHECK_SMAP_CHECK(RT_NOTHING);
+ rc = IntNetR0Init();
+ if (RT_SUCCESS(rc))
+ {
+#ifdef VBOX_WITH_PCI_PASSTHROUGH
+ VMM_CHECK_SMAP_CHECK(RT_NOTHING);
+ rc = PciRawR0Init();
+#endif
+ if (RT_SUCCESS(rc))
+ {
+ VMM_CHECK_SMAP_CHECK(RT_NOTHING);
+ rc = CPUMR0ModuleInit();
+ if (RT_SUCCESS(rc))
+ {
+#ifdef VBOX_WITH_TRIPLE_FAULT_HACK
+ VMM_CHECK_SMAP_CHECK(RT_NOTHING);
+ rc = vmmR0TripleFaultHackInit();
+ if (RT_SUCCESS(rc))
+#endif
+ {
+ VMM_CHECK_SMAP_CHECK(rc = VERR_VMM_SMAP_BUT_AC_CLEAR);
+ if (RT_SUCCESS(rc))
+ {
+ g_rcRawModeUsability = SUPR0GetRawModeUsability();
+ if (g_rcRawModeUsability != VINF_SUCCESS)
+ SUPR0Printf("VMMR0!ModuleInit: SUPR0GetRawModeUsability -> %Rrc\n",
+ g_rcRawModeUsability);
+ LogFlow(("ModuleInit: returns success\n"));
+ return VINF_SUCCESS;
+ }
+ }
+
+ /*
+ * Bail out.
+ */
+#ifdef VBOX_WITH_TRIPLE_FAULT_HACK
+ vmmR0TripleFaultHackTerm();
+#endif
+ }
+ else
+ LogRel(("ModuleInit: CPUMR0ModuleInit -> %Rrc\n", rc));
+#ifdef VBOX_WITH_PCI_PASSTHROUGH
+ PciRawR0Term();
+#endif
+ }
+ else
+ LogRel(("ModuleInit: PciRawR0Init -> %Rrc\n", rc));
+ IntNetR0Term();
+ }
+ else
+ LogRel(("ModuleInit: IntNetR0Init -> %Rrc\n", rc));
+#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE
+ PGMR0DynMapTerm();
+#endif
+ }
+ else
+ LogRel(("ModuleInit: PGMR0DynMapInit -> %Rrc\n", rc));
+ PGMDeregisterStringFormatTypes();
+ }
+ else
+ LogRel(("ModuleInit: PGMRegisterStringFormatTypes -> %Rrc\n", rc));
+ HMR0Term();
+ }
+ else
+ LogRel(("ModuleInit: HMR0Init -> %Rrc\n", rc));
+ GMMR0Term();
+ }
+ else
+ LogRel(("ModuleInit: GMMR0Init -> %Rrc\n", rc));
+ GVMMR0Term();
+ }
+ else
+ LogRel(("ModuleInit: GVMMR0Init -> %Rrc\n", rc));
+ vmmTermFormatTypes();
+ }
+ else
+ LogRel(("ModuleInit: vmmInitFormatTypes -> %Rrc\n", rc));
+
+ LogFlow(("ModuleInit: failed %Rrc\n", rc));
+ return rc;
+}
+
+
+/**
+ * Terminate the module.
+ * This is called when we're finally unloaded.
+ *
+ * @param hMod Image handle for use in APIs.
+ */
+DECLEXPORT(void) ModuleTerm(void *hMod)
+{
+ NOREF(hMod);
+ LogFlow(("ModuleTerm:\n"));
+
+ /*
+ * Terminate the CPUM module (Local APIC cleanup).
+ */
+ CPUMR0ModuleTerm();
+
+ /*
+ * Terminate the internal network service.
+ */
+ IntNetR0Term();
+
+ /*
+ * PGM (Darwin), HM and PciRaw global cleanup.
+ */
+#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE
+ PGMR0DynMapTerm();
+#endif
+#ifdef VBOX_WITH_PCI_PASSTHROUGH
+ PciRawR0Term();
+#endif
+ PGMDeregisterStringFormatTypes();
+ HMR0Term();
+#ifdef VBOX_WITH_TRIPLE_FAULT_HACK
+ vmmR0TripleFaultHackTerm();
+#endif
+
+ /*
+ * Destroy the GMM and GVMM instances.
+ */
+ GMMR0Term();
+ GVMMR0Term();
+
+ vmmTermFormatTypes();
+
+ LogFlow(("ModuleTerm: returns\n"));
+}
+
+
+/**
+ * Initiates the R0 driver for a particular VM instance.
+ *
+ * @returns VBox status code.
+ *
+ * @param pGVM The global (ring-0) VM structure.
+ * @param pVM The cross context VM structure.
+ * @param uSvnRev The SVN revision of the ring-3 part.
+ * @param uBuildType Build type indicator.
+ * @thread EMT(0)
+ */
+static int vmmR0InitVM(PGVM pGVM, PVM pVM, uint32_t uSvnRev, uint32_t uBuildType)
+{
+ VMM_CHECK_SMAP_SETUP();
+ VMM_CHECK_SMAP_CHECK(return VERR_VMM_SMAP_BUT_AC_CLEAR);
+
+ /*
+ * Match the SVN revisions and build type.
+ */
+ if (uSvnRev != VMMGetSvnRev())
+ {
+ LogRel(("VMMR0InitVM: Revision mismatch, r3=%d r0=%d\n", uSvnRev, VMMGetSvnRev()));
+ SUPR0Printf("VMMR0InitVM: Revision mismatch, r3=%d r0=%d\n", uSvnRev, VMMGetSvnRev());
+ return VERR_VMM_R0_VERSION_MISMATCH;
+ }
+ if (uBuildType != vmmGetBuildType())
+ {
+ LogRel(("VMMR0InitVM: Build type mismatch, r3=%#x r0=%#x\n", uBuildType, vmmGetBuildType()));
+ SUPR0Printf("VMMR0InitVM: Build type mismatch, r3=%#x r0=%#x\n", uBuildType, vmmGetBuildType());
+ return VERR_VMM_R0_VERSION_MISMATCH;
+ }
+
+ int rc = GVMMR0ValidateGVMandVMandEMT(pGVM, pVM, 0 /*idCpu*/);
+ if (RT_FAILURE(rc))
+ return rc;
+
+#ifdef LOG_ENABLED
+ /*
+ * Register the EMT R0 logger instance for VCPU 0.
+ */
+ PVMCPU pVCpu = &pVM->aCpus[0];
+
+ PVMMR0LOGGER pR0Logger = pVCpu->vmm.s.pR0LoggerR0;
+ if (pR0Logger)
+ {
+# if 0 /* testing of the logger. */
+ LogCom(("vmmR0InitVM: before %p\n", RTLogDefaultInstance()));
+ LogCom(("vmmR0InitVM: pfnFlush=%p actual=%p\n", pR0Logger->Logger.pfnFlush, vmmR0LoggerFlush));
+ LogCom(("vmmR0InitVM: pfnLogger=%p actual=%p\n", pR0Logger->Logger.pfnLogger, vmmR0LoggerWrapper));
+ LogCom(("vmmR0InitVM: offScratch=%d fFlags=%#x fDestFlags=%#x\n", pR0Logger->Logger.offScratch, pR0Logger->Logger.fFlags, pR0Logger->Logger.fDestFlags));
+
+ RTLogSetDefaultInstanceThread(&pR0Logger->Logger, (uintptr_t)pVM->pSession);
+ LogCom(("vmmR0InitVM: after %p reg\n", RTLogDefaultInstance()));
+ RTLogSetDefaultInstanceThread(NULL, pVM->pSession);
+ LogCom(("vmmR0InitVM: after %p dereg\n", RTLogDefaultInstance()));
+
+ pR0Logger->Logger.pfnLogger("hello ring-0 logger\n");
+ LogCom(("vmmR0InitVM: returned successfully from direct logger call.\n"));
+ pR0Logger->Logger.pfnFlush(&pR0Logger->Logger);
+ LogCom(("vmmR0InitVM: returned successfully from direct flush call.\n"));
+
+ RTLogSetDefaultInstanceThread(&pR0Logger->Logger, (uintptr_t)pVM->pSession);
+ LogCom(("vmmR0InitVM: after %p reg2\n", RTLogDefaultInstance()));
+ pR0Logger->Logger.pfnLogger("hello ring-0 logger\n");
+ LogCom(("vmmR0InitVM: returned successfully from direct logger call (2). offScratch=%d\n", pR0Logger->Logger.offScratch));
+ RTLogSetDefaultInstanceThread(NULL, pVM->pSession);
+ LogCom(("vmmR0InitVM: after %p dereg2\n", RTLogDefaultInstance()));
+
+ RTLogLoggerEx(&pR0Logger->Logger, 0, ~0U, "hello ring-0 logger (RTLogLoggerEx)\n");
+ LogCom(("vmmR0InitVM: RTLogLoggerEx returned fine offScratch=%d\n", pR0Logger->Logger.offScratch));
+
+ RTLogSetDefaultInstanceThread(&pR0Logger->Logger, (uintptr_t)pVM->pSession);
+ RTLogPrintf("hello ring-0 logger (RTLogPrintf)\n");
+ LogCom(("vmmR0InitVM: RTLogPrintf returned fine offScratch=%d\n", pR0Logger->Logger.offScratch));
+# endif
+ Log(("Switching to per-thread logging instance %p (key=%p)\n", &pR0Logger->Logger, pVM->pSession));
+ RTLogSetDefaultInstanceThread(&pR0Logger->Logger, (uintptr_t)pVM->pSession);
+ pR0Logger->fRegistered = true;
+ }
+#endif /* LOG_ENABLED */
+
+ /*
+ * Check if the host supports high resolution timers or not.
+ */
+ if ( pVM->vmm.s.fUsePeriodicPreemptionTimers
+ && !RTTimerCanDoHighResolution())
+ pVM->vmm.s.fUsePeriodicPreemptionTimers = false;
+
+ /*
+ * Initialize the per VM data for GVMM and GMM.
+ */
+ VMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+ rc = GVMMR0InitVM(pGVM);
+// if (RT_SUCCESS(rc))
+// rc = GMMR0InitPerVMData(pVM);
+ if (RT_SUCCESS(rc))
+ {
+ /*
+ * Init HM, CPUM and PGM (Darwin only).
+ */
+ VMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+ rc = HMR0InitVM(pVM);
+ if (RT_SUCCESS(rc))
+ VMM_CHECK_SMAP_CHECK2(pVM, rc = VERR_VMM_RING0_ASSERTION); /* CPUR0InitVM will otherwise panic the host */
+ if (RT_SUCCESS(rc))
+ {
+ rc = CPUMR0InitVM(pVM);
+ if (RT_SUCCESS(rc))
+ {
+ VMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE
+ rc = PGMR0DynMapInitVM(pVM);
+#endif
+ if (RT_SUCCESS(rc))
+ {
+ VMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+ rc = EMR0InitVM(pGVM, pVM);
+ if (RT_SUCCESS(rc))
+ {
+ VMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+#ifdef VBOX_WITH_PCI_PASSTHROUGH
+ rc = PciRawR0InitVM(pGVM, pVM);
+#endif
+ if (RT_SUCCESS(rc))
+ {
+ VMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+ rc = GIMR0InitVM(pVM);
+ if (RT_SUCCESS(rc))
+ {
+ VMM_CHECK_SMAP_CHECK2(pVM, rc = VERR_VMM_RING0_ASSERTION);
+ if (RT_SUCCESS(rc))
+ {
+ GVMMR0DoneInitVM(pGVM);
+
+ /*
+ * Collect a bit of info for the VM release log.
+ */
+ pVM->vmm.s.fIsPreemptPendingApiTrusty = RTThreadPreemptIsPendingTrusty();
+ pVM->vmm.s.fIsPreemptPossible = RTThreadPreemptIsPossible();;
+
+ VMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+ return rc;
+ }
+
+ /* bail out*/
+ GIMR0TermVM(pVM);
+ }
+#ifdef VBOX_WITH_PCI_PASSTHROUGH
+ PciRawR0TermVM(pGVM, pVM);
+#endif
+ }
+ }
+ }
+ }
+ HMR0TermVM(pVM);
+ }
+ }
+
+ RTLogSetDefaultInstanceThread(NULL, (uintptr_t)pVM->pSession);
+ return rc;
+}
+
+
+/**
+ * Does EMT specific VM initialization.
+ *
+ * @returns VBox status code.
+ * @param pGVM The ring-0 VM structure.
+ * @param pVM The cross context VM structure.
+ * @param idCpu The EMT that's calling.
+ */
+static int vmmR0InitVMEmt(PGVM pGVM, PVM pVM, VMCPUID idCpu)
+{
+ /* Paranoia (caller checked these already). */
+ AssertReturn(idCpu < pGVM->cCpus, VERR_INVALID_CPU_ID);
+ AssertReturn(pGVM->aCpus[idCpu].hEMT == RTThreadNativeSelf(), VERR_INVALID_CPU_ID);
+
+#ifdef LOG_ENABLED
+ /*
+ * Registration of ring 0 loggers.
+ */
+ PVMCPU pVCpu = &pVM->aCpus[idCpu];
+ PVMMR0LOGGER pR0Logger = pVCpu->vmm.s.pR0LoggerR0;
+ if ( pR0Logger
+ && !pR0Logger->fRegistered)
+ {
+ RTLogSetDefaultInstanceThread(&pR0Logger->Logger, (uintptr_t)pVM->pSession);
+ pR0Logger->fRegistered = true;
+ }
+#endif
+ RT_NOREF(pVM);
+
+ return VINF_SUCCESS;
+}
+
+
+
+/**
+ * Terminates the R0 bits for a particular VM instance.
+ *
+ * This is normally called by ring-3 as part of the VM termination process, but
+ * may alternatively be called during the support driver session cleanup when
+ * the VM object is destroyed (see GVMM).
+ *
+ * @returns VBox status code.
+ *
+ * @param pGVM The global (ring-0) VM structure.
+ * @param pVM The cross context VM structure.
+ * @param idCpu Set to 0 if EMT(0) or NIL_VMCPUID if session cleanup
+ * thread.
+ * @thread EMT(0) or session clean up thread.
+ */
+VMMR0_INT_DECL(int) VMMR0TermVM(PGVM pGVM, PVM pVM, VMCPUID idCpu)
+{
+ /*
+ * Check EMT(0) claim if we're called from userland.
+ */
+ if (idCpu != NIL_VMCPUID)
+ {
+ AssertReturn(idCpu == 0, VERR_INVALID_CPU_ID);
+ int rc = GVMMR0ValidateGVMandVMandEMT(pGVM, pVM, idCpu);
+ if (RT_FAILURE(rc))
+ return rc;
+ }
+
+#ifdef VBOX_WITH_PCI_PASSTHROUGH
+ PciRawR0TermVM(pGVM, pVM);
+#endif
+
+ /*
+ * Tell GVMM what we're up to and check that we only do this once.
+ */
+ if (GVMMR0DoingTermVM(pGVM))
+ {
+ GIMR0TermVM(pVM);
+
+ /** @todo I wish to call PGMR0PhysFlushHandyPages(pVM, &pVM->aCpus[idCpu])
+ * here to make sure we don't leak any shared pages if we crash... */
+#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE
+ PGMR0DynMapTermVM(pVM);
+#endif
+ HMR0TermVM(pVM);
+ }
+
+ /*
+ * Deregister the logger.
+ */
+ RTLogSetDefaultInstanceThread(NULL, (uintptr_t)pVM->pSession);
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * An interrupt or unhalt force flag is set, deal with it.
+ *
+ * @returns VINF_SUCCESS (or VINF_EM_HALT).
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param uMWait Result from EMMonitorWaitIsActive().
+ * @param enmInterruptibility Guest CPU interruptbility level.
+ */
+static int vmmR0DoHaltInterrupt(PVMCPU pVCpu, unsigned uMWait, CPUMINTERRUPTIBILITY enmInterruptibility)
+{
+ Assert(!TRPMHasTrap(pVCpu));
+ Assert( enmInterruptibility > CPUMINTERRUPTIBILITY_INVALID
+ && enmInterruptibility < CPUMINTERRUPTIBILITY_END);
+
+ /*
+ * Pending interrupts w/o any SMIs or NMIs? That the usual case.
+ */
+ if ( VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC)
+ && !VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_INTERRUPT_SMI | VMCPU_FF_INTERRUPT_NMI))
+ {
+ if (enmInterruptibility <= CPUMINTERRUPTIBILITY_UNRESTRAINED)
+ {
+ uint8_t u8Interrupt = 0;
+ int rc = PDMGetInterrupt(pVCpu, &u8Interrupt);
+ Log(("vmmR0DoHaltInterrupt: CPU%d u8Interrupt=%d (%#x) rc=%Rrc\n", pVCpu->idCpu, u8Interrupt, u8Interrupt, rc));
+ if (RT_SUCCESS(rc))
+ {
+ VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_UNHALT);
+
+ rc = TRPMAssertTrap(pVCpu, u8Interrupt, TRPM_HARDWARE_INT);
+ AssertRCSuccess(rc);
+ STAM_REL_COUNTER_INC(&pVCpu->vmm.s.StatR0HaltExec);
+ return rc;
+ }
+ }
+ }
+ /*
+ * SMI is not implemented yet, at least not here.
+ */
+ else if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INTERRUPT_SMI))
+ {
+ return VINF_EM_HALT;
+ }
+ /*
+ * NMI.
+ */
+ else if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INTERRUPT_NMI))
+ {
+ if (enmInterruptibility < CPUMINTERRUPTIBILITY_NMI_INHIBIT)
+ {
+ /** @todo later. */
+ return VINF_EM_HALT;
+ }
+ }
+ /*
+ * Nested-guest virtual interrupt.
+ */
+ else if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INTERRUPT_NESTED_GUEST))
+ {
+ if (enmInterruptibility < CPUMINTERRUPTIBILITY_VIRT_INT_DISABLED)
+ {
+ /** @todo NSTVMX: NSTSVM: Remember, we might have to check and perform VM-exits
+ * here before injecting the virtual interrupt. See emR3ForcedActions
+ * for details. */
+ return VINF_EM_HALT;
+ }
+ }
+
+ if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_UNHALT))
+ {
+ STAM_REL_COUNTER_INC(&pVCpu->vmm.s.StatR0HaltExec);
+ return VINF_SUCCESS;
+ }
+ if (uMWait > 1)
+ {
+ STAM_REL_COUNTER_INC(&pVCpu->vmm.s.StatR0HaltExec);
+ return VINF_SUCCESS;
+ }
+
+ return VINF_EM_HALT;
+}
+
+
+/**
+ * This does one round of vmR3HaltGlobal1Halt().
+ *
+ * The rational here is that we'll reduce latency in interrupt situations if we
+ * don't go to ring-3 immediately on a VINF_EM_HALT (guest executed HLT or
+ * MWAIT), but do one round of blocking here instead and hope the interrupt is
+ * raised in the meanwhile.
+ *
+ * If we go to ring-3 we'll quit the inner HM/NEM loop in EM and end up in the
+ * outer loop, which will then call VMR3WaitHalted() and that in turn will do a
+ * ring-0 call (unless we're too close to a timer event). When the interrupt
+ * wakes us up, we'll return from ring-0 and EM will by instinct do a
+ * rescheduling (because of raw-mode) before it resumes the HM/NEM loop and gets
+ * back to VMMR0EntryFast().
+ *
+ * @returns VINF_SUCCESS or VINF_EM_HALT.
+ * @param pGVM The ring-0 VM structure.
+ * @param pVM The cross context VM structure.
+ * @param pGVCpu The ring-0 virtual CPU structure.
+ * @param pVCpu The cross context virtual CPU structure.
+ *
+ * @todo r=bird: All the blocking/waiting and EMT managment should move out of
+ * the VM module, probably to VMM. Then this would be more weird wrt
+ * parameters and statistics.
+ */
+static int vmmR0DoHalt(PGVM pGVM, PVM pVM, PGVMCPU pGVCpu, PVMCPU pVCpu)
+{
+ Assert(pVCpu == pGVCpu->pVCpu);
+
+ /*
+ * Do spin stat historization.
+ */
+ if (++pVCpu->vmm.s.cR0Halts & 0xff)
+ { /* likely */ }
+ else if (pVCpu->vmm.s.cR0HaltsSucceeded > pVCpu->vmm.s.cR0HaltsToRing3)
+ {
+ pVCpu->vmm.s.cR0HaltsSucceeded = 2;
+ pVCpu->vmm.s.cR0HaltsToRing3 = 0;
+ }
+ else
+ {
+ pVCpu->vmm.s.cR0HaltsSucceeded = 0;
+ pVCpu->vmm.s.cR0HaltsToRing3 = 2;
+ }
+
+ /*
+ * Flags that makes us go to ring-3.
+ */
+ uint32_t const fVmFFs = VM_FF_TM_VIRTUAL_SYNC | VM_FF_PDM_QUEUES | VM_FF_PDM_DMA
+ | VM_FF_DBGF | VM_FF_REQUEST | VM_FF_CHECK_VM_STATE
+ | VM_FF_RESET | VM_FF_EMT_RENDEZVOUS | VM_FF_PGM_NEED_HANDY_PAGES
+ | VM_FF_PGM_NO_MEMORY | VM_FF_REM_HANDLER_NOTIFY | VM_FF_DEBUG_SUSPEND;
+ uint64_t const fCpuFFs = VMCPU_FF_TIMER | VMCPU_FF_PDM_CRITSECT | VMCPU_FF_IEM
+ | VMCPU_FF_REQUEST | VMCPU_FF_DBGF | VMCPU_FF_HM_UPDATE_CR3
+ | VMCPU_FF_HM_UPDATE_PAE_PDPES | VMCPU_FF_PGM_SYNC_CR3 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL
+ | VMCPU_FF_TO_R3 | VMCPU_FF_IOM
+#ifdef VBOX_WITH_RAW_MODE
+ | VMCPU_FF_TRPM_SYNC_IDT | VMCPU_FF_SELM_SYNC_TSS | VMCPU_FF_SELM_SYNC_GDT
+ | VMCPU_FF_SELM_SYNC_LDT | VMCPU_FF_CSAM_SCAN_PAGE | VMCPU_FF_CSAM_PENDING_ACTION
+ | VMCPU_FF_CPUM
+#endif
+ ;
+
+ /*
+ * Check preconditions.
+ */
+ unsigned const uMWait = EMMonitorWaitIsActive(pVCpu);
+ CPUMINTERRUPTIBILITY const enmInterruptibility = CPUMGetGuestInterruptibility(pVCpu);
+ if ( pVCpu->vmm.s.fMayHaltInRing0
+ && !TRPMHasTrap(pVCpu)
+ && ( enmInterruptibility == CPUMINTERRUPTIBILITY_UNRESTRAINED
+ || uMWait > 1))
+ {
+ if ( !VM_FF_IS_ANY_SET(pVM, fVmFFs)
+ && !VMCPU_FF_IS_ANY_SET(pVCpu, fCpuFFs))
+ {
+ /*
+ * Interrupts pending already?
+ */
+ if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_UPDATE_APIC))
+ APICUpdatePendingInterrupts(pVCpu);
+
+ /*
+ * Flags that wake up from the halted state.
+ */
+ uint64_t const fIntMask = VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC | VMCPU_FF_INTERRUPT_NESTED_GUEST
+ | VMCPU_FF_INTERRUPT_NMI | VMCPU_FF_INTERRUPT_SMI | VMCPU_FF_UNHALT;
+
+ if (VMCPU_FF_IS_ANY_SET(pVCpu, fIntMask))
+ return vmmR0DoHaltInterrupt(pVCpu, uMWait, enmInterruptibility);
+ ASMNopPause();
+
+ /*
+ * Check out how long till the next timer event.
+ */
+ uint64_t u64Delta;
+ uint64_t u64GipTime = TMTimerPollGIP(pVM, pVCpu, &u64Delta);
+
+ if ( !VM_FF_IS_ANY_SET(pVM, fVmFFs)
+ && !VMCPU_FF_IS_ANY_SET(pVCpu, fCpuFFs))
+ {
+ if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_UPDATE_APIC))
+ APICUpdatePendingInterrupts(pVCpu);
+
+ if (VMCPU_FF_IS_ANY_SET(pVCpu, fIntMask))
+ return vmmR0DoHaltInterrupt(pVCpu, uMWait, enmInterruptibility);
+
+ /*
+ * Wait if there is enough time to the next timer event.
+ */
+ if (u64Delta >= pVCpu->vmm.s.cNsSpinBlockThreshold)
+ {
+ /* If there are few other CPU cores around, we will procrastinate a
+ little before going to sleep, hoping for some device raising an
+ interrupt or similar. Though, the best thing here would be to
+ dynamically adjust the spin count according to its usfulness or
+ something... */
+ if ( pVCpu->vmm.s.cR0HaltsSucceeded > pVCpu->vmm.s.cR0HaltsToRing3
+ && RTMpGetOnlineCount() >= 4)
+ {
+ /** @todo Figure out how we can skip this if it hasn't help recently...
+ * @bugref{9172#c12} */
+ uint32_t cSpinLoops = 42;
+ while (cSpinLoops-- > 0)
+ {
+ ASMNopPause();
+ if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_UPDATE_APIC))
+ APICUpdatePendingInterrupts(pVCpu);
+ ASMNopPause();
+ if (VM_FF_IS_ANY_SET(pVM, fVmFFs))
+ {
+ STAM_REL_COUNTER_INC(&pVCpu->vmm.s.StatR0HaltToR3FromSpin);
+ return VINF_EM_HALT;
+ }
+ ASMNopPause();
+ if (VMCPU_FF_IS_ANY_SET(pVCpu, fCpuFFs))
+ {
+ STAM_REL_COUNTER_INC(&pVCpu->vmm.s.StatR0HaltToR3FromSpin);
+ return VINF_EM_HALT;
+ }
+ ASMNopPause();
+ if (VMCPU_FF_IS_ANY_SET(pVCpu, fIntMask))
+ {
+ STAM_REL_COUNTER_INC(&pVCpu->vmm.s.StatR0HaltExecFromSpin);
+ return vmmR0DoHaltInterrupt(pVCpu, uMWait, enmInterruptibility);
+ }
+ ASMNopPause();
+ }
+ }
+
+ /* Block. We have to set the state to VMCPUSTATE_STARTED_HALTED here so ring-3
+ knows when to notify us (cannot access VMINTUSERPERVMCPU::fWait from here). */
+ VMCPU_CMPXCHG_STATE(pVCpu, VMCPUSTATE_STARTED_HALTED, VMCPUSTATE_STARTED);
+ uint64_t const u64StartSchedHalt = RTTimeNanoTS();
+ int rc = GVMMR0SchedHalt(pGVM, pVM, pGVCpu, u64GipTime);
+ uint64_t const u64EndSchedHalt = RTTimeNanoTS();
+ uint64_t const cNsElapsedSchedHalt = u64EndSchedHalt - u64StartSchedHalt;
+ VMCPU_CMPXCHG_STATE(pVCpu, VMCPUSTATE_STARTED, VMCPUSTATE_STARTED_HALTED);
+ STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->vmm.s.StatR0HaltBlock, cNsElapsedSchedHalt);
+ if ( rc == VINF_SUCCESS
+ || rc == VERR_INTERRUPTED)
+
+ {
+ /* Keep some stats like ring-3 does. */
+ int64_t const cNsOverslept = u64EndSchedHalt - u64GipTime;
+ if (cNsOverslept > 50000)
+ STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->vmm.s.StatR0HaltBlockOverslept, cNsOverslept);
+ else if (cNsOverslept < -50000)
+ STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->vmm.s.StatR0HaltBlockInsomnia, cNsElapsedSchedHalt);
+ else
+ STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->vmm.s.StatR0HaltBlockOnTime, cNsElapsedSchedHalt);
+
+ /*
+ * Recheck whether we can resume execution or have to go to ring-3.
+ */
+ if ( !VM_FF_IS_ANY_SET(pVM, fVmFFs)
+ && !VMCPU_FF_IS_ANY_SET(pVCpu, fCpuFFs))
+ {
+ if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_UPDATE_APIC))
+ APICUpdatePendingInterrupts(pVCpu);
+ if (VMCPU_FF_IS_ANY_SET(pVCpu, fIntMask))
+ {
+ STAM_REL_COUNTER_INC(&pVCpu->vmm.s.StatR0HaltExecFromBlock);
+ return vmmR0DoHaltInterrupt(pVCpu, uMWait, enmInterruptibility);
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ return VINF_EM_HALT;
+}
+
+
+/**
+ * VMM ring-0 thread-context callback.
+ *
+ * This does common HM state updating and calls the HM-specific thread-context
+ * callback.
+ *
+ * @param enmEvent The thread-context event.
+ * @param pvUser Opaque pointer to the VMCPU.
+ *
+ * @thread EMT(pvUser)
+ */
+static DECLCALLBACK(void) vmmR0ThreadCtxCallback(RTTHREADCTXEVENT enmEvent, void *pvUser)
+{
+ PVMCPU pVCpu = (PVMCPU)pvUser;
+
+ switch (enmEvent)
+ {
+ case RTTHREADCTXEVENT_IN:
+ {
+ /*
+ * Linux may call us with preemption enabled (really!) but technically we
+ * cannot get preempted here, otherwise we end up in an infinite recursion
+ * scenario (i.e. preempted in resume hook -> preempt hook -> resume hook...
+ * ad infinitum). Let's just disable preemption for now...
+ */
+ /** @todo r=bird: I don't believe the above. The linux code is clearly enabling
+ * preemption after doing the callout (one or two functions up the
+ * call chain). */
+ /** @todo r=ramshankar: See @bugref{5313#c30}. */
+ RTTHREADPREEMPTSTATE ParanoidPreemptState = RTTHREADPREEMPTSTATE_INITIALIZER;
+ RTThreadPreemptDisable(&ParanoidPreemptState);
+
+ /* We need to update the VCPU <-> host CPU mapping. */
+ RTCPUID idHostCpu;
+ uint32_t iHostCpuSet = RTMpCurSetIndexAndId(&idHostCpu);
+ pVCpu->iHostCpuSet = iHostCpuSet;
+ ASMAtomicWriteU32(&pVCpu->idHostCpu, idHostCpu);
+
+ /* In the very unlikely event that the GIP delta for the CPU we're
+ rescheduled needs calculating, try force a return to ring-3.
+ We unfortunately cannot do the measurements right here. */
+ if (RT_UNLIKELY(SUPIsTscDeltaAvailableForCpuSetIndex(iHostCpuSet)))
+ VMCPU_FF_SET(pVCpu, VMCPU_FF_TO_R3);
+
+ /* Invoke the HM-specific thread-context callback. */
+ HMR0ThreadCtxCallback(enmEvent, pvUser);
+
+ /* Restore preemption. */
+ RTThreadPreemptRestore(&ParanoidPreemptState);
+ break;
+ }
+
+ case RTTHREADCTXEVENT_OUT:
+ {
+ /* Invoke the HM-specific thread-context callback. */
+ HMR0ThreadCtxCallback(enmEvent, pvUser);
+
+ /*
+ * Sigh. See VMMGetCpu() used by VMCPU_ASSERT_EMT(). We cannot let several VCPUs
+ * have the same host CPU associated with it.
+ */
+ pVCpu->iHostCpuSet = UINT32_MAX;
+ ASMAtomicWriteU32(&pVCpu->idHostCpu, NIL_RTCPUID);
+ break;
+ }
+
+ default:
+ /* Invoke the HM-specific thread-context callback. */
+ HMR0ThreadCtxCallback(enmEvent, pvUser);
+ break;
+ }
+}
+
+
+/**
+ * Creates thread switching hook for the current EMT thread.
+ *
+ * This is called by GVMMR0CreateVM and GVMMR0RegisterVCpu. If the host
+ * platform does not implement switcher hooks, no hooks will be create and the
+ * member set to NIL_RTTHREADCTXHOOK.
+ *
+ * @returns VBox status code.
+ * @param pVCpu The cross context virtual CPU structure.
+ * @thread EMT(pVCpu)
+ */
+VMMR0_INT_DECL(int) VMMR0ThreadCtxHookCreateForEmt(PVMCPU pVCpu)
+{
+ VMCPU_ASSERT_EMT(pVCpu);
+ Assert(pVCpu->vmm.s.hCtxHook == NIL_RTTHREADCTXHOOK);
+
+#if 1 /* To disable this stuff change to zero. */
+ int rc = RTThreadCtxHookCreate(&pVCpu->vmm.s.hCtxHook, 0, vmmR0ThreadCtxCallback, pVCpu);
+ if (RT_SUCCESS(rc))
+ return rc;
+#else
+ RT_NOREF(vmmR0ThreadCtxCallback);
+ int rc = VERR_NOT_SUPPORTED;
+#endif
+
+ pVCpu->vmm.s.hCtxHook = NIL_RTTHREADCTXHOOK;
+ if (rc == VERR_NOT_SUPPORTED)
+ return VINF_SUCCESS;
+
+ LogRelMax(32, ("RTThreadCtxHookCreate failed! rc=%Rrc pVCpu=%p idCpu=%RU32\n", rc, pVCpu, pVCpu->idCpu));
+ return VINF_SUCCESS; /* Just ignore it, we can live without context hooks. */
+}
+
+
+/**
+ * Destroys the thread switching hook for the specified VCPU.
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ * @remarks Can be called from any thread.
+ */
+VMMR0_INT_DECL(void) VMMR0ThreadCtxHookDestroyForEmt(PVMCPU pVCpu)
+{
+ int rc = RTThreadCtxHookDestroy(pVCpu->vmm.s.hCtxHook);
+ AssertRC(rc);
+ pVCpu->vmm.s.hCtxHook = NIL_RTTHREADCTXHOOK;
+}
+
+
+/**
+ * Disables the thread switching hook for this VCPU (if we got one).
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ * @thread EMT(pVCpu)
+ *
+ * @remarks This also clears VMCPU::idHostCpu, so the mapping is invalid after
+ * this call. This means you have to be careful with what you do!
+ */
+VMMR0_INT_DECL(void) VMMR0ThreadCtxHookDisable(PVMCPU pVCpu)
+{
+ /*
+ * Clear the VCPU <-> host CPU mapping as we've left HM context.
+ * @bugref{7726#c19} explains the need for this trick:
+ *
+ * hmR0VmxCallRing3Callback/hmR0SvmCallRing3Callback &
+ * hmR0VmxLeaveSession/hmR0SvmLeaveSession disables context hooks during
+ * longjmp & normal return to ring-3, which opens a window where we may be
+ * rescheduled without changing VMCPUID::idHostCpu and cause confusion if
+ * the CPU starts executing a different EMT. Both functions first disables
+ * preemption and then calls HMR0LeaveCpu which invalids idHostCpu, leaving
+ * an opening for getting preempted.
+ */
+ /** @todo Make HM not need this API! Then we could leave the hooks enabled
+ * all the time. */
+ /** @todo move this into the context hook disabling if(). */
+ ASMAtomicWriteU32(&pVCpu->idHostCpu, NIL_RTCPUID);
+
+ /*
+ * Disable the context hook, if we got one.
+ */
+ if (pVCpu->vmm.s.hCtxHook != NIL_RTTHREADCTXHOOK)
+ {
+ Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+ int rc = RTThreadCtxHookDisable(pVCpu->vmm.s.hCtxHook);
+ AssertRC(rc);
+ }
+}
+
+
+/**
+ * Internal version of VMMR0ThreadCtxHooksAreRegistered.
+ *
+ * @returns true if registered, false otherwise.
+ * @param pVCpu The cross context virtual CPU structure.
+ */
+DECLINLINE(bool) vmmR0ThreadCtxHookIsEnabled(PVMCPU pVCpu)
+{
+ return RTThreadCtxHookIsEnabled(pVCpu->vmm.s.hCtxHook);
+}
+
+
+/**
+ * Whether thread-context hooks are registered for this VCPU.
+ *
+ * @returns true if registered, false otherwise.
+ * @param pVCpu The cross context virtual CPU structure.
+ */
+VMMR0_INT_DECL(bool) VMMR0ThreadCtxHookIsEnabled(PVMCPU pVCpu)
+{
+ return vmmR0ThreadCtxHookIsEnabled(pVCpu);
+}
+
+
+#ifdef VBOX_WITH_STATISTICS
+/**
+ * Record return code statistics
+ * @param pVM The cross context VM structure.
+ * @param pVCpu The cross context virtual CPU structure.
+ * @param rc The status code.
+ */
+static void vmmR0RecordRC(PVM pVM, PVMCPU pVCpu, int rc)
+{
+ /*
+ * Collect statistics.
+ */
+ switch (rc)
+ {
+ case VINF_SUCCESS:
+ STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetNormal);
+ break;
+ case VINF_EM_RAW_INTERRUPT:
+ STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetInterrupt);
+ break;
+ case VINF_EM_RAW_INTERRUPT_HYPER:
+ STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetInterruptHyper);
+ break;
+ case VINF_EM_RAW_GUEST_TRAP:
+ STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetGuestTrap);
+ break;
+ case VINF_EM_RAW_RING_SWITCH:
+ STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetRingSwitch);
+ break;
+ case VINF_EM_RAW_RING_SWITCH_INT:
+ STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetRingSwitchInt);
+ break;
+ case VINF_EM_RAW_STALE_SELECTOR:
+ STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetStaleSelector);
+ break;
+ case VINF_EM_RAW_IRET_TRAP:
+ STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetIRETTrap);
+ break;
+ case VINF_IOM_R3_IOPORT_READ:
+ STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetIORead);
+ break;
+ case VINF_IOM_R3_IOPORT_WRITE:
+ STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetIOWrite);
+ break;
+ case VINF_IOM_R3_IOPORT_COMMIT_WRITE:
+ STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetIOCommitWrite);
+ break;
+ case VINF_IOM_R3_MMIO_READ:
+ STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetMMIORead);
+ break;
+ case VINF_IOM_R3_MMIO_WRITE:
+ STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetMMIOWrite);
+ break;
+ case VINF_IOM_R3_MMIO_COMMIT_WRITE:
+ STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetMMIOCommitWrite);
+ break;
+ case VINF_IOM_R3_MMIO_READ_WRITE:
+ STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetMMIOReadWrite);
+ break;
+ case VINF_PATM_HC_MMIO_PATCH_READ:
+ STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetMMIOPatchRead);
+ break;
+ case VINF_PATM_HC_MMIO_PATCH_WRITE:
+ STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetMMIOPatchWrite);
+ break;
+ case VINF_CPUM_R3_MSR_READ:
+ STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetMSRRead);
+ break;
+ case VINF_CPUM_R3_MSR_WRITE:
+ STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetMSRWrite);
+ break;
+ case VINF_EM_RAW_EMULATE_INSTR:
+ STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetEmulate);
+ break;
+ case VINF_PATCH_EMULATE_INSTR:
+ STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetPatchEmulate);
+ break;
+ case VINF_EM_RAW_EMULATE_INSTR_LDT_FAULT:
+ STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetLDTFault);
+ break;
+ case VINF_EM_RAW_EMULATE_INSTR_GDT_FAULT:
+ STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetGDTFault);
+ break;
+ case VINF_EM_RAW_EMULATE_INSTR_IDT_FAULT:
+ STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetIDTFault);
+ break;
+ case VINF_EM_RAW_EMULATE_INSTR_TSS_FAULT:
+ STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetTSSFault);
+ break;
+ case VINF_CSAM_PENDING_ACTION:
+ STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetCSAMTask);
+ break;
+ case VINF_PGM_SYNC_CR3:
+ STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetSyncCR3);
+ break;
+ case VINF_PATM_PATCH_INT3:
+ STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetPatchInt3);
+ break;
+ case VINF_PATM_PATCH_TRAP_PF:
+ STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetPatchPF);
+ break;
+ case VINF_PATM_PATCH_TRAP_GP:
+ STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetPatchGP);
+ break;
+ case VINF_PATM_PENDING_IRQ_AFTER_IRET:
+ STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetPatchIretIRQ);
+ break;
+ case VINF_EM_RESCHEDULE_REM:
+ STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetRescheduleREM);
+ break;
+ case VINF_EM_RAW_TO_R3:
+ STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetToR3Total);
+ if (VM_FF_IS_SET(pVM, VM_FF_TM_VIRTUAL_SYNC))
+ STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetToR3TMVirt);
+ else if (VM_FF_IS_SET(pVM, VM_FF_PGM_NEED_HANDY_PAGES))
+ STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetToR3HandyPages);
+ else if (VM_FF_IS_SET(pVM, VM_FF_PDM_QUEUES))
+ STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetToR3PDMQueues);
+ else if (VM_FF_IS_SET(pVM, VM_FF_EMT_RENDEZVOUS))
+ STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetToR3Rendezvous);
+ else if (VM_FF_IS_SET(pVM, VM_FF_PDM_DMA))
+ STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetToR3DMA);
+ else if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_TIMER))
+ STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetToR3Timer);
+ else if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PDM_CRITSECT))
+ STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetToR3CritSect);
+ else if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_TO_R3))
+ STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetToR3FF);
+ else if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_IEM))
+ STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetToR3Iem);
+ else if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_IOM))
+ STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetToR3Iom);
+ else
+ STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetToR3Unknown);
+ break;
+
+ case VINF_EM_RAW_TIMER_PENDING:
+ STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetTimerPending);
+ break;
+ case VINF_EM_RAW_INTERRUPT_PENDING:
+ STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetInterruptPending);
+ break;
+ case VINF_VMM_CALL_HOST:
+ switch (pVCpu->vmm.s.enmCallRing3Operation)
+ {
+ case VMMCALLRING3_PDM_CRIT_SECT_ENTER:
+ STAM_COUNTER_INC(&pVM->vmm.s.StatRZCallPDMCritSectEnter);
+ break;
+ case VMMCALLRING3_PDM_LOCK:
+ STAM_COUNTER_INC(&pVM->vmm.s.StatRZCallPDMLock);
+ break;
+ case VMMCALLRING3_PGM_POOL_GROW:
+ STAM_COUNTER_INC(&pVM->vmm.s.StatRZCallPGMPoolGrow);
+ break;
+ case VMMCALLRING3_PGM_LOCK:
+ STAM_COUNTER_INC(&pVM->vmm.s.StatRZCallPGMLock);
+ break;
+ case VMMCALLRING3_PGM_MAP_CHUNK:
+ STAM_COUNTER_INC(&pVM->vmm.s.StatRZCallPGMMapChunk);
+ break;
+ case VMMCALLRING3_PGM_ALLOCATE_HANDY_PAGES:
+ STAM_COUNTER_INC(&pVM->vmm.s.StatRZCallPGMAllocHandy);
+ break;
+ case VMMCALLRING3_REM_REPLAY_HANDLER_NOTIFICATIONS:
+ STAM_COUNTER_INC(&pVM->vmm.s.StatRZCallRemReplay);
+ break;
+ case VMMCALLRING3_VMM_LOGGER_FLUSH:
+ STAM_COUNTER_INC(&pVM->vmm.s.StatRZCallLogFlush);
+ break;
+ case VMMCALLRING3_VM_SET_ERROR:
+ STAM_COUNTER_INC(&pVM->vmm.s.StatRZCallVMSetError);
+ break;
+ case VMMCALLRING3_VM_SET_RUNTIME_ERROR:
+ STAM_COUNTER_INC(&pVM->vmm.s.StatRZCallVMSetRuntimeError);
+ break;
+ case VMMCALLRING3_VM_R0_ASSERTION:
+ default:
+ STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetCallRing3);
+ break;
+ }
+ break;
+ case VINF_PATM_DUPLICATE_FUNCTION:
+ STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetPATMDuplicateFn);
+ break;
+ case VINF_PGM_CHANGE_MODE:
+ STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetPGMChangeMode);
+ break;
+ case VINF_PGM_POOL_FLUSH_PENDING:
+ STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetPGMFlushPending);
+ break;
+ case VINF_EM_PENDING_REQUEST:
+ STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetPendingRequest);
+ break;
+ case VINF_EM_HM_PATCH_TPR_INSTR:
+ STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetPatchTPR);
+ break;
+ default:
+ STAM_COUNTER_INC(&pVM->vmm.s.StatRZRetMisc);
+ break;
+ }
+}
+#endif /* VBOX_WITH_STATISTICS */
+
+
+/**
+ * The Ring 0 entry point, called by the fast-ioctl path.
+ *
+ * @param pGVM The global (ring-0) VM structure.
+ * @param pVM The cross context VM structure.
+ * The return code is stored in pVM->vmm.s.iLastGZRc.
+ * @param idCpu The Virtual CPU ID of the calling EMT.
+ * @param enmOperation Which operation to execute.
+ * @remarks Assume called with interrupts _enabled_.
+ */
+VMMR0DECL(void) VMMR0EntryFast(PGVM pGVM, PVM pVM, VMCPUID idCpu, VMMR0OPERATION enmOperation)
+{
+ /*
+ * Validation.
+ */
+ if ( idCpu < pGVM->cCpus
+ && pGVM->cCpus == pVM->cCpus)
+ { /*likely*/ }
+ else
+ {
+ SUPR0Printf("VMMR0EntryFast: Bad idCpu=%#x cCpus=%#x/%#x\n", idCpu, pGVM->cCpus, pVM->cCpus);
+ return;
+ }
+
+ PGVMCPU pGVCpu = &pGVM->aCpus[idCpu];
+ PVMCPU pVCpu = &pVM->aCpus[idCpu];
+ RTNATIVETHREAD const hNativeThread = RTThreadNativeSelf();
+ if (RT_LIKELY( pGVCpu->hEMT == hNativeThread
+ && pVCpu->hNativeThreadR0 == hNativeThread))
+ { /* likely */ }
+ else
+ {
+ SUPR0Printf("VMMR0EntryFast: Bad thread idCpu=%#x hNativeSelf=%p pGVCpu->hEmt=%p pVCpu->hNativeThreadR0=%p\n",
+ idCpu, hNativeThread, pGVCpu->hEMT, pVCpu->hNativeThreadR0);
+ return;
+ }
+
+ /*
+ * SMAP fun.
+ */
+ VMM_CHECK_SMAP_SETUP();
+ VMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+
+ /*
+ * Perform requested operation.
+ */
+ switch (enmOperation)
+ {
+ /*
+ * Switch to GC and run guest raw mode code.
+ * Disable interrupts before doing the world switch.
+ */
+ case VMMR0_DO_RAW_RUN:
+ {
+#ifdef VBOX_WITH_RAW_MODE
+# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
+ /* Some safety precautions first. */
+ if (RT_UNLIKELY(!PGMGetHyperCR3(pVCpu)))
+ {
+ pVCpu->vmm.s.iLastGZRc = VERR_PGM_NO_CR3_SHADOW_ROOT;
+ break;
+ }
+# endif
+ if (RT_SUCCESS(g_rcRawModeUsability))
+ { /* likely */ }
+ else
+ {
+ pVCpu->vmm.s.iLastGZRc = g_rcRawModeUsability;
+ break;
+ }
+
+ /*
+ * Disable preemption.
+ */
+ RTTHREADPREEMPTSTATE PreemptState = RTTHREADPREEMPTSTATE_INITIALIZER;
+ RTThreadPreemptDisable(&PreemptState);
+
+ /*
+ * Get the host CPU identifiers, make sure they are valid and that
+ * we've got a TSC delta for the CPU.
+ */
+ RTCPUID idHostCpu;
+ uint32_t iHostCpuSet = RTMpCurSetIndexAndId(&idHostCpu);
+ if (RT_LIKELY( iHostCpuSet < RTCPUSET_MAX_CPUS
+ && SUPIsTscDeltaAvailableForCpuSetIndex(iHostCpuSet)))
+ {
+ /*
+ * Commit the CPU identifiers and update the periodict preemption timer if it's active.
+ */
+# ifdef VBOX_WITH_VMMR0_DISABLE_LAPIC_NMI
+ CPUMR0SetLApic(pVCpu, iHostCpuSet);
+# endif
+ pVCpu->iHostCpuSet = iHostCpuSet;
+ ASMAtomicWriteU32(&pVCpu->idHostCpu, idHostCpu);
+
+ if (pVM->vmm.s.fUsePeriodicPreemptionTimers)
+ GVMMR0SchedUpdatePeriodicPreemptionTimer(pVM, pVCpu->idHostCpu, TMCalcHostTimerFrequency(pVM, pVCpu));
+
+ /*
+ * We might need to disable VT-x if the active switcher turns off paging.
+ */
+ bool fVTxDisabled;
+ int rc = HMR0EnterSwitcher(pVM, pVM->vmm.s.enmSwitcher, &fVTxDisabled);
+ if (RT_SUCCESS(rc))
+ {
+ /*
+ * Disable interrupts and run raw-mode code. The loop is for efficiently
+ * dispatching tracepoints that fired in raw-mode context.
+ */
+ RTCCUINTREG uFlags = ASMIntDisableFlags();
+
+ for (;;)
+ {
+ VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_EXEC);
+ TMNotifyStartOfExecution(pVCpu);
+
+ rc = pVM->vmm.s.pfnR0ToRawMode(pVM);
+ pVCpu->vmm.s.iLastGZRc = rc;
+
+ TMNotifyEndOfExecution(pVCpu);
+ VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED);
+
+ if (rc != VINF_VMM_CALL_TRACER)
+ break;
+ SUPR0TracerUmodProbeFire(pVM->pSession, &pVCpu->vmm.s.TracerCtx);
+ }
+
+ /*
+ * Re-enable VT-x before we dispatch any pending host interrupts and
+ * re-enables interrupts.
+ */
+ HMR0LeaveSwitcher(pVM, fVTxDisabled);
+
+ if ( rc == VINF_EM_RAW_INTERRUPT
+ || rc == VINF_EM_RAW_INTERRUPT_HYPER)
+ TRPMR0DispatchHostInterrupt(pVM);
+
+ ASMSetFlags(uFlags);
+
+ /* Fire dtrace probe and collect statistics. */
+ VBOXVMM_R0_VMM_RETURN_TO_RING3_RC(pVCpu, CPUMQueryGuestCtxPtr(pVCpu), rc);
+# ifdef VBOX_WITH_STATISTICS
+ STAM_COUNTER_INC(&pVM->vmm.s.StatRunRC);
+ vmmR0RecordRC(pVM, pVCpu, rc);
+# endif
+ }
+ else
+ pVCpu->vmm.s.iLastGZRc = rc;
+
+ /*
+ * Invalidate the host CPU identifiers as we restore preemption.
+ */
+ pVCpu->iHostCpuSet = UINT32_MAX;
+ ASMAtomicWriteU32(&pVCpu->idHostCpu, NIL_RTCPUID);
+
+ RTThreadPreemptRestore(&PreemptState);
+ }
+ /*
+ * Invalid CPU set index or TSC delta in need of measuring.
+ */
+ else
+ {
+ RTThreadPreemptRestore(&PreemptState);
+ if (iHostCpuSet < RTCPUSET_MAX_CPUS)
+ {
+ int rc = SUPR0TscDeltaMeasureBySetIndex(pVM->pSession, iHostCpuSet, 0 /*fFlags*/,
+ 2 /*cMsWaitRetry*/, 5*RT_MS_1SEC /*cMsWaitThread*/,
+ 0 /*default cTries*/);
+ if (RT_SUCCESS(rc) || rc == VERR_CPU_OFFLINE)
+ pVCpu->vmm.s.iLastGZRc = VINF_EM_RAW_TO_R3;
+ else
+ pVCpu->vmm.s.iLastGZRc = rc;
+ }
+ else
+ pVCpu->vmm.s.iLastGZRc = VERR_INVALID_CPU_INDEX;
+ }
+
+#else /* !VBOX_WITH_RAW_MODE */
+ pVCpu->vmm.s.iLastGZRc = VERR_RAW_MODE_NOT_SUPPORTED;
+#endif
+ break;
+ }
+
+ /*
+ * Run guest code using the available hardware acceleration technology.
+ */
+ case VMMR0_DO_HM_RUN:
+ {
+ for (;;) /* hlt loop */
+ {
+ /*
+ * Disable preemption.
+ */
+ Assert(!vmmR0ThreadCtxHookIsEnabled(pVCpu));
+ RTTHREADPREEMPTSTATE PreemptState = RTTHREADPREEMPTSTATE_INITIALIZER;
+ RTThreadPreemptDisable(&PreemptState);
+
+ /*
+ * Get the host CPU identifiers, make sure they are valid and that
+ * we've got a TSC delta for the CPU.
+ */
+ RTCPUID idHostCpu;
+ uint32_t iHostCpuSet = RTMpCurSetIndexAndId(&idHostCpu);
+ if (RT_LIKELY( iHostCpuSet < RTCPUSET_MAX_CPUS
+ && SUPIsTscDeltaAvailableForCpuSetIndex(iHostCpuSet)))
+ {
+ pVCpu->iHostCpuSet = iHostCpuSet;
+ ASMAtomicWriteU32(&pVCpu->idHostCpu, idHostCpu);
+
+ /*
+ * Update the periodic preemption timer if it's active.
+ */
+ if (pVM->vmm.s.fUsePeriodicPreemptionTimers)
+ GVMMR0SchedUpdatePeriodicPreemptionTimer(pVM, pVCpu->idHostCpu, TMCalcHostTimerFrequency(pVM, pVCpu));
+ VMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+
+#ifdef VMM_R0_TOUCH_FPU
+ /*
+ * Make sure we've got the FPU state loaded so and we don't need to clear
+ * CR0.TS and get out of sync with the host kernel when loading the guest
+ * FPU state. @ref sec_cpum_fpu (CPUM.cpp) and @bugref{4053}.
+ */
+ CPUMR0TouchHostFpu();
+#endif
+ int rc;
+ bool fPreemptRestored = false;
+ if (!HMR0SuspendPending())
+ {
+ /*
+ * Enable the context switching hook.
+ */
+ if (pVCpu->vmm.s.hCtxHook != NIL_RTTHREADCTXHOOK)
+ {
+ Assert(!RTThreadCtxHookIsEnabled(pVCpu->vmm.s.hCtxHook));
+ int rc2 = RTThreadCtxHookEnable(pVCpu->vmm.s.hCtxHook); AssertRC(rc2);
+ }
+
+ /*
+ * Enter HM context.
+ */
+ rc = HMR0Enter(pVCpu);
+ if (RT_SUCCESS(rc))
+ {
+ VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_HM);
+
+ /*
+ * When preemption hooks are in place, enable preemption now that
+ * we're in HM context.
+ */
+ if (vmmR0ThreadCtxHookIsEnabled(pVCpu))
+ {
+ fPreemptRestored = true;
+ RTThreadPreemptRestore(&PreemptState);
+ }
+
+ /*
+ * Setup the longjmp machinery and execute guest code (calls HMR0RunGuestCode).
+ */
+ VMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+ rc = vmmR0CallRing3SetJmp(&pVCpu->vmm.s.CallRing3JmpBufR0, HMR0RunGuestCode, pVM, pVCpu);
+ VMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+
+ /*
+ * Assert sanity on the way out. Using manual assertions code here as normal
+ * assertions are going to panic the host since we're outside the setjmp/longjmp zone.
+ */
+ if (RT_UNLIKELY( VMCPU_GET_STATE(pVCpu) != VMCPUSTATE_STARTED_HM
+ && RT_SUCCESS_NP(rc) && rc != VINF_VMM_CALL_HOST ))
+ {
+ pVM->vmm.s.szRing0AssertMsg1[0] = '\0';
+ RTStrPrintf(pVM->vmm.s.szRing0AssertMsg2, sizeof(pVM->vmm.s.szRing0AssertMsg2),
+ "Got VMCPU state %d expected %d.\n", VMCPU_GET_STATE(pVCpu), VMCPUSTATE_STARTED_HM);
+ rc = VERR_VMM_WRONG_HM_VMCPU_STATE;
+ }
+ /** @todo Get rid of this. HM shouldn't disable the context hook. */
+ else if (RT_UNLIKELY(vmmR0ThreadCtxHookIsEnabled(pVCpu)))
+ {
+ pVM->vmm.s.szRing0AssertMsg1[0] = '\0';
+ RTStrPrintf(pVM->vmm.s.szRing0AssertMsg2, sizeof(pVM->vmm.s.szRing0AssertMsg2),
+ "Thread-context hooks still enabled! VCPU=%p Id=%u rc=%d.\n", pVCpu, pVCpu->idCpu, rc);
+ rc = VERR_INVALID_STATE;
+ }
+
+ VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED);
+ }
+ STAM_COUNTER_INC(&pVM->vmm.s.StatRunRC);
+
+ /*
+ * Invalidate the host CPU identifiers before we disable the context
+ * hook / restore preemption.
+ */
+ pVCpu->iHostCpuSet = UINT32_MAX;
+ ASMAtomicWriteU32(&pVCpu->idHostCpu, NIL_RTCPUID);
+
+ /*
+ * Disable context hooks. Due to unresolved cleanup issues, we
+ * cannot leave the hooks enabled when we return to ring-3.
+ *
+ * Note! At the moment HM may also have disabled the hook
+ * when we get here, but the IPRT API handles that.
+ */
+ if (pVCpu->vmm.s.hCtxHook != NIL_RTTHREADCTXHOOK)
+ {
+ ASMAtomicWriteU32(&pVCpu->idHostCpu, NIL_RTCPUID);
+ RTThreadCtxHookDisable(pVCpu->vmm.s.hCtxHook);
+ }
+ }
+ /*
+ * The system is about to go into suspend mode; go back to ring 3.
+ */
+ else
+ {
+ rc = VINF_EM_RAW_INTERRUPT;
+ pVCpu->iHostCpuSet = UINT32_MAX;
+ ASMAtomicWriteU32(&pVCpu->idHostCpu, NIL_RTCPUID);
+ }
+
+ /** @todo When HM stops messing with the context hook state, we'll disable
+ * preemption again before the RTThreadCtxHookDisable call. */
+ if (!fPreemptRestored)
+ RTThreadPreemptRestore(&PreemptState);
+
+ pVCpu->vmm.s.iLastGZRc = rc;
+
+ /* Fire dtrace probe and collect statistics. */
+ VBOXVMM_R0_VMM_RETURN_TO_RING3_HM(pVCpu, CPUMQueryGuestCtxPtr(pVCpu), rc);
+#ifdef VBOX_WITH_STATISTICS
+ vmmR0RecordRC(pVM, pVCpu, rc);
+#endif
+#if 1
+ /*
+ * If this is a halt.
+ */
+ if (rc != VINF_EM_HALT)
+ { /* we're not in a hurry for a HLT, so prefer this path */ }
+ else
+ {
+ pVCpu->vmm.s.iLastGZRc = rc = vmmR0DoHalt(pGVM, pVM, pGVCpu, pVCpu);
+ if (rc == VINF_SUCCESS)
+ {
+ pVCpu->vmm.s.cR0HaltsSucceeded++;
+ continue;
+ }
+ pVCpu->vmm.s.cR0HaltsToRing3++;
+ }
+#endif
+ }
+ /*
+ * Invalid CPU set index or TSC delta in need of measuring.
+ */
+ else
+ {
+ pVCpu->iHostCpuSet = UINT32_MAX;
+ ASMAtomicWriteU32(&pVCpu->idHostCpu, NIL_RTCPUID);
+ RTThreadPreemptRestore(&PreemptState);
+ if (iHostCpuSet < RTCPUSET_MAX_CPUS)
+ {
+ int rc = SUPR0TscDeltaMeasureBySetIndex(pVM->pSession, iHostCpuSet, 0 /*fFlags*/,
+ 2 /*cMsWaitRetry*/, 5*RT_MS_1SEC /*cMsWaitThread*/,
+ 0 /*default cTries*/);
+ if (RT_SUCCESS(rc) || rc == VERR_CPU_OFFLINE)
+ pVCpu->vmm.s.iLastGZRc = VINF_EM_RAW_TO_R3;
+ else
+ pVCpu->vmm.s.iLastGZRc = rc;
+ }
+ else
+ pVCpu->vmm.s.iLastGZRc = VERR_INVALID_CPU_INDEX;
+ }
+ break;
+
+ } /* halt loop. */
+ break;
+ }
+
+#ifdef VBOX_WITH_NEM_R0
+# if defined(RT_ARCH_AMD64) && defined(RT_OS_WINDOWS)
+ case VMMR0_DO_NEM_RUN:
+ {
+ /*
+ * Setup the longjmp machinery and execute guest code (calls NEMR0RunGuestCode).
+ */
+ VMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+ int rc = vmmR0CallRing3SetJmp2(&pVCpu->vmm.s.CallRing3JmpBufR0, NEMR0RunGuestCode, pGVM, idCpu);
+ VMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+ STAM_COUNTER_INC(&pVM->vmm.s.StatRunRC);
+
+ pVCpu->vmm.s.iLastGZRc = rc;
+
+ /*
+ * Fire dtrace probe and collect statistics.
+ */
+ VBOXVMM_R0_VMM_RETURN_TO_RING3_NEM(pVCpu, CPUMQueryGuestCtxPtr(pVCpu), rc);
+# ifdef VBOX_WITH_STATISTICS
+ vmmR0RecordRC(pVM, pVCpu, rc);
+# endif
+ break;
+ }
+# endif
+#endif
+
+
+ /*
+ * For profiling.
+ */
+ case VMMR0_DO_NOP:
+ pVCpu->vmm.s.iLastGZRc = VINF_SUCCESS;
+ break;
+
+ /*
+ * Shouldn't happen.
+ */
+ default:
+ AssertMsgFailed(("%#x\n", enmOperation));
+ pVCpu->vmm.s.iLastGZRc = VERR_NOT_SUPPORTED;
+ break;
+ }
+ VMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+}
+
+
+/**
+ * Validates a session or VM session argument.
+ *
+ * @returns true / false accordingly.
+ * @param pVM The cross context VM structure.
+ * @param pClaimedSession The session claim to validate.
+ * @param pSession The session argument.
+ */
+DECLINLINE(bool) vmmR0IsValidSession(PVM pVM, PSUPDRVSESSION pClaimedSession, PSUPDRVSESSION pSession)
+{
+ /* This must be set! */
+ if (!pSession)
+ return false;
+
+ /* Only one out of the two. */
+ if (pVM && pClaimedSession)
+ return false;
+ if (pVM)
+ pClaimedSession = pVM->pSession;
+ return pClaimedSession == pSession;
+}
+
+
+/**
+ * VMMR0EntryEx worker function, either called directly or when ever possible
+ * called thru a longjmp so we can exit safely on failure.
+ *
+ * @returns VBox status code.
+ * @param pGVM The global (ring-0) VM structure.
+ * @param pVM The cross context VM structure.
+ * @param idCpu Virtual CPU ID argument. Must be NIL_VMCPUID if pVM
+ * is NIL_RTR0PTR, and may be NIL_VMCPUID if it isn't
+ * @param enmOperation Which operation to execute.
+ * @param pReqHdr This points to a SUPVMMR0REQHDR packet. Optional.
+ * The support driver validates this if it's present.
+ * @param u64Arg Some simple constant argument.
+ * @param pSession The session of the caller.
+ *
+ * @remarks Assume called with interrupts _enabled_.
+ */
+static int vmmR0EntryExWorker(PGVM pGVM, PVM pVM, VMCPUID idCpu, VMMR0OPERATION enmOperation,
+ PSUPVMMR0REQHDR pReqHdr, uint64_t u64Arg, PSUPDRVSESSION pSession)
+{
+ /*
+ * Validate pGVM, pVM and idCpu for consistency and validity.
+ */
+ if ( pGVM != NULL
+ || pVM != NULL)
+ {
+ if (RT_LIKELY( RT_VALID_PTR(pGVM)
+ && RT_VALID_PTR(pVM)
+ && ((uintptr_t)pVM & PAGE_OFFSET_MASK) == 0))
+ { /* likely */ }
+ else
+ {
+ SUPR0Printf("vmmR0EntryExWorker: Invalid pGVM=%p and/or pVM=%p! (op=%d)\n", pGVM, pVM, enmOperation);
+ return VERR_INVALID_POINTER;
+ }
+
+ if (RT_LIKELY(pGVM->pVM == pVM))
+ { /* likely */ }
+ else
+ {
+ SUPR0Printf("vmmR0EntryExWorker: pVM mismatch: got %p, pGVM->pVM=%p\n", pVM, pGVM->pVM);
+ return VERR_INVALID_PARAMETER;
+ }
+
+ if (RT_LIKELY(idCpu == NIL_VMCPUID || idCpu < pGVM->cCpus))
+ { /* likely */ }
+ else
+ {
+ SUPR0Printf("vmmR0EntryExWorker: Invalid idCpu %#x (cCpus=%#x)\n", idCpu, pGVM->cCpus);
+ return VERR_INVALID_PARAMETER;
+ }
+
+ if (RT_LIKELY( pVM->enmVMState >= VMSTATE_CREATING
+ && pVM->enmVMState <= VMSTATE_TERMINATED
+ && pVM->cCpus == pGVM->cCpus
+ && pVM->pSession == pSession
+ && pVM->pVMR0 == pVM))
+ { /* likely */ }
+ else
+ {
+ SUPR0Printf("vmmR0EntryExWorker: Invalid pVM=%p:{.enmVMState=%d, .cCpus=%#x(==%#x), .pSession=%p(==%p), .pVMR0=%p(==%p)}! (op=%d)\n",
+ pVM, pVM->enmVMState, pVM->cCpus, pGVM->cCpus, pVM->pSession, pSession, pVM->pVMR0, pVM, enmOperation);
+ return VERR_INVALID_POINTER;
+ }
+ }
+ else if (RT_LIKELY(idCpu == NIL_VMCPUID))
+ { /* likely */ }
+ else
+ {
+ SUPR0Printf("vmmR0EntryExWorker: Invalid idCpu=%u\n", idCpu);
+ return VERR_INVALID_PARAMETER;
+ }
+
+ /*
+ * SMAP fun.
+ */
+ VMM_CHECK_SMAP_SETUP();
+ VMM_CHECK_SMAP_CHECK(RT_NOTHING);
+
+ /*
+ * Process the request.
+ */
+ int rc;
+ switch (enmOperation)
+ {
+ /*
+ * GVM requests
+ */
+ case VMMR0_DO_GVMM_CREATE_VM:
+ if (pGVM == NULL && pVM == NULL && u64Arg == 0 && idCpu == NIL_VMCPUID)
+ rc = GVMMR0CreateVMReq((PGVMMCREATEVMREQ)pReqHdr, pSession);
+ else
+ rc = VERR_INVALID_PARAMETER;
+ VMM_CHECK_SMAP_CHECK(RT_NOTHING);
+ break;
+
+ case VMMR0_DO_GVMM_DESTROY_VM:
+ if (pReqHdr == NULL && u64Arg == 0)
+ rc = GVMMR0DestroyVM(pGVM, pVM);
+ else
+ rc = VERR_INVALID_PARAMETER;
+ VMM_CHECK_SMAP_CHECK(RT_NOTHING);
+ break;
+
+ case VMMR0_DO_GVMM_REGISTER_VMCPU:
+ if (pGVM != NULL && pVM != NULL)
+ rc = GVMMR0RegisterVCpu(pGVM, pVM, idCpu);
+ else
+ rc = VERR_INVALID_PARAMETER;
+ VMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+ break;
+
+ case VMMR0_DO_GVMM_DEREGISTER_VMCPU:
+ if (pGVM != NULL && pVM != NULL)
+ rc = GVMMR0DeregisterVCpu(pGVM, pVM, idCpu);
+ else
+ rc = VERR_INVALID_PARAMETER;
+ VMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+ break;
+
+ case VMMR0_DO_GVMM_SCHED_HALT:
+ if (pReqHdr)
+ return VERR_INVALID_PARAMETER;
+ VMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+ rc = GVMMR0SchedHaltReq(pGVM, pVM, idCpu, u64Arg);
+ VMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+ break;
+
+ case VMMR0_DO_GVMM_SCHED_WAKE_UP:
+ if (pReqHdr || u64Arg)
+ return VERR_INVALID_PARAMETER;
+ VMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+ rc = GVMMR0SchedWakeUp(pGVM, pVM, idCpu);
+ VMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+ break;
+
+ case VMMR0_DO_GVMM_SCHED_POKE:
+ if (pReqHdr || u64Arg)
+ return VERR_INVALID_PARAMETER;
+ rc = GVMMR0SchedPoke(pGVM, pVM, idCpu);
+ VMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+ break;
+
+ case VMMR0_DO_GVMM_SCHED_WAKE_UP_AND_POKE_CPUS:
+ if (u64Arg)
+ return VERR_INVALID_PARAMETER;
+ rc = GVMMR0SchedWakeUpAndPokeCpusReq(pGVM, pVM, (PGVMMSCHEDWAKEUPANDPOKECPUSREQ)pReqHdr);
+ VMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+ break;
+
+ case VMMR0_DO_GVMM_SCHED_POLL:
+ if (pReqHdr || u64Arg > 1)
+ return VERR_INVALID_PARAMETER;
+ rc = GVMMR0SchedPoll(pGVM, pVM, idCpu, !!u64Arg);
+ VMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+ break;
+
+ case VMMR0_DO_GVMM_QUERY_STATISTICS:
+ if (u64Arg)
+ return VERR_INVALID_PARAMETER;
+ rc = GVMMR0QueryStatisticsReq(pGVM, pVM, (PGVMMQUERYSTATISTICSSREQ)pReqHdr, pSession);
+ VMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+ break;
+
+ case VMMR0_DO_GVMM_RESET_STATISTICS:
+ if (u64Arg)
+ return VERR_INVALID_PARAMETER;
+ rc = GVMMR0ResetStatisticsReq(pGVM, pVM, (PGVMMRESETSTATISTICSSREQ)pReqHdr, pSession);
+ VMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+ break;
+
+ /*
+ * Initialize the R0 part of a VM instance.
+ */
+ case VMMR0_DO_VMMR0_INIT:
+ rc = vmmR0InitVM(pGVM, pVM, RT_LODWORD(u64Arg), RT_HIDWORD(u64Arg));
+ VMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+ break;
+
+ /*
+ * Does EMT specific ring-0 init.
+ */
+ case VMMR0_DO_VMMR0_INIT_EMT:
+ rc = vmmR0InitVMEmt(pGVM, pVM, idCpu);
+ VMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+ break;
+
+ /*
+ * Terminate the R0 part of a VM instance.
+ */
+ case VMMR0_DO_VMMR0_TERM:
+ rc = VMMR0TermVM(pGVM, pVM, 0 /*idCpu*/);
+ VMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+ break;
+
+ /*
+ * Attempt to enable hm mode and check the current setting.
+ */
+ case VMMR0_DO_HM_ENABLE:
+ rc = HMR0EnableAllCpus(pVM);
+ VMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+ break;
+
+ /*
+ * Setup the hardware accelerated session.
+ */
+ case VMMR0_DO_HM_SETUP_VM:
+ rc = HMR0SetupVM(pVM);
+ VMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+ break;
+
+ /*
+ * Switch to RC to execute Hypervisor function.
+ */
+ case VMMR0_DO_CALL_HYPERVISOR:
+ {
+#ifdef VBOX_WITH_RAW_MODE
+ /*
+ * Validate input / context.
+ */
+ if (RT_UNLIKELY(idCpu != 0))
+ return VERR_INVALID_CPU_ID;
+ if (RT_UNLIKELY(pVM->cCpus != 1))
+ return VERR_INVALID_PARAMETER;
+ PVMCPU pVCpu = &pVM->aCpus[idCpu];
+# ifndef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
+ if (RT_UNLIKELY(!PGMGetHyperCR3(pVCpu)))
+ return VERR_PGM_NO_CR3_SHADOW_ROOT;
+# endif
+ if (RT_FAILURE(g_rcRawModeUsability))
+ return g_rcRawModeUsability;
+
+ /*
+ * Disable interrupts.
+ */
+ RTCCUINTREG fFlags = ASMIntDisableFlags();
+
+ /*
+ * Get the host CPU identifiers, make sure they are valid and that
+ * we've got a TSC delta for the CPU.
+ */
+ RTCPUID idHostCpu;
+ uint32_t iHostCpuSet = RTMpCurSetIndexAndId(&idHostCpu);
+ if (RT_UNLIKELY(iHostCpuSet >= RTCPUSET_MAX_CPUS))
+ {
+ ASMSetFlags(fFlags);
+ return VERR_INVALID_CPU_INDEX;
+ }
+ if (RT_UNLIKELY(!SUPIsTscDeltaAvailableForCpuSetIndex(iHostCpuSet)))
+ {
+ ASMSetFlags(fFlags);
+ rc = SUPR0TscDeltaMeasureBySetIndex(pVM->pSession, iHostCpuSet, 0 /*fFlags*/,
+ 2 /*cMsWaitRetry*/, 5*RT_MS_1SEC /*cMsWaitThread*/,
+ 0 /*default cTries*/);
+ if (RT_FAILURE(rc) && rc != VERR_CPU_OFFLINE)
+ {
+ VMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+ return rc;
+ }
+ }
+
+ /*
+ * Commit the CPU identifiers.
+ */
+# ifdef VBOX_WITH_VMMR0_DISABLE_LAPIC_NMI
+ CPUMR0SetLApic(pVCpu, iHostCpuSet);
+# endif
+ pVCpu->iHostCpuSet = iHostCpuSet;
+ ASMAtomicWriteU32(&pVCpu->idHostCpu, idHostCpu);
+
+ /*
+ * We might need to disable VT-x if the active switcher turns off paging.
+ */
+ bool fVTxDisabled;
+ rc = HMR0EnterSwitcher(pVM, pVM->vmm.s.enmSwitcher, &fVTxDisabled);
+ if (RT_SUCCESS(rc))
+ {
+ /*
+ * Go through the wormhole...
+ */
+ rc = pVM->vmm.s.pfnR0ToRawMode(pVM);
+
+ /*
+ * Re-enable VT-x before we dispatch any pending host interrupts.
+ */
+ HMR0LeaveSwitcher(pVM, fVTxDisabled);
+
+ if ( rc == VINF_EM_RAW_INTERRUPT
+ || rc == VINF_EM_RAW_INTERRUPT_HYPER)
+ TRPMR0DispatchHostInterrupt(pVM);
+ }
+
+ /*
+ * Invalidate the host CPU identifiers as we restore interrupts.
+ */
+ pVCpu->iHostCpuSet = UINT32_MAX;
+ ASMAtomicWriteU32(&pVCpu->idHostCpu, NIL_RTCPUID);
+ ASMSetFlags(fFlags);
+
+#else /* !VBOX_WITH_RAW_MODE */
+ rc = VERR_RAW_MODE_NOT_SUPPORTED;
+#endif
+ VMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+ break;
+ }
+
+ /*
+ * PGM wrappers.
+ */
+ case VMMR0_DO_PGM_ALLOCATE_HANDY_PAGES:
+ if (idCpu == NIL_VMCPUID)
+ return VERR_INVALID_CPU_ID;
+ rc = PGMR0PhysAllocateHandyPages(pGVM, pVM, idCpu);
+ VMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+ break;
+
+ case VMMR0_DO_PGM_FLUSH_HANDY_PAGES:
+ if (idCpu == NIL_VMCPUID)
+ return VERR_INVALID_CPU_ID;
+ rc = PGMR0PhysFlushHandyPages(pGVM, pVM, idCpu);
+ VMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+ break;
+
+ case VMMR0_DO_PGM_ALLOCATE_LARGE_HANDY_PAGE:
+ if (idCpu == NIL_VMCPUID)
+ return VERR_INVALID_CPU_ID;
+ rc = PGMR0PhysAllocateLargeHandyPage(pGVM, pVM, idCpu);
+ VMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+ break;
+
+ case VMMR0_DO_PGM_PHYS_SETUP_IOMMU:
+ if (idCpu != 0)
+ return VERR_INVALID_CPU_ID;
+ rc = PGMR0PhysSetupIoMmu(pGVM, pVM);
+ VMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+ break;
+
+ /*
+ * GMM wrappers.
+ */
+ case VMMR0_DO_GMM_INITIAL_RESERVATION:
+ if (u64Arg)
+ return VERR_INVALID_PARAMETER;
+ rc = GMMR0InitialReservationReq(pGVM, pVM, idCpu, (PGMMINITIALRESERVATIONREQ)pReqHdr);
+ VMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+ break;
+
+ case VMMR0_DO_GMM_UPDATE_RESERVATION:
+ if (u64Arg)
+ return VERR_INVALID_PARAMETER;
+ rc = GMMR0UpdateReservationReq(pGVM, pVM, idCpu, (PGMMUPDATERESERVATIONREQ)pReqHdr);
+ VMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+ break;
+
+ case VMMR0_DO_GMM_ALLOCATE_PAGES:
+ if (u64Arg)
+ return VERR_INVALID_PARAMETER;
+ rc = GMMR0AllocatePagesReq(pGVM, pVM, idCpu, (PGMMALLOCATEPAGESREQ)pReqHdr);
+ VMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+ break;
+
+ case VMMR0_DO_GMM_FREE_PAGES:
+ if (u64Arg)
+ return VERR_INVALID_PARAMETER;
+ rc = GMMR0FreePagesReq(pGVM, pVM, idCpu, (PGMMFREEPAGESREQ)pReqHdr);
+ VMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+ break;
+
+ case VMMR0_DO_GMM_FREE_LARGE_PAGE:
+ if (u64Arg)
+ return VERR_INVALID_PARAMETER;
+ rc = GMMR0FreeLargePageReq(pGVM, pVM, idCpu, (PGMMFREELARGEPAGEREQ)pReqHdr);
+ VMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+ break;
+
+ case VMMR0_DO_GMM_QUERY_HYPERVISOR_MEM_STATS:
+ if (u64Arg)
+ return VERR_INVALID_PARAMETER;
+ rc = GMMR0QueryHypervisorMemoryStatsReq((PGMMMEMSTATSREQ)pReqHdr);
+ VMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+ break;
+
+ case VMMR0_DO_GMM_QUERY_MEM_STATS:
+ if (idCpu == NIL_VMCPUID)
+ return VERR_INVALID_CPU_ID;
+ if (u64Arg)
+ return VERR_INVALID_PARAMETER;
+ rc = GMMR0QueryMemoryStatsReq(pGVM, pVM, idCpu, (PGMMMEMSTATSREQ)pReqHdr);
+ VMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+ break;
+
+ case VMMR0_DO_GMM_BALLOONED_PAGES:
+ if (u64Arg)
+ return VERR_INVALID_PARAMETER;
+ rc = GMMR0BalloonedPagesReq(pGVM, pVM, idCpu, (PGMMBALLOONEDPAGESREQ)pReqHdr);
+ VMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+ break;
+
+ case VMMR0_DO_GMM_MAP_UNMAP_CHUNK:
+ if (u64Arg)
+ return VERR_INVALID_PARAMETER;
+ rc = GMMR0MapUnmapChunkReq(pGVM, pVM, (PGMMMAPUNMAPCHUNKREQ)pReqHdr);
+ VMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+ break;
+
+ case VMMR0_DO_GMM_SEED_CHUNK:
+ if (pReqHdr)
+ return VERR_INVALID_PARAMETER;
+ rc = GMMR0SeedChunk(pGVM, pVM, idCpu, (RTR3PTR)u64Arg);
+ VMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+ break;
+
+ case VMMR0_DO_GMM_REGISTER_SHARED_MODULE:
+ if (idCpu == NIL_VMCPUID)
+ return VERR_INVALID_CPU_ID;
+ if (u64Arg)
+ return VERR_INVALID_PARAMETER;
+ rc = GMMR0RegisterSharedModuleReq(pGVM, pVM, idCpu, (PGMMREGISTERSHAREDMODULEREQ)pReqHdr);
+ VMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+ break;
+
+ case VMMR0_DO_GMM_UNREGISTER_SHARED_MODULE:
+ if (idCpu == NIL_VMCPUID)
+ return VERR_INVALID_CPU_ID;
+ if (u64Arg)
+ return VERR_INVALID_PARAMETER;
+ rc = GMMR0UnregisterSharedModuleReq(pGVM, pVM, idCpu, (PGMMUNREGISTERSHAREDMODULEREQ)pReqHdr);
+ VMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+ break;
+
+ case VMMR0_DO_GMM_RESET_SHARED_MODULES:
+ if (idCpu == NIL_VMCPUID)
+ return VERR_INVALID_CPU_ID;
+ if ( u64Arg
+ || pReqHdr)
+ return VERR_INVALID_PARAMETER;
+ rc = GMMR0ResetSharedModules(pGVM, pVM, idCpu);
+ VMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+ break;
+
+#ifdef VBOX_WITH_PAGE_SHARING
+ case VMMR0_DO_GMM_CHECK_SHARED_MODULES:
+ {
+ if (idCpu == NIL_VMCPUID)
+ return VERR_INVALID_CPU_ID;
+ if ( u64Arg
+ || pReqHdr)
+ return VERR_INVALID_PARAMETER;
+ rc = GMMR0CheckSharedModules(pGVM, pVM, idCpu);
+ VMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+ break;
+ }
+#endif
+
+#if defined(VBOX_STRICT) && HC_ARCH_BITS == 64
+ case VMMR0_DO_GMM_FIND_DUPLICATE_PAGE:
+ if (u64Arg)
+ return VERR_INVALID_PARAMETER;
+ rc = GMMR0FindDuplicatePageReq(pGVM, pVM, (PGMMFINDDUPLICATEPAGEREQ)pReqHdr);
+ VMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+ break;
+#endif
+
+ case VMMR0_DO_GMM_QUERY_STATISTICS:
+ if (u64Arg)
+ return VERR_INVALID_PARAMETER;
+ rc = GMMR0QueryStatisticsReq(pGVM, pVM, (PGMMQUERYSTATISTICSSREQ)pReqHdr);
+ VMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+ break;
+
+ case VMMR0_DO_GMM_RESET_STATISTICS:
+ if (u64Arg)
+ return VERR_INVALID_PARAMETER;
+ rc = GMMR0ResetStatisticsReq(pGVM, pVM, (PGMMRESETSTATISTICSSREQ)pReqHdr);
+ VMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+ break;
+
+ /*
+ * A quick GCFGM mock-up.
+ */
+ /** @todo GCFGM with proper access control, ring-3 management interface and all that. */
+ case VMMR0_DO_GCFGM_SET_VALUE:
+ case VMMR0_DO_GCFGM_QUERY_VALUE:
+ {
+ if (pGVM || pVM || !pReqHdr || u64Arg || idCpu != NIL_VMCPUID)
+ return VERR_INVALID_PARAMETER;
+ PGCFGMVALUEREQ pReq = (PGCFGMVALUEREQ)pReqHdr;
+ if (pReq->Hdr.cbReq != sizeof(*pReq))
+ return VERR_INVALID_PARAMETER;
+ if (enmOperation == VMMR0_DO_GCFGM_SET_VALUE)
+ {
+ rc = GVMMR0SetConfig(pReq->pSession, &pReq->szName[0], pReq->u64Value);
+ //if (rc == VERR_CFGM_VALUE_NOT_FOUND)
+ // rc = GMMR0SetConfig(pReq->pSession, &pReq->szName[0], pReq->u64Value);
+ }
+ else
+ {
+ rc = GVMMR0QueryConfig(pReq->pSession, &pReq->szName[0], &pReq->u64Value);
+ //if (rc == VERR_CFGM_VALUE_NOT_FOUND)
+ // rc = GMMR0QueryConfig(pReq->pSession, &pReq->szName[0], &pReq->u64Value);
+ }
+ VMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+ break;
+ }
+
+ /*
+ * PDM Wrappers.
+ */
+ case VMMR0_DO_PDM_DRIVER_CALL_REQ_HANDLER:
+ {
+ if (!pReqHdr || u64Arg || idCpu != NIL_VMCPUID)
+ return VERR_INVALID_PARAMETER;
+ rc = PDMR0DriverCallReqHandler(pGVM, pVM, (PPDMDRIVERCALLREQHANDLERREQ)pReqHdr);
+ VMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+ break;
+ }
+
+ case VMMR0_DO_PDM_DEVICE_CALL_REQ_HANDLER:
+ {
+ if (!pReqHdr || u64Arg || idCpu != NIL_VMCPUID)
+ return VERR_INVALID_PARAMETER;
+ rc = PDMR0DeviceCallReqHandler(pGVM, pVM, (PPDMDEVICECALLREQHANDLERREQ)pReqHdr);
+ VMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+ break;
+ }
+
+ /*
+ * Requests to the internal networking service.
+ */
+ case VMMR0_DO_INTNET_OPEN:
+ {
+ PINTNETOPENREQ pReq = (PINTNETOPENREQ)pReqHdr;
+ if (u64Arg || !pReq || !vmmR0IsValidSession(pVM, pReq->pSession, pSession) || idCpu != NIL_VMCPUID)
+ return VERR_INVALID_PARAMETER;
+ rc = IntNetR0OpenReq(pSession, pReq);
+ VMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+ break;
+ }
+
+ case VMMR0_DO_INTNET_IF_CLOSE:
+ if (u64Arg || !pReqHdr || !vmmR0IsValidSession(pVM, ((PINTNETIFCLOSEREQ)pReqHdr)->pSession, pSession) || idCpu != NIL_VMCPUID)
+ return VERR_INVALID_PARAMETER;
+ rc = IntNetR0IfCloseReq(pSession, (PINTNETIFCLOSEREQ)pReqHdr);
+ VMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+ break;
+
+
+ case VMMR0_DO_INTNET_IF_GET_BUFFER_PTRS:
+ if (u64Arg || !pReqHdr || !vmmR0IsValidSession(pVM, ((PINTNETIFGETBUFFERPTRSREQ)pReqHdr)->pSession, pSession) || idCpu != NIL_VMCPUID)
+ return VERR_INVALID_PARAMETER;
+ rc = IntNetR0IfGetBufferPtrsReq(pSession, (PINTNETIFGETBUFFERPTRSREQ)pReqHdr);
+ VMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+ break;
+
+ case VMMR0_DO_INTNET_IF_SET_PROMISCUOUS_MODE:
+ if (u64Arg || !pReqHdr || !vmmR0IsValidSession(pVM, ((PINTNETIFSETPROMISCUOUSMODEREQ)pReqHdr)->pSession, pSession) || idCpu != NIL_VMCPUID)
+ return VERR_INVALID_PARAMETER;
+ rc = IntNetR0IfSetPromiscuousModeReq(pSession, (PINTNETIFSETPROMISCUOUSMODEREQ)pReqHdr);
+ VMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+ break;
+
+ case VMMR0_DO_INTNET_IF_SET_MAC_ADDRESS:
+ if (u64Arg || !pReqHdr || !vmmR0IsValidSession(pVM, ((PINTNETIFSETMACADDRESSREQ)pReqHdr)->pSession, pSession) || idCpu != NIL_VMCPUID)
+ return VERR_INVALID_PARAMETER;
+ rc = IntNetR0IfSetMacAddressReq(pSession, (PINTNETIFSETMACADDRESSREQ)pReqHdr);
+ VMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+ break;
+
+ case VMMR0_DO_INTNET_IF_SET_ACTIVE:
+ if (u64Arg || !pReqHdr || !vmmR0IsValidSession(pVM, ((PINTNETIFSETACTIVEREQ)pReqHdr)->pSession, pSession) || idCpu != NIL_VMCPUID)
+ return VERR_INVALID_PARAMETER;
+ rc = IntNetR0IfSetActiveReq(pSession, (PINTNETIFSETACTIVEREQ)pReqHdr);
+ VMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+ break;
+
+ case VMMR0_DO_INTNET_IF_SEND:
+ if (u64Arg || !pReqHdr || !vmmR0IsValidSession(pVM, ((PINTNETIFSENDREQ)pReqHdr)->pSession, pSession) || idCpu != NIL_VMCPUID)
+ return VERR_INVALID_PARAMETER;
+ rc = IntNetR0IfSendReq(pSession, (PINTNETIFSENDREQ)pReqHdr);
+ VMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+ break;
+
+ case VMMR0_DO_INTNET_IF_WAIT:
+ if (u64Arg || !pReqHdr || !vmmR0IsValidSession(pVM, ((PINTNETIFWAITREQ)pReqHdr)->pSession, pSession) || idCpu != NIL_VMCPUID)
+ return VERR_INVALID_PARAMETER;
+ rc = IntNetR0IfWaitReq(pSession, (PINTNETIFWAITREQ)pReqHdr);
+ VMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+ break;
+
+ case VMMR0_DO_INTNET_IF_ABORT_WAIT:
+ if (u64Arg || !pReqHdr || !vmmR0IsValidSession(pVM, ((PINTNETIFWAITREQ)pReqHdr)->pSession, pSession) || idCpu != NIL_VMCPUID)
+ return VERR_INVALID_PARAMETER;
+ rc = IntNetR0IfAbortWaitReq(pSession, (PINTNETIFABORTWAITREQ)pReqHdr);
+ VMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+ break;
+
+#ifdef VBOX_WITH_PCI_PASSTHROUGH
+ /*
+ * Requests to host PCI driver service.
+ */
+ case VMMR0_DO_PCIRAW_REQ:
+ if (u64Arg || !pReqHdr || !vmmR0IsValidSession(pVM, ((PPCIRAWSENDREQ)pReqHdr)->pSession, pSession) || idCpu != NIL_VMCPUID)
+ return VERR_INVALID_PARAMETER;
+ rc = PciRawR0ProcessReq(pGVM, pVM, pSession, (PPCIRAWSENDREQ)pReqHdr);
+ VMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+ break;
+#endif
+
+ /*
+ * NEM requests.
+ */
+#ifdef VBOX_WITH_NEM_R0
+# if defined(RT_ARCH_AMD64) && defined(RT_OS_WINDOWS)
+ case VMMR0_DO_NEM_INIT_VM:
+ if (u64Arg || pReqHdr || idCpu != 0)
+ return VERR_INVALID_PARAMETER;
+ rc = NEMR0InitVM(pGVM, pVM);
+ VMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+ break;
+
+ case VMMR0_DO_NEM_INIT_VM_PART_2:
+ if (u64Arg || pReqHdr || idCpu != 0)
+ return VERR_INVALID_PARAMETER;
+ rc = NEMR0InitVMPart2(pGVM, pVM);
+ VMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+ break;
+
+ case VMMR0_DO_NEM_MAP_PAGES:
+ if (u64Arg || pReqHdr || idCpu == NIL_VMCPUID)
+ return VERR_INVALID_PARAMETER;
+ rc = NEMR0MapPages(pGVM, pVM, idCpu);
+ VMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+ break;
+
+ case VMMR0_DO_NEM_UNMAP_PAGES:
+ if (u64Arg || pReqHdr || idCpu == NIL_VMCPUID)
+ return VERR_INVALID_PARAMETER;
+ rc = NEMR0UnmapPages(pGVM, pVM, idCpu);
+ VMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+ break;
+
+ case VMMR0_DO_NEM_EXPORT_STATE:
+ if (u64Arg || pReqHdr || idCpu == NIL_VMCPUID)
+ return VERR_INVALID_PARAMETER;
+ rc = NEMR0ExportState(pGVM, pVM, idCpu);
+ VMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+ break;
+
+ case VMMR0_DO_NEM_IMPORT_STATE:
+ if (pReqHdr || idCpu == NIL_VMCPUID)
+ return VERR_INVALID_PARAMETER;
+ rc = NEMR0ImportState(pGVM, pVM, idCpu, u64Arg);
+ VMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+ break;
+
+ case VMMR0_DO_NEM_QUERY_CPU_TICK:
+ if (u64Arg || pReqHdr || idCpu == NIL_VMCPUID)
+ return VERR_INVALID_PARAMETER;
+ rc = NEMR0QueryCpuTick(pGVM, pVM, idCpu);
+ VMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+ break;
+
+ case VMMR0_DO_NEM_RESUME_CPU_TICK_ON_ALL:
+ if (pReqHdr || idCpu == NIL_VMCPUID)
+ return VERR_INVALID_PARAMETER;
+ rc = NEMR0ResumeCpuTickOnAll(pGVM, pVM, idCpu, u64Arg);
+ VMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+ break;
+
+ case VMMR0_DO_NEM_UPDATE_STATISTICS:
+ if (u64Arg || pReqHdr)
+ return VERR_INVALID_PARAMETER;
+ rc = NEMR0UpdateStatistics(pGVM, pVM, idCpu);
+ VMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+ break;
+
+# if 1 && defined(DEBUG_bird)
+ case VMMR0_DO_NEM_EXPERIMENT:
+ if (pReqHdr)
+ return VERR_INVALID_PARAMETER;
+ rc = NEMR0DoExperiment(pGVM, pVM, idCpu, u64Arg);
+ VMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+ break;
+# endif
+# endif
+#endif
+
+ /*
+ * For profiling.
+ */
+ case VMMR0_DO_NOP:
+ case VMMR0_DO_SLOW_NOP:
+ return VINF_SUCCESS;
+
+ /*
+ * For testing Ring-0 APIs invoked in this environment.
+ */
+ case VMMR0_DO_TESTS:
+ /** @todo make new test */
+ return VINF_SUCCESS;
+
+
+#if HC_ARCH_BITS == 32 && defined(VBOX_WITH_64_BITS_GUESTS)
+ case VMMR0_DO_TEST_SWITCHER3264:
+ if (idCpu == NIL_VMCPUID)
+ return VERR_INVALID_CPU_ID;
+ rc = HMR0TestSwitcher3264(pVM);
+ VMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
+ break;
+#endif
+ default:
+ /*
+ * We're returning VERR_NOT_SUPPORT here so we've got something else
+ * than -1 which the interrupt gate glue code might return.
+ */
+ Log(("operation %#x is not supported\n", enmOperation));
+ return VERR_NOT_SUPPORTED;
+ }
+ return rc;
+}
+
+
+/**
+ * Argument for vmmR0EntryExWrapper containing the arguments for VMMR0EntryEx.
+ */
+typedef struct VMMR0ENTRYEXARGS
+{
+ PGVM pGVM;
+ PVM pVM;
+ VMCPUID idCpu;
+ VMMR0OPERATION enmOperation;
+ PSUPVMMR0REQHDR pReq;
+ uint64_t u64Arg;
+ PSUPDRVSESSION pSession;
+} VMMR0ENTRYEXARGS;
+/** Pointer to a vmmR0EntryExWrapper argument package. */
+typedef VMMR0ENTRYEXARGS *PVMMR0ENTRYEXARGS;
+
+/**
+ * This is just a longjmp wrapper function for VMMR0EntryEx calls.
+ *
+ * @returns VBox status code.
+ * @param pvArgs The argument package
+ */
+static DECLCALLBACK(int) vmmR0EntryExWrapper(void *pvArgs)
+{
+ return vmmR0EntryExWorker(((PVMMR0ENTRYEXARGS)pvArgs)->pGVM,
+ ((PVMMR0ENTRYEXARGS)pvArgs)->pVM,
+ ((PVMMR0ENTRYEXARGS)pvArgs)->idCpu,
+ ((PVMMR0ENTRYEXARGS)pvArgs)->enmOperation,
+ ((PVMMR0ENTRYEXARGS)pvArgs)->pReq,
+ ((PVMMR0ENTRYEXARGS)pvArgs)->u64Arg,
+ ((PVMMR0ENTRYEXARGS)pvArgs)->pSession);
+}
+
+
+/**
+ * The Ring 0 entry point, called by the support library (SUP).
+ *
+ * @returns VBox status code.
+ * @param pGVM The global (ring-0) VM structure.
+ * @param pVM The cross context VM structure.
+ * @param idCpu Virtual CPU ID argument. Must be NIL_VMCPUID if pVM
+ * is NIL_RTR0PTR, and may be NIL_VMCPUID if it isn't
+ * @param enmOperation Which operation to execute.
+ * @param pReq Pointer to the SUPVMMR0REQHDR packet. Optional.
+ * @param u64Arg Some simple constant argument.
+ * @param pSession The session of the caller.
+ * @remarks Assume called with interrupts _enabled_.
+ */
+VMMR0DECL(int) VMMR0EntryEx(PGVM pGVM, PVM pVM, VMCPUID idCpu, VMMR0OPERATION enmOperation,
+ PSUPVMMR0REQHDR pReq, uint64_t u64Arg, PSUPDRVSESSION pSession)
+{
+ /*
+ * Requests that should only happen on the EMT thread will be
+ * wrapped in a setjmp so we can assert without causing trouble.
+ */
+ if ( pVM != NULL
+ && pGVM != NULL
+ && idCpu < pGVM->cCpus
+ && pVM->pVMR0 != NULL)
+ {
+ switch (enmOperation)
+ {
+ /* These might/will be called before VMMR3Init. */
+ case VMMR0_DO_GMM_INITIAL_RESERVATION:
+ case VMMR0_DO_GMM_UPDATE_RESERVATION:
+ case VMMR0_DO_GMM_ALLOCATE_PAGES:
+ case VMMR0_DO_GMM_FREE_PAGES:
+ case VMMR0_DO_GMM_BALLOONED_PAGES:
+ /* On the mac we might not have a valid jmp buf, so check these as well. */
+ case VMMR0_DO_VMMR0_INIT:
+ case VMMR0_DO_VMMR0_TERM:
+ {
+ PGVMCPU pGVCpu = &pGVM->aCpus[idCpu];
+ PVMCPU pVCpu = &pVM->aCpus[idCpu];
+ RTNATIVETHREAD hNativeThread = RTThreadNativeSelf();
+ if (RT_LIKELY( pGVCpu->hEMT == hNativeThread
+ && pVCpu->hNativeThreadR0 == hNativeThread))
+ {
+ if (!pVCpu->vmm.s.CallRing3JmpBufR0.pvSavedStack)
+ break;
+
+ /** @todo validate this EMT claim... GVM knows. */
+ VMMR0ENTRYEXARGS Args;
+ Args.pGVM = pGVM;
+ Args.pVM = pVM;
+ Args.idCpu = idCpu;
+ Args.enmOperation = enmOperation;
+ Args.pReq = pReq;
+ Args.u64Arg = u64Arg;
+ Args.pSession = pSession;
+ return vmmR0CallRing3SetJmpEx(&pVCpu->vmm.s.CallRing3JmpBufR0, vmmR0EntryExWrapper, &Args);
+ }
+ return VERR_VM_THREAD_NOT_EMT;
+ }
+
+ default:
+ break;
+ }
+ }
+ return vmmR0EntryExWorker(pGVM, pVM, idCpu, enmOperation, pReq, u64Arg, pSession);
+}
+
+
+/**
+ * Checks whether we've armed the ring-0 long jump machinery.
+ *
+ * @returns @c true / @c false
+ * @param pVCpu The cross context virtual CPU structure.
+ * @thread EMT
+ * @sa VMMIsLongJumpArmed
+ */
+VMMR0_INT_DECL(bool) VMMR0IsLongJumpArmed(PVMCPU pVCpu)
+{
+#ifdef RT_ARCH_X86
+ return pVCpu->vmm.s.CallRing3JmpBufR0.eip
+ && !pVCpu->vmm.s.CallRing3JmpBufR0.fInRing3Call;
+#else
+ return pVCpu->vmm.s.CallRing3JmpBufR0.rip
+ && !pVCpu->vmm.s.CallRing3JmpBufR0.fInRing3Call;
+#endif
+}
+
+
+/**
+ * Checks whether we've done a ring-3 long jump.
+ *
+ * @returns @c true / @c false
+ * @param pVCpu The cross context virtual CPU structure.
+ * @thread EMT
+ */
+VMMR0_INT_DECL(bool) VMMR0IsInRing3LongJump(PVMCPU pVCpu)
+{
+ return pVCpu->vmm.s.CallRing3JmpBufR0.fInRing3Call;
+}
+
+
+/**
+ * Internal R0 logger worker: Flush logger.
+ *
+ * @param pLogger The logger instance to flush.
+ * @remark This function must be exported!
+ */
+VMMR0DECL(void) vmmR0LoggerFlush(PRTLOGGER pLogger)
+{
+#ifdef LOG_ENABLED
+ /*
+ * Convert the pLogger into a VM handle and 'call' back to Ring-3.
+ * (This is a bit paranoid code.)
+ */
+ PVMMR0LOGGER pR0Logger = (PVMMR0LOGGER)((uintptr_t)pLogger - RT_UOFFSETOF(VMMR0LOGGER, Logger));
+ if ( !VALID_PTR(pR0Logger)
+ || !VALID_PTR(pR0Logger + 1)
+ || pLogger->u32Magic != RTLOGGER_MAGIC)
+ {
+# ifdef DEBUG
+ SUPR0Printf("vmmR0LoggerFlush: pLogger=%p!\n", pLogger);
+# endif
+ return;
+ }
+ if (pR0Logger->fFlushingDisabled)
+ return; /* quietly */
+
+ PVM pVM = pR0Logger->pVM;
+ if ( !VALID_PTR(pVM)
+ || pVM->pVMR0 != pVM)
+ {
+# ifdef DEBUG
+ SUPR0Printf("vmmR0LoggerFlush: pVM=%p! pVMR0=%p! pLogger=%p\n", pVM, pVM->pVMR0, pLogger);
+# endif
+ return;
+ }
+
+ PVMCPU pVCpu = VMMGetCpu(pVM);
+ if (pVCpu)
+ {
+ /*
+ * Check that the jump buffer is armed.
+ */
+# ifdef RT_ARCH_X86
+ if ( !pVCpu->vmm.s.CallRing3JmpBufR0.eip
+ || pVCpu->vmm.s.CallRing3JmpBufR0.fInRing3Call)
+# else
+ if ( !pVCpu->vmm.s.CallRing3JmpBufR0.rip
+ || pVCpu->vmm.s.CallRing3JmpBufR0.fInRing3Call)
+# endif
+ {
+# ifdef DEBUG
+ SUPR0Printf("vmmR0LoggerFlush: Jump buffer isn't armed!\n");
+# endif
+ return;
+ }
+ VMMRZCallRing3(pVM, pVCpu, VMMCALLRING3_VMM_LOGGER_FLUSH, 0);
+ }
+# ifdef DEBUG
+ else
+ SUPR0Printf("vmmR0LoggerFlush: invalid VCPU context!\n");
+# endif
+#else
+ NOREF(pLogger);
+#endif /* LOG_ENABLED */
+}
+
+#ifdef LOG_ENABLED
+
+/**
+ * Disables flushing of the ring-0 debug log.
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ */
+VMMR0_INT_DECL(void) VMMR0LogFlushDisable(PVMCPU pVCpu)
+{
+ if (pVCpu->vmm.s.pR0LoggerR0)
+ pVCpu->vmm.s.pR0LoggerR0->fFlushingDisabled = true;
+ if (pVCpu->vmm.s.pR0RelLoggerR0)
+ pVCpu->vmm.s.pR0RelLoggerR0->fFlushingDisabled = true;
+}
+
+
+/**
+ * Enables flushing of the ring-0 debug log.
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ */
+VMMR0_INT_DECL(void) VMMR0LogFlushEnable(PVMCPU pVCpu)
+{
+ if (pVCpu->vmm.s.pR0LoggerR0)
+ pVCpu->vmm.s.pR0LoggerR0->fFlushingDisabled = false;
+ if (pVCpu->vmm.s.pR0RelLoggerR0)
+ pVCpu->vmm.s.pR0RelLoggerR0->fFlushingDisabled = false;
+}
+
+
+/**
+ * Checks if log flushing is disabled or not.
+ *
+ * @param pVCpu The cross context virtual CPU structure.
+ */
+VMMR0_INT_DECL(bool) VMMR0IsLogFlushDisabled(PVMCPU pVCpu)
+{
+ if (pVCpu->vmm.s.pR0LoggerR0)
+ return pVCpu->vmm.s.pR0LoggerR0->fFlushingDisabled;
+ if (pVCpu->vmm.s.pR0RelLoggerR0)
+ return pVCpu->vmm.s.pR0RelLoggerR0->fFlushingDisabled;
+ return true;
+}
+
+#endif /* LOG_ENABLED */
+
+/**
+ * Override RTLogRelGetDefaultInstanceEx so we can do LogRel to VBox.log from EMTs in ring-0.
+ */
+DECLEXPORT(PRTLOGGER) RTLogRelGetDefaultInstanceEx(uint32_t fFlagsAndGroup)
+{
+ PGVMCPU pGVCpu = GVMMR0GetGVCpuByEMT(NIL_RTNATIVETHREAD);
+ if (pGVCpu)
+ {
+ PVMCPU pVCpu = pGVCpu->pVCpu;
+ if (RT_VALID_PTR(pVCpu))
+ {
+ PVMMR0LOGGER pVmmLogger = pVCpu->vmm.s.pR0RelLoggerR0;
+ if (RT_VALID_PTR(pVmmLogger))
+ {
+ if ( pVmmLogger->fCreated
+ && pVmmLogger->pVM == pGVCpu->pVM)
+ {
+ if (pVmmLogger->Logger.fFlags & RTLOGFLAGS_DISABLED)
+ return NULL;
+ uint16_t const fFlags = RT_LO_U16(fFlagsAndGroup);
+ uint16_t const iGroup = RT_HI_U16(fFlagsAndGroup);
+ if ( iGroup != UINT16_MAX
+ && ( ( pVmmLogger->Logger.afGroups[iGroup < pVmmLogger->Logger.cGroups ? iGroup : 0]
+ & (fFlags | (uint32_t)RTLOGGRPFLAGS_ENABLED))
+ != (fFlags | (uint32_t)RTLOGGRPFLAGS_ENABLED)))
+ return NULL;
+ return &pVmmLogger->Logger;
+ }
+ }
+ }
+ }
+ return SUPR0GetDefaultLogRelInstanceEx(fFlagsAndGroup);
+}
+
+
+/**
+ * Jump back to ring-3 if we're the EMT and the longjmp is armed.
+ *
+ * @returns true if the breakpoint should be hit, false if it should be ignored.
+ */
+DECLEXPORT(bool) RTCALL RTAssertShouldPanic(void)
+{
+#if 0
+ return true;
+#else
+ PVM pVM = GVMMR0GetVMByEMT(NIL_RTNATIVETHREAD);
+ if (pVM)
+ {
+ PVMCPU pVCpu = VMMGetCpu(pVM);
+
+ if (pVCpu)
+ {
+#ifdef RT_ARCH_X86
+ if ( pVCpu->vmm.s.CallRing3JmpBufR0.eip
+ && !pVCpu->vmm.s.CallRing3JmpBufR0.fInRing3Call)
+#else
+ if ( pVCpu->vmm.s.CallRing3JmpBufR0.rip
+ && !pVCpu->vmm.s.CallRing3JmpBufR0.fInRing3Call)
+#endif
+ {
+ int rc = VMMRZCallRing3(pVM, pVCpu, VMMCALLRING3_VM_R0_ASSERTION, 0);
+ return RT_FAILURE_NP(rc);
+ }
+ }
+ }
+#ifdef RT_OS_LINUX
+ return true;
+#else
+ return false;
+#endif
+#endif
+}
+
+
+/**
+ * Override this so we can push it up to ring-3.
+ *
+ * @param pszExpr Expression. Can be NULL.
+ * @param uLine Location line number.
+ * @param pszFile Location file name.
+ * @param pszFunction Location function name.
+ */
+DECLEXPORT(void) RTCALL RTAssertMsg1Weak(const char *pszExpr, unsigned uLine, const char *pszFile, const char *pszFunction)
+{
+ /*
+ * To the log.
+ */
+ LogAlways(("\n!!R0-Assertion Failed!!\n"
+ "Expression: %s\n"
+ "Location : %s(%d) %s\n",
+ pszExpr, pszFile, uLine, pszFunction));
+
+ /*
+ * To the global VMM buffer.
+ */
+ PVM pVM = GVMMR0GetVMByEMT(NIL_RTNATIVETHREAD);
+ if (pVM)
+ RTStrPrintf(pVM->vmm.s.szRing0AssertMsg1, sizeof(pVM->vmm.s.szRing0AssertMsg1),
+ "\n!!R0-Assertion Failed!!\n"
+ "Expression: %.*s\n"
+ "Location : %s(%d) %s\n",
+ sizeof(pVM->vmm.s.szRing0AssertMsg1) / 4 * 3, pszExpr,
+ pszFile, uLine, pszFunction);
+
+ /*
+ * Continue the normal way.
+ */
+ RTAssertMsg1(pszExpr, uLine, pszFile, pszFunction);
+}
+
+
+/**
+ * Callback for RTLogFormatV which writes to the ring-3 log port.
+ * See PFNLOGOUTPUT() for details.
+ */
+static DECLCALLBACK(size_t) rtLogOutput(void *pv, const char *pachChars, size_t cbChars)
+{
+ for (size_t i = 0; i < cbChars; i++)
+ {
+ LogAlways(("%c", pachChars[i])); NOREF(pachChars);
+ }
+
+ NOREF(pv);
+ return cbChars;
+}
+
+
+/**
+ * Override this so we can push it up to ring-3.
+ *
+ * @param pszFormat The format string.
+ * @param va Arguments.
+ */
+DECLEXPORT(void) RTCALL RTAssertMsg2WeakV(const char *pszFormat, va_list va)
+{
+ va_list vaCopy;
+
+ /*
+ * Push the message to the loggers.
+ */
+ PRTLOGGER pLog = RTLogGetDefaultInstance(); /* Don't initialize it here... */
+ if (pLog)
+ {
+ va_copy(vaCopy, va);
+ RTLogFormatV(rtLogOutput, pLog, pszFormat, vaCopy);
+ va_end(vaCopy);
+ }
+ pLog = RTLogRelGetDefaultInstance();
+ if (pLog)
+ {
+ va_copy(vaCopy, va);
+ RTLogFormatV(rtLogOutput, pLog, pszFormat, vaCopy);
+ va_end(vaCopy);
+ }
+
+ /*
+ * Push it to the global VMM buffer.
+ */
+ PVM pVM = GVMMR0GetVMByEMT(NIL_RTNATIVETHREAD);
+ if (pVM)
+ {
+ va_copy(vaCopy, va);
+ RTStrPrintfV(pVM->vmm.s.szRing0AssertMsg2, sizeof(pVM->vmm.s.szRing0AssertMsg2), pszFormat, vaCopy);
+ va_end(vaCopy);
+ }
+
+ /*
+ * Continue the normal way.
+ */
+ RTAssertMsg2V(pszFormat, va);
+}
+
diff --git a/src/VBox/VMM/VMMR0/VMMR0.def b/src/VBox/VMM/VMMR0/VMMR0.def
new file mode 100644
index 00000000..0735d86d
--- /dev/null
+++ b/src/VBox/VMM/VMMR0/VMMR0.def
@@ -0,0 +1,117 @@
+; $Id: VMMR0.def $
+;; @file
+; VMM Ring 0 DLL - Definition file.
+
+;
+; Copyright (C) 2006-2019 Oracle Corporation
+;
+; This file is part of VirtualBox Open Source Edition (OSE), as
+; available from http://www.virtualbox.org. This file is free software;
+; you can redistribute it and/or modify it under the terms of the GNU
+; General Public License (GPL) as published by the Free Software
+; Foundation, in version 2 as it comes in the "COPYING" file of the
+; VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+;
+
+LIBRARY VMMR0.r0
+EXPORTS
+ ; data
+
+ ; code
+ PDMCritSectEnter
+ PDMCritSectEnterDebug
+ PDMCritSectIsOwner
+ PDMCritSectLeave
+ PDMHCCritSectScheduleExitEvent
+ PDMCritSectTryEnter
+ PDMCritSectTryEnterDebug
+ PDMQueueAlloc
+ PDMQueueInsert
+ PGMHandlerPhysicalPageTempOff
+ PGMShwMakePageWritable
+ PGMPhysSimpleWriteGCPhys
+ PGMPhysSimpleReadGCPtr
+ PGMPhysSimpleWriteGCPtr
+ PGMPhysReadGCPtr
+ PGMPhysWriteGCPtr
+ PGMPhysSimpleDirtyWriteGCPtr
+ IOMMMIOResetRegion
+ IOMMMIOMapMMIO2Page
+ RTLogDefaultInstance
+ RTLogDefaultInstanceEx
+ RTLogRelGetDefaultInstance
+ RTLogRelGetDefaultInstanceEx
+ RTLogLogger
+ RTLogLoggerEx
+ RTLogLoggerExV
+ RTTimeMilliTS
+ RTTraceBufAddMsgF
+ RTTraceBufAddPos
+ RTTraceBufAddPosMsgF
+ TMTimerFromMilli
+ TMTimerFromMicro
+ TMTimerFromNano
+ TMTimerGet
+ TMTimerGetFreq
+ TMTimerIsActive
+ TMTimerIsLockOwner
+ TMTimerLock
+ TMTimerSet
+ TMTimerSetRelative
+ TMTimerSetMillies
+ TMTimerSetMicro
+ TMTimerSetNano
+ TMTimerSetFrequencyHint
+ TMTimerStop
+ TMTimerUnlock
+ VMMGetSvnRev
+ vmmR0LoggerFlush
+ vmmR0LoggerWrapper
+ VMSetError
+ VMSetErrorV
+
+ ; Internal Networking
+ IntNetR0Open
+ IntNetR0IfClose
+ IntNetR0IfGetBufferPtrs
+ IntNetR0IfSetPromiscuousMode
+ IntNetR0IfSetMacAddress
+ IntNetR0IfSetActive
+ IntNetR0IfSend
+ IntNetR0IfWait
+
+ ; Network Shaper
+ PDMNsAllocateBandwidth
+
+ ; runtime
+ RTAssertMsg1Weak
+ RTAssertMsg2Weak
+ RTAssertShouldPanic
+ RTCrc32
+ RTOnceSlow
+ RTTimeNanoTSLegacySyncInvarNoDelta
+ RTTimeNanoTSLegacySyncInvarWithDelta
+ RTTimeNanoTSLegacyAsync
+ RTTimeNanoTSLFenceSyncInvarNoDelta
+ RTTimeNanoTSLFenceSyncInvarWithDelta
+ RTTimeNanoTSLFenceAsync
+ RTTimeSystemNanoTS
+ RTTimeNanoTS
+ ASMMultU64ByU32DivByU32 ; not-os2
+ ASMAtomicXchgU8 ; not-x86
+ ASMAtomicXchgU16 ; not-x86
+ ASMBitFirstSet ; not-x86
+ ASMNopPause ; not-x86
+ nocrt_memchr
+ nocrt_memcmp
+ nocrt_memcpy
+ memcpy=nocrt_memcpy ; not-os2
+ nocrt_memmove
+ nocrt_memset
+ memset=nocrt_memset ; not-os2
+ nocrt_strcpy
+ nocrt_strcmp
+ nocrt_strchr
+ nocrt_strlen
+
diff --git a/src/VBox/VMM/VMMR0/VMMR0JmpA-amd64.asm b/src/VBox/VMM/VMMR0/VMMR0JmpA-amd64.asm
new file mode 100644
index 00000000..8735dfa6
--- /dev/null
+++ b/src/VBox/VMM/VMMR0/VMMR0JmpA-amd64.asm
@@ -0,0 +1,491 @@
+; $Id: VMMR0JmpA-amd64.asm $
+;; @file
+; VMM - R0 SetJmp / LongJmp routines for AMD64.
+;
+
+;
+; Copyright (C) 2006-2019 Oracle Corporation
+;
+; This file is part of VirtualBox Open Source Edition (OSE), as
+; available from http://www.virtualbox.org. This file is free software;
+; you can redistribute it and/or modify it under the terms of the GNU
+; General Public License (GPL) as published by the Free Software
+; Foundation, in version 2 as it comes in the "COPYING" file of the
+; VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+;
+
+;*******************************************************************************
+;* Header Files *
+;*******************************************************************************
+%define RT_ASM_WITH_SEH64
+%include "VBox/asmdefs.mac"
+%include "VMMInternal.mac"
+%include "VBox/err.mac"
+%include "VBox/param.mac"
+
+
+;*******************************************************************************
+;* Defined Constants And Macros *
+;*******************************************************************************
+%define RESUME_MAGIC 07eadf00dh
+%define STACK_PADDING 0eeeeeeeeeeeeeeeeh
+
+;; Workaround for linux 4.6 fast/slow syscall stack depth difference.
+%ifdef VMM_R0_SWITCH_STACK
+ %define STACK_FUZZ_SIZE 0
+%else
+ %define STACK_FUZZ_SIZE 128
+%endif
+
+
+BEGINCODE
+
+
+;;
+; The setjmp variant used for calling Ring-3.
+;
+; This differs from the normal setjmp in that it will resume VMMRZCallRing3 if we're
+; in the middle of a ring-3 call. Another differences is the function pointer and
+; argument. This has to do with resuming code and the stack frame of the caller.
+;
+; @returns VINF_SUCCESS on success or whatever is passed to vmmR0CallRing3LongJmp.
+; @param pJmpBuf msc:rcx gcc:rdi x86:[esp+0x04] Our jmp_buf.
+; @param pfn msc:rdx gcc:rsi x86:[esp+0x08] The function to be called when not resuming.
+; @param pvUser1 msc:r8 gcc:rdx x86:[esp+0x0c] The argument of that function.
+; @param pvUser2 msc:r9 gcc:rcx x86:[esp+0x10] The argument of that function.
+;
+BEGINPROC vmmR0CallRing3SetJmp
+GLOBALNAME vmmR0CallRing3SetJmp2
+GLOBALNAME vmmR0CallRing3SetJmpEx
+ ;
+ ; Save the registers.
+ ;
+ push rbp
+ SEH64_PUSH_xBP
+ mov rbp, rsp
+ SEH64_SET_FRAME_xBP 0
+ %ifdef ASM_CALL64_MSC
+ sub rsp, 30h + STACK_FUZZ_SIZE ; (10h is used by resume (??), 20h for callee spill area)
+ SEH64_ALLOCATE_STACK 30h + STACK_FUZZ_SIZE
+SEH64_END_PROLOGUE
+ mov r11, rdx ; pfn
+ mov rdx, rcx ; pJmpBuf;
+ %else
+ sub rsp, 10h + STACK_FUZZ_SIZE ; (10h is used by resume (??))
+ SEH64_ALLOCATE_STACK 10h + STACK_FUZZ_SIZE
+SEH64_END_PROLOGUE
+ mov r8, rdx ; pvUser1 (save it like MSC)
+ mov r9, rcx ; pvUser2 (save it like MSC)
+ mov r11, rsi ; pfn
+ mov rdx, rdi ; pJmpBuf
+ %endif
+ mov [xDX + VMMR0JMPBUF.rbx], rbx
+ %ifdef ASM_CALL64_MSC
+ mov [xDX + VMMR0JMPBUF.rsi], rsi
+ mov [xDX + VMMR0JMPBUF.rdi], rdi
+ %endif
+ mov [xDX + VMMR0JMPBUF.rbp], rbp
+ mov [xDX + VMMR0JMPBUF.r12], r12
+ mov [xDX + VMMR0JMPBUF.r13], r13
+ mov [xDX + VMMR0JMPBUF.r14], r14
+ mov [xDX + VMMR0JMPBUF.r15], r15
+ mov xAX, [rbp + 8] ; (not really necessary, except for validity check)
+ mov [xDX + VMMR0JMPBUF.rip], xAX
+ %ifdef ASM_CALL64_MSC
+ lea r10, [rsp + 20h] ; must save the spill area
+ %else
+ lea r10, [rsp]
+ %endif
+ mov [xDX + VMMR0JMPBUF.rsp], r10
+ %ifdef RT_OS_WINDOWS
+ movdqa [xDX + VMMR0JMPBUF.xmm6], xmm6
+ movdqa [xDX + VMMR0JMPBUF.xmm7], xmm7
+ movdqa [xDX + VMMR0JMPBUF.xmm8], xmm8
+ movdqa [xDX + VMMR0JMPBUF.xmm9], xmm9
+ movdqa [xDX + VMMR0JMPBUF.xmm10], xmm10
+ movdqa [xDX + VMMR0JMPBUF.xmm11], xmm11
+ movdqa [xDX + VMMR0JMPBUF.xmm12], xmm12
+ movdqa [xDX + VMMR0JMPBUF.xmm13], xmm13
+ movdqa [xDX + VMMR0JMPBUF.xmm14], xmm14
+ movdqa [xDX + VMMR0JMPBUF.xmm15], xmm15
+ %endif
+ pushf
+ pop xAX
+ mov [xDX + VMMR0JMPBUF.rflags], xAX
+
+ ;
+ ; If we're not in a ring-3 call, call pfn and return.
+ ;
+ test byte [xDX + VMMR0JMPBUF.fInRing3Call], 1
+ jnz .resume
+
+ %ifdef VMM_R0_SWITCH_STACK
+ mov r15, [xDX + VMMR0JMPBUF.pvSavedStack]
+ test r15, r15
+ jz .entry_error
+ %ifdef VBOX_STRICT
+ cmp dword [r15], 0h
+ jne .entry_error
+ mov rdi, r15
+ mov rcx, VMM_STACK_SIZE / 8
+ mov rax, qword 0eeeeeeeffeeeeeeeh
+ repne stosq
+ mov [rdi - 10h], rbx
+ %endif
+ lea r15, [r15 + VMM_STACK_SIZE - 40h]
+ mov rsp, r15 ; Switch stack!
+ %endif ; VMM_R0_SWITCH_STACK
+
+ mov r12, rdx ; Save pJmpBuf.
+ %ifdef ASM_CALL64_MSC
+ mov rcx, r8 ; pvUser -> arg0
+ mov rdx, r9
+ %else
+ mov rdi, r8 ; pvUser -> arg0
+ mov rsi, r9
+ %endif
+ call r11
+ mov rdx, r12 ; Restore pJmpBuf
+
+ %ifdef VMM_R0_SWITCH_STACK
+ %ifdef VBOX_STRICT
+ mov r15, [xDX + VMMR0JMPBUF.pvSavedStack]
+ mov dword [r15], 0h ; Reset the marker
+ %endif
+ %endif
+
+ ;
+ ; Return like in the long jump but clear eip, no shortcuts here.
+ ;
+.proper_return:
+%ifdef RT_OS_WINDOWS
+ movdqa xmm6, [xDX + VMMR0JMPBUF.xmm6 ]
+ movdqa xmm7, [xDX + VMMR0JMPBUF.xmm7 ]
+ movdqa xmm8, [xDX + VMMR0JMPBUF.xmm8 ]
+ movdqa xmm9, [xDX + VMMR0JMPBUF.xmm9 ]
+ movdqa xmm10, [xDX + VMMR0JMPBUF.xmm10]
+ movdqa xmm11, [xDX + VMMR0JMPBUF.xmm11]
+ movdqa xmm12, [xDX + VMMR0JMPBUF.xmm12]
+ movdqa xmm13, [xDX + VMMR0JMPBUF.xmm13]
+ movdqa xmm14, [xDX + VMMR0JMPBUF.xmm14]
+ movdqa xmm15, [xDX + VMMR0JMPBUF.xmm15]
+%endif
+ mov rbx, [xDX + VMMR0JMPBUF.rbx]
+%ifdef ASM_CALL64_MSC
+ mov rsi, [xDX + VMMR0JMPBUF.rsi]
+ mov rdi, [xDX + VMMR0JMPBUF.rdi]
+%endif
+ mov r12, [xDX + VMMR0JMPBUF.r12]
+ mov r13, [xDX + VMMR0JMPBUF.r13]
+ mov r14, [xDX + VMMR0JMPBUF.r14]
+ mov r15, [xDX + VMMR0JMPBUF.r15]
+ mov rbp, [xDX + VMMR0JMPBUF.rbp]
+ and qword [xDX + VMMR0JMPBUF.rip], byte 0 ; used for valid check.
+ mov rsp, [xDX + VMMR0JMPBUF.rsp]
+ push qword [xDX + VMMR0JMPBUF.rflags]
+ popf
+ leave
+ ret
+
+.entry_error:
+ mov eax, VERR_VMM_SET_JMP_ERROR
+ jmp .proper_return
+
+.stack_overflow:
+ mov eax, VERR_VMM_SET_JMP_STACK_OVERFLOW
+ jmp .proper_return
+
+ ;
+ ; Aborting resume.
+ ; Note! No need to restore XMM registers here since we haven't touched them yet.
+ ;
+.bad:
+ and qword [xDX + VMMR0JMPBUF.rip], byte 0 ; used for valid check.
+ mov rbx, [xDX + VMMR0JMPBUF.rbx]
+ %ifdef ASM_CALL64_MSC
+ mov rsi, [xDX + VMMR0JMPBUF.rsi]
+ mov rdi, [xDX + VMMR0JMPBUF.rdi]
+ %endif
+ mov r12, [xDX + VMMR0JMPBUF.r12]
+ mov r13, [xDX + VMMR0JMPBUF.r13]
+ mov r14, [xDX + VMMR0JMPBUF.r14]
+ mov r15, [xDX + VMMR0JMPBUF.r15]
+ mov eax, VERR_VMM_SET_JMP_ABORTED_RESUME
+ leave
+ ret
+
+ ;
+ ; Resume VMMRZCallRing3 the call.
+ ;
+.resume:
+ %ifndef VMM_R0_SWITCH_STACK
+ ; Sanity checks incoming stack, applying fuzz if needed.
+ sub r10, [xDX + VMMR0JMPBUF.SpCheck]
+ jz .resume_stack_checked_out
+ add r10, STACK_FUZZ_SIZE ; plus/minus STACK_FUZZ_SIZE is fine.
+ cmp r10, STACK_FUZZ_SIZE * 2
+ ja .bad
+
+ mov r10, [xDX + VMMR0JMPBUF.SpCheck]
+ mov [xDX + VMMR0JMPBUF.rsp], r10 ; Must be update in case of another long jump (used for save calc).
+
+.resume_stack_checked_out:
+ mov ecx, [xDX + VMMR0JMPBUF.cbSavedStack]
+ cmp rcx, VMM_STACK_SIZE
+ ja .bad
+ test rcx, 7
+ jnz .bad
+ mov rdi, [xDX + VMMR0JMPBUF.SpCheck]
+ sub rdi, [xDX + VMMR0JMPBUF.SpResume]
+ cmp rcx, rdi
+ jne .bad
+ %endif
+
+%ifdef VMM_R0_SWITCH_STACK
+ ; Switch stack.
+ mov rsp, [xDX + VMMR0JMPBUF.SpResume]
+%else
+ ; Restore the stack.
+ mov ecx, [xDX + VMMR0JMPBUF.cbSavedStack]
+ shr ecx, 3
+ mov rsi, [xDX + VMMR0JMPBUF.pvSavedStack]
+ mov rdi, [xDX + VMMR0JMPBUF.SpResume]
+ mov rsp, rdi
+ rep movsq
+%endif ; !VMM_R0_SWITCH_STACK
+ mov byte [xDX + VMMR0JMPBUF.fInRing3Call], 0
+
+ ;
+ ; Continue where we left off.
+ ;
+%ifdef VBOX_STRICT
+ pop rax ; magic
+ cmp rax, RESUME_MAGIC
+ je .magic_ok
+ mov ecx, 0123h
+ mov [ecx], edx
+.magic_ok:
+%endif
+%ifdef RT_OS_WINDOWS
+ movdqa xmm6, [rsp + 000h]
+ movdqa xmm7, [rsp + 010h]
+ movdqa xmm8, [rsp + 020h]
+ movdqa xmm9, [rsp + 030h]
+ movdqa xmm10, [rsp + 040h]
+ movdqa xmm11, [rsp + 050h]
+ movdqa xmm12, [rsp + 060h]
+ movdqa xmm13, [rsp + 070h]
+ movdqa xmm14, [rsp + 080h]
+ movdqa xmm15, [rsp + 090h]
+ add rsp, 0a0h
+%endif
+ popf
+ pop rbx
+%ifdef ASM_CALL64_MSC
+ pop rsi
+ pop rdi
+%endif
+ pop r12
+ pop r13
+ pop r14
+ pop r15
+ pop rbp
+ xor eax, eax ; VINF_SUCCESS
+ ret
+ENDPROC vmmR0CallRing3SetJmp
+
+
+;;
+; Worker for VMMRZCallRing3.
+; This will save the stack and registers.
+;
+; @param pJmpBuf msc:rcx gcc:rdi x86:[ebp+8] Pointer to the jump buffer.
+; @param rc msc:rdx gcc:rsi x86:[ebp+c] The return code.
+;
+BEGINPROC vmmR0CallRing3LongJmp
+ ;
+ ; Save the registers on the stack.
+ ;
+ push rbp
+ SEH64_PUSH_xBP
+ mov rbp, rsp
+ SEH64_SET_FRAME_xBP 0
+ push r15
+ SEH64_PUSH_GREG r15
+ push r14
+ SEH64_PUSH_GREG r14
+ push r13
+ SEH64_PUSH_GREG r13
+ push r12
+ SEH64_PUSH_GREG r12
+%ifdef ASM_CALL64_MSC
+ push rdi
+ SEH64_PUSH_GREG rdi
+ push rsi
+ SEH64_PUSH_GREG rsi
+%endif
+ push rbx
+ SEH64_PUSH_GREG rbx
+ pushf
+ SEH64_ALLOCATE_STACK 8
+%ifdef RT_OS_WINDOWS
+ sub rsp, 0a0h
+ SEH64_ALLOCATE_STACK 0a0h
+ movdqa [rsp + 000h], xmm6
+ movdqa [rsp + 010h], xmm7
+ movdqa [rsp + 020h], xmm8
+ movdqa [rsp + 030h], xmm9
+ movdqa [rsp + 040h], xmm10
+ movdqa [rsp + 050h], xmm11
+ movdqa [rsp + 060h], xmm12
+ movdqa [rsp + 070h], xmm13
+ movdqa [rsp + 080h], xmm14
+ movdqa [rsp + 090h], xmm15
+%endif
+%ifdef VBOX_STRICT
+ push RESUME_MAGIC
+ SEH64_ALLOCATE_STACK 8
+%endif
+SEH64_END_PROLOGUE
+
+ ;
+ ; Normalize the parameters.
+ ;
+%ifdef ASM_CALL64_MSC
+ mov eax, edx ; rc
+ mov rdx, rcx ; pJmpBuf
+%else
+ mov rdx, rdi ; pJmpBuf
+ mov eax, esi ; rc
+%endif
+
+ ;
+ ; Is the jump buffer armed?
+ ;
+ cmp qword [xDX + VMMR0JMPBUF.rip], byte 0
+ je .nok
+
+ ;
+ ; Sanity checks.
+ ;
+ mov rdi, [xDX + VMMR0JMPBUF.pvSavedStack]
+ test rdi, rdi ; darwin may set this to 0.
+ jz .nok
+ mov [xDX + VMMR0JMPBUF.SpResume], rsp
+ %ifndef VMM_R0_SWITCH_STACK
+ mov rsi, rsp
+ mov rcx, [xDX + VMMR0JMPBUF.rsp]
+ sub rcx, rsi
+
+ ; two sanity checks on the size.
+ cmp rcx, VMM_STACK_SIZE ; check max size.
+ jnbe .nok
+
+ ;
+ ; Copy the stack
+ ;
+ test ecx, 7 ; check alignment
+ jnz .nok
+ mov [xDX + VMMR0JMPBUF.cbSavedStack], ecx
+ shr ecx, 3
+ rep movsq
+
+ %endif ; !VMM_R0_SWITCH_STACK
+
+ ; Save a PC and return PC here to assist unwinding.
+.unwind_point:
+ lea rcx, [.unwind_point wrt RIP]
+ mov [xDX + VMMR0JMPBUF.SavedEipForUnwind], rcx
+ mov rcx, [xDX + VMMR0JMPBUF.rbp]
+ lea rcx, [rcx + 8]
+ mov [xDX + VMMR0JMPBUF.UnwindRetPcLocation], rcx
+ mov rcx, [rcx]
+ mov [xDX + VMMR0JMPBUF.UnwindRetPcValue], rcx
+
+ ; Save RSP & RBP to enable stack dumps
+ mov rcx, rbp
+ mov [xDX + VMMR0JMPBUF.SavedEbp], rcx
+ sub rcx, 8
+ mov [xDX + VMMR0JMPBUF.SavedEsp], rcx
+
+ ; store the last pieces of info.
+ mov rcx, [xDX + VMMR0JMPBUF.rsp]
+ mov [xDX + VMMR0JMPBUF.SpCheck], rcx
+ mov byte [xDX + VMMR0JMPBUF.fInRing3Call], 1
+
+ ;
+ ; Do the long jump.
+ ;
+%ifdef RT_OS_WINDOWS
+ movdqa xmm6, [xDX + VMMR0JMPBUF.xmm6 ]
+ movdqa xmm7, [xDX + VMMR0JMPBUF.xmm7 ]
+ movdqa xmm8, [xDX + VMMR0JMPBUF.xmm8 ]
+ movdqa xmm9, [xDX + VMMR0JMPBUF.xmm9 ]
+ movdqa xmm10, [xDX + VMMR0JMPBUF.xmm10]
+ movdqa xmm11, [xDX + VMMR0JMPBUF.xmm11]
+ movdqa xmm12, [xDX + VMMR0JMPBUF.xmm12]
+ movdqa xmm13, [xDX + VMMR0JMPBUF.xmm13]
+ movdqa xmm14, [xDX + VMMR0JMPBUF.xmm14]
+ movdqa xmm15, [xDX + VMMR0JMPBUF.xmm15]
+%endif
+ mov rbx, [xDX + VMMR0JMPBUF.rbx]
+%ifdef ASM_CALL64_MSC
+ mov rsi, [xDX + VMMR0JMPBUF.rsi]
+ mov rdi, [xDX + VMMR0JMPBUF.rdi]
+%endif
+ mov r12, [xDX + VMMR0JMPBUF.r12]
+ mov r13, [xDX + VMMR0JMPBUF.r13]
+ mov r14, [xDX + VMMR0JMPBUF.r14]
+ mov r15, [xDX + VMMR0JMPBUF.r15]
+ mov rbp, [xDX + VMMR0JMPBUF.rbp]
+ mov rsp, [xDX + VMMR0JMPBUF.rsp]
+ push qword [xDX + VMMR0JMPBUF.rflags]
+ popf
+ leave
+ ret
+
+ ;
+ ; Failure
+ ;
+.nok:
+%ifdef VBOX_STRICT
+ pop rax ; magic
+ cmp rax, RESUME_MAGIC
+ je .magic_ok
+ mov ecx, 0123h
+ mov [rcx], edx
+.magic_ok:
+%endif
+ mov eax, VERR_VMM_LONG_JMP_ERROR
+%ifdef RT_OS_WINDOWS
+ add rsp, 0a0h ; skip XMM registers since they are unmodified.
+%endif
+ popf
+ pop rbx
+%ifdef ASM_CALL64_MSC
+ pop rsi
+ pop rdi
+%endif
+ pop r12
+ pop r13
+ pop r14
+ pop r15
+ leave
+ ret
+ENDPROC vmmR0CallRing3LongJmp
+
+
+;;
+; Internal R0 logger worker: Logger wrapper.
+;
+; @cproto VMMR0DECL(void) vmmR0LoggerWrapper(const char *pszFormat, ...)
+;
+BEGINPROC_EXPORTED vmmR0LoggerWrapper
+SEH64_END_PROLOGUE
+ int3
+ int3
+ int3
+ ret
+ENDPROC vmmR0LoggerWrapper
+
diff --git a/src/VBox/VMM/VMMR0/VMMR0JmpA-x86.asm b/src/VBox/VMM/VMMR0/VMMR0JmpA-x86.asm
new file mode 100644
index 00000000..3a89a244
--- /dev/null
+++ b/src/VBox/VMM/VMMR0/VMMR0JmpA-x86.asm
@@ -0,0 +1,401 @@
+; $Id: VMMR0JmpA-x86.asm $
+;; @file
+; VMM - R0 SetJmp / LongJmp routines for X86.
+;
+
+;
+; Copyright (C) 2006-2019 Oracle Corporation
+;
+; This file is part of VirtualBox Open Source Edition (OSE), as
+; available from http://www.virtualbox.org. This file is free software;
+; you can redistribute it and/or modify it under the terms of the GNU
+; General Public License (GPL) as published by the Free Software
+; Foundation, in version 2 as it comes in the "COPYING" file of the
+; VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+;
+
+;*******************************************************************************
+;* Header Files *
+;*******************************************************************************
+%include "VBox/asmdefs.mac"
+%include "VMMInternal.mac"
+%include "VBox/err.mac"
+%include "VBox/param.mac"
+
+
+;*******************************************************************************
+;* Defined Constants And Macros *
+;*******************************************************************************
+%define RESUME_MAGIC 07eadf00dh
+%define STACK_PADDING 0eeeeeeeeh
+
+
+; For vmmR0LoggerWrapper. (The other architecture(s) use(s) C99 variadic macros.)
+extern NAME(RTLogLogger)
+
+
+BEGINCODE
+
+
+;;
+; The setjmp variant used for calling Ring-3.
+;
+; This differs from the normal setjmp in that it will resume VMMRZCallRing3 if we're
+; in the middle of a ring-3 call. Another differences is the function pointer and
+; argument. This has to do with resuming code and the stack frame of the caller.
+;
+; @returns VINF_SUCCESS on success or whatever is passed to vmmR0CallRing3LongJmp.
+; @param pJmpBuf msc:rcx gcc:rdi x86:[esp+0x04] Our jmp_buf.
+; @param pfn msc:rdx gcc:rsi x86:[esp+0x08] The function to be called when not resuming.
+; @param pvUser1 msc:r8 gcc:rdx x86:[esp+0x0c] The argument of that function.
+; @param pvUser2 msc:r9 gcc:rcx x86:[esp+0x10] The argument of that function.
+;
+BEGINPROC vmmR0CallRing3SetJmp
+GLOBALNAME vmmR0CallRing3SetJmp2
+GLOBALNAME vmmR0CallRing3SetJmpEx
+ ;
+ ; Save the registers.
+ ;
+ mov edx, [esp + 4h] ; pJmpBuf
+ mov [xDX + VMMR0JMPBUF.ebx], ebx
+ mov [xDX + VMMR0JMPBUF.esi], esi
+ mov [xDX + VMMR0JMPBUF.edi], edi
+ mov [xDX + VMMR0JMPBUF.ebp], ebp
+ mov xAX, [esp]
+ mov [xDX + VMMR0JMPBUF.eip], xAX
+ lea ecx, [esp + 4] ; (used in resume)
+ mov [xDX + VMMR0JMPBUF.esp], ecx
+ pushf
+ pop xAX
+ mov [xDX + VMMR0JMPBUF.eflags], xAX
+
+ ;
+ ; If we're not in a ring-3 call, call pfn and return.
+ ;
+ test byte [xDX + VMMR0JMPBUF.fInRing3Call], 1
+ jnz .resume
+
+ mov ebx, edx ; pJmpBuf -> ebx (persistent reg)
+%ifdef VMM_R0_SWITCH_STACK
+ mov esi, [ebx + VMMR0JMPBUF.pvSavedStack]
+ test esi, esi
+ jz .entry_error
+ %ifdef VBOX_STRICT
+ cmp dword [esi], 0h
+ jne .entry_error
+ mov edx, esi
+ mov edi, esi
+ mov ecx, VMM_STACK_SIZE / 4
+ mov eax, STACK_PADDING
+ repne stosd
+ %endif
+ lea esi, [esi + VMM_STACK_SIZE - 32]
+ mov [esi + 1ch], dword 0deadbeefh ; Marker 1.
+ mov [esi + 18h], ebx ; Save pJmpBuf pointer.
+ mov [esi + 14h], dword 00c00ffeeh ; Marker 2.
+ mov [esi + 10h], dword 0f00dbeefh ; Marker 3.
+ mov edx, [esp + 10h] ; pvArg2
+ mov ecx, [esp + 0ch] ; pvArg1
+ mov eax, [esp + 08h] ; pfn
+ %if 1 ; Use this to eat of some extra stack - handy for finding paths using lots of stack.
+ %define FRAME_OFFSET 0
+ %else
+ %define FRAME_OFFSET 1024
+ %endif
+ mov [esi - FRAME_OFFSET + 04h], edx
+ mov [esi - FRAME_OFFSET ], ecx
+ lea esp, [esi - FRAME_OFFSET] ; Switch stack!
+ call eax
+ and dword [esi + 1ch], byte 0 ; reset marker.
+
+ %ifdef VBOX_STRICT
+ ; Calc stack usage and check for overflows.
+ mov edi, [ebx + VMMR0JMPBUF.pvSavedStack]
+ cmp dword [edi], STACK_PADDING ; Check for obvious stack overflow.
+ jne .stack_overflow
+ mov esi, eax ; save eax
+ mov eax, STACK_PADDING
+ mov ecx, VMM_STACK_SIZE / 4
+ cld
+ repe scasd
+ shl ecx, 2 ; *4
+ cmp ecx, VMM_STACK_SIZE - 64 ; Less than 64 bytes left -> overflow as well.
+ mov eax, esi ; restore eax in case of overflow (esi remains used)
+ jae .stack_overflow_almost
+
+ ; Update stack usage statistics.
+ cmp ecx, [ebx + VMMR0JMPBUF.cbUsedMax] ; New max usage?
+ jle .no_used_max
+ mov [ebx + VMMR0JMPBUF.cbUsedMax], ecx
+.no_used_max:
+ ; To simplify the average stuff, just historize before we hit div errors.
+ inc dword [ebx + VMMR0JMPBUF.cUsedTotal]
+ test [ebx + VMMR0JMPBUF.cUsedTotal], dword 0c0000000h
+ jz .no_historize
+ mov dword [ebx + VMMR0JMPBUF.cUsedTotal], 2
+ mov edi, [ebx + VMMR0JMPBUF.cbUsedAvg]
+ mov [ebx + VMMR0JMPBUF.cbUsedTotal], edi
+ mov dword [ebx + VMMR0JMPBUF.cbUsedTotal + 4], 0
+.no_historize:
+ add [ebx + VMMR0JMPBUF.cbUsedTotal], ecx
+ adc dword [ebx + VMMR0JMPBUF.cbUsedTotal + 4], 0
+ mov eax, [ebx + VMMR0JMPBUF.cbUsedTotal]
+ mov edx, [ebx + VMMR0JMPBUF.cbUsedTotal + 4]
+ mov edi, [ebx + VMMR0JMPBUF.cUsedTotal]
+ div edi
+ mov [ebx + VMMR0JMPBUF.cbUsedAvg], eax
+
+ mov eax, esi ; restore eax (final, esi released)
+
+ mov edi, [ebx + VMMR0JMPBUF.pvSavedStack]
+ mov dword [edi], 0h ; Reset the overflow marker.
+ %endif ; VBOX_STRICT
+
+%else ; !VMM_R0_SWITCH_STACK
+ mov ecx, [esp + 0ch] ; pvArg1
+ mov edx, [esp + 10h] ; pvArg2
+ mov eax, [esp + 08h] ; pfn
+ sub esp, 12 ; align the stack on a 16-byte boundary.
+ mov [esp ], ecx
+ mov [esp + 04h], edx
+ call eax
+%endif ; !VMM_R0_SWITCH_STACK
+ mov edx, ebx ; pJmpBuf -> edx (volatile reg)
+
+ ;
+ ; Return like in the long jump but clear eip, no short cuts here.
+ ;
+.proper_return:
+ mov ebx, [xDX + VMMR0JMPBUF.ebx]
+ mov esi, [xDX + VMMR0JMPBUF.esi]
+ mov edi, [xDX + VMMR0JMPBUF.edi]
+ mov ebp, [xDX + VMMR0JMPBUF.ebp]
+ mov xCX, [xDX + VMMR0JMPBUF.eip]
+ and dword [xDX + VMMR0JMPBUF.eip], byte 0 ; used for valid check.
+ mov esp, [xDX + VMMR0JMPBUF.esp]
+ push dword [xDX + VMMR0JMPBUF.eflags]
+ popf
+ jmp xCX
+
+.entry_error:
+ mov eax, VERR_VMM_SET_JMP_ERROR
+ jmp .proper_return
+
+.stack_overflow:
+ mov eax, VERR_VMM_SET_JMP_STACK_OVERFLOW
+ mov edx, ebx
+ jmp .proper_return
+
+.stack_overflow_almost:
+ mov eax, VERR_VMM_SET_JMP_STACK_OVERFLOW
+ mov edx, ebx
+ jmp .proper_return
+
+ ;
+ ; Aborting resume.
+ ;
+.bad:
+ and dword [xDX + VMMR0JMPBUF.eip], byte 0 ; used for valid check.
+ mov edi, [xDX + VMMR0JMPBUF.edi]
+ mov esi, [xDX + VMMR0JMPBUF.esi]
+ mov ebx, [xDX + VMMR0JMPBUF.ebx]
+ mov eax, VERR_VMM_SET_JMP_ABORTED_RESUME
+ ret
+
+ ;
+ ; Resume VMMRZCallRing3 the call.
+ ;
+.resume:
+ ; Sanity checks.
+%ifdef VMM_R0_SWITCH_STACK
+ mov eax, [xDX + VMMR0JMPBUF.pvSavedStack]
+ %ifdef RT_STRICT
+ cmp dword [eax], STACK_PADDING
+ %endif
+ lea eax, [eax + VMM_STACK_SIZE - 32]
+ cmp dword [eax + 1ch], 0deadbeefh ; Marker 1.
+ jne .bad
+ %ifdef RT_STRICT
+ cmp [esi + 18h], edx ; The saved pJmpBuf pointer.
+ jne .bad
+ cmp dword [esi + 14h], 00c00ffeeh ; Marker 2.
+ jne .bad
+ cmp dword [esi + 10h], 0f00dbeefh ; Marker 3.
+ jne .bad
+ %endif
+%else ; !VMM_R0_SWITCH_STACK
+ cmp ecx, [xDX + VMMR0JMPBUF.SpCheck]
+ jne .bad
+.espCheck_ok:
+ mov ecx, [xDX + VMMR0JMPBUF.cbSavedStack]
+ cmp ecx, VMM_STACK_SIZE
+ ja .bad
+ test ecx, 3
+ jnz .bad
+ mov edi, [xDX + VMMR0JMPBUF.esp]
+ sub edi, [xDX + VMMR0JMPBUF.SpResume]
+ cmp ecx, edi
+ jne .bad
+%endif
+
+%ifdef VMM_R0_SWITCH_STACK
+ ; Switch stack.
+ mov esp, [xDX + VMMR0JMPBUF.SpResume]
+%else
+ ; Restore the stack.
+ mov ecx, [xDX + VMMR0JMPBUF.cbSavedStack]
+ shr ecx, 2
+ mov esi, [xDX + VMMR0JMPBUF.pvSavedStack]
+ mov edi, [xDX + VMMR0JMPBUF.SpResume]
+ mov esp, edi
+ rep movsd
+%endif ; !VMM_R0_SWITCH_STACK
+ mov byte [xDX + VMMR0JMPBUF.fInRing3Call], 0
+
+ ;
+ ; Continue where we left off.
+ ;
+%ifdef VBOX_STRICT
+ pop eax ; magic
+ cmp eax, RESUME_MAGIC
+ je .magic_ok
+ mov ecx, 0123h
+ mov [ecx], edx
+.magic_ok:
+%endif
+ popf
+ pop ebx
+ pop esi
+ pop edi
+ pop ebp
+ xor eax, eax ; VINF_SUCCESS
+ ret
+ENDPROC vmmR0CallRing3SetJmp
+
+
+;;
+; Worker for VMMRZCallRing3.
+; This will save the stack and registers.
+;
+; @param pJmpBuf msc:rcx gcc:rdi x86:[ebp+8] Pointer to the jump buffer.
+; @param rc msc:rdx gcc:rsi x86:[ebp+c] The return code.
+;
+BEGINPROC vmmR0CallRing3LongJmp
+ ;
+ ; Save the registers on the stack.
+ ;
+ push ebp
+ mov ebp, esp
+ push edi
+ push esi
+ push ebx
+ pushf
+%ifdef VBOX_STRICT
+ push RESUME_MAGIC
+%endif
+
+ ;
+ ; Load parameters.
+ ;
+ mov edx, [ebp + 08h] ; pJmpBuf
+ mov eax, [ebp + 0ch] ; rc
+
+ ;
+ ; Is the jump buffer armed?
+ ;
+ cmp dword [xDX + VMMR0JMPBUF.eip], byte 0
+ je .nok
+
+ ;
+ ; Sanity checks.
+ ;
+ mov edi, [xDX + VMMR0JMPBUF.pvSavedStack]
+ test edi, edi ; darwin may set this to 0.
+ jz .nok
+ mov [xDX + VMMR0JMPBUF.SpResume], esp
+%ifndef VMM_R0_SWITCH_STACK
+ mov esi, esp
+ mov ecx, [xDX + VMMR0JMPBUF.esp]
+ sub ecx, esi
+
+ ; two sanity checks on the size.
+ cmp ecx, VMM_STACK_SIZE ; check max size.
+ jnbe .nok
+
+ ;
+ ; Copy the stack.
+ ;
+ test ecx, 3 ; check alignment
+ jnz .nok
+ mov [xDX + VMMR0JMPBUF.cbSavedStack], ecx
+ shr ecx, 2
+ rep movsd
+%endif ; !VMM_R0_SWITCH_STACK
+
+ ; Save a PC here to assist unwinding.
+.unwind_point:
+ mov dword [xDX + VMMR0JMPBUF.SavedEipForUnwind], .unwind_point
+ mov ecx, [xDX + VMMR0JMPBUF.ebp]
+ lea ecx, [ecx + 4]
+ mov [xDX + VMMR0JMPBUF.UnwindRetPcLocation], ecx
+
+ ; Save ESP & EBP to enable stack dumps
+ mov ecx, ebp
+ mov [xDX + VMMR0JMPBUF.SavedEbp], ecx
+ sub ecx, 4
+ mov [xDX + VMMR0JMPBUF.SavedEsp], ecx
+
+ ; store the last pieces of info.
+ mov ecx, [xDX + VMMR0JMPBUF.esp]
+ mov [xDX + VMMR0JMPBUF.SpCheck], ecx
+ mov byte [xDX + VMMR0JMPBUF.fInRing3Call], 1
+
+ ;
+ ; Do the long jump.
+ ;
+ mov ebx, [xDX + VMMR0JMPBUF.ebx]
+ mov esi, [xDX + VMMR0JMPBUF.esi]
+ mov edi, [xDX + VMMR0JMPBUF.edi]
+ mov ebp, [xDX + VMMR0JMPBUF.ebp]
+ mov ecx, [xDX + VMMR0JMPBUF.eip]
+ mov [xDX + VMMR0JMPBUF.UnwindRetPcValue], ecx
+ mov esp, [xDX + VMMR0JMPBUF.esp]
+ push dword [xDX + VMMR0JMPBUF.eflags]
+ popf
+ jmp ecx
+
+ ;
+ ; Failure
+ ;
+.nok:
+%ifdef VBOX_STRICT
+ pop eax ; magic
+ cmp eax, RESUME_MAGIC
+ je .magic_ok
+ mov ecx, 0123h
+ mov [ecx], edx
+.magic_ok:
+%endif
+ popf
+ pop ebx
+ pop esi
+ pop edi
+ mov eax, VERR_VMM_LONG_JMP_ERROR
+ leave
+ ret
+ENDPROC vmmR0CallRing3LongJmp
+
+
+;;
+; Internal R0 logger worker: Logger wrapper.
+;
+; @cproto VMMR0DECL(void) vmmR0LoggerWrapper(const char *pszFormat, ...)
+;
+EXPORTEDNAME vmmR0LoggerWrapper
+ push 0 ; assumes we're the wrapper for a default instance.
+ call NAME(RTLogLogger)
+ add esp, byte 4
+ ret
+ENDPROC vmmR0LoggerWrapper
+
diff --git a/src/VBox/VMM/VMMR0/VMMR0TripleFaultHack.cpp b/src/VBox/VMM/VMMR0/VMMR0TripleFaultHack.cpp
new file mode 100644
index 00000000..bcafbd96
--- /dev/null
+++ b/src/VBox/VMM/VMMR0/VMMR0TripleFaultHack.cpp
@@ -0,0 +1,209 @@
+/* $Id: VMMR0TripleFaultHack.cpp $ */
+/** @file
+ * VMM - Host Context Ring 0, Triple Fault Debugging Hack.
+ *
+ * Only use this when desperate. May not work on all systems, esp. newer ones,
+ * since it require BIOS support for the warm reset vector at 0467h.
+ */
+
+/*
+ * Copyright (C) 2011-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#define LOG_GROUP LOG_GROUP_VMM
+#include <VBox/vmm/vmm.h>
+#include "VMMInternal.h"
+#include <VBox/param.h>
+
+#include <iprt/asm-amd64-x86.h>
+#include <iprt/assert.h>
+#include <iprt/memobj.h>
+#include <iprt/mem.h>
+#include <iprt/string.h>
+
+
+/*********************************************************************************************************************************
+* Global Variables *
+*********************************************************************************************************************************/
+static RTR0MEMOBJ g_hMemPage0;
+static RTR0MEMOBJ g_hMapPage0;
+static uint8_t *g_pbPage0;
+
+static RTR0MEMOBJ g_hMemLowCore;
+static RTR0MEMOBJ g_hMapLowCore;
+static uint8_t *g_pbLowCore;
+static RTHCPHYS g_HCPhysLowCore;
+
+/** @name For restoring memory we've overwritten.
+ * @{ */
+static uint32_t g_u32SavedVector;
+static uint16_t g_u16SavedCadIndicator;
+static void *g_pvSavedLowCore;
+/** @} */
+
+
+/*********************************************************************************************************************************
+* Internal Functions *
+*********************************************************************************************************************************/
+/* VMMR0TripleFaultHackA.asm */
+DECLASM(void) vmmR0TripleFaultHackStart(void);
+DECLASM(void) vmmR0TripleFaultHackEnd(void);
+DECLASM(void) vmmR0TripleFaultHackTripleFault(void);
+
+
+/**
+ * Initalizes the triple fault / boot hack.
+ *
+ * Always call vmmR0TripleFaultHackTerm to clean up, even when this call fails.
+ *
+ * @returns VBox status code.
+ */
+int vmmR0TripleFaultHackInit(void)
+{
+ /*
+ * Map the first page.
+ */
+ int rc = RTR0MemObjEnterPhys(&g_hMemPage0, 0, PAGE_SIZE, RTMEM_CACHE_POLICY_DONT_CARE);
+ AssertRCReturn(rc, rc);
+ rc = RTR0MemObjMapKernel(&g_hMapPage0, g_hMemPage0, (void *)-1, 0, RTMEM_PROT_READ | RTMEM_PROT_WRITE);
+ AssertRCReturn(rc, rc);
+ g_pbPage0 = (uint8_t *)RTR0MemObjAddress(g_hMapPage0);
+ LogRel(("0040:0067 = %04x:%04x\n", RT_MAKE_U16(g_pbPage0[0x467+2], g_pbPage0[0x467+3]), RT_MAKE_U16(g_pbPage0[0x467+0], g_pbPage0[0x467+1]) ));
+
+ /*
+ * Allocate some "low core" memory. If that fails, just grab some memory.
+ */
+ //rc = RTR0MemObjAllocPhys(&g_hMemLowCore, PAGE_SIZE, _1M - 1);
+ //__debugbreak();
+ rc = RTR0MemObjEnterPhys(&g_hMemLowCore, 0x7000, PAGE_SIZE, RTMEM_CACHE_POLICY_DONT_CARE);
+ AssertRCReturn(rc, rc);
+ rc = RTR0MemObjMapKernel(&g_hMapLowCore, g_hMemLowCore, (void *)-1, 0, RTMEM_PROT_READ | RTMEM_PROT_WRITE);
+ AssertRCReturn(rc, rc);
+ g_pbLowCore = (uint8_t *)RTR0MemObjAddress(g_hMapLowCore);
+ g_HCPhysLowCore = RTR0MemObjGetPagePhysAddr(g_hMapLowCore, 0);
+ LogRel(("Low core at %RHp mapped at %p\n", g_HCPhysLowCore, g_pbLowCore));
+
+ /*
+ * Save memory we'll be overwriting.
+ */
+ g_pvSavedLowCore = RTMemAlloc(PAGE_SIZE);
+ AssertReturn(g_pvSavedLowCore, VERR_NO_MEMORY);
+ memcpy(g_pvSavedLowCore, g_pbLowCore, PAGE_SIZE);
+
+ g_u32SavedVector = RT_MAKE_U32_FROM_U8(g_pbPage0[0x467], g_pbPage0[0x467+1], g_pbPage0[0x467+2], g_pbPage0[0x467+3]);
+ g_u16SavedCadIndicator = RT_MAKE_U16(g_pbPage0[0x472], g_pbPage0[0x472+1]);
+
+ /*
+ * Install the code.
+ */
+ size_t cbCode = (uintptr_t)&vmmR0TripleFaultHackEnd - (uintptr_t)&vmmR0TripleFaultHackStart;
+ AssertLogRelReturn(cbCode <= PAGE_SIZE, VERR_OUT_OF_RANGE);
+ memcpy(g_pbLowCore, &vmmR0TripleFaultHackStart, cbCode);
+
+ g_pbPage0[0x467+0] = 0x00;
+ g_pbPage0[0x467+1] = 0x70;
+ g_pbPage0[0x467+2] = 0x00;
+ g_pbPage0[0x467+3] = 0x00;
+
+ g_pbPage0[0x472+0] = 0x34;
+ g_pbPage0[0x472+1] = 0x12;
+
+ /*
+ * Configure the status port and cmos shutdown command.
+ */
+ uint32_t fSaved = ASMIntDisableFlags();
+
+ ASMOutU8(0x70, 0x0f);
+ ASMOutU8(0x71, 0x0a);
+
+ ASMOutU8(0x70, 0x05);
+ ASMInU8(0x71);
+
+ ASMReloadCR3();
+ ASMWriteBackAndInvalidateCaches();
+
+ ASMSetFlags(fSaved);
+
+#if 1 /* For testing & debugging. */
+ vmmR0TripleFaultHackTripleFault();
+#endif
+
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Try undo the harm done by the init function.
+ *
+ * This may leave the system in an unstable state since we might have been
+ * hijacking memory below 1MB that is in use by the kernel.
+ */
+void vmmR0TripleFaultHackTerm(void)
+{
+ /*
+ * Restore overwritten memory.
+ */
+ if ( g_pvSavedLowCore
+ && g_pbLowCore)
+ memcpy(g_pbLowCore, g_pvSavedLowCore, PAGE_SIZE);
+
+ if (g_pbPage0)
+ {
+ g_pbPage0[0x467+0] = RT_BYTE1(g_u32SavedVector);
+ g_pbPage0[0x467+1] = RT_BYTE2(g_u32SavedVector);
+ g_pbPage0[0x467+2] = RT_BYTE3(g_u32SavedVector);
+ g_pbPage0[0x467+3] = RT_BYTE4(g_u32SavedVector);
+
+ g_pbPage0[0x472+0] = RT_BYTE1(g_u16SavedCadIndicator);
+ g_pbPage0[0x472+1] = RT_BYTE2(g_u16SavedCadIndicator);
+ }
+
+ /*
+ * Fix the CMOS.
+ */
+ if (g_pvSavedLowCore)
+ {
+ uint32_t fSaved = ASMIntDisableFlags();
+
+ ASMOutU8(0x70, 0x0f);
+ ASMOutU8(0x71, 0x0a);
+
+ ASMOutU8(0x70, 0x00);
+ ASMInU8(0x71);
+
+ ASMReloadCR3();
+ ASMWriteBackAndInvalidateCaches();
+
+ ASMSetFlags(fSaved);
+ }
+
+ /*
+ * Release resources.
+ */
+ RTMemFree(g_pvSavedLowCore);
+ g_pvSavedLowCore = NULL;
+
+ RTR0MemObjFree(g_hMemLowCore, true /*fFreeMappings*/);
+ g_hMemLowCore = NIL_RTR0MEMOBJ;
+ g_hMapLowCore = NIL_RTR0MEMOBJ;
+ g_pbLowCore = NULL;
+ g_HCPhysLowCore = NIL_RTHCPHYS;
+
+ RTR0MemObjFree(g_hMemPage0, true /*fFreeMappings*/);
+ g_hMemPage0 = NIL_RTR0MEMOBJ;
+ g_hMapPage0 = NIL_RTR0MEMOBJ;
+ g_pbPage0 = NULL;
+}
+
diff --git a/src/VBox/VMM/VMMR0/VMMR0TripleFaultHackA.asm b/src/VBox/VMM/VMMR0/VMMR0TripleFaultHackA.asm
new file mode 100644
index 00000000..64817920
--- /dev/null
+++ b/src/VBox/VMM/VMMR0/VMMR0TripleFaultHackA.asm
@@ -0,0 +1,264 @@
+; $Id: VMMR0TripleFaultHackA.asm $
+;; @file
+; VMM - Host Context Ring 0, Assembly Code for The Triple Fault Debugging Hack.
+;
+
+;
+; Copyright (C) 2011-2019 Oracle Corporation
+;
+; This file is part of VirtualBox Open Source Edition (OSE), as
+; available from http://www.virtualbox.org. This file is free software;
+; you can redistribute it and/or modify it under the terms of the GNU
+; General Public License (GPL) as published by the Free Software
+; Foundation, in version 2 as it comes in the "COPYING" file of the
+; VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+;
+
+;*******************************************************************************
+;* Header Files *
+;*******************************************************************************
+%include "VBox/asmdefs.mac"
+
+
+BEGINCODE
+GLOBALNAME vmmR0TripleFaultHackStart
+%define CALC_ADDR(a_Addr) ( (a_Addr) - NAME(vmmR0TripleFaultHackStart) + 07000h )
+
+
+BITS 16
+BEGINPROC vmmR0TripleFaultHack
+ ; Set up stack.
+ cli ; paranoia
+ mov sp, 0ffffh
+ mov ax, cs
+ mov ss, ax
+ mov ds, ax
+ mov es, ax
+ cld ; paranoia
+
+ COM_INIT
+
+ ; Beep and say hello to the post-reset world.
+ call NAME(vmmR0TripleFaultHackBeep)
+ mov si, CALC_ADDR(.s_szHello)
+ call NAME(vmmR0TripleFaultHackPrint)
+
+.forever:
+ hlt
+ jmp .forever
+
+.s_szHello:
+ db 'Hello post-reset world', 0ah, 0dh, 0
+ENDPROC vmmR0TripleFaultHack
+
+;; ds:si = zero terminated string.
+BEGINPROC vmmR0TripleFaultHackPrint
+ push eax
+ push esi
+
+.outer_loop:
+ lodsb
+ cmp al, 0
+ je .done
+ call NAME(vmmR0TripleFaultHackPrintCh)
+ jmp .outer_loop
+
+.done:
+ pop esi
+ pop eax
+ ret
+ENDPROC vmmR0TripleFaultHackPrint
+
+
+;; al = char to print
+BEGINPROC vmmR0TripleFaultHackPrintCh
+ push eax
+ push edx
+ push ecx
+ mov ah, al ; save char.
+
+ ; Wait for status.
+ mov ecx, _1G
+ mov dx, VBOX_UART_BASE + 5
+.pre_status:
+ in al, dx
+ test al, 20h
+ jnz .put_char
+ dec ecx
+ jnz .pre_status
+
+ ; Write the character.
+.put_char:
+ mov al, ah
+ mov dx, VBOX_UART_BASE
+ out dx, al
+
+ ; Wait for status.
+ mov ecx, _1G
+ mov dx, VBOX_UART_BASE + 5
+.post_status:
+ in al, dx
+ test al, 20h
+ jnz .done
+ dec ecx
+ jnz .post_status
+
+.done:
+ pop ecx
+ pop edx
+ pop eax
+ ret
+ENDPROC vmmR0TripleFaultHackPrintCh
+
+;;
+; make a 440 BEEP.
+BEGINPROC vmmR0TripleFaultHackBeep
+ push eax
+ push edx
+ push ecx
+
+ ; program PIT(1) and stuff.
+ mov al, 10110110b
+ out 43h, al
+ mov ax, 0a79h ; A = 440
+ out 42h, al
+ shr ax, 8
+ out 42h, al
+
+ in al, 61h
+ or al, 3
+ out 61h, al
+
+ ; delay
+ mov ecx, _1G
+.delay:
+ inc ecx
+ dec ecx
+ dec ecx
+ jnz .delay
+
+ ; shut up speaker.
+ in al, 61h
+ and al, 11111100b
+ out 61h, al
+
+.done:
+ pop ecx
+ pop edx
+ pop eax
+ ret
+ENDPROC vmmR0TripleFaultHackBeep
+
+
+GLOBALNAME vmmR0TripleFaultHackEnd
+
+
+
+
+;;;
+;;;
+;;;
+;;;
+;;;
+
+
+
+BITS ARCH_BITS
+
+BEGINPROC vmmR0TripleFaultHackKbdWait
+ push xAX
+
+.check_status:
+ in al, 64h
+ test al, 1 ; KBD_STAT_OBF
+ jnz .read_data_and_status
+ test al, 2 ; KBD_STAT_IBF
+ jnz .check_status
+
+ pop xAX
+ ret
+
+.read_data_and_status:
+ in al, 60h
+ jmp .check_status
+ENDPROC vmmR0TripleFaultHackKbdWait
+
+
+BEGINPROC vmmR0TripleFaultHackKbdRead
+ out 64h, al ; Write the command.
+
+.check_status:
+ in al, 64h
+ test al, 1 ; KBD_STAT_OBF
+ jz .check_status
+
+ in al, 60h ; Read the data.
+ ret
+ENDPROC vmmR0TripleFaultHackKbdRead
+
+
+BEGINPROC vmmR0TripleFaultHackKbdWrite
+ out 64h, al ; Write the command.
+ call NAME(vmmR0TripleFaultHackKbdWait)
+
+ xchg al, ah
+ out 60h, al ; Write the data.
+ call NAME(vmmR0TripleFaultHackKbdWait)
+ xchg al, ah
+
+ ret
+ENDPROC vmmR0TripleFaultHackKbdWrite
+
+
+
+BEGINPROC vmmR0TripleFaultHackTripleFault
+ push xAX
+ push xSI
+
+ xor eax, eax
+ push xAX
+ push xAX
+ push xAX
+ push xAX
+
+ COM_CHAR 'B'
+ COM_CHAR 'y'
+ COM_CHAR 'e'
+ COM_CHAR '!'
+ COM_CHAR 0ah
+ COM_CHAR 0dh
+
+
+ ;call NAME(vmmR0TripleFaultHackBeep32)
+%if 1
+ lidt [xSP]
+%elif 0
+ in al, 92h
+ or al, 1
+ out 92h, al
+ in al, 92h
+ cli
+ hlt
+%else
+ mov al, 0d0h ; KBD_CCMD_READ_OUTPORT
+ call NAME(vmmR0TripleFaultHackKbdRead)
+ mov ah, 0feh
+ and ah, al
+ mov al, 0d1h ; KBD_CCMD_WRITE_OUTPORT
+ call NAME(vmmR0TripleFaultHackKbdWrite)
+ cli
+ hlt
+%endif
+ int3
+
+ pop xAX
+ pop xAX
+ pop xAX
+ pop xAX
+
+ pop xSI
+ pop xAX
+ ret
+ENDPROC vmmR0TripleFaultHackTripleFault
+