summaryrefslogtreecommitdiffstats
path: root/src/VBox/VMM/VMMR0/GVMMR0.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/VBox/VMM/VMMR0/GVMMR0.cpp')
-rw-r--r--src/VBox/VMM/VMMR0/GVMMR0.cpp3029
1 files changed, 3029 insertions, 0 deletions
diff --git a/src/VBox/VMM/VMMR0/GVMMR0.cpp b/src/VBox/VMM/VMMR0/GVMMR0.cpp
new file mode 100644
index 00000000..2de316d5
--- /dev/null
+++ b/src/VBox/VMM/VMMR0/GVMMR0.cpp
@@ -0,0 +1,3029 @@
+/* $Id: GVMMR0.cpp $ */
+/** @file
+ * GVMM - Global VM Manager.
+ */
+
+/*
+ * Copyright (C) 2007-2020 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ */
+
+
+/** @page pg_gvmm GVMM - The Global VM Manager
+ *
+ * The Global VM Manager lives in ring-0. Its main function at the moment is
+ * to manage a list of all running VMs, keep a ring-0 only structure (GVM) for
+ * each of them, and assign them unique identifiers (so GMM can track page
+ * owners). The GVMM also manage some of the host CPU resources, like the
+ * periodic preemption timer.
+ *
+ * The GVMM will create a ring-0 object for each VM when it is registered, this
+ * is both for session cleanup purposes and for having a point where it is
+ * possible to implement usage polices later (in SUPR0ObjRegister).
+ *
+ *
+ * @section sec_gvmm_ppt Periodic Preemption Timer (PPT)
+ *
+ * On system that sports a high resolution kernel timer API, we use per-cpu
+ * timers to generate interrupts that preempts VT-x, AMD-V and raw-mode guest
+ * execution. The timer frequency is calculating by taking the max
+ * TMCalcHostTimerFrequency for all VMs running on a CPU for the last ~160 ms
+ * (RT_ELEMENTS((PGVMMHOSTCPU)0, Ppt.aHzHistory) *
+ * GVMMHOSTCPU_PPT_HIST_INTERVAL_NS).
+ *
+ * The TMCalcHostTimerFrequency() part of the things gets its takes the max
+ * TMTimerSetFrequencyHint() value and adjusts by the current catch-up percent,
+ * warp drive percent and some fudge factors. VMMR0.cpp reports the result via
+ * GVMMR0SchedUpdatePeriodicPreemptionTimer() before switching to the VT-x,
+ * AMD-V and raw-mode execution environments.
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#define LOG_GROUP LOG_GROUP_GVMM
+#include <VBox/vmm/gvmm.h>
+#include <VBox/vmm/gmm.h>
+#include "GVMMR0Internal.h"
+#include <VBox/vmm/iom.h>
+#include <VBox/vmm/pdm.h>
+#include <VBox/vmm/pgm.h>
+#include <VBox/vmm/vmm.h>
+#ifdef VBOX_WITH_NEM_R0
+# include <VBox/vmm/nem.h>
+#endif
+#include <VBox/vmm/vmcpuset.h>
+#include <VBox/vmm/vmcc.h>
+#include <VBox/param.h>
+#include <VBox/err.h>
+
+#include <iprt/asm.h>
+#include <iprt/asm-amd64-x86.h>
+#include <iprt/critsect.h>
+#include <iprt/mem.h>
+#include <iprt/semaphore.h>
+#include <iprt/time.h>
+#include <VBox/log.h>
+#include <iprt/thread.h>
+#include <iprt/process.h>
+#include <iprt/param.h>
+#include <iprt/string.h>
+#include <iprt/assert.h>
+#include <iprt/mem.h>
+#include <iprt/memobj.h>
+#include <iprt/mp.h>
+#include <iprt/cpuset.h>
+#include <iprt/spinlock.h>
+#include <iprt/timer.h>
+
+#include "dtrace/VBoxVMM.h"
+
+
+/*********************************************************************************************************************************
+* Defined Constants And Macros *
+*********************************************************************************************************************************/
+#if defined(RT_OS_LINUX) || defined(RT_OS_SOLARIS) || defined(DOXYGEN_RUNNING)
+/** Define this to enable the periodic preemption timer. */
+# define GVMM_SCHED_WITH_PPT
+#endif
+
+
+/** @def GVMM_CHECK_SMAP_SETUP
+ * SMAP check setup. */
+/** @def GVMM_CHECK_SMAP_CHECK
+ * Checks that the AC flag is set if SMAP is enabled. If AC is not set,
+ * it will be logged and @a a_BadExpr is executed. */
+/** @def GVMM_CHECK_SMAP_CHECK2
+ * Checks that the AC flag is set if SMAP is enabled. If AC is not set, it will
+ * be logged, written to the VMs assertion text buffer, and @a a_BadExpr is
+ * executed. */
+#if (defined(VBOX_STRICT) || 1) && !defined(VBOX_WITH_RAM_IN_KERNEL)
+# define GVMM_CHECK_SMAP_SETUP() uint32_t const fKernelFeatures = SUPR0GetKernelFeatures()
+# define GVMM_CHECK_SMAP_CHECK(a_BadExpr) \
+ do { \
+ if (fKernelFeatures & SUPKERNELFEATURES_SMAP) \
+ { \
+ RTCCUINTREG fEflCheck = ASMGetFlags(); \
+ if (RT_LIKELY(fEflCheck & X86_EFL_AC)) \
+ { /* likely */ } \
+ else \
+ { \
+ SUPR0Printf("%s, line %d: EFLAGS.AC is clear! (%#x)\n", __FUNCTION__, __LINE__, (uint32_t)fEflCheck); \
+ a_BadExpr; \
+ } \
+ } \
+ } while (0)
+# define GVMM_CHECK_SMAP_CHECK2(a_pGVM, a_BadExpr) \
+ do { \
+ if (fKernelFeatures & SUPKERNELFEATURES_SMAP) \
+ { \
+ RTCCUINTREG fEflCheck = ASMGetFlags(); \
+ if (RT_LIKELY(fEflCheck & X86_EFL_AC)) \
+ { /* likely */ } \
+ else \
+ { \
+ SUPR0BadContext((a_pGVM) ? (a_pGVM)->pSession : NULL, __FILE__, __LINE__, "EFLAGS.AC is zero!"); \
+ a_BadExpr; \
+ } \
+ } \
+ } while (0)
+#else
+# define GVMM_CHECK_SMAP_SETUP() uint32_t const fKernelFeatures = 0
+# define GVMM_CHECK_SMAP_CHECK(a_BadExpr) NOREF(fKernelFeatures)
+# define GVMM_CHECK_SMAP_CHECK2(a_pGVM, a_BadExpr) NOREF(fKernelFeatures)
+#endif
+
+
+
+/*********************************************************************************************************************************
+* Structures and Typedefs *
+*********************************************************************************************************************************/
+
+/**
+ * Global VM handle.
+ */
+typedef struct GVMHANDLE
+{
+ /** The index of the next handle in the list (free or used). (0 is nil.) */
+ uint16_t volatile iNext;
+ /** Our own index / handle value. */
+ uint16_t iSelf;
+ /** The process ID of the handle owner.
+ * This is used for access checks. */
+ RTPROCESS ProcId;
+ /** The pointer to the ring-0 only (aka global) VM structure. */
+ PGVM pGVM;
+ /** The virtual machine object. */
+ void *pvObj;
+ /** The session this VM is associated with. */
+ PSUPDRVSESSION pSession;
+ /** The ring-0 handle of the EMT0 thread.
+ * This is used for ownership checks as well as looking up a VM handle by thread
+ * at times like assertions. */
+ RTNATIVETHREAD hEMT0;
+} GVMHANDLE;
+/** Pointer to a global VM handle. */
+typedef GVMHANDLE *PGVMHANDLE;
+
+/** Number of GVM handles (including the NIL handle). */
+#if HC_ARCH_BITS == 64
+# define GVMM_MAX_HANDLES 8192
+#else
+# define GVMM_MAX_HANDLES 128
+#endif
+
+/**
+ * Per host CPU GVMM data.
+ */
+typedef struct GVMMHOSTCPU
+{
+ /** Magic number (GVMMHOSTCPU_MAGIC). */
+ uint32_t volatile u32Magic;
+ /** The CPU ID. */
+ RTCPUID idCpu;
+ /** The CPU set index. */
+ uint32_t idxCpuSet;
+
+#ifdef GVMM_SCHED_WITH_PPT
+ /** Periodic preemption timer data. */
+ struct
+ {
+ /** The handle to the periodic preemption timer. */
+ PRTTIMER pTimer;
+ /** Spinlock protecting the data below. */
+ RTSPINLOCK hSpinlock;
+ /** The smalles Hz that we need to care about. (static) */
+ uint32_t uMinHz;
+ /** The number of ticks between each historization. */
+ uint32_t cTicksHistoriziationInterval;
+ /** The current historization tick (counting up to
+ * cTicksHistoriziationInterval and then resetting). */
+ uint32_t iTickHistorization;
+ /** The current timer interval. This is set to 0 when inactive. */
+ uint32_t cNsInterval;
+ /** The current timer frequency. This is set to 0 when inactive. */
+ uint32_t uTimerHz;
+ /** The current max frequency reported by the EMTs.
+ * This gets historicize and reset by the timer callback. This is
+ * read without holding the spinlock, so needs atomic updating. */
+ uint32_t volatile uDesiredHz;
+ /** Whether the timer was started or not. */
+ bool volatile fStarted;
+ /** Set if we're starting timer. */
+ bool volatile fStarting;
+ /** The index of the next history entry (mod it). */
+ uint32_t iHzHistory;
+ /** Historicized uDesiredHz values. The array wraps around, new entries
+ * are added at iHzHistory. This is updated approximately every
+ * GVMMHOSTCPU_PPT_HIST_INTERVAL_NS by the timer callback. */
+ uint32_t aHzHistory[8];
+ /** Statistics counter for recording the number of interval changes. */
+ uint32_t cChanges;
+ /** Statistics counter for recording the number of timer starts. */
+ uint32_t cStarts;
+ } Ppt;
+#endif /* GVMM_SCHED_WITH_PPT */
+
+} GVMMHOSTCPU;
+/** Pointer to the per host CPU GVMM data. */
+typedef GVMMHOSTCPU *PGVMMHOSTCPU;
+/** The GVMMHOSTCPU::u32Magic value (Petra, Tanya & Rachel Haden). */
+#define GVMMHOSTCPU_MAGIC UINT32_C(0x19711011)
+/** The interval on history entry should cover (approximately) give in
+ * nanoseconds. */
+#define GVMMHOSTCPU_PPT_HIST_INTERVAL_NS UINT32_C(20000000)
+
+
+/**
+ * The GVMM instance data.
+ */
+typedef struct GVMM
+{
+ /** Eyecatcher / magic. */
+ uint32_t u32Magic;
+ /** The index of the head of the free handle chain. (0 is nil.) */
+ uint16_t volatile iFreeHead;
+ /** The index of the head of the active handle chain. (0 is nil.) */
+ uint16_t volatile iUsedHead;
+ /** The number of VMs. */
+ uint16_t volatile cVMs;
+ /** Alignment padding. */
+ uint16_t u16Reserved;
+ /** The number of EMTs. */
+ uint32_t volatile cEMTs;
+ /** The number of EMTs that have halted in GVMMR0SchedHalt. */
+ uint32_t volatile cHaltedEMTs;
+ /** Mini lock for restricting early wake-ups to one thread. */
+ bool volatile fDoingEarlyWakeUps;
+ bool afPadding[3]; /**< explicit alignment padding. */
+ /** When the next halted or sleeping EMT will wake up.
+ * This is set to 0 when it needs recalculating and to UINT64_MAX when
+ * there are no halted or sleeping EMTs in the GVMM. */
+ uint64_t uNsNextEmtWakeup;
+ /** The lock used to serialize VM creation, destruction and associated events that
+ * isn't performance critical. Owners may acquire the list lock. */
+ RTCRITSECT CreateDestroyLock;
+ /** The lock used to serialize used list updates and accesses.
+ * This indirectly includes scheduling since the scheduler will have to walk the
+ * used list to examin running VMs. Owners may not acquire any other locks. */
+ RTCRITSECTRW UsedLock;
+ /** The handle array.
+ * The size of this array defines the maximum number of currently running VMs.
+ * The first entry is unused as it represents the NIL handle. */
+ GVMHANDLE aHandles[GVMM_MAX_HANDLES];
+
+ /** @gcfgm{/GVMM/cEMTsMeansCompany, 32-bit, 0, UINT32_MAX, 1}
+ * The number of EMTs that means we no longer consider ourselves alone on a
+ * CPU/Core.
+ */
+ uint32_t cEMTsMeansCompany;
+ /** @gcfgm{/GVMM/MinSleepAlone,32-bit, 0, 100000000, 750000, ns}
+ * The minimum sleep time for when we're alone, in nano seconds.
+ */
+ uint32_t nsMinSleepAlone;
+ /** @gcfgm{/GVMM/MinSleepCompany,32-bit,0, 100000000, 15000, ns}
+ * The minimum sleep time for when we've got company, in nano seconds.
+ */
+ uint32_t nsMinSleepCompany;
+ /** @gcfgm{/GVMM/EarlyWakeUp1, 32-bit, 0, 100000000, 25000, ns}
+ * The limit for the first round of early wake-ups, given in nano seconds.
+ */
+ uint32_t nsEarlyWakeUp1;
+ /** @gcfgm{/GVMM/EarlyWakeUp2, 32-bit, 0, 100000000, 50000, ns}
+ * The limit for the second round of early wake-ups, given in nano seconds.
+ */
+ uint32_t nsEarlyWakeUp2;
+
+ /** Set if we're doing early wake-ups.
+ * This reflects nsEarlyWakeUp1 and nsEarlyWakeUp2. */
+ bool volatile fDoEarlyWakeUps;
+
+ /** The number of entries in the host CPU array (aHostCpus). */
+ uint32_t cHostCpus;
+ /** Per host CPU data (variable length). */
+ GVMMHOSTCPU aHostCpus[1];
+} GVMM;
+AssertCompileMemberAlignment(GVMM, CreateDestroyLock, 8);
+AssertCompileMemberAlignment(GVMM, UsedLock, 8);
+AssertCompileMemberAlignment(GVMM, uNsNextEmtWakeup, 8);
+/** Pointer to the GVMM instance data. */
+typedef GVMM *PGVMM;
+
+/** The GVMM::u32Magic value (Charlie Haden). */
+#define GVMM_MAGIC UINT32_C(0x19370806)
+
+
+
+/*********************************************************************************************************************************
+* Global Variables *
+*********************************************************************************************************************************/
+/** Pointer to the GVMM instance data.
+ * (Just my general dislike for global variables.) */
+static PGVMM g_pGVMM = NULL;
+
+/** Macro for obtaining and validating the g_pGVMM pointer.
+ * On failure it will return from the invoking function with the specified return value.
+ *
+ * @param pGVMM The name of the pGVMM variable.
+ * @param rc The return value on failure. Use VERR_GVMM_INSTANCE for VBox
+ * status codes.
+ */
+#define GVMM_GET_VALID_INSTANCE(pGVMM, rc) \
+ do { \
+ (pGVMM) = g_pGVMM;\
+ AssertPtrReturn((pGVMM), (rc)); \
+ AssertMsgReturn((pGVMM)->u32Magic == GVMM_MAGIC, ("%p - %#x\n", (pGVMM), (pGVMM)->u32Magic), (rc)); \
+ } while (0)
+
+/** Macro for obtaining and validating the g_pGVMM pointer, void function variant.
+ * On failure it will return from the invoking function.
+ *
+ * @param pGVMM The name of the pGVMM variable.
+ */
+#define GVMM_GET_VALID_INSTANCE_VOID(pGVMM) \
+ do { \
+ (pGVMM) = g_pGVMM;\
+ AssertPtrReturnVoid((pGVMM)); \
+ AssertMsgReturnVoid((pGVMM)->u32Magic == GVMM_MAGIC, ("%p - %#x\n", (pGVMM), (pGVMM)->u32Magic)); \
+ } while (0)
+
+
+/*********************************************************************************************************************************
+* Internal Functions *
+*********************************************************************************************************************************/
+static void gvmmR0InitPerVMData(PGVM pGVM, int16_t hSelf, VMCPUID cCpus, PSUPDRVSESSION pSession);
+static DECLCALLBACK(void) gvmmR0HandleObjDestructor(void *pvObj, void *pvGVMM, void *pvHandle);
+static int gvmmR0ByGVM(PGVM pGVM, PGVMM *ppGVMM, bool fTakeUsedLock);
+static int gvmmR0ByGVMandEMT(PGVM pGVM, VMCPUID idCpu, PGVMM *ppGVMM);
+
+#ifdef GVMM_SCHED_WITH_PPT
+static DECLCALLBACK(void) gvmmR0SchedPeriodicPreemptionTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
+#endif
+
+
+/**
+ * Initializes the GVMM.
+ *
+ * This is called while owning the loader semaphore (see supdrvIOCtl_LdrLoad()).
+ *
+ * @returns VBox status code.
+ */
+GVMMR0DECL(int) GVMMR0Init(void)
+{
+ LogFlow(("GVMMR0Init:\n"));
+
+ /*
+ * Allocate and initialize the instance data.
+ */
+ uint32_t cHostCpus = RTMpGetArraySize();
+ AssertMsgReturn(cHostCpus > 0 && cHostCpus < _64K, ("%d", (int)cHostCpus), VERR_GVMM_HOST_CPU_RANGE);
+
+ PGVMM pGVMM = (PGVMM)RTMemAllocZ(RT_UOFFSETOF_DYN(GVMM, aHostCpus[cHostCpus]));
+ if (!pGVMM)
+ return VERR_NO_MEMORY;
+ int rc = RTCritSectInitEx(&pGVMM->CreateDestroyLock, 0, NIL_RTLOCKVALCLASS, RTLOCKVAL_SUB_CLASS_NONE,
+ "GVMM-CreateDestroyLock");
+ if (RT_SUCCESS(rc))
+ {
+ rc = RTCritSectRwInitEx(&pGVMM->UsedLock, 0, NIL_RTLOCKVALCLASS, RTLOCKVAL_SUB_CLASS_NONE, "GVMM-UsedLock");
+ if (RT_SUCCESS(rc))
+ {
+ pGVMM->u32Magic = GVMM_MAGIC;
+ pGVMM->iUsedHead = 0;
+ pGVMM->iFreeHead = 1;
+
+ /* the nil handle */
+ pGVMM->aHandles[0].iSelf = 0;
+ pGVMM->aHandles[0].iNext = 0;
+
+ /* the tail */
+ unsigned i = RT_ELEMENTS(pGVMM->aHandles) - 1;
+ pGVMM->aHandles[i].iSelf = i;
+ pGVMM->aHandles[i].iNext = 0; /* nil */
+
+ /* the rest */
+ while (i-- > 1)
+ {
+ pGVMM->aHandles[i].iSelf = i;
+ pGVMM->aHandles[i].iNext = i + 1;
+ }
+
+ /* The default configuration values. */
+ uint32_t cNsResolution = RTSemEventMultiGetResolution();
+ pGVMM->cEMTsMeansCompany = 1; /** @todo should be adjusted to relative to the cpu count or something... */
+ if (cNsResolution >= 5*RT_NS_100US)
+ {
+ pGVMM->nsMinSleepAlone = 750000 /* ns (0.750 ms) */; /** @todo this should be adjusted to be 75% (or something) of the scheduler granularity... */
+ pGVMM->nsMinSleepCompany = 15000 /* ns (0.015 ms) */;
+ pGVMM->nsEarlyWakeUp1 = 25000 /* ns (0.025 ms) */;
+ pGVMM->nsEarlyWakeUp2 = 50000 /* ns (0.050 ms) */;
+ }
+ else if (cNsResolution > RT_NS_100US)
+ {
+ pGVMM->nsMinSleepAlone = cNsResolution / 2;
+ pGVMM->nsMinSleepCompany = cNsResolution / 4;
+ pGVMM->nsEarlyWakeUp1 = 0;
+ pGVMM->nsEarlyWakeUp2 = 0;
+ }
+ else
+ {
+ pGVMM->nsMinSleepAlone = 2000;
+ pGVMM->nsMinSleepCompany = 2000;
+ pGVMM->nsEarlyWakeUp1 = 0;
+ pGVMM->nsEarlyWakeUp2 = 0;
+ }
+ pGVMM->fDoEarlyWakeUps = pGVMM->nsEarlyWakeUp1 > 0 && pGVMM->nsEarlyWakeUp2 > 0;
+
+ /* The host CPU data. */
+ pGVMM->cHostCpus = cHostCpus;
+ uint32_t iCpu = cHostCpus;
+ RTCPUSET PossibleSet;
+ RTMpGetSet(&PossibleSet);
+ while (iCpu-- > 0)
+ {
+ pGVMM->aHostCpus[iCpu].idxCpuSet = iCpu;
+#ifdef GVMM_SCHED_WITH_PPT
+ pGVMM->aHostCpus[iCpu].Ppt.pTimer = NULL;
+ pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
+ pGVMM->aHostCpus[iCpu].Ppt.uMinHz = 5; /** @todo Add some API which figures this one out. (not *that* important) */
+ pGVMM->aHostCpus[iCpu].Ppt.cTicksHistoriziationInterval = 1;
+ //pGVMM->aHostCpus[iCpu].Ppt.iTickHistorization = 0;
+ //pGVMM->aHostCpus[iCpu].Ppt.cNsInterval = 0;
+ //pGVMM->aHostCpus[iCpu].Ppt.uTimerHz = 0;
+ //pGVMM->aHostCpus[iCpu].Ppt.uDesiredHz = 0;
+ //pGVMM->aHostCpus[iCpu].Ppt.fStarted = false;
+ //pGVMM->aHostCpus[iCpu].Ppt.fStarting = false;
+ //pGVMM->aHostCpus[iCpu].Ppt.iHzHistory = 0;
+ //pGVMM->aHostCpus[iCpu].Ppt.aHzHistory = {0};
+#endif
+
+ if (RTCpuSetIsMember(&PossibleSet, iCpu))
+ {
+ pGVMM->aHostCpus[iCpu].idCpu = RTMpCpuIdFromSetIndex(iCpu);
+ pGVMM->aHostCpus[iCpu].u32Magic = GVMMHOSTCPU_MAGIC;
+
+#ifdef GVMM_SCHED_WITH_PPT
+ rc = RTTimerCreateEx(&pGVMM->aHostCpus[iCpu].Ppt.pTimer,
+ 50*1000*1000 /* whatever */,
+ RTTIMER_FLAGS_CPU(iCpu) | RTTIMER_FLAGS_HIGH_RES,
+ gvmmR0SchedPeriodicPreemptionTimerCallback,
+ &pGVMM->aHostCpus[iCpu]);
+ if (RT_SUCCESS(rc))
+ rc = RTSpinlockCreate(&pGVMM->aHostCpus[iCpu].Ppt.hSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "GVMM/CPU");
+ if (RT_FAILURE(rc))
+ {
+ while (iCpu < cHostCpus)
+ {
+ RTTimerDestroy(pGVMM->aHostCpus[iCpu].Ppt.pTimer);
+ RTSpinlockDestroy(pGVMM->aHostCpus[iCpu].Ppt.hSpinlock);
+ pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
+ iCpu++;
+ }
+ break;
+ }
+#endif
+ }
+ else
+ {
+ pGVMM->aHostCpus[iCpu].idCpu = NIL_RTCPUID;
+ pGVMM->aHostCpus[iCpu].u32Magic = 0;
+ }
+ }
+ if (RT_SUCCESS(rc))
+ {
+ g_pGVMM = pGVMM;
+ LogFlow(("GVMMR0Init: pGVMM=%p cHostCpus=%u\n", pGVMM, cHostCpus));
+ return VINF_SUCCESS;
+ }
+
+ /* bail out. */
+ RTCritSectRwDelete(&pGVMM->UsedLock);
+ }
+ RTCritSectDelete(&pGVMM->CreateDestroyLock);
+ }
+
+ RTMemFree(pGVMM);
+ return rc;
+}
+
+
+/**
+ * Terminates the GVM.
+ *
+ * This is called while owning the loader semaphore (see supdrvLdrFree()).
+ * And unless something is wrong, there should be absolutely no VMs
+ * registered at this point.
+ */
+GVMMR0DECL(void) GVMMR0Term(void)
+{
+ LogFlow(("GVMMR0Term:\n"));
+
+ PGVMM pGVMM = g_pGVMM;
+ g_pGVMM = NULL;
+ if (RT_UNLIKELY(!RT_VALID_PTR(pGVMM)))
+ {
+ SUPR0Printf("GVMMR0Term: pGVMM=%RKv\n", pGVMM);
+ return;
+ }
+
+ /*
+ * First of all, stop all active timers.
+ */
+ uint32_t cActiveTimers = 0;
+ uint32_t iCpu = pGVMM->cHostCpus;
+ while (iCpu-- > 0)
+ {
+ ASMAtomicWriteU32(&pGVMM->aHostCpus[iCpu].u32Magic, ~GVMMHOSTCPU_MAGIC);
+#ifdef GVMM_SCHED_WITH_PPT
+ if ( pGVMM->aHostCpus[iCpu].Ppt.pTimer != NULL
+ && RT_SUCCESS(RTTimerStop(pGVMM->aHostCpus[iCpu].Ppt.pTimer)))
+ cActiveTimers++;
+#endif
+ }
+ if (cActiveTimers)
+ RTThreadSleep(1); /* fudge */
+
+ /*
+ * Invalidate the and free resources.
+ */
+ pGVMM->u32Magic = ~GVMM_MAGIC;
+ RTCritSectRwDelete(&pGVMM->UsedLock);
+ RTCritSectDelete(&pGVMM->CreateDestroyLock);
+
+ pGVMM->iFreeHead = 0;
+ if (pGVMM->iUsedHead)
+ {
+ SUPR0Printf("GVMMR0Term: iUsedHead=%#x! (cVMs=%#x cEMTs=%#x)\n", pGVMM->iUsedHead, pGVMM->cVMs, pGVMM->cEMTs);
+ pGVMM->iUsedHead = 0;
+ }
+
+#ifdef GVMM_SCHED_WITH_PPT
+ iCpu = pGVMM->cHostCpus;
+ while (iCpu-- > 0)
+ {
+ RTTimerDestroy(pGVMM->aHostCpus[iCpu].Ppt.pTimer);
+ pGVMM->aHostCpus[iCpu].Ppt.pTimer = NULL;
+ RTSpinlockDestroy(pGVMM->aHostCpus[iCpu].Ppt.hSpinlock);
+ pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
+ }
+#endif
+
+ RTMemFree(pGVMM);
+}
+
+
+/**
+ * A quick hack for setting global config values.
+ *
+ * @returns VBox status code.
+ *
+ * @param pSession The session handle. Used for authentication.
+ * @param pszName The variable name.
+ * @param u64Value The new value.
+ */
+GVMMR0DECL(int) GVMMR0SetConfig(PSUPDRVSESSION pSession, const char *pszName, uint64_t u64Value)
+{
+ /*
+ * Validate input.
+ */
+ PGVMM pGVMM;
+ GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
+ AssertPtrReturn(pSession, VERR_INVALID_HANDLE);
+ AssertPtrReturn(pszName, VERR_INVALID_POINTER);
+
+ /*
+ * String switch time!
+ */
+ if (strncmp(pszName, RT_STR_TUPLE("/GVMM/")))
+ return VERR_CFGM_VALUE_NOT_FOUND; /* borrow status codes from CFGM... */
+ int rc = VINF_SUCCESS;
+ pszName += sizeof("/GVMM/") - 1;
+ if (!strcmp(pszName, "cEMTsMeansCompany"))
+ {
+ if (u64Value <= UINT32_MAX)
+ pGVMM->cEMTsMeansCompany = u64Value;
+ else
+ rc = VERR_OUT_OF_RANGE;
+ }
+ else if (!strcmp(pszName, "MinSleepAlone"))
+ {
+ if (u64Value <= RT_NS_100MS)
+ pGVMM->nsMinSleepAlone = u64Value;
+ else
+ rc = VERR_OUT_OF_RANGE;
+ }
+ else if (!strcmp(pszName, "MinSleepCompany"))
+ {
+ if (u64Value <= RT_NS_100MS)
+ pGVMM->nsMinSleepCompany = u64Value;
+ else
+ rc = VERR_OUT_OF_RANGE;
+ }
+ else if (!strcmp(pszName, "EarlyWakeUp1"))
+ {
+ if (u64Value <= RT_NS_100MS)
+ {
+ pGVMM->nsEarlyWakeUp1 = u64Value;
+ pGVMM->fDoEarlyWakeUps = pGVMM->nsEarlyWakeUp1 > 0 && pGVMM->nsEarlyWakeUp2 > 0;
+ }
+ else
+ rc = VERR_OUT_OF_RANGE;
+ }
+ else if (!strcmp(pszName, "EarlyWakeUp2"))
+ {
+ if (u64Value <= RT_NS_100MS)
+ {
+ pGVMM->nsEarlyWakeUp2 = u64Value;
+ pGVMM->fDoEarlyWakeUps = pGVMM->nsEarlyWakeUp1 > 0 && pGVMM->nsEarlyWakeUp2 > 0;
+ }
+ else
+ rc = VERR_OUT_OF_RANGE;
+ }
+ else
+ rc = VERR_CFGM_VALUE_NOT_FOUND;
+ return rc;
+}
+
+
+/**
+ * A quick hack for getting global config values.
+ *
+ * @returns VBox status code.
+ *
+ * @param pSession The session handle. Used for authentication.
+ * @param pszName The variable name.
+ * @param pu64Value Where to return the value.
+ */
+GVMMR0DECL(int) GVMMR0QueryConfig(PSUPDRVSESSION pSession, const char *pszName, uint64_t *pu64Value)
+{
+ /*
+ * Validate input.
+ */
+ PGVMM pGVMM;
+ GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
+ AssertPtrReturn(pSession, VERR_INVALID_HANDLE);
+ AssertPtrReturn(pszName, VERR_INVALID_POINTER);
+ AssertPtrReturn(pu64Value, VERR_INVALID_POINTER);
+
+ /*
+ * String switch time!
+ */
+ if (strncmp(pszName, RT_STR_TUPLE("/GVMM/")))
+ return VERR_CFGM_VALUE_NOT_FOUND; /* borrow status codes from CFGM... */
+ int rc = VINF_SUCCESS;
+ pszName += sizeof("/GVMM/") - 1;
+ if (!strcmp(pszName, "cEMTsMeansCompany"))
+ *pu64Value = pGVMM->cEMTsMeansCompany;
+ else if (!strcmp(pszName, "MinSleepAlone"))
+ *pu64Value = pGVMM->nsMinSleepAlone;
+ else if (!strcmp(pszName, "MinSleepCompany"))
+ *pu64Value = pGVMM->nsMinSleepCompany;
+ else if (!strcmp(pszName, "EarlyWakeUp1"))
+ *pu64Value = pGVMM->nsEarlyWakeUp1;
+ else if (!strcmp(pszName, "EarlyWakeUp2"))
+ *pu64Value = pGVMM->nsEarlyWakeUp2;
+ else
+ rc = VERR_CFGM_VALUE_NOT_FOUND;
+ return rc;
+}
+
+
+/**
+ * Acquire the 'used' lock in shared mode.
+ *
+ * This prevents destruction of the VM while we're in ring-0.
+ *
+ * @returns IPRT status code, see RTSemFastMutexRequest.
+ * @param a_pGVMM The GVMM instance data.
+ * @sa GVMMR0_USED_SHARED_UNLOCK, GVMMR0_USED_EXCLUSIVE_LOCK
+ */
+#define GVMMR0_USED_SHARED_LOCK(a_pGVMM) RTCritSectRwEnterShared(&(a_pGVMM)->UsedLock)
+
+/**
+ * Release the 'used' lock in when owning it in shared mode.
+ *
+ * @returns IPRT status code, see RTSemFastMutexRequest.
+ * @param a_pGVMM The GVMM instance data.
+ * @sa GVMMR0_USED_SHARED_LOCK
+ */
+#define GVMMR0_USED_SHARED_UNLOCK(a_pGVMM) RTCritSectRwLeaveShared(&(a_pGVMM)->UsedLock)
+
+/**
+ * Acquire the 'used' lock in exclusive mode.
+ *
+ * Only use this function when making changes to the used list.
+ *
+ * @returns IPRT status code, see RTSemFastMutexRequest.
+ * @param a_pGVMM The GVMM instance data.
+ * @sa GVMMR0_USED_EXCLUSIVE_UNLOCK
+ */
+#define GVMMR0_USED_EXCLUSIVE_LOCK(a_pGVMM) RTCritSectRwEnterExcl(&(a_pGVMM)->UsedLock)
+
+/**
+ * Release the 'used' lock when owning it in exclusive mode.
+ *
+ * @returns IPRT status code, see RTSemFastMutexRelease.
+ * @param a_pGVMM The GVMM instance data.
+ * @sa GVMMR0_USED_EXCLUSIVE_LOCK, GVMMR0_USED_SHARED_UNLOCK
+ */
+#define GVMMR0_USED_EXCLUSIVE_UNLOCK(a_pGVMM) RTCritSectRwLeaveExcl(&(a_pGVMM)->UsedLock)
+
+
+/**
+ * Try acquire the 'create & destroy' lock.
+ *
+ * @returns IPRT status code, see RTSemFastMutexRequest.
+ * @param pGVMM The GVMM instance data.
+ */
+DECLINLINE(int) gvmmR0CreateDestroyLock(PGVMM pGVMM)
+{
+ LogFlow(("++gvmmR0CreateDestroyLock(%p)\n", pGVMM));
+ int rc = RTCritSectEnter(&pGVMM->CreateDestroyLock);
+ LogFlow(("gvmmR0CreateDestroyLock(%p)->%Rrc\n", pGVMM, rc));
+ return rc;
+}
+
+
+/**
+ * Release the 'create & destroy' lock.
+ *
+ * @returns IPRT status code, see RTSemFastMutexRequest.
+ * @param pGVMM The GVMM instance data.
+ */
+DECLINLINE(int) gvmmR0CreateDestroyUnlock(PGVMM pGVMM)
+{
+ LogFlow(("--gvmmR0CreateDestroyUnlock(%p)\n", pGVMM));
+ int rc = RTCritSectLeave(&pGVMM->CreateDestroyLock);
+ AssertRC(rc);
+ return rc;
+}
+
+
+/**
+ * Request wrapper for the GVMMR0CreateVM API.
+ *
+ * @returns VBox status code.
+ * @param pReq The request buffer.
+ * @param pSession The session handle. The VM will be associated with this.
+ */
+GVMMR0DECL(int) GVMMR0CreateVMReq(PGVMMCREATEVMREQ pReq, PSUPDRVSESSION pSession)
+{
+ /*
+ * Validate the request.
+ */
+ if (!RT_VALID_PTR(pReq))
+ return VERR_INVALID_POINTER;
+ if (pReq->Hdr.cbReq != sizeof(*pReq))
+ return VERR_INVALID_PARAMETER;
+ if (pReq->pSession != pSession)
+ return VERR_INVALID_POINTER;
+
+ /*
+ * Execute it.
+ */
+ PGVM pGVM;
+ pReq->pVMR0 = NULL;
+ pReq->pVMR3 = NIL_RTR3PTR;
+ int rc = GVMMR0CreateVM(pSession, pReq->cCpus, &pGVM);
+ if (RT_SUCCESS(rc))
+ {
+ pReq->pVMR0 = pGVM; /** @todo don't expose this to ring-3, use a unique random number instead. */
+ pReq->pVMR3 = pGVM->pVMR3;
+ }
+ return rc;
+}
+
+
+/**
+ * Allocates the VM structure and registers it with GVM.
+ *
+ * The caller will become the VM owner and there by the EMT.
+ *
+ * @returns VBox status code.
+ * @param pSession The support driver session.
+ * @param cCpus Number of virtual CPUs for the new VM.
+ * @param ppGVM Where to store the pointer to the VM structure.
+ *
+ * @thread EMT.
+ */
+GVMMR0DECL(int) GVMMR0CreateVM(PSUPDRVSESSION pSession, uint32_t cCpus, PGVM *ppGVM)
+{
+ LogFlow(("GVMMR0CreateVM: pSession=%p\n", pSession));
+ PGVMM pGVMM;
+ GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
+
+ AssertPtrReturn(ppGVM, VERR_INVALID_POINTER);
+ *ppGVM = NULL;
+
+ if ( cCpus == 0
+ || cCpus > VMM_MAX_CPU_COUNT)
+ return VERR_INVALID_PARAMETER;
+
+ RTNATIVETHREAD hEMT0 = RTThreadNativeSelf();
+ AssertReturn(hEMT0 != NIL_RTNATIVETHREAD, VERR_GVMM_BROKEN_IPRT);
+ RTPROCESS ProcId = RTProcSelf();
+ AssertReturn(ProcId != NIL_RTPROCESS, VERR_GVMM_BROKEN_IPRT);
+
+ /*
+ * The whole allocation process is protected by the lock.
+ */
+ int rc = gvmmR0CreateDestroyLock(pGVMM);
+ AssertRCReturn(rc, rc);
+
+ /*
+ * Only one VM per session.
+ */
+ if (SUPR0GetSessionVM(pSession) != NULL)
+ {
+ gvmmR0CreateDestroyUnlock(pGVMM);
+ SUPR0Printf("GVMMR0CreateVM: The session %p already got a VM: %p\n", pSession, SUPR0GetSessionVM(pSession));
+ return VERR_ALREADY_EXISTS;
+ }
+
+ /*
+ * Allocate a handle first so we don't waste resources unnecessarily.
+ */
+ uint16_t iHandle = pGVMM->iFreeHead;
+ if (iHandle)
+ {
+ PGVMHANDLE pHandle = &pGVMM->aHandles[iHandle];
+
+ /* consistency checks, a bit paranoid as always. */
+ if ( !pHandle->pGVM
+ && !pHandle->pvObj
+ && pHandle->iSelf == iHandle)
+ {
+ pHandle->pvObj = SUPR0ObjRegister(pSession, SUPDRVOBJTYPE_VM, gvmmR0HandleObjDestructor, pGVMM, pHandle);
+ if (pHandle->pvObj)
+ {
+ /*
+ * Move the handle from the free to used list and perform permission checks.
+ */
+ rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
+ AssertRC(rc);
+
+ pGVMM->iFreeHead = pHandle->iNext;
+ pHandle->iNext = pGVMM->iUsedHead;
+ pGVMM->iUsedHead = iHandle;
+ pGVMM->cVMs++;
+
+ pHandle->pGVM = NULL;
+ pHandle->pSession = pSession;
+ pHandle->hEMT0 = NIL_RTNATIVETHREAD;
+ pHandle->ProcId = NIL_RTPROCESS;
+
+ GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
+
+ rc = SUPR0ObjVerifyAccess(pHandle->pvObj, pSession, NULL);
+ if (RT_SUCCESS(rc))
+ {
+ /*
+ * Allocate memory for the VM structure (combined VM + GVM).
+ */
+ const uint32_t cbVM = RT_UOFFSETOF_DYN(GVM, aCpus[cCpus]);
+ const uint32_t cPages = RT_ALIGN_32(cbVM, PAGE_SIZE) >> PAGE_SHIFT;
+ RTR0MEMOBJ hVMMemObj = NIL_RTR0MEMOBJ;
+ rc = RTR0MemObjAllocPage(&hVMMemObj, cPages << PAGE_SHIFT, false /* fExecutable */);
+ if (RT_SUCCESS(rc))
+ {
+ PGVM pGVM = (PGVM)RTR0MemObjAddress(hVMMemObj);
+ AssertPtr(pGVM);
+
+ /*
+ * Initialise the structure.
+ */
+ RT_BZERO(pGVM, cPages << PAGE_SHIFT);
+ gvmmR0InitPerVMData(pGVM, iHandle, cCpus, pSession);
+ pGVM->gvmm.s.VMMemObj = hVMMemObj;
+ rc = GMMR0InitPerVMData(pGVM);
+ int rc2 = PGMR0InitPerVMData(pGVM);
+ PDMR0InitPerVMData(pGVM);
+ IOMR0InitPerVMData(pGVM);
+ if (RT_SUCCESS(rc) && RT_SUCCESS(rc2))
+ {
+ /*
+ * Allocate page array.
+ * This currently have to be made available to ring-3, but this is should change eventually.
+ */
+ rc = RTR0MemObjAllocPage(&pGVM->gvmm.s.VMPagesMemObj, cPages * sizeof(SUPPAGE), false /* fExecutable */);
+ if (RT_SUCCESS(rc))
+ {
+ PSUPPAGE paPages = (PSUPPAGE)RTR0MemObjAddress(pGVM->gvmm.s.VMPagesMemObj); AssertPtr(paPages);
+ for (uint32_t iPage = 0; iPage < cPages; iPage++)
+ {
+ paPages[iPage].uReserved = 0;
+ paPages[iPage].Phys = RTR0MemObjGetPagePhysAddr(pGVM->gvmm.s.VMMemObj, iPage);
+ Assert(paPages[iPage].Phys != NIL_RTHCPHYS);
+ }
+
+ /*
+ * Map the page array, VM and VMCPU structures into ring-3.
+ */
+ AssertCompileSizeAlignment(VM, PAGE_SIZE);
+ rc = RTR0MemObjMapUserEx(&pGVM->gvmm.s.VMMapObj, pGVM->gvmm.s.VMMemObj, (RTR3PTR)-1, 0,
+ RTMEM_PROT_READ | RTMEM_PROT_WRITE, NIL_RTR0PROCESS,
+ 0 /*offSub*/, sizeof(VM));
+ for (VMCPUID i = 0; i < cCpus && RT_SUCCESS(rc); i++)
+ {
+ AssertCompileSizeAlignment(VMCPU, PAGE_SIZE);
+ rc = RTR0MemObjMapUserEx(&pGVM->aCpus[i].gvmm.s.VMCpuMapObj, pGVM->gvmm.s.VMMemObj,
+ (RTR3PTR)-1, 0, RTMEM_PROT_READ | RTMEM_PROT_WRITE, NIL_RTR0PROCESS,
+ RT_UOFFSETOF_DYN(GVM, aCpus[i]), sizeof(VMCPU));
+ }
+ if (RT_SUCCESS(rc))
+ rc = RTR0MemObjMapUser(&pGVM->gvmm.s.VMPagesMapObj, pGVM->gvmm.s.VMPagesMemObj, (RTR3PTR)-1,
+ 0 /* uAlignment */, RTMEM_PROT_READ | RTMEM_PROT_WRITE,
+ NIL_RTR0PROCESS);
+ if (RT_SUCCESS(rc))
+ {
+ /*
+ * Initialize all the VM pointers.
+ */
+ PVMR3 pVMR3 = RTR0MemObjAddressR3(pGVM->gvmm.s.VMMapObj);
+ AssertPtr((void *)pVMR3);
+
+ for (VMCPUID i = 0; i < cCpus; i++)
+ {
+ pGVM->aCpus[i].pVMR0 = pGVM;
+ pGVM->aCpus[i].pVMR3 = pVMR3;
+ pGVM->apCpusR3[i] = RTR0MemObjAddressR3(pGVM->aCpus[i].gvmm.s.VMCpuMapObj);
+ pGVM->aCpus[i].pVCpuR3 = pGVM->apCpusR3[i];
+ pGVM->apCpusR0[i] = &pGVM->aCpus[i];
+ AssertPtr((void *)pGVM->apCpusR3[i]);
+ }
+
+ pGVM->paVMPagesR3 = RTR0MemObjAddressR3(pGVM->gvmm.s.VMPagesMapObj);
+ AssertPtr((void *)pGVM->paVMPagesR3);
+
+ /*
+ * Complete the handle - take the UsedLock sem just to be careful.
+ */
+ rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
+ AssertRC(rc);
+
+ pHandle->pGVM = pGVM;
+ pHandle->hEMT0 = hEMT0;
+ pHandle->ProcId = ProcId;
+ pGVM->pVMR3 = pVMR3;
+ pGVM->pVMR3Unsafe = pVMR3;
+ pGVM->aCpus[0].hEMT = hEMT0;
+ pGVM->aCpus[0].hNativeThreadR0 = hEMT0;
+ pGVMM->cEMTs += cCpus;
+
+ /* Associate it with the session and create the context hook for EMT0. */
+ rc = SUPR0SetSessionVM(pSession, pGVM, pGVM);
+ if (RT_SUCCESS(rc))
+ {
+ rc = VMMR0ThreadCtxHookCreateForEmt(&pGVM->aCpus[0]);
+ if (RT_SUCCESS(rc))
+ {
+ /*
+ * Done!
+ */
+ VBOXVMM_R0_GVMM_VM_CREATED(pGVM, pGVM, ProcId, (void *)hEMT0, cCpus);
+
+ GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
+ gvmmR0CreateDestroyUnlock(pGVMM);
+
+ CPUMR0RegisterVCpuThread(&pGVM->aCpus[0]);
+
+ *ppGVM = pGVM;
+ Log(("GVMMR0CreateVM: pVMR3=%p pGVM=%p hGVM=%d\n", pVMR3, pGVM, iHandle));
+ return VINF_SUCCESS;
+ }
+
+ SUPR0SetSessionVM(pSession, NULL, NULL);
+ }
+ GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
+ }
+
+ /* Cleanup mappings. */
+ if (pGVM->gvmm.s.VMMapObj != NIL_RTR0MEMOBJ)
+ {
+ RTR0MemObjFree(pGVM->gvmm.s.VMMapObj, false /* fFreeMappings */);
+ pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
+ }
+ for (VMCPUID i = 0; i < cCpus; i++)
+ if (pGVM->aCpus[i].gvmm.s.VMCpuMapObj != NIL_RTR0MEMOBJ)
+ {
+ RTR0MemObjFree(pGVM->aCpus[i].gvmm.s.VMCpuMapObj, false /* fFreeMappings */);
+ pGVM->aCpus[i].gvmm.s.VMCpuMapObj = NIL_RTR0MEMOBJ;
+ }
+ if (pGVM->gvmm.s.VMPagesMapObj != NIL_RTR0MEMOBJ)
+ {
+ RTR0MemObjFree(pGVM->gvmm.s.VMPagesMapObj, false /* fFreeMappings */);
+ pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
+ }
+ }
+ }
+ else if (RT_SUCCESS(rc))
+ rc = rc2;
+ }
+ }
+ /* else: The user wasn't permitted to create this VM. */
+
+ /*
+ * The handle will be freed by gvmmR0HandleObjDestructor as we release the
+ * object reference here. A little extra mess because of non-recursive lock.
+ */
+ void *pvObj = pHandle->pvObj;
+ pHandle->pvObj = NULL;
+ gvmmR0CreateDestroyUnlock(pGVMM);
+
+ SUPR0ObjRelease(pvObj, pSession);
+
+ SUPR0Printf("GVMMR0CreateVM: failed, rc=%Rrc\n", rc);
+ return rc;
+ }
+
+ rc = VERR_NO_MEMORY;
+ }
+ else
+ rc = VERR_GVMM_IPE_1;
+ }
+ else
+ rc = VERR_GVM_TOO_MANY_VMS;
+
+ gvmmR0CreateDestroyUnlock(pGVMM);
+ return rc;
+}
+
+
+/**
+ * Initializes the per VM data belonging to GVMM.
+ *
+ * @param pGVM Pointer to the global VM structure.
+ * @param hSelf The handle.
+ * @param cCpus The CPU count.
+ * @param pSession The session this VM is associated with.
+ */
+static void gvmmR0InitPerVMData(PGVM pGVM, int16_t hSelf, VMCPUID cCpus, PSUPDRVSESSION pSession)
+{
+ AssertCompile(RT_SIZEOFMEMB(GVM,gvmm.s) <= RT_SIZEOFMEMB(GVM,gvmm.padding));
+ AssertCompile(RT_SIZEOFMEMB(GVMCPU,gvmm.s) <= RT_SIZEOFMEMB(GVMCPU,gvmm.padding));
+ AssertCompileMemberAlignment(VM, cpum, 64);
+ AssertCompileMemberAlignment(VM, tm, 64);
+
+ /* GVM: */
+ pGVM->u32Magic = GVM_MAGIC;
+ pGVM->hSelf = hSelf;
+ pGVM->cCpus = cCpus;
+ pGVM->pSession = pSession;
+ pGVM->pSelf = pGVM;
+
+ /* VM: */
+ pGVM->enmVMState = VMSTATE_CREATING;
+ pGVM->hSelfUnsafe = hSelf;
+ pGVM->pSessionUnsafe = pSession;
+ pGVM->pVMR0ForCall = pGVM;
+ pGVM->cCpusUnsafe = cCpus;
+ pGVM->uCpuExecutionCap = 100; /* default is no cap. */
+ pGVM->uStructVersion = 1;
+ pGVM->cbSelf = sizeof(VM);
+ pGVM->cbVCpu = sizeof(VMCPU);
+
+ /* GVMM: */
+ pGVM->gvmm.s.VMMemObj = NIL_RTR0MEMOBJ;
+ pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
+ pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ;
+ pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
+ pGVM->gvmm.s.fDoneVMMR0Init = false;
+ pGVM->gvmm.s.fDoneVMMR0Term = false;
+
+ /*
+ * Per virtual CPU.
+ */
+ for (VMCPUID i = 0; i < pGVM->cCpus; i++)
+ {
+ pGVM->aCpus[i].idCpu = i;
+ pGVM->aCpus[i].idCpuUnsafe = i;
+ pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
+ pGVM->aCpus[i].gvmm.s.VMCpuMapObj = NIL_RTR0MEMOBJ;
+ pGVM->aCpus[i].hEMT = NIL_RTNATIVETHREAD;
+ pGVM->aCpus[i].pGVM = pGVM;
+ pGVM->aCpus[i].idHostCpu = NIL_RTCPUID;
+ pGVM->aCpus[i].iHostCpuSet = UINT32_MAX;
+ pGVM->aCpus[i].hNativeThread = NIL_RTNATIVETHREAD;
+ pGVM->aCpus[i].hNativeThreadR0 = NIL_RTNATIVETHREAD;
+ pGVM->aCpus[i].enmState = VMCPUSTATE_STOPPED;
+ pGVM->aCpus[i].pVCpuR0ForVtg = &pGVM->aCpus[i];
+ }
+}
+
+
+/**
+ * Does the VM initialization.
+ *
+ * @returns VBox status code.
+ * @param pGVM The global (ring-0) VM structure.
+ */
+GVMMR0DECL(int) GVMMR0InitVM(PGVM pGVM)
+{
+ LogFlow(("GVMMR0InitVM: pGVM=%p\n", pGVM));
+
+ int rc = VERR_INTERNAL_ERROR_3;
+ if ( !pGVM->gvmm.s.fDoneVMMR0Init
+ && pGVM->aCpus[0].gvmm.s.HaltEventMulti == NIL_RTSEMEVENTMULTI)
+ {
+ for (VMCPUID i = 0; i < pGVM->cCpus; i++)
+ {
+ rc = RTSemEventMultiCreate(&pGVM->aCpus[i].gvmm.s.HaltEventMulti);
+ if (RT_FAILURE(rc))
+ {
+ pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
+ break;
+ }
+ }
+ }
+ else
+ rc = VERR_WRONG_ORDER;
+
+ LogFlow(("GVMMR0InitVM: returns %Rrc\n", rc));
+ return rc;
+}
+
+
+/**
+ * Indicates that we're done with the ring-0 initialization
+ * of the VM.
+ *
+ * @param pGVM The global (ring-0) VM structure.
+ * @thread EMT(0)
+ */
+GVMMR0DECL(void) GVMMR0DoneInitVM(PGVM pGVM)
+{
+ /* Set the indicator. */
+ pGVM->gvmm.s.fDoneVMMR0Init = true;
+}
+
+
+/**
+ * Indicates that we're doing the ring-0 termination of the VM.
+ *
+ * @returns true if termination hasn't been done already, false if it has.
+ * @param pGVM Pointer to the global VM structure. Optional.
+ * @thread EMT(0) or session cleanup thread.
+ */
+GVMMR0DECL(bool) GVMMR0DoingTermVM(PGVM pGVM)
+{
+ /* Validate the VM structure, state and handle. */
+ AssertPtrReturn(pGVM, false);
+
+ /* Set the indicator. */
+ if (pGVM->gvmm.s.fDoneVMMR0Term)
+ return false;
+ pGVM->gvmm.s.fDoneVMMR0Term = true;
+ return true;
+}
+
+
+/**
+ * Destroys the VM, freeing all associated resources (the ring-0 ones anyway).
+ *
+ * This is call from the vmR3DestroyFinalBit and from a error path in VMR3Create,
+ * and the caller is not the EMT thread, unfortunately. For security reasons, it
+ * would've been nice if the caller was actually the EMT thread or that we somehow
+ * could've associated the calling thread with the VM up front.
+ *
+ * @returns VBox status code.
+ * @param pGVM The global (ring-0) VM structure.
+ *
+ * @thread EMT(0) if it's associated with the VM, otherwise any thread.
+ */
+GVMMR0DECL(int) GVMMR0DestroyVM(PGVM pGVM)
+{
+ LogFlow(("GVMMR0DestroyVM: pGVM=%p\n", pGVM));
+ PGVMM pGVMM;
+ GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
+
+ /*
+ * Validate the VM structure, state and caller.
+ */
+ AssertPtrReturn(pGVM, VERR_INVALID_POINTER);
+ AssertReturn(!((uintptr_t)pGVM & PAGE_OFFSET_MASK), VERR_INVALID_POINTER);
+ AssertMsgReturn(pGVM->enmVMState >= VMSTATE_CREATING && pGVM->enmVMState <= VMSTATE_TERMINATED, ("%d\n", pGVM->enmVMState),
+ VERR_WRONG_ORDER);
+
+ uint32_t hGVM = pGVM->hSelf;
+ ASMCompilerBarrier();
+ AssertReturn(hGVM != NIL_GVM_HANDLE, VERR_INVALID_VM_HANDLE);
+ AssertReturn(hGVM < RT_ELEMENTS(pGVMM->aHandles), VERR_INVALID_VM_HANDLE);
+
+ PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
+ AssertReturn(pHandle->pGVM == pGVM, VERR_NOT_OWNER);
+
+ RTPROCESS ProcId = RTProcSelf();
+ RTNATIVETHREAD hSelf = RTThreadNativeSelf();
+ AssertReturn( ( pHandle->hEMT0 == hSelf
+ && pHandle->ProcId == ProcId)
+ || pHandle->hEMT0 == NIL_RTNATIVETHREAD, VERR_NOT_OWNER);
+
+ /*
+ * Lookup the handle and destroy the object.
+ * Since the lock isn't recursive and we'll have to leave it before dereferencing the
+ * object, we take some precautions against racing callers just in case...
+ */
+ int rc = gvmmR0CreateDestroyLock(pGVMM);
+ AssertRC(rc);
+
+ /* Be careful here because we might theoretically be racing someone else cleaning up. */
+ if ( pHandle->pGVM == pGVM
+ && ( ( pHandle->hEMT0 == hSelf
+ && pHandle->ProcId == ProcId)
+ || pHandle->hEMT0 == NIL_RTNATIVETHREAD)
+ && RT_VALID_PTR(pHandle->pvObj)
+ && RT_VALID_PTR(pHandle->pSession)
+ && RT_VALID_PTR(pHandle->pGVM)
+ && pHandle->pGVM->u32Magic == GVM_MAGIC)
+ {
+ /* Check that other EMTs have deregistered. */
+ uint32_t cNotDeregistered = 0;
+ for (VMCPUID idCpu = 1; idCpu < pGVM->cCpus; idCpu++)
+ cNotDeregistered += pGVM->aCpus[idCpu].hEMT != ~(RTNATIVETHREAD)1; /* see GVMMR0DeregisterVCpu for the value */
+ if (cNotDeregistered == 0)
+ {
+ /* Grab the object pointer. */
+ void *pvObj = pHandle->pvObj;
+ pHandle->pvObj = NULL;
+ gvmmR0CreateDestroyUnlock(pGVMM);
+
+ SUPR0ObjRelease(pvObj, pHandle->pSession);
+ }
+ else
+ {
+ gvmmR0CreateDestroyUnlock(pGVMM);
+ rc = VERR_GVMM_NOT_ALL_EMTS_DEREGISTERED;
+ }
+ }
+ else
+ {
+ SUPR0Printf("GVMMR0DestroyVM: pHandle=%RKv:{.pGVM=%p, .hEMT0=%p, .ProcId=%u, .pvObj=%p} pGVM=%p hSelf=%p\n",
+ pHandle, pHandle->pGVM, pHandle->hEMT0, pHandle->ProcId, pHandle->pvObj, pGVM, hSelf);
+ gvmmR0CreateDestroyUnlock(pGVMM);
+ rc = VERR_GVMM_IPE_2;
+ }
+
+ return rc;
+}
+
+
+/**
+ * Performs VM cleanup task as part of object destruction.
+ *
+ * @param pGVM The GVM pointer.
+ */
+static void gvmmR0CleanupVM(PGVM pGVM)
+{
+ if ( pGVM->gvmm.s.fDoneVMMR0Init
+ && !pGVM->gvmm.s.fDoneVMMR0Term)
+ {
+ if ( pGVM->gvmm.s.VMMemObj != NIL_RTR0MEMOBJ
+ && RTR0MemObjAddress(pGVM->gvmm.s.VMMemObj) == pGVM)
+ {
+ LogFlow(("gvmmR0CleanupVM: Calling VMMR0TermVM\n"));
+ VMMR0TermVM(pGVM, NIL_VMCPUID);
+ }
+ else
+ AssertMsgFailed(("gvmmR0CleanupVM: VMMemObj=%p pGVM=%p\n", pGVM->gvmm.s.VMMemObj, pGVM));
+ }
+
+ GMMR0CleanupVM(pGVM);
+#ifdef VBOX_WITH_NEM_R0
+ NEMR0CleanupVM(pGVM);
+#endif
+ PDMR0CleanupVM(pGVM);
+ IOMR0CleanupVM(pGVM);
+ PGMR0CleanupVM(pGVM);
+
+ AssertCompile(NIL_RTTHREADCTXHOOK == (RTTHREADCTXHOOK)0); /* Depends on zero initialized memory working for NIL at the moment. */
+ for (VMCPUID idCpu = 0; idCpu < pGVM->cCpus; idCpu++)
+ {
+ /** @todo Can we busy wait here for all thread-context hooks to be
+ * deregistered before releasing (destroying) it? Only until we find a
+ * solution for not deregistering hooks everytime we're leaving HMR0
+ * context. */
+ VMMR0ThreadCtxHookDestroyForEmt(&pGVM->aCpus[idCpu]);
+ }
+}
+
+
+/**
+ * @callback_method_impl{FNSUPDRVDESTRUCTOR,VM handle destructor}
+ *
+ * pvUser1 is the GVM instance pointer.
+ * pvUser2 is the handle pointer.
+ */
+static DECLCALLBACK(void) gvmmR0HandleObjDestructor(void *pvObj, void *pvUser1, void *pvUser2)
+{
+ LogFlow(("gvmmR0HandleObjDestructor: %p %p %p\n", pvObj, pvUser1, pvUser2));
+
+ NOREF(pvObj);
+
+ /*
+ * Some quick, paranoid, input validation.
+ */
+ PGVMHANDLE pHandle = (PGVMHANDLE)pvUser2;
+ AssertPtr(pHandle);
+ PGVMM pGVMM = (PGVMM)pvUser1;
+ Assert(pGVMM == g_pGVMM);
+ const uint16_t iHandle = pHandle - &pGVMM->aHandles[0];
+ if ( !iHandle
+ || iHandle >= RT_ELEMENTS(pGVMM->aHandles)
+ || iHandle != pHandle->iSelf)
+ {
+ SUPR0Printf("GVM: handle %d is out of range or corrupt (iSelf=%d)!\n", iHandle, pHandle->iSelf);
+ return;
+ }
+
+ int rc = gvmmR0CreateDestroyLock(pGVMM);
+ AssertRC(rc);
+ rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
+ AssertRC(rc);
+
+ /*
+ * This is a tad slow but a doubly linked list is too much hassle.
+ */
+ if (RT_UNLIKELY(pHandle->iNext >= RT_ELEMENTS(pGVMM->aHandles)))
+ {
+ SUPR0Printf("GVM: used list index %d is out of range!\n", pHandle->iNext);
+ GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
+ gvmmR0CreateDestroyUnlock(pGVMM);
+ return;
+ }
+
+ if (pGVMM->iUsedHead == iHandle)
+ pGVMM->iUsedHead = pHandle->iNext;
+ else
+ {
+ uint16_t iPrev = pGVMM->iUsedHead;
+ int c = RT_ELEMENTS(pGVMM->aHandles) + 2;
+ while (iPrev)
+ {
+ if (RT_UNLIKELY(iPrev >= RT_ELEMENTS(pGVMM->aHandles)))
+ {
+ SUPR0Printf("GVM: used list index %d is out of range!\n", iPrev);
+ GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
+ gvmmR0CreateDestroyUnlock(pGVMM);
+ return;
+ }
+ if (RT_UNLIKELY(c-- <= 0))
+ {
+ iPrev = 0;
+ break;
+ }
+
+ if (pGVMM->aHandles[iPrev].iNext == iHandle)
+ break;
+ iPrev = pGVMM->aHandles[iPrev].iNext;
+ }
+ if (!iPrev)
+ {
+ SUPR0Printf("GVM: can't find the handle previous previous of %d!\n", pHandle->iSelf);
+ GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
+ gvmmR0CreateDestroyUnlock(pGVMM);
+ return;
+ }
+
+ Assert(pGVMM->aHandles[iPrev].iNext == iHandle);
+ pGVMM->aHandles[iPrev].iNext = pHandle->iNext;
+ }
+ pHandle->iNext = 0;
+ pGVMM->cVMs--;
+
+ /*
+ * Do the global cleanup round.
+ */
+ PGVM pGVM = pHandle->pGVM;
+ if ( RT_VALID_PTR(pGVM)
+ && pGVM->u32Magic == GVM_MAGIC)
+ {
+ pGVMM->cEMTs -= pGVM->cCpus;
+
+ if (pGVM->pSession)
+ SUPR0SetSessionVM(pGVM->pSession, NULL, NULL);
+
+ GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
+
+ gvmmR0CleanupVM(pGVM);
+
+ /*
+ * Do the GVMM cleanup - must be done last.
+ */
+ /* The VM and VM pages mappings/allocations. */
+ if (pGVM->gvmm.s.VMPagesMapObj != NIL_RTR0MEMOBJ)
+ {
+ rc = RTR0MemObjFree(pGVM->gvmm.s.VMPagesMapObj, false /* fFreeMappings */); AssertRC(rc);
+ pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
+ }
+
+ if (pGVM->gvmm.s.VMMapObj != NIL_RTR0MEMOBJ)
+ {
+ rc = RTR0MemObjFree(pGVM->gvmm.s.VMMapObj, false /* fFreeMappings */); AssertRC(rc);
+ pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
+ }
+
+ if (pGVM->gvmm.s.VMPagesMemObj != NIL_RTR0MEMOBJ)
+ {
+ rc = RTR0MemObjFree(pGVM->gvmm.s.VMPagesMemObj, false /* fFreeMappings */); AssertRC(rc);
+ pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ;
+ }
+
+ for (VMCPUID i = 0; i < pGVM->cCpus; i++)
+ {
+ if (pGVM->aCpus[i].gvmm.s.HaltEventMulti != NIL_RTSEMEVENTMULTI)
+ {
+ rc = RTSemEventMultiDestroy(pGVM->aCpus[i].gvmm.s.HaltEventMulti); AssertRC(rc);
+ pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
+ }
+ if (pGVM->aCpus[i].gvmm.s.VMCpuMapObj != NIL_RTR0MEMOBJ)
+ {
+ rc = RTR0MemObjFree(pGVM->aCpus[i].gvmm.s.VMCpuMapObj, false /* fFreeMappings */); AssertRC(rc);
+ pGVM->aCpus[i].gvmm.s.VMCpuMapObj = NIL_RTR0MEMOBJ;
+ }
+ }
+
+ /* the GVM structure itself. */
+ pGVM->u32Magic |= UINT32_C(0x80000000);
+ Assert(pGVM->gvmm.s.VMMemObj != NIL_RTR0MEMOBJ);
+ rc = RTR0MemObjFree(pGVM->gvmm.s.VMMemObj, true /*fFreeMappings*/); AssertRC(rc);
+ pGVM = NULL;
+
+ /* Re-acquire the UsedLock before freeing the handle since we're updating handle fields. */
+ rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
+ AssertRC(rc);
+ }
+ /* else: GVMMR0CreateVM cleanup. */
+
+ /*
+ * Free the handle.
+ */
+ pHandle->iNext = pGVMM->iFreeHead;
+ pGVMM->iFreeHead = iHandle;
+ ASMAtomicWriteNullPtr(&pHandle->pGVM);
+ ASMAtomicWriteNullPtr(&pHandle->pvObj);
+ ASMAtomicWriteNullPtr(&pHandle->pSession);
+ ASMAtomicWriteHandle(&pHandle->hEMT0, NIL_RTNATIVETHREAD);
+ ASMAtomicWriteU32(&pHandle->ProcId, NIL_RTPROCESS);
+
+ GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
+ gvmmR0CreateDestroyUnlock(pGVMM);
+ LogFlow(("gvmmR0HandleObjDestructor: returns\n"));
+}
+
+
+/**
+ * Registers the calling thread as the EMT of a Virtual CPU.
+ *
+ * Note that VCPU 0 is automatically registered during VM creation.
+ *
+ * @returns VBox status code
+ * @param pGVM The global (ring-0) VM structure.
+ * @param idCpu VCPU id to register the current thread as.
+ */
+GVMMR0DECL(int) GVMMR0RegisterVCpu(PGVM pGVM, VMCPUID idCpu)
+{
+ AssertReturn(idCpu != 0, VERR_INVALID_FUNCTION);
+
+ /*
+ * Validate the VM structure, state and handle.
+ */
+ PGVMM pGVMM;
+ int rc = gvmmR0ByGVM(pGVM, &pGVMM, false /* fTakeUsedLock */); /** @todo take lock here. */
+ if (RT_SUCCESS(rc))
+ {
+ if (idCpu < pGVM->cCpus)
+ {
+ /* Check that the EMT isn't already assigned to a thread. */
+ if (pGVM->aCpus[idCpu].hEMT == NIL_RTNATIVETHREAD)
+ {
+ Assert(pGVM->aCpus[idCpu].hNativeThreadR0 == NIL_RTNATIVETHREAD);
+
+ /* A thread may only be one EMT. */
+ RTNATIVETHREAD const hNativeSelf = RTThreadNativeSelf();
+ for (VMCPUID iCpu = 0; iCpu < pGVM->cCpus; iCpu++)
+ AssertBreakStmt(pGVM->aCpus[iCpu].hEMT != hNativeSelf, rc = VERR_INVALID_PARAMETER);
+ if (RT_SUCCESS(rc))
+ {
+ /*
+ * Do the assignment, then try setup the hook. Undo if that fails.
+ */
+ pGVM->aCpus[idCpu].hNativeThreadR0 = pGVM->aCpus[idCpu].hEMT = RTThreadNativeSelf();
+
+ rc = VMMR0ThreadCtxHookCreateForEmt(&pGVM->aCpus[idCpu]);
+ if (RT_SUCCESS(rc))
+ CPUMR0RegisterVCpuThread(&pGVM->aCpus[idCpu]);
+ else
+ pGVM->aCpus[idCpu].hNativeThreadR0 = pGVM->aCpus[idCpu].hEMT = NIL_RTNATIVETHREAD;
+ }
+ }
+ else
+ rc = VERR_ACCESS_DENIED;
+ }
+ else
+ rc = VERR_INVALID_CPU_ID;
+ }
+ return rc;
+}
+
+
+/**
+ * Deregisters the calling thread as the EMT of a Virtual CPU.
+ *
+ * Note that VCPU 0 shall call GVMMR0DestroyVM intead of this API.
+ *
+ * @returns VBox status code
+ * @param pGVM The global (ring-0) VM structure.
+ * @param idCpu VCPU id to register the current thread as.
+ */
+GVMMR0DECL(int) GVMMR0DeregisterVCpu(PGVM pGVM, VMCPUID idCpu)
+{
+ AssertReturn(idCpu != 0, VERR_INVALID_FUNCTION);
+
+ /*
+ * Validate the VM structure, state and handle.
+ */
+ PGVMM pGVMM;
+ int rc = gvmmR0ByGVMandEMT(pGVM, idCpu, &pGVMM);
+ if (RT_SUCCESS(rc))
+ {
+ /*
+ * Take the destruction lock and recheck the handle state to
+ * prevent racing GVMMR0DestroyVM.
+ */
+ gvmmR0CreateDestroyLock(pGVMM);
+ uint32_t hSelf = pGVM->hSelf;
+ ASMCompilerBarrier();
+ if ( hSelf < RT_ELEMENTS(pGVMM->aHandles)
+ && pGVMM->aHandles[hSelf].pvObj != NULL
+ && pGVMM->aHandles[hSelf].pGVM == pGVM)
+ {
+ /*
+ * Do per-EMT cleanups.
+ */
+ VMMR0ThreadCtxHookDestroyForEmt(&pGVM->aCpus[idCpu]);
+
+ /*
+ * Invalidate hEMT. We don't use NIL here as that would allow
+ * GVMMR0RegisterVCpu to be called again, and we don't want that.
+ */
+ AssertCompile(~(RTNATIVETHREAD)1 != NIL_RTNATIVETHREAD);
+ pGVM->aCpus[idCpu].hEMT = ~(RTNATIVETHREAD)1;
+ pGVM->aCpus[idCpu].hNativeThreadR0 = NIL_RTNATIVETHREAD;
+ }
+
+ gvmmR0CreateDestroyUnlock(pGVMM);
+ }
+ return rc;
+}
+
+
+/**
+ * Lookup a GVM structure by its handle.
+ *
+ * @returns The GVM pointer on success, NULL on failure.
+ * @param hGVM The global VM handle. Asserts on bad handle.
+ */
+GVMMR0DECL(PGVM) GVMMR0ByHandle(uint32_t hGVM)
+{
+ PGVMM pGVMM;
+ GVMM_GET_VALID_INSTANCE(pGVMM, NULL);
+
+ /*
+ * Validate.
+ */
+ AssertReturn(hGVM != NIL_GVM_HANDLE, NULL);
+ AssertReturn(hGVM < RT_ELEMENTS(pGVMM->aHandles), NULL);
+
+ /*
+ * Look it up.
+ */
+ PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
+ AssertPtrReturn(pHandle->pvObj, NULL);
+ PGVM pGVM = pHandle->pGVM;
+ AssertPtrReturn(pGVM, NULL);
+
+ return pGVM;
+}
+
+
+/**
+ * Check that the given GVM and VM structures match up.
+ *
+ * The calling thread must be in the same process as the VM. All current lookups
+ * are by threads inside the same process, so this will not be an issue.
+ *
+ * @returns VBox status code.
+ * @param pGVM The global (ring-0) VM structure.
+ * @param ppGVMM Where to store the pointer to the GVMM instance data.
+ * @param fTakeUsedLock Whether to take the used lock or not. We take it in
+ * shared mode when requested.
+ *
+ * Be very careful if not taking the lock as it's
+ * possible that the VM will disappear then!
+ *
+ * @remark This will not assert on an invalid pGVM but try return silently.
+ */
+static int gvmmR0ByGVM(PGVM pGVM, PGVMM *ppGVMM, bool fTakeUsedLock)
+{
+ /*
+ * Check the pointers.
+ */
+ int rc;
+ if (RT_LIKELY( RT_VALID_PTR(pGVM)
+ && ((uintptr_t)pGVM & PAGE_OFFSET_MASK) == 0 ))
+ {
+ /*
+ * Get the pGVMM instance and check the VM handle.
+ */
+ PGVMM pGVMM;
+ GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
+
+ uint16_t hGVM = pGVM->hSelf;
+ if (RT_LIKELY( hGVM != NIL_GVM_HANDLE
+ && hGVM < RT_ELEMENTS(pGVMM->aHandles)))
+ {
+ RTPROCESS const pidSelf = RTProcSelf();
+ PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
+ if (fTakeUsedLock)
+ {
+ rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
+ AssertRCReturn(rc, rc);
+ }
+
+ if (RT_LIKELY( pHandle->pGVM == pGVM
+ && pHandle->ProcId == pidSelf
+ && RT_VALID_PTR(pHandle->pvObj)))
+ {
+ /*
+ * Some more VM data consistency checks.
+ */
+ if (RT_LIKELY( pGVM->cCpusUnsafe == pGVM->cCpus
+ && pGVM->hSelfUnsafe == hGVM
+ && pGVM->pSelf == pGVM))
+ {
+ if (RT_LIKELY( pGVM->enmVMState >= VMSTATE_CREATING
+ && pGVM->enmVMState <= VMSTATE_TERMINATED))
+ {
+ *ppGVMM = pGVMM;
+ return VINF_SUCCESS;
+ }
+ rc = VERR_INCONSISTENT_VM_HANDLE;
+ }
+ else
+ rc = VERR_INCONSISTENT_VM_HANDLE;
+ }
+ else
+ rc = VERR_INVALID_VM_HANDLE;
+
+ if (fTakeUsedLock)
+ GVMMR0_USED_SHARED_UNLOCK(pGVMM);
+ }
+ else
+ rc = VERR_INVALID_VM_HANDLE;
+ }
+ else
+ rc = VERR_INVALID_POINTER;
+ return rc;
+}
+
+
+/**
+ * Validates a GVM/VM pair.
+ *
+ * @returns VBox status code.
+ * @param pGVM The global (ring-0) VM structure.
+ */
+GVMMR0DECL(int) GVMMR0ValidateGVM(PGVM pGVM)
+{
+ PGVMM pGVMM;
+ return gvmmR0ByGVM(pGVM, &pGVMM, false /*fTakeUsedLock*/);
+}
+
+
+/**
+ * Check that the given GVM and VM structures match up.
+ *
+ * The calling thread must be in the same process as the VM. All current lookups
+ * are by threads inside the same process, so this will not be an issue.
+ *
+ * @returns VBox status code.
+ * @param pGVM The global (ring-0) VM structure.
+ * @param idCpu The (alleged) Virtual CPU ID of the calling EMT.
+ * @param ppGVMM Where to store the pointer to the GVMM instance data.
+ * @thread EMT
+ *
+ * @remarks This will assert in all failure paths.
+ */
+static int gvmmR0ByGVMandEMT(PGVM pGVM, VMCPUID idCpu, PGVMM *ppGVMM)
+{
+ /*
+ * Check the pointers.
+ */
+ AssertPtrReturn(pGVM, VERR_INVALID_POINTER);
+ AssertReturn(((uintptr_t)pGVM & PAGE_OFFSET_MASK) == 0, VERR_INVALID_POINTER);
+
+ /*
+ * Get the pGVMM instance and check the VM handle.
+ */
+ PGVMM pGVMM;
+ GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
+
+ uint16_t hGVM = pGVM->hSelf;
+ ASMCompilerBarrier();
+ AssertReturn( hGVM != NIL_GVM_HANDLE
+ && hGVM < RT_ELEMENTS(pGVMM->aHandles), VERR_INVALID_VM_HANDLE);
+
+ RTPROCESS const pidSelf = RTProcSelf();
+ PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
+ AssertReturn( pHandle->pGVM == pGVM
+ && pHandle->ProcId == pidSelf
+ && RT_VALID_PTR(pHandle->pvObj),
+ VERR_INVALID_HANDLE);
+
+ /*
+ * Check the EMT claim.
+ */
+ RTNATIVETHREAD const hAllegedEMT = RTThreadNativeSelf();
+ AssertReturn(idCpu < pGVM->cCpus, VERR_INVALID_CPU_ID);
+ AssertReturn(pGVM->aCpus[idCpu].hEMT == hAllegedEMT, VERR_NOT_OWNER);
+
+ /*
+ * Some more VM data consistency checks.
+ */
+ AssertReturn(pGVM->cCpusUnsafe == pGVM->cCpus, VERR_INCONSISTENT_VM_HANDLE);
+ AssertReturn(pGVM->hSelfUnsafe == hGVM, VERR_INCONSISTENT_VM_HANDLE);
+ AssertReturn( pGVM->enmVMState >= VMSTATE_CREATING
+ && pGVM->enmVMState <= VMSTATE_TERMINATED, VERR_INCONSISTENT_VM_HANDLE);
+
+ *ppGVMM = pGVMM;
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Validates a GVM/EMT pair.
+ *
+ * @returns VBox status code.
+ * @param pGVM The global (ring-0) VM structure.
+ * @param idCpu The Virtual CPU ID of the calling EMT.
+ * @thread EMT(idCpu)
+ */
+GVMMR0DECL(int) GVMMR0ValidateGVMandEMT(PGVM pGVM, VMCPUID idCpu)
+{
+ PGVMM pGVMM;
+ return gvmmR0ByGVMandEMT(pGVM, idCpu, &pGVMM);
+}
+
+
+/**
+ * Looks up the VM belonging to the specified EMT thread.
+ *
+ * This is used by the assertion machinery in VMMR0.cpp to avoid causing
+ * unnecessary kernel panics when the EMT thread hits an assertion. The
+ * call may or not be an EMT thread.
+ *
+ * @returns Pointer to the VM on success, NULL on failure.
+ * @param hEMT The native thread handle of the EMT.
+ * NIL_RTNATIVETHREAD means the current thread
+ */
+GVMMR0DECL(PVMCC) GVMMR0GetVMByEMT(RTNATIVETHREAD hEMT)
+{
+ /*
+ * No Assertions here as we're usually called in a AssertMsgN or
+ * RTAssert* context.
+ */
+ PGVMM pGVMM = g_pGVMM;
+ if ( !RT_VALID_PTR(pGVMM)
+ || pGVMM->u32Magic != GVMM_MAGIC)
+ return NULL;
+
+ if (hEMT == NIL_RTNATIVETHREAD)
+ hEMT = RTThreadNativeSelf();
+ RTPROCESS ProcId = RTProcSelf();
+
+ /*
+ * Search the handles in a linear fashion as we don't dare to take the lock (assert).
+ */
+/** @todo introduce some pid hash table here, please. */
+ for (unsigned i = 1; i < RT_ELEMENTS(pGVMM->aHandles); i++)
+ {
+ if ( pGVMM->aHandles[i].iSelf == i
+ && pGVMM->aHandles[i].ProcId == ProcId
+ && RT_VALID_PTR(pGVMM->aHandles[i].pvObj)
+ && RT_VALID_PTR(pGVMM->aHandles[i].pGVM))
+ {
+ if (pGVMM->aHandles[i].hEMT0 == hEMT)
+ return pGVMM->aHandles[i].pGVM;
+
+ /* This is fearly safe with the current process per VM approach. */
+ PGVM pGVM = pGVMM->aHandles[i].pGVM;
+ VMCPUID const cCpus = pGVM->cCpus;
+ ASMCompilerBarrier();
+ if ( cCpus < 1
+ || cCpus > VMM_MAX_CPU_COUNT)
+ continue;
+ for (VMCPUID idCpu = 1; idCpu < cCpus; idCpu++)
+ if (pGVM->aCpus[idCpu].hEMT == hEMT)
+ return pGVMM->aHandles[i].pGVM;
+ }
+ }
+ return NULL;
+}
+
+
+/**
+ * Looks up the GVMCPU belonging to the specified EMT thread.
+ *
+ * This is used by the assertion machinery in VMMR0.cpp to avoid causing
+ * unnecessary kernel panics when the EMT thread hits an assertion. The
+ * call may or not be an EMT thread.
+ *
+ * @returns Pointer to the VM on success, NULL on failure.
+ * @param hEMT The native thread handle of the EMT.
+ * NIL_RTNATIVETHREAD means the current thread
+ */
+GVMMR0DECL(PGVMCPU) GVMMR0GetGVCpuByEMT(RTNATIVETHREAD hEMT)
+{
+ /*
+ * No Assertions here as we're usually called in a AssertMsgN,
+ * RTAssert*, Log and LogRel contexts.
+ */
+ PGVMM pGVMM = g_pGVMM;
+ if ( !RT_VALID_PTR(pGVMM)
+ || pGVMM->u32Magic != GVMM_MAGIC)
+ return NULL;
+
+ if (hEMT == NIL_RTNATIVETHREAD)
+ hEMT = RTThreadNativeSelf();
+ RTPROCESS ProcId = RTProcSelf();
+
+ /*
+ * Search the handles in a linear fashion as we don't dare to take the lock (assert).
+ */
+/** @todo introduce some pid hash table here, please. */
+ for (unsigned i = 1; i < RT_ELEMENTS(pGVMM->aHandles); i++)
+ {
+ if ( pGVMM->aHandles[i].iSelf == i
+ && pGVMM->aHandles[i].ProcId == ProcId
+ && RT_VALID_PTR(pGVMM->aHandles[i].pvObj)
+ && RT_VALID_PTR(pGVMM->aHandles[i].pGVM))
+ {
+ PGVM pGVM = pGVMM->aHandles[i].pGVM;
+ if (pGVMM->aHandles[i].hEMT0 == hEMT)
+ return &pGVM->aCpus[0];
+
+ /* This is fearly safe with the current process per VM approach. */
+ VMCPUID const cCpus = pGVM->cCpus;
+ ASMCompilerBarrier();
+ ASMCompilerBarrier();
+ if ( cCpus < 1
+ || cCpus > VMM_MAX_CPU_COUNT)
+ continue;
+ for (VMCPUID idCpu = 1; idCpu < cCpus; idCpu++)
+ if (pGVM->aCpus[idCpu].hEMT == hEMT)
+ return &pGVM->aCpus[idCpu];
+ }
+ }
+ return NULL;
+}
+
+
+/**
+ * This is will wake up expired and soon-to-be expired VMs.
+ *
+ * @returns Number of VMs that has been woken up.
+ * @param pGVMM Pointer to the GVMM instance data.
+ * @param u64Now The current time.
+ */
+static unsigned gvmmR0SchedDoWakeUps(PGVMM pGVMM, uint64_t u64Now)
+{
+ /*
+ * Skip this if we've got disabled because of high resolution wakeups or by
+ * the user.
+ */
+ if (!pGVMM->fDoEarlyWakeUps)
+ return 0;
+
+/** @todo Rewrite this algorithm. See performance defect XYZ. */
+
+ /*
+ * A cheap optimization to stop wasting so much time here on big setups.
+ */
+ const uint64_t uNsEarlyWakeUp2 = u64Now + pGVMM->nsEarlyWakeUp2;
+ if ( pGVMM->cHaltedEMTs == 0
+ || uNsEarlyWakeUp2 > pGVMM->uNsNextEmtWakeup)
+ return 0;
+
+ /*
+ * Only one thread doing this at a time.
+ */
+ if (!ASMAtomicCmpXchgBool(&pGVMM->fDoingEarlyWakeUps, true, false))
+ return 0;
+
+ /*
+ * The first pass will wake up VMs which have actually expired
+ * and look for VMs that should be woken up in the 2nd and 3rd passes.
+ */
+ const uint64_t uNsEarlyWakeUp1 = u64Now + pGVMM->nsEarlyWakeUp1;
+ uint64_t u64Min = UINT64_MAX;
+ unsigned cWoken = 0;
+ unsigned cHalted = 0;
+ unsigned cTodo2nd = 0;
+ unsigned cTodo3rd = 0;
+ for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
+ i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
+ i = pGVMM->aHandles[i].iNext)
+ {
+ PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
+ if ( RT_VALID_PTR(pCurGVM)
+ && pCurGVM->u32Magic == GVM_MAGIC)
+ {
+ for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
+ {
+ PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
+ uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
+ if (u64)
+ {
+ if (u64 <= u64Now)
+ {
+ if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
+ {
+ int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
+ AssertRC(rc);
+ cWoken++;
+ }
+ }
+ else
+ {
+ cHalted++;
+ if (u64 <= uNsEarlyWakeUp1)
+ cTodo2nd++;
+ else if (u64 <= uNsEarlyWakeUp2)
+ cTodo3rd++;
+ else if (u64 < u64Min)
+ u64 = u64Min;
+ }
+ }
+ }
+ }
+ AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
+ }
+
+ if (cTodo2nd)
+ {
+ for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
+ i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
+ i = pGVMM->aHandles[i].iNext)
+ {
+ PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
+ if ( RT_VALID_PTR(pCurGVM)
+ && pCurGVM->u32Magic == GVM_MAGIC)
+ {
+ for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
+ {
+ PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
+ uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
+ if ( u64
+ && u64 <= uNsEarlyWakeUp1)
+ {
+ if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
+ {
+ int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
+ AssertRC(rc);
+ cWoken++;
+ }
+ }
+ }
+ }
+ AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
+ }
+ }
+
+ if (cTodo3rd)
+ {
+ for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
+ i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
+ i = pGVMM->aHandles[i].iNext)
+ {
+ PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
+ if ( RT_VALID_PTR(pCurGVM)
+ && pCurGVM->u32Magic == GVM_MAGIC)
+ {
+ for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
+ {
+ PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
+ uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
+ if ( u64
+ && u64 <= uNsEarlyWakeUp2)
+ {
+ if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
+ {
+ int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
+ AssertRC(rc);
+ cWoken++;
+ }
+ }
+ }
+ }
+ AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
+ }
+ }
+
+ /*
+ * Set the minimum value.
+ */
+ pGVMM->uNsNextEmtWakeup = u64Min;
+
+ ASMAtomicWriteBool(&pGVMM->fDoingEarlyWakeUps, false);
+ return cWoken;
+}
+
+
+/**
+ * Halt the EMT thread.
+ *
+ * @returns VINF_SUCCESS normal wakeup (timeout or kicked by other thread).
+ * VERR_INTERRUPTED if a signal was scheduled for the thread.
+ * @param pGVM The global (ring-0) VM structure.
+ * @param pGVCpu The global (ring-0) CPU structure of the calling
+ * EMT.
+ * @param u64ExpireGipTime The time for the sleep to expire expressed as GIP time.
+ * @thread EMT(pGVCpu).
+ */
+GVMMR0DECL(int) GVMMR0SchedHalt(PGVM pGVM, PGVMCPU pGVCpu, uint64_t u64ExpireGipTime)
+{
+ LogFlow(("GVMMR0SchedHalt: pGVM=%p pGVCpu=%p(%d) u64ExpireGipTime=%#RX64\n",
+ pGVM, pGVCpu, pGVCpu->idCpu, u64ExpireGipTime));
+ GVMM_CHECK_SMAP_SETUP();
+ GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+
+ PGVMM pGVMM;
+ GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
+
+ pGVM->gvmm.s.StatsSched.cHaltCalls++;
+ Assert(!pGVCpu->gvmm.s.u64HaltExpire);
+
+ /*
+ * If we're doing early wake-ups, we must take the UsedList lock before we
+ * start querying the current time.
+ * Note! Interrupts must NOT be disabled at this point because we ask for GIP time!
+ */
+ bool const fDoEarlyWakeUps = pGVMM->fDoEarlyWakeUps;
+ if (fDoEarlyWakeUps)
+ {
+ int rc2 = GVMMR0_USED_SHARED_LOCK(pGVMM); AssertRC(rc2);
+ GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+ }
+
+ pGVCpu->gvmm.s.iCpuEmt = ASMGetApicId();
+
+ /* GIP hack: We might are frequently sleeping for short intervals where the
+ difference between GIP and system time matters on systems with high resolution
+ system time. So, convert the input from GIP to System time in that case. */
+ Assert(ASMGetFlags() & X86_EFL_IF);
+ const uint64_t u64NowSys = RTTimeSystemNanoTS();
+ const uint64_t u64NowGip = RTTimeNanoTS();
+ GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+
+ if (fDoEarlyWakeUps)
+ {
+ pGVM->gvmm.s.StatsSched.cHaltWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64NowGip);
+ GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+ }
+
+ /*
+ * Go to sleep if we must...
+ * Cap the sleep time to 1 second to be on the safe side.
+ */
+ int rc;
+ uint64_t cNsInterval = u64ExpireGipTime - u64NowGip;
+ if ( u64NowGip < u64ExpireGipTime
+ && cNsInterval >= (pGVMM->cEMTs > pGVMM->cEMTsMeansCompany
+ ? pGVMM->nsMinSleepCompany
+ : pGVMM->nsMinSleepAlone))
+ {
+ pGVM->gvmm.s.StatsSched.cHaltBlocking++;
+ if (cNsInterval > RT_NS_1SEC)
+ u64ExpireGipTime = u64NowGip + RT_NS_1SEC;
+ ASMAtomicWriteU64(&pGVCpu->gvmm.s.u64HaltExpire, u64ExpireGipTime);
+ ASMAtomicIncU32(&pGVMM->cHaltedEMTs);
+ if (fDoEarlyWakeUps)
+ {
+ if (u64ExpireGipTime < pGVMM->uNsNextEmtWakeup)
+ pGVMM->uNsNextEmtWakeup = u64ExpireGipTime;
+ GVMMR0_USED_SHARED_UNLOCK(pGVMM);
+ }
+ GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+
+ rc = RTSemEventMultiWaitEx(pGVCpu->gvmm.s.HaltEventMulti,
+ RTSEMWAIT_FLAGS_ABSOLUTE | RTSEMWAIT_FLAGS_NANOSECS | RTSEMWAIT_FLAGS_INTERRUPTIBLE,
+ u64NowGip > u64NowSys ? u64ExpireGipTime : u64NowSys + cNsInterval);
+ GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+
+ ASMAtomicWriteU64(&pGVCpu->gvmm.s.u64HaltExpire, 0);
+ ASMAtomicDecU32(&pGVMM->cHaltedEMTs);
+
+ /* Reset the semaphore to try prevent a few false wake-ups. */
+ if (rc == VINF_SUCCESS)
+ {
+ RTSemEventMultiReset(pGVCpu->gvmm.s.HaltEventMulti);
+ GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+ }
+ else if (rc == VERR_TIMEOUT)
+ {
+ pGVM->gvmm.s.StatsSched.cHaltTimeouts++;
+ rc = VINF_SUCCESS;
+ }
+ }
+ else
+ {
+ pGVM->gvmm.s.StatsSched.cHaltNotBlocking++;
+ if (fDoEarlyWakeUps)
+ GVMMR0_USED_SHARED_UNLOCK(pGVMM);
+ GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+ RTSemEventMultiReset(pGVCpu->gvmm.s.HaltEventMulti);
+ GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+ rc = VINF_SUCCESS;
+ }
+
+ return rc;
+}
+
+
+/**
+ * Halt the EMT thread.
+ *
+ * @returns VINF_SUCCESS normal wakeup (timeout or kicked by other thread).
+ * VERR_INTERRUPTED if a signal was scheduled for the thread.
+ * @param pGVM The global (ring-0) VM structure.
+ * @param idCpu The Virtual CPU ID of the calling EMT.
+ * @param u64ExpireGipTime The time for the sleep to expire expressed as GIP time.
+ * @thread EMT(idCpu).
+ */
+GVMMR0DECL(int) GVMMR0SchedHaltReq(PGVM pGVM, VMCPUID idCpu, uint64_t u64ExpireGipTime)
+{
+ GVMM_CHECK_SMAP_SETUP();
+ GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+ PGVMM pGVMM;
+ int rc = gvmmR0ByGVMandEMT(pGVM, idCpu, &pGVMM);
+ if (RT_SUCCESS(rc))
+ {
+ GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+ rc = GVMMR0SchedHalt(pGVM, &pGVM->aCpus[idCpu], u64ExpireGipTime);
+ }
+ GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+ return rc;
+}
+
+
+
+/**
+ * Worker for GVMMR0SchedWakeUp and GVMMR0SchedWakeUpAndPokeCpus that wakes up
+ * the a sleeping EMT.
+ *
+ * @retval VINF_SUCCESS if successfully woken up.
+ * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
+ *
+ * @param pGVM The global (ring-0) VM structure.
+ * @param pGVCpu The global (ring-0) VCPU structure.
+ */
+DECLINLINE(int) gvmmR0SchedWakeUpOne(PGVM pGVM, PGVMCPU pGVCpu)
+{
+ pGVM->gvmm.s.StatsSched.cWakeUpCalls++;
+
+ /*
+ * Signal the semaphore regardless of whether it's current blocked on it.
+ *
+ * The reason for this is that there is absolutely no way we can be 100%
+ * certain that it isn't *about* go to go to sleep on it and just got
+ * delayed a bit en route. So, we will always signal the semaphore when
+ * the it is flagged as halted in the VMM.
+ */
+/** @todo we can optimize some of that by means of the pVCpu->enmState now. */
+ int rc;
+ if (pGVCpu->gvmm.s.u64HaltExpire)
+ {
+ rc = VINF_SUCCESS;
+ ASMAtomicWriteU64(&pGVCpu->gvmm.s.u64HaltExpire, 0);
+ }
+ else
+ {
+ rc = VINF_GVM_NOT_BLOCKED;
+ pGVM->gvmm.s.StatsSched.cWakeUpNotHalted++;
+ }
+
+ int rc2 = RTSemEventMultiSignal(pGVCpu->gvmm.s.HaltEventMulti);
+ AssertRC(rc2);
+
+ return rc;
+}
+
+
+/**
+ * Wakes up the halted EMT thread so it can service a pending request.
+ *
+ * @returns VBox status code.
+ * @retval VINF_SUCCESS if successfully woken up.
+ * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
+ *
+ * @param pGVM The global (ring-0) VM structure.
+ * @param idCpu The Virtual CPU ID of the EMT to wake up.
+ * @param fTakeUsedLock Take the used lock or not
+ * @thread Any but EMT(idCpu).
+ */
+GVMMR0DECL(int) GVMMR0SchedWakeUpEx(PGVM pGVM, VMCPUID idCpu, bool fTakeUsedLock)
+{
+ GVMM_CHECK_SMAP_SETUP();
+ GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+
+ /*
+ * Validate input and take the UsedLock.
+ */
+ PGVMM pGVMM;
+ int rc = gvmmR0ByGVM(pGVM, &pGVMM, fTakeUsedLock);
+ GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+ if (RT_SUCCESS(rc))
+ {
+ if (idCpu < pGVM->cCpus)
+ {
+ /*
+ * Do the actual job.
+ */
+ rc = gvmmR0SchedWakeUpOne(pGVM, &pGVM->aCpus[idCpu]);
+ GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+
+ if (fTakeUsedLock && pGVMM->fDoEarlyWakeUps)
+ {
+ /*
+ * While we're here, do a round of scheduling.
+ */
+ Assert(ASMGetFlags() & X86_EFL_IF);
+ const uint64_t u64Now = RTTimeNanoTS(); /* (GIP time) */
+ pGVM->gvmm.s.StatsSched.cWakeUpWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64Now);
+ GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+ }
+ }
+ else
+ rc = VERR_INVALID_CPU_ID;
+
+ if (fTakeUsedLock)
+ {
+ int rc2 = GVMMR0_USED_SHARED_UNLOCK(pGVMM);
+ AssertRC(rc2);
+ GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+ }
+ }
+
+ LogFlow(("GVMMR0SchedWakeUpEx: returns %Rrc\n", rc));
+ return rc;
+}
+
+
+/**
+ * Wakes up the halted EMT thread so it can service a pending request.
+ *
+ * @returns VBox status code.
+ * @retval VINF_SUCCESS if successfully woken up.
+ * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
+ *
+ * @param pGVM The global (ring-0) VM structure.
+ * @param idCpu The Virtual CPU ID of the EMT to wake up.
+ * @thread Any but EMT(idCpu).
+ */
+GVMMR0DECL(int) GVMMR0SchedWakeUp(PGVM pGVM, VMCPUID idCpu)
+{
+ return GVMMR0SchedWakeUpEx(pGVM, idCpu, true /* fTakeUsedLock */);
+}
+
+
+/**
+ * Wakes up the halted EMT thread so it can service a pending request, no GVM
+ * parameter and no used locking.
+ *
+ * @returns VBox status code.
+ * @retval VINF_SUCCESS if successfully woken up.
+ * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
+ *
+ * @param pGVM The global (ring-0) VM structure.
+ * @param idCpu The Virtual CPU ID of the EMT to wake up.
+ * @thread Any but EMT(idCpu).
+ * @deprecated Don't use in new code if possible! Use the GVM variant.
+ */
+GVMMR0DECL(int) GVMMR0SchedWakeUpNoGVMNoLock(PGVM pGVM, VMCPUID idCpu)
+{
+ GVMM_CHECK_SMAP_SETUP();
+ GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+ PGVMM pGVMM;
+ int rc = gvmmR0ByGVM(pGVM, &pGVMM, false /*fTakeUsedLock*/);
+ GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+ if (RT_SUCCESS(rc))
+ rc = GVMMR0SchedWakeUpEx(pGVM, idCpu, false /*fTakeUsedLock*/);
+ return rc;
+}
+
+
+/**
+ * Worker common to GVMMR0SchedPoke and GVMMR0SchedWakeUpAndPokeCpus that pokes
+ * the Virtual CPU if it's still busy executing guest code.
+ *
+ * @returns VBox status code.
+ * @retval VINF_SUCCESS if poked successfully.
+ * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
+ *
+ * @param pGVM The global (ring-0) VM structure.
+ * @param pVCpu The cross context virtual CPU structure.
+ */
+DECLINLINE(int) gvmmR0SchedPokeOne(PGVM pGVM, PVMCPUCC pVCpu)
+{
+ pGVM->gvmm.s.StatsSched.cPokeCalls++;
+
+ RTCPUID idHostCpu = pVCpu->idHostCpu;
+ if ( idHostCpu == NIL_RTCPUID
+ || VMCPU_GET_STATE(pVCpu) != VMCPUSTATE_STARTED_EXEC)
+ {
+ pGVM->gvmm.s.StatsSched.cPokeNotBusy++;
+ return VINF_GVM_NOT_BUSY_IN_GC;
+ }
+
+ /* Note: this function is not implemented on Darwin and Linux (kernel < 2.6.19) */
+ RTMpPokeCpu(idHostCpu);
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Pokes an EMT if it's still busy running guest code.
+ *
+ * @returns VBox status code.
+ * @retval VINF_SUCCESS if poked successfully.
+ * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
+ *
+ * @param pGVM The global (ring-0) VM structure.
+ * @param idCpu The ID of the virtual CPU to poke.
+ * @param fTakeUsedLock Take the used lock or not
+ */
+GVMMR0DECL(int) GVMMR0SchedPokeEx(PGVM pGVM, VMCPUID idCpu, bool fTakeUsedLock)
+{
+ /*
+ * Validate input and take the UsedLock.
+ */
+ PGVMM pGVMM;
+ int rc = gvmmR0ByGVM(pGVM, &pGVMM, fTakeUsedLock);
+ if (RT_SUCCESS(rc))
+ {
+ if (idCpu < pGVM->cCpus)
+ rc = gvmmR0SchedPokeOne(pGVM, &pGVM->aCpus[idCpu]);
+ else
+ rc = VERR_INVALID_CPU_ID;
+
+ if (fTakeUsedLock)
+ {
+ int rc2 = GVMMR0_USED_SHARED_UNLOCK(pGVMM);
+ AssertRC(rc2);
+ }
+ }
+
+ LogFlow(("GVMMR0SchedWakeUpAndPokeCpus: returns %Rrc\n", rc));
+ return rc;
+}
+
+
+/**
+ * Pokes an EMT if it's still busy running guest code.
+ *
+ * @returns VBox status code.
+ * @retval VINF_SUCCESS if poked successfully.
+ * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
+ *
+ * @param pGVM The global (ring-0) VM structure.
+ * @param idCpu The ID of the virtual CPU to poke.
+ */
+GVMMR0DECL(int) GVMMR0SchedPoke(PGVM pGVM, VMCPUID idCpu)
+{
+ return GVMMR0SchedPokeEx(pGVM, idCpu, true /* fTakeUsedLock */);
+}
+
+
+/**
+ * Pokes an EMT if it's still busy running guest code, no GVM parameter and no
+ * used locking.
+ *
+ * @returns VBox status code.
+ * @retval VINF_SUCCESS if poked successfully.
+ * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
+ *
+ * @param pGVM The global (ring-0) VM structure.
+ * @param idCpu The ID of the virtual CPU to poke.
+ *
+ * @deprecated Don't use in new code if possible! Use the GVM variant.
+ */
+GVMMR0DECL(int) GVMMR0SchedPokeNoGVMNoLock(PGVM pGVM, VMCPUID idCpu)
+{
+ PGVMM pGVMM;
+ int rc = gvmmR0ByGVM(pGVM, &pGVMM, false /*fTakeUsedLock*/);
+ if (RT_SUCCESS(rc))
+ {
+ if (idCpu < pGVM->cCpus)
+ rc = gvmmR0SchedPokeOne(pGVM, &pGVM->aCpus[idCpu]);
+ else
+ rc = VERR_INVALID_CPU_ID;
+ }
+ return rc;
+}
+
+
+/**
+ * Wakes up a set of halted EMT threads so they can service pending request.
+ *
+ * @returns VBox status code, no informational stuff.
+ *
+ * @param pGVM The global (ring-0) VM structure.
+ * @param pSleepSet The set of sleepers to wake up.
+ * @param pPokeSet The set of CPUs to poke.
+ */
+GVMMR0DECL(int) GVMMR0SchedWakeUpAndPokeCpus(PGVM pGVM, PCVMCPUSET pSleepSet, PCVMCPUSET pPokeSet)
+{
+ AssertPtrReturn(pSleepSet, VERR_INVALID_POINTER);
+ AssertPtrReturn(pPokeSet, VERR_INVALID_POINTER);
+ GVMM_CHECK_SMAP_SETUP();
+ GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+ RTNATIVETHREAD hSelf = RTThreadNativeSelf();
+
+ /*
+ * Validate input and take the UsedLock.
+ */
+ PGVMM pGVMM;
+ int rc = gvmmR0ByGVM(pGVM, &pGVMM, true /* fTakeUsedLock */);
+ GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+ if (RT_SUCCESS(rc))
+ {
+ rc = VINF_SUCCESS;
+ VMCPUID idCpu = pGVM->cCpus;
+ while (idCpu-- > 0)
+ {
+ /* Don't try poke or wake up ourselves. */
+ if (pGVM->aCpus[idCpu].hEMT == hSelf)
+ continue;
+
+ /* just ignore errors for now. */
+ if (VMCPUSET_IS_PRESENT(pSleepSet, idCpu))
+ {
+ gvmmR0SchedWakeUpOne(pGVM, &pGVM->aCpus[idCpu]);
+ GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+ }
+ else if (VMCPUSET_IS_PRESENT(pPokeSet, idCpu))
+ {
+ gvmmR0SchedPokeOne(pGVM, &pGVM->aCpus[idCpu]);
+ GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+ }
+ }
+
+ int rc2 = GVMMR0_USED_SHARED_UNLOCK(pGVMM);
+ AssertRC(rc2);
+ GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
+ }
+
+ LogFlow(("GVMMR0SchedWakeUpAndPokeCpus: returns %Rrc\n", rc));
+ return rc;
+}
+
+
+/**
+ * VMMR0 request wrapper for GVMMR0SchedWakeUpAndPokeCpus.
+ *
+ * @returns see GVMMR0SchedWakeUpAndPokeCpus.
+ * @param pGVM The global (ring-0) VM structure.
+ * @param pReq Pointer to the request packet.
+ */
+GVMMR0DECL(int) GVMMR0SchedWakeUpAndPokeCpusReq(PGVM pGVM, PGVMMSCHEDWAKEUPANDPOKECPUSREQ pReq)
+{
+ /*
+ * Validate input and pass it on.
+ */
+ AssertPtrReturn(pReq, VERR_INVALID_POINTER);
+ AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
+
+ return GVMMR0SchedWakeUpAndPokeCpus(pGVM, &pReq->SleepSet, &pReq->PokeSet);
+}
+
+
+
+/**
+ * Poll the schedule to see if someone else should get a chance to run.
+ *
+ * This is a bit hackish and will not work too well if the machine is
+ * under heavy load from non-VM processes.
+ *
+ * @returns VINF_SUCCESS if not yielded.
+ * VINF_GVM_YIELDED if an attempt to switch to a different VM task was made.
+ * @param pGVM The global (ring-0) VM structure.
+ * @param idCpu The Virtual CPU ID of the calling EMT.
+ * @param fYield Whether to yield or not.
+ * This is for when we're spinning in the halt loop.
+ * @thread EMT(idCpu).
+ */
+GVMMR0DECL(int) GVMMR0SchedPoll(PGVM pGVM, VMCPUID idCpu, bool fYield)
+{
+ /*
+ * Validate input.
+ */
+ PGVMM pGVMM;
+ int rc = gvmmR0ByGVMandEMT(pGVM, idCpu, &pGVMM);
+ if (RT_SUCCESS(rc))
+ {
+ /*
+ * We currently only implement helping doing wakeups (fYield = false), so don't
+ * bother taking the lock if gvmmR0SchedDoWakeUps is not going to do anything.
+ */
+ if (!fYield && pGVMM->fDoEarlyWakeUps)
+ {
+ rc = GVMMR0_USED_SHARED_LOCK(pGVMM); AssertRC(rc);
+ pGVM->gvmm.s.StatsSched.cPollCalls++;
+
+ Assert(ASMGetFlags() & X86_EFL_IF);
+ const uint64_t u64Now = RTTimeNanoTS(); /* (GIP time) */
+
+ pGVM->gvmm.s.StatsSched.cPollWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64Now);
+
+ GVMMR0_USED_SHARED_UNLOCK(pGVMM);
+ }
+ /*
+ * Not quite sure what we could do here...
+ */
+ else if (fYield)
+ rc = VERR_NOT_IMPLEMENTED; /** @todo implement this... */
+ else
+ rc = VINF_SUCCESS;
+ }
+
+ LogFlow(("GVMMR0SchedWakeUp: returns %Rrc\n", rc));
+ return rc;
+}
+
+
+#ifdef GVMM_SCHED_WITH_PPT
+/**
+ * Timer callback for the periodic preemption timer.
+ *
+ * @param pTimer The timer handle.
+ * @param pvUser Pointer to the per cpu structure.
+ * @param iTick The current tick.
+ */
+static DECLCALLBACK(void) gvmmR0SchedPeriodicPreemptionTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
+{
+ PGVMMHOSTCPU pCpu = (PGVMMHOSTCPU)pvUser;
+ NOREF(pTimer); NOREF(iTick);
+
+ /*
+ * Termination check
+ */
+ if (pCpu->u32Magic != GVMMHOSTCPU_MAGIC)
+ return;
+
+ /*
+ * Do the house keeping.
+ */
+ RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
+
+ if (++pCpu->Ppt.iTickHistorization >= pCpu->Ppt.cTicksHistoriziationInterval)
+ {
+ /*
+ * Historicize the max frequency.
+ */
+ uint32_t iHzHistory = ++pCpu->Ppt.iHzHistory % RT_ELEMENTS(pCpu->Ppt.aHzHistory);
+ pCpu->Ppt.aHzHistory[iHzHistory] = pCpu->Ppt.uDesiredHz;
+ pCpu->Ppt.iTickHistorization = 0;
+ pCpu->Ppt.uDesiredHz = 0;
+
+ /*
+ * Check if the current timer frequency.
+ */
+ uint32_t uHistMaxHz = 0;
+ for (uint32_t i = 0; i < RT_ELEMENTS(pCpu->Ppt.aHzHistory); i++)
+ if (pCpu->Ppt.aHzHistory[i] > uHistMaxHz)
+ uHistMaxHz = pCpu->Ppt.aHzHistory[i];
+ if (uHistMaxHz == pCpu->Ppt.uTimerHz)
+ RTSpinlockRelease(pCpu->Ppt.hSpinlock);
+ else if (uHistMaxHz)
+ {
+ /*
+ * Reprogram it.
+ */
+ pCpu->Ppt.cChanges++;
+ pCpu->Ppt.iTickHistorization = 0;
+ pCpu->Ppt.uTimerHz = uHistMaxHz;
+ uint32_t const cNsInterval = RT_NS_1SEC / uHistMaxHz;
+ pCpu->Ppt.cNsInterval = cNsInterval;
+ if (cNsInterval < GVMMHOSTCPU_PPT_HIST_INTERVAL_NS)
+ pCpu->Ppt.cTicksHistoriziationInterval = ( GVMMHOSTCPU_PPT_HIST_INTERVAL_NS
+ + GVMMHOSTCPU_PPT_HIST_INTERVAL_NS / 2 - 1)
+ / cNsInterval;
+ else
+ pCpu->Ppt.cTicksHistoriziationInterval = 1;
+ RTSpinlockRelease(pCpu->Ppt.hSpinlock);
+
+ /*SUPR0Printf("Cpu%u: change to %u Hz / %u ns\n", pCpu->idxCpuSet, uHistMaxHz, cNsInterval);*/
+ RTTimerChangeInterval(pTimer, cNsInterval);
+ }
+ else
+ {
+ /*
+ * Stop it.
+ */
+ pCpu->Ppt.fStarted = false;
+ pCpu->Ppt.uTimerHz = 0;
+ pCpu->Ppt.cNsInterval = 0;
+ RTSpinlockRelease(pCpu->Ppt.hSpinlock);
+
+ /*SUPR0Printf("Cpu%u: stopping (%u Hz)\n", pCpu->idxCpuSet, uHistMaxHz);*/
+ RTTimerStop(pTimer);
+ }
+ }
+ else
+ RTSpinlockRelease(pCpu->Ppt.hSpinlock);
+}
+#endif /* GVMM_SCHED_WITH_PPT */
+
+
+/**
+ * Updates the periodic preemption timer for the calling CPU.
+ *
+ * The caller must have disabled preemption!
+ * The caller must check that the host can do high resolution timers.
+ *
+ * @param pGVM The global (ring-0) VM structure.
+ * @param idHostCpu The current host CPU id.
+ * @param uHz The desired frequency.
+ */
+GVMMR0DECL(void) GVMMR0SchedUpdatePeriodicPreemptionTimer(PGVM pGVM, RTCPUID idHostCpu, uint32_t uHz)
+{
+ NOREF(pGVM);
+#ifdef GVMM_SCHED_WITH_PPT
+ Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+ Assert(RTTimerCanDoHighResolution());
+
+ /*
+ * Resolve the per CPU data.
+ */
+ uint32_t iCpu = RTMpCpuIdToSetIndex(idHostCpu);
+ PGVMM pGVMM = g_pGVMM;
+ if ( !RT_VALID_PTR(pGVMM)
+ || pGVMM->u32Magic != GVMM_MAGIC)
+ return;
+ AssertMsgReturnVoid(iCpu < pGVMM->cHostCpus, ("iCpu=%d cHostCpus=%d\n", iCpu, pGVMM->cHostCpus));
+ PGVMMHOSTCPU pCpu = &pGVMM->aHostCpus[iCpu];
+ AssertMsgReturnVoid( pCpu->u32Magic == GVMMHOSTCPU_MAGIC
+ && pCpu->idCpu == idHostCpu,
+ ("u32Magic=%#x idCpu=% idHostCpu=%d\n", pCpu->u32Magic, pCpu->idCpu, idHostCpu));
+
+ /*
+ * Check whether we need to do anything about the timer.
+ * We have to be a little bit careful since we might be race the timer
+ * callback here.
+ */
+ if (uHz > 16384)
+ uHz = 16384; /** @todo add a query method for this! */
+ if (RT_UNLIKELY( uHz > ASMAtomicReadU32(&pCpu->Ppt.uDesiredHz)
+ && uHz >= pCpu->Ppt.uMinHz
+ && !pCpu->Ppt.fStarting /* solaris paranoia */))
+ {
+ RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
+
+ pCpu->Ppt.uDesiredHz = uHz;
+ uint32_t cNsInterval = 0;
+ if (!pCpu->Ppt.fStarted)
+ {
+ pCpu->Ppt.cStarts++;
+ pCpu->Ppt.fStarted = true;
+ pCpu->Ppt.fStarting = true;
+ pCpu->Ppt.iTickHistorization = 0;
+ pCpu->Ppt.uTimerHz = uHz;
+ pCpu->Ppt.cNsInterval = cNsInterval = RT_NS_1SEC / uHz;
+ if (cNsInterval < GVMMHOSTCPU_PPT_HIST_INTERVAL_NS)
+ pCpu->Ppt.cTicksHistoriziationInterval = ( GVMMHOSTCPU_PPT_HIST_INTERVAL_NS
+ + GVMMHOSTCPU_PPT_HIST_INTERVAL_NS / 2 - 1)
+ / cNsInterval;
+ else
+ pCpu->Ppt.cTicksHistoriziationInterval = 1;
+ }
+
+ RTSpinlockRelease(pCpu->Ppt.hSpinlock);
+
+ if (cNsInterval)
+ {
+ RTTimerChangeInterval(pCpu->Ppt.pTimer, cNsInterval);
+ int rc = RTTimerStart(pCpu->Ppt.pTimer, cNsInterval);
+ AssertRC(rc);
+
+ RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
+ if (RT_FAILURE(rc))
+ pCpu->Ppt.fStarted = false;
+ pCpu->Ppt.fStarting = false;
+ RTSpinlockRelease(pCpu->Ppt.hSpinlock);
+ }
+ }
+#else /* !GVMM_SCHED_WITH_PPT */
+ NOREF(idHostCpu); NOREF(uHz);
+#endif /* !GVMM_SCHED_WITH_PPT */
+}
+
+
+/**
+ * Calls @a pfnCallback for each VM in the system.
+ *
+ * This will enumerate the VMs while holding the global VM used list lock in
+ * shared mode. So, only suitable for simple work. If more expensive work
+ * needs doing, a different approach must be taken as using this API would
+ * otherwise block VM creation and destruction.
+ *
+ * @returns VBox status code.
+ * @param pfnCallback The callback function.
+ * @param pvUser User argument to the callback.
+ */
+GVMMR0DECL(int) GVMMR0EnumVMs(PFNGVMMR0ENUMCALLBACK pfnCallback, void *pvUser)
+{
+ PGVMM pGVMM;
+ GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
+
+ int rc = VINF_SUCCESS;
+ GVMMR0_USED_SHARED_LOCK(pGVMM);
+ for (unsigned i = pGVMM->iUsedHead, cLoops = 0;
+ i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
+ i = pGVMM->aHandles[i].iNext, cLoops++)
+ {
+ PGVM pGVM = pGVMM->aHandles[i].pGVM;
+ if ( RT_VALID_PTR(pGVM)
+ && RT_VALID_PTR(pGVMM->aHandles[i].pvObj)
+ && pGVM->u32Magic == GVM_MAGIC)
+ {
+ rc = pfnCallback(pGVM, pvUser);
+ if (rc != VINF_SUCCESS)
+ break;
+ }
+
+ AssertBreak(cLoops < RT_ELEMENTS(pGVMM->aHandles) * 4); /* paranoia */
+ }
+ GVMMR0_USED_SHARED_UNLOCK(pGVMM);
+ return rc;
+}
+
+
+/**
+ * Retrieves the GVMM statistics visible to the caller.
+ *
+ * @returns VBox status code.
+ *
+ * @param pStats Where to put the statistics.
+ * @param pSession The current session.
+ * @param pGVM The GVM to obtain statistics for. Optional.
+ */
+GVMMR0DECL(int) GVMMR0QueryStatistics(PGVMMSTATS pStats, PSUPDRVSESSION pSession, PGVM pGVM)
+{
+ LogFlow(("GVMMR0QueryStatistics: pStats=%p pSession=%p pGVM=%p\n", pStats, pSession, pGVM));
+
+ /*
+ * Validate input.
+ */
+ AssertPtrReturn(pSession, VERR_INVALID_POINTER);
+ AssertPtrReturn(pStats, VERR_INVALID_POINTER);
+ pStats->cVMs = 0; /* (crash before taking the sem...) */
+
+ /*
+ * Take the lock and get the VM statistics.
+ */
+ PGVMM pGVMM;
+ if (pGVM)
+ {
+ int rc = gvmmR0ByGVM(pGVM, &pGVMM, true /*fTakeUsedLock*/);
+ if (RT_FAILURE(rc))
+ return rc;
+ pStats->SchedVM = pGVM->gvmm.s.StatsSched;
+ }
+ else
+ {
+ GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
+ memset(&pStats->SchedVM, 0, sizeof(pStats->SchedVM));
+
+ int rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
+ AssertRCReturn(rc, rc);
+ }
+
+ /*
+ * Enumerate the VMs and add the ones visible to the statistics.
+ */
+ pStats->cVMs = 0;
+ pStats->cEMTs = 0;
+ memset(&pStats->SchedSum, 0, sizeof(pStats->SchedSum));
+
+ for (unsigned i = pGVMM->iUsedHead;
+ i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
+ i = pGVMM->aHandles[i].iNext)
+ {
+ PGVM pOtherGVM = pGVMM->aHandles[i].pGVM;
+ void *pvObj = pGVMM->aHandles[i].pvObj;
+ if ( RT_VALID_PTR(pvObj)
+ && RT_VALID_PTR(pOtherGVM)
+ && pOtherGVM->u32Magic == GVM_MAGIC
+ && RT_SUCCESS(SUPR0ObjVerifyAccess(pvObj, pSession, NULL)))
+ {
+ pStats->cVMs++;
+ pStats->cEMTs += pOtherGVM->cCpus;
+
+ pStats->SchedSum.cHaltCalls += pOtherGVM->gvmm.s.StatsSched.cHaltCalls;
+ pStats->SchedSum.cHaltBlocking += pOtherGVM->gvmm.s.StatsSched.cHaltBlocking;
+ pStats->SchedSum.cHaltTimeouts += pOtherGVM->gvmm.s.StatsSched.cHaltTimeouts;
+ pStats->SchedSum.cHaltNotBlocking += pOtherGVM->gvmm.s.StatsSched.cHaltNotBlocking;
+ pStats->SchedSum.cHaltWakeUps += pOtherGVM->gvmm.s.StatsSched.cHaltWakeUps;
+
+ pStats->SchedSum.cWakeUpCalls += pOtherGVM->gvmm.s.StatsSched.cWakeUpCalls;
+ pStats->SchedSum.cWakeUpNotHalted += pOtherGVM->gvmm.s.StatsSched.cWakeUpNotHalted;
+ pStats->SchedSum.cWakeUpWakeUps += pOtherGVM->gvmm.s.StatsSched.cWakeUpWakeUps;
+
+ pStats->SchedSum.cPokeCalls += pOtherGVM->gvmm.s.StatsSched.cPokeCalls;
+ pStats->SchedSum.cPokeNotBusy += pOtherGVM->gvmm.s.StatsSched.cPokeNotBusy;
+
+ pStats->SchedSum.cPollCalls += pOtherGVM->gvmm.s.StatsSched.cPollCalls;
+ pStats->SchedSum.cPollHalts += pOtherGVM->gvmm.s.StatsSched.cPollHalts;
+ pStats->SchedSum.cPollWakeUps += pOtherGVM->gvmm.s.StatsSched.cPollWakeUps;
+ }
+ }
+
+ /*
+ * Copy out the per host CPU statistics.
+ */
+ uint32_t iDstCpu = 0;
+ uint32_t cSrcCpus = pGVMM->cHostCpus;
+ for (uint32_t iSrcCpu = 0; iSrcCpu < cSrcCpus; iSrcCpu++)
+ {
+ if (pGVMM->aHostCpus[iSrcCpu].idCpu != NIL_RTCPUID)
+ {
+ pStats->aHostCpus[iDstCpu].idCpu = pGVMM->aHostCpus[iSrcCpu].idCpu;
+ pStats->aHostCpus[iDstCpu].idxCpuSet = pGVMM->aHostCpus[iSrcCpu].idxCpuSet;
+#ifdef GVMM_SCHED_WITH_PPT
+ pStats->aHostCpus[iDstCpu].uDesiredHz = pGVMM->aHostCpus[iSrcCpu].Ppt.uDesiredHz;
+ pStats->aHostCpus[iDstCpu].uTimerHz = pGVMM->aHostCpus[iSrcCpu].Ppt.uTimerHz;
+ pStats->aHostCpus[iDstCpu].cChanges = pGVMM->aHostCpus[iSrcCpu].Ppt.cChanges;
+ pStats->aHostCpus[iDstCpu].cStarts = pGVMM->aHostCpus[iSrcCpu].Ppt.cStarts;
+#else
+ pStats->aHostCpus[iDstCpu].uDesiredHz = 0;
+ pStats->aHostCpus[iDstCpu].uTimerHz = 0;
+ pStats->aHostCpus[iDstCpu].cChanges = 0;
+ pStats->aHostCpus[iDstCpu].cStarts = 0;
+#endif
+ iDstCpu++;
+ if (iDstCpu >= RT_ELEMENTS(pStats->aHostCpus))
+ break;
+ }
+ }
+ pStats->cHostCpus = iDstCpu;
+
+ GVMMR0_USED_SHARED_UNLOCK(pGVMM);
+
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * VMMR0 request wrapper for GVMMR0QueryStatistics.
+ *
+ * @returns see GVMMR0QueryStatistics.
+ * @param pGVM The global (ring-0) VM structure. Optional.
+ * @param pReq Pointer to the request packet.
+ * @param pSession The current session.
+ */
+GVMMR0DECL(int) GVMMR0QueryStatisticsReq(PGVM pGVM, PGVMMQUERYSTATISTICSSREQ pReq, PSUPDRVSESSION pSession)
+{
+ /*
+ * Validate input and pass it on.
+ */
+ AssertPtrReturn(pReq, VERR_INVALID_POINTER);
+ AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
+ AssertReturn(pReq->pSession == pSession, VERR_INVALID_PARAMETER);
+
+ return GVMMR0QueryStatistics(&pReq->Stats, pSession, pGVM);
+}
+
+
+/**
+ * Resets the specified GVMM statistics.
+ *
+ * @returns VBox status code.
+ *
+ * @param pStats Which statistics to reset, that is, non-zero fields indicates which to reset.
+ * @param pSession The current session.
+ * @param pGVM The GVM to reset statistics for. Optional.
+ */
+GVMMR0DECL(int) GVMMR0ResetStatistics(PCGVMMSTATS pStats, PSUPDRVSESSION pSession, PGVM pGVM)
+{
+ LogFlow(("GVMMR0ResetStatistics: pStats=%p pSession=%p pGVM=%p\n", pStats, pSession, pGVM));
+
+ /*
+ * Validate input.
+ */
+ AssertPtrReturn(pSession, VERR_INVALID_POINTER);
+ AssertPtrReturn(pStats, VERR_INVALID_POINTER);
+
+ /*
+ * Take the lock and get the VM statistics.
+ */
+ PGVMM pGVMM;
+ if (pGVM)
+ {
+ int rc = gvmmR0ByGVM(pGVM, &pGVMM, true /*fTakeUsedLock*/);
+ if (RT_FAILURE(rc))
+ return rc;
+# define MAYBE_RESET_FIELD(field) \
+ do { if (pStats->SchedVM. field ) { pGVM->gvmm.s.StatsSched. field = 0; } } while (0)
+ MAYBE_RESET_FIELD(cHaltCalls);
+ MAYBE_RESET_FIELD(cHaltBlocking);
+ MAYBE_RESET_FIELD(cHaltTimeouts);
+ MAYBE_RESET_FIELD(cHaltNotBlocking);
+ MAYBE_RESET_FIELD(cHaltWakeUps);
+ MAYBE_RESET_FIELD(cWakeUpCalls);
+ MAYBE_RESET_FIELD(cWakeUpNotHalted);
+ MAYBE_RESET_FIELD(cWakeUpWakeUps);
+ MAYBE_RESET_FIELD(cPokeCalls);
+ MAYBE_RESET_FIELD(cPokeNotBusy);
+ MAYBE_RESET_FIELD(cPollCalls);
+ MAYBE_RESET_FIELD(cPollHalts);
+ MAYBE_RESET_FIELD(cPollWakeUps);
+# undef MAYBE_RESET_FIELD
+ }
+ else
+ {
+ GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
+
+ int rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
+ AssertRCReturn(rc, rc);
+ }
+
+ /*
+ * Enumerate the VMs and add the ones visible to the statistics.
+ */
+ if (!ASMMemIsZero(&pStats->SchedSum, sizeof(pStats->SchedSum)))
+ {
+ for (unsigned i = pGVMM->iUsedHead;
+ i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
+ i = pGVMM->aHandles[i].iNext)
+ {
+ PGVM pOtherGVM = pGVMM->aHandles[i].pGVM;
+ void *pvObj = pGVMM->aHandles[i].pvObj;
+ if ( RT_VALID_PTR(pvObj)
+ && RT_VALID_PTR(pOtherGVM)
+ && pOtherGVM->u32Magic == GVM_MAGIC
+ && RT_SUCCESS(SUPR0ObjVerifyAccess(pvObj, pSession, NULL)))
+ {
+# define MAYBE_RESET_FIELD(field) \
+ do { if (pStats->SchedSum. field ) { pOtherGVM->gvmm.s.StatsSched. field = 0; } } while (0)
+ MAYBE_RESET_FIELD(cHaltCalls);
+ MAYBE_RESET_FIELD(cHaltBlocking);
+ MAYBE_RESET_FIELD(cHaltTimeouts);
+ MAYBE_RESET_FIELD(cHaltNotBlocking);
+ MAYBE_RESET_FIELD(cHaltWakeUps);
+ MAYBE_RESET_FIELD(cWakeUpCalls);
+ MAYBE_RESET_FIELD(cWakeUpNotHalted);
+ MAYBE_RESET_FIELD(cWakeUpWakeUps);
+ MAYBE_RESET_FIELD(cPokeCalls);
+ MAYBE_RESET_FIELD(cPokeNotBusy);
+ MAYBE_RESET_FIELD(cPollCalls);
+ MAYBE_RESET_FIELD(cPollHalts);
+ MAYBE_RESET_FIELD(cPollWakeUps);
+# undef MAYBE_RESET_FIELD
+ }
+ }
+ }
+
+ GVMMR0_USED_SHARED_UNLOCK(pGVMM);
+
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * VMMR0 request wrapper for GVMMR0ResetStatistics.
+ *
+ * @returns see GVMMR0ResetStatistics.
+ * @param pGVM The global (ring-0) VM structure. Optional.
+ * @param pReq Pointer to the request packet.
+ * @param pSession The current session.
+ */
+GVMMR0DECL(int) GVMMR0ResetStatisticsReq(PGVM pGVM, PGVMMRESETSTATISTICSSREQ pReq, PSUPDRVSESSION pSession)
+{
+ /*
+ * Validate input and pass it on.
+ */
+ AssertPtrReturn(pReq, VERR_INVALID_POINTER);
+ AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
+ AssertReturn(pReq->pSession == pSession, VERR_INVALID_PARAMETER);
+
+ return GVMMR0ResetStatistics(&pReq->Stats, pSession, pGVM);
+}
+