diff options
Diffstat (limited to 'src/VBox/VMM/VMMR0/GVMMR0.cpp')
-rw-r--r-- | src/VBox/VMM/VMMR0/GVMMR0.cpp | 3106 |
1 files changed, 3106 insertions, 0 deletions
diff --git a/src/VBox/VMM/VMMR0/GVMMR0.cpp b/src/VBox/VMM/VMMR0/GVMMR0.cpp new file mode 100644 index 00000000..13aef810 --- /dev/null +++ b/src/VBox/VMM/VMMR0/GVMMR0.cpp @@ -0,0 +1,3106 @@ +/* $Id: GVMMR0.cpp $ */ +/** @file + * GVMM - Global VM Manager. + */ + +/* + * Copyright (C) 2007-2019 Oracle Corporation + * + * This file is part of VirtualBox Open Source Edition (OSE), as + * available from http://www.virtualbox.org. This file is free software; + * you can redistribute it and/or modify it under the terms of the GNU + * General Public License (GPL) as published by the Free Software + * Foundation, in version 2 as it comes in the "COPYING" file of the + * VirtualBox OSE distribution. VirtualBox OSE is distributed in the + * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. + */ + + +/** @page pg_gvmm GVMM - The Global VM Manager + * + * The Global VM Manager lives in ring-0. Its main function at the moment is + * to manage a list of all running VMs, keep a ring-0 only structure (GVM) for + * each of them, and assign them unique identifiers (so GMM can track page + * owners). The GVMM also manage some of the host CPU resources, like the + * periodic preemption timer. + * + * The GVMM will create a ring-0 object for each VM when it is registered, this + * is both for session cleanup purposes and for having a point where it is + * possible to implement usage polices later (in SUPR0ObjRegister). + * + * + * @section sec_gvmm_ppt Periodic Preemption Timer (PPT) + * + * On system that sports a high resolution kernel timer API, we use per-cpu + * timers to generate interrupts that preempts VT-x, AMD-V and raw-mode guest + * execution. The timer frequency is calculating by taking the max + * TMCalcHostTimerFrequency for all VMs running on a CPU for the last ~160 ms + * (RT_ELEMENTS((PGVMMHOSTCPU)0, Ppt.aHzHistory) * + * GVMMHOSTCPU_PPT_HIST_INTERVAL_NS). + * + * The TMCalcHostTimerFrequency() part of the things gets its takes the max + * TMTimerSetFrequencyHint() value and adjusts by the current catch-up percent, + * warp drive percent and some fudge factors. VMMR0.cpp reports the result via + * GVMMR0SchedUpdatePeriodicPreemptionTimer() before switching to the VT-x, + * AMD-V and raw-mode execution environments. + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#define LOG_GROUP LOG_GROUP_GVMM +#include <VBox/vmm/gvmm.h> +#include <VBox/vmm/gmm.h> +#include "GVMMR0Internal.h" +#include <VBox/vmm/gvm.h> +#include <VBox/vmm/vm.h> +#include <VBox/vmm/vmcpuset.h> +#include <VBox/vmm/vmm.h> +#ifdef VBOX_WITH_NEM_R0 +# include <VBox/vmm/nem.h> +#endif +#include <VBox/param.h> +#include <VBox/err.h> + +#include <iprt/asm.h> +#include <iprt/asm-amd64-x86.h> +#include <iprt/critsect.h> +#include <iprt/mem.h> +#include <iprt/semaphore.h> +#include <iprt/time.h> +#include <VBox/log.h> +#include <iprt/thread.h> +#include <iprt/process.h> +#include <iprt/param.h> +#include <iprt/string.h> +#include <iprt/assert.h> +#include <iprt/mem.h> +#include <iprt/memobj.h> +#include <iprt/mp.h> +#include <iprt/cpuset.h> +#include <iprt/spinlock.h> +#include <iprt/timer.h> + +#include "dtrace/VBoxVMM.h" + + +/********************************************************************************************************************************* +* Defined Constants And Macros * +*********************************************************************************************************************************/ +#if defined(RT_OS_LINUX) || defined(RT_OS_SOLARIS) || defined(DOXYGEN_RUNNING) +/** Define this to enable the periodic preemption timer. */ +# define GVMM_SCHED_WITH_PPT +#endif + + +/** @def GVMM_CHECK_SMAP_SETUP + * SMAP check setup. */ +/** @def GVMM_CHECK_SMAP_CHECK + * Checks that the AC flag is set if SMAP is enabled. If AC is not set, + * it will be logged and @a a_BadExpr is executed. */ +/** @def GVMM_CHECK_SMAP_CHECK2 + * Checks that the AC flag is set if SMAP is enabled. If AC is not set, it will + * be logged, written to the VMs assertion text buffer, and @a a_BadExpr is + * executed. */ +#if defined(VBOX_STRICT) || 1 +# define GVMM_CHECK_SMAP_SETUP() uint32_t const fKernelFeatures = SUPR0GetKernelFeatures() +# define GVMM_CHECK_SMAP_CHECK(a_BadExpr) \ + do { \ + if (fKernelFeatures & SUPKERNELFEATURES_SMAP) \ + { \ + RTCCUINTREG fEflCheck = ASMGetFlags(); \ + if (RT_LIKELY(fEflCheck & X86_EFL_AC)) \ + { /* likely */ } \ + else \ + { \ + SUPR0Printf("%s, line %d: EFLAGS.AC is clear! (%#x)\n", __FUNCTION__, __LINE__, (uint32_t)fEflCheck); \ + a_BadExpr; \ + } \ + } \ + } while (0) +# define GVMM_CHECK_SMAP_CHECK2(a_pVM, a_BadExpr) \ + do { \ + if (fKernelFeatures & SUPKERNELFEATURES_SMAP) \ + { \ + RTCCUINTREG fEflCheck = ASMGetFlags(); \ + if (RT_LIKELY(fEflCheck & X86_EFL_AC)) \ + { /* likely */ } \ + else \ + { \ + SUPR0BadContext((a_pVM) ? (a_pVM)->pSession : NULL, __FILE__, __LINE__, "EFLAGS.AC is zero!"); \ + a_BadExpr; \ + } \ + } \ + } while (0) +#else +# define GVMM_CHECK_SMAP_SETUP() uint32_t const fKernelFeatures = 0 +# define GVMM_CHECK_SMAP_CHECK(a_BadExpr) NOREF(fKernelFeatures) +# define GVMM_CHECK_SMAP_CHECK2(a_pVM, a_BadExpr) NOREF(fKernelFeatures) +#endif + + + +/********************************************************************************************************************************* +* Structures and Typedefs * +*********************************************************************************************************************************/ + +/** + * Global VM handle. + */ +typedef struct GVMHANDLE +{ + /** The index of the next handle in the list (free or used). (0 is nil.) */ + uint16_t volatile iNext; + /** Our own index / handle value. */ + uint16_t iSelf; + /** The process ID of the handle owner. + * This is used for access checks. */ + RTPROCESS ProcId; + /** The pointer to the ring-0 only (aka global) VM structure. */ + PGVM pGVM; + /** The ring-0 mapping of the shared VM instance data. */ + PVM pVM; + /** The virtual machine object. */ + void *pvObj; + /** The session this VM is associated with. */ + PSUPDRVSESSION pSession; + /** The ring-0 handle of the EMT0 thread. + * This is used for ownership checks as well as looking up a VM handle by thread + * at times like assertions. */ + RTNATIVETHREAD hEMT0; +} GVMHANDLE; +/** Pointer to a global VM handle. */ +typedef GVMHANDLE *PGVMHANDLE; + +/** Number of GVM handles (including the NIL handle). */ +#if HC_ARCH_BITS == 64 +# define GVMM_MAX_HANDLES 8192 +#else +# define GVMM_MAX_HANDLES 128 +#endif + +/** + * Per host CPU GVMM data. + */ +typedef struct GVMMHOSTCPU +{ + /** Magic number (GVMMHOSTCPU_MAGIC). */ + uint32_t volatile u32Magic; + /** The CPU ID. */ + RTCPUID idCpu; + /** The CPU set index. */ + uint32_t idxCpuSet; + +#ifdef GVMM_SCHED_WITH_PPT + /** Periodic preemption timer data. */ + struct + { + /** The handle to the periodic preemption timer. */ + PRTTIMER pTimer; + /** Spinlock protecting the data below. */ + RTSPINLOCK hSpinlock; + /** The smalles Hz that we need to care about. (static) */ + uint32_t uMinHz; + /** The number of ticks between each historization. */ + uint32_t cTicksHistoriziationInterval; + /** The current historization tick (counting up to + * cTicksHistoriziationInterval and then resetting). */ + uint32_t iTickHistorization; + /** The current timer interval. This is set to 0 when inactive. */ + uint32_t cNsInterval; + /** The current timer frequency. This is set to 0 when inactive. */ + uint32_t uTimerHz; + /** The current max frequency reported by the EMTs. + * This gets historicize and reset by the timer callback. This is + * read without holding the spinlock, so needs atomic updating. */ + uint32_t volatile uDesiredHz; + /** Whether the timer was started or not. */ + bool volatile fStarted; + /** Set if we're starting timer. */ + bool volatile fStarting; + /** The index of the next history entry (mod it). */ + uint32_t iHzHistory; + /** Historicized uDesiredHz values. The array wraps around, new entries + * are added at iHzHistory. This is updated approximately every + * GVMMHOSTCPU_PPT_HIST_INTERVAL_NS by the timer callback. */ + uint32_t aHzHistory[8]; + /** Statistics counter for recording the number of interval changes. */ + uint32_t cChanges; + /** Statistics counter for recording the number of timer starts. */ + uint32_t cStarts; + } Ppt; +#endif /* GVMM_SCHED_WITH_PPT */ + +} GVMMHOSTCPU; +/** Pointer to the per host CPU GVMM data. */ +typedef GVMMHOSTCPU *PGVMMHOSTCPU; +/** The GVMMHOSTCPU::u32Magic value (Petra, Tanya & Rachel Haden). */ +#define GVMMHOSTCPU_MAGIC UINT32_C(0x19711011) +/** The interval on history entry should cover (approximately) give in + * nanoseconds. */ +#define GVMMHOSTCPU_PPT_HIST_INTERVAL_NS UINT32_C(20000000) + + +/** + * The GVMM instance data. + */ +typedef struct GVMM +{ + /** Eyecatcher / magic. */ + uint32_t u32Magic; + /** The index of the head of the free handle chain. (0 is nil.) */ + uint16_t volatile iFreeHead; + /** The index of the head of the active handle chain. (0 is nil.) */ + uint16_t volatile iUsedHead; + /** The number of VMs. */ + uint16_t volatile cVMs; + /** Alignment padding. */ + uint16_t u16Reserved; + /** The number of EMTs. */ + uint32_t volatile cEMTs; + /** The number of EMTs that have halted in GVMMR0SchedHalt. */ + uint32_t volatile cHaltedEMTs; + /** Mini lock for restricting early wake-ups to one thread. */ + bool volatile fDoingEarlyWakeUps; + bool afPadding[3]; /**< explicit alignment padding. */ + /** When the next halted or sleeping EMT will wake up. + * This is set to 0 when it needs recalculating and to UINT64_MAX when + * there are no halted or sleeping EMTs in the GVMM. */ + uint64_t uNsNextEmtWakeup; + /** The lock used to serialize VM creation, destruction and associated events that + * isn't performance critical. Owners may acquire the list lock. */ + RTCRITSECT CreateDestroyLock; + /** The lock used to serialize used list updates and accesses. + * This indirectly includes scheduling since the scheduler will have to walk the + * used list to examin running VMs. Owners may not acquire any other locks. */ + RTCRITSECTRW UsedLock; + /** The handle array. + * The size of this array defines the maximum number of currently running VMs. + * The first entry is unused as it represents the NIL handle. */ + GVMHANDLE aHandles[GVMM_MAX_HANDLES]; + + /** @gcfgm{/GVMM/cEMTsMeansCompany, 32-bit, 0, UINT32_MAX, 1} + * The number of EMTs that means we no longer consider ourselves alone on a + * CPU/Core. + */ + uint32_t cEMTsMeansCompany; + /** @gcfgm{/GVMM/MinSleepAlone,32-bit, 0, 100000000, 750000, ns} + * The minimum sleep time for when we're alone, in nano seconds. + */ + uint32_t nsMinSleepAlone; + /** @gcfgm{/GVMM/MinSleepCompany,32-bit,0, 100000000, 15000, ns} + * The minimum sleep time for when we've got company, in nano seconds. + */ + uint32_t nsMinSleepCompany; + /** @gcfgm{/GVMM/EarlyWakeUp1, 32-bit, 0, 100000000, 25000, ns} + * The limit for the first round of early wake-ups, given in nano seconds. + */ + uint32_t nsEarlyWakeUp1; + /** @gcfgm{/GVMM/EarlyWakeUp2, 32-bit, 0, 100000000, 50000, ns} + * The limit for the second round of early wake-ups, given in nano seconds. + */ + uint32_t nsEarlyWakeUp2; + + /** Set if we're doing early wake-ups. + * This reflects nsEarlyWakeUp1 and nsEarlyWakeUp2. */ + bool volatile fDoEarlyWakeUps; + + /** The number of entries in the host CPU array (aHostCpus). */ + uint32_t cHostCpus; + /** Per host CPU data (variable length). */ + GVMMHOSTCPU aHostCpus[1]; +} GVMM; +AssertCompileMemberAlignment(GVMM, CreateDestroyLock, 8); +AssertCompileMemberAlignment(GVMM, UsedLock, 8); +AssertCompileMemberAlignment(GVMM, uNsNextEmtWakeup, 8); +/** Pointer to the GVMM instance data. */ +typedef GVMM *PGVMM; + +/** The GVMM::u32Magic value (Charlie Haden). */ +#define GVMM_MAGIC UINT32_C(0x19370806) + + + +/********************************************************************************************************************************* +* Global Variables * +*********************************************************************************************************************************/ +/** Pointer to the GVMM instance data. + * (Just my general dislike for global variables.) */ +static PGVMM g_pGVMM = NULL; + +/** Macro for obtaining and validating the g_pGVMM pointer. + * On failure it will return from the invoking function with the specified return value. + * + * @param pGVMM The name of the pGVMM variable. + * @param rc The return value on failure. Use VERR_GVMM_INSTANCE for VBox + * status codes. + */ +#define GVMM_GET_VALID_INSTANCE(pGVMM, rc) \ + do { \ + (pGVMM) = g_pGVMM;\ + AssertPtrReturn((pGVMM), (rc)); \ + AssertMsgReturn((pGVMM)->u32Magic == GVMM_MAGIC, ("%p - %#x\n", (pGVMM), (pGVMM)->u32Magic), (rc)); \ + } while (0) + +/** Macro for obtaining and validating the g_pGVMM pointer, void function variant. + * On failure it will return from the invoking function. + * + * @param pGVMM The name of the pGVMM variable. + */ +#define GVMM_GET_VALID_INSTANCE_VOID(pGVMM) \ + do { \ + (pGVMM) = g_pGVMM;\ + AssertPtrReturnVoid((pGVMM)); \ + AssertMsgReturnVoid((pGVMM)->u32Magic == GVMM_MAGIC, ("%p - %#x\n", (pGVMM), (pGVMM)->u32Magic)); \ + } while (0) + + +/********************************************************************************************************************************* +* Internal Functions * +*********************************************************************************************************************************/ +static void gvmmR0InitPerVMData(PGVM pGVM); +static DECLCALLBACK(void) gvmmR0HandleObjDestructor(void *pvObj, void *pvGVMM, void *pvHandle); +static int gvmmR0ByGVMandVM(PGVM pGVM, PVM pVM, PGVMM *ppGVMM, bool fTakeUsedLock); +static int gvmmR0ByGVMandVMandEMT(PGVM pGVM, PVM pVM, VMCPUID idCpu, PGVMM *ppGVMM); + +#ifdef GVMM_SCHED_WITH_PPT +static DECLCALLBACK(void) gvmmR0SchedPeriodicPreemptionTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick); +#endif + + +/** + * Initializes the GVMM. + * + * This is called while owning the loader semaphore (see supdrvIOCtl_LdrLoad()). + * + * @returns VBox status code. + */ +GVMMR0DECL(int) GVMMR0Init(void) +{ + LogFlow(("GVMMR0Init:\n")); + + /* + * Allocate and initialize the instance data. + */ + uint32_t cHostCpus = RTMpGetArraySize(); + AssertMsgReturn(cHostCpus > 0 && cHostCpus < _64K, ("%d", (int)cHostCpus), VERR_GVMM_HOST_CPU_RANGE); + + PGVMM pGVMM = (PGVMM)RTMemAllocZ(RT_UOFFSETOF_DYN(GVMM, aHostCpus[cHostCpus])); + if (!pGVMM) + return VERR_NO_MEMORY; + int rc = RTCritSectInitEx(&pGVMM->CreateDestroyLock, 0, NIL_RTLOCKVALCLASS, RTLOCKVAL_SUB_CLASS_NONE, + "GVMM-CreateDestroyLock"); + if (RT_SUCCESS(rc)) + { + rc = RTCritSectRwInitEx(&pGVMM->UsedLock, 0, NIL_RTLOCKVALCLASS, RTLOCKVAL_SUB_CLASS_NONE, "GVMM-UsedLock"); + if (RT_SUCCESS(rc)) + { + pGVMM->u32Magic = GVMM_MAGIC; + pGVMM->iUsedHead = 0; + pGVMM->iFreeHead = 1; + + /* the nil handle */ + pGVMM->aHandles[0].iSelf = 0; + pGVMM->aHandles[0].iNext = 0; + + /* the tail */ + unsigned i = RT_ELEMENTS(pGVMM->aHandles) - 1; + pGVMM->aHandles[i].iSelf = i; + pGVMM->aHandles[i].iNext = 0; /* nil */ + + /* the rest */ + while (i-- > 1) + { + pGVMM->aHandles[i].iSelf = i; + pGVMM->aHandles[i].iNext = i + 1; + } + + /* The default configuration values. */ + uint32_t cNsResolution = RTSemEventMultiGetResolution(); + pGVMM->cEMTsMeansCompany = 1; /** @todo should be adjusted to relative to the cpu count or something... */ + if (cNsResolution >= 5*RT_NS_100US) + { + pGVMM->nsMinSleepAlone = 750000 /* ns (0.750 ms) */; /** @todo this should be adjusted to be 75% (or something) of the scheduler granularity... */ + pGVMM->nsMinSleepCompany = 15000 /* ns (0.015 ms) */; + pGVMM->nsEarlyWakeUp1 = 25000 /* ns (0.025 ms) */; + pGVMM->nsEarlyWakeUp2 = 50000 /* ns (0.050 ms) */; + } + else if (cNsResolution > RT_NS_100US) + { + pGVMM->nsMinSleepAlone = cNsResolution / 2; + pGVMM->nsMinSleepCompany = cNsResolution / 4; + pGVMM->nsEarlyWakeUp1 = 0; + pGVMM->nsEarlyWakeUp2 = 0; + } + else + { + pGVMM->nsMinSleepAlone = 2000; + pGVMM->nsMinSleepCompany = 2000; + pGVMM->nsEarlyWakeUp1 = 0; + pGVMM->nsEarlyWakeUp2 = 0; + } + pGVMM->fDoEarlyWakeUps = pGVMM->nsEarlyWakeUp1 > 0 && pGVMM->nsEarlyWakeUp2 > 0; + + /* The host CPU data. */ + pGVMM->cHostCpus = cHostCpus; + uint32_t iCpu = cHostCpus; + RTCPUSET PossibleSet; + RTMpGetSet(&PossibleSet); + while (iCpu-- > 0) + { + pGVMM->aHostCpus[iCpu].idxCpuSet = iCpu; +#ifdef GVMM_SCHED_WITH_PPT + pGVMM->aHostCpus[iCpu].Ppt.pTimer = NULL; + pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK; + pGVMM->aHostCpus[iCpu].Ppt.uMinHz = 5; /** @todo Add some API which figures this one out. (not *that* important) */ + pGVMM->aHostCpus[iCpu].Ppt.cTicksHistoriziationInterval = 1; + //pGVMM->aHostCpus[iCpu].Ppt.iTickHistorization = 0; + //pGVMM->aHostCpus[iCpu].Ppt.cNsInterval = 0; + //pGVMM->aHostCpus[iCpu].Ppt.uTimerHz = 0; + //pGVMM->aHostCpus[iCpu].Ppt.uDesiredHz = 0; + //pGVMM->aHostCpus[iCpu].Ppt.fStarted = false; + //pGVMM->aHostCpus[iCpu].Ppt.fStarting = false; + //pGVMM->aHostCpus[iCpu].Ppt.iHzHistory = 0; + //pGVMM->aHostCpus[iCpu].Ppt.aHzHistory = {0}; +#endif + + if (RTCpuSetIsMember(&PossibleSet, iCpu)) + { + pGVMM->aHostCpus[iCpu].idCpu = RTMpCpuIdFromSetIndex(iCpu); + pGVMM->aHostCpus[iCpu].u32Magic = GVMMHOSTCPU_MAGIC; + +#ifdef GVMM_SCHED_WITH_PPT + rc = RTTimerCreateEx(&pGVMM->aHostCpus[iCpu].Ppt.pTimer, + 50*1000*1000 /* whatever */, + RTTIMER_FLAGS_CPU(iCpu) | RTTIMER_FLAGS_HIGH_RES, + gvmmR0SchedPeriodicPreemptionTimerCallback, + &pGVMM->aHostCpus[iCpu]); + if (RT_SUCCESS(rc)) + rc = RTSpinlockCreate(&pGVMM->aHostCpus[iCpu].Ppt.hSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "GVMM/CPU"); + if (RT_FAILURE(rc)) + { + while (iCpu < cHostCpus) + { + RTTimerDestroy(pGVMM->aHostCpus[iCpu].Ppt.pTimer); + RTSpinlockDestroy(pGVMM->aHostCpus[iCpu].Ppt.hSpinlock); + pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK; + iCpu++; + } + break; + } +#endif + } + else + { + pGVMM->aHostCpus[iCpu].idCpu = NIL_RTCPUID; + pGVMM->aHostCpus[iCpu].u32Magic = 0; + } + } + if (RT_SUCCESS(rc)) + { + g_pGVMM = pGVMM; + LogFlow(("GVMMR0Init: pGVMM=%p cHostCpus=%u\n", pGVMM, cHostCpus)); + return VINF_SUCCESS; + } + + /* bail out. */ + RTCritSectRwDelete(&pGVMM->UsedLock); + } + RTCritSectDelete(&pGVMM->CreateDestroyLock); + } + + RTMemFree(pGVMM); + return rc; +} + + +/** + * Terminates the GVM. + * + * This is called while owning the loader semaphore (see supdrvLdrFree()). + * And unless something is wrong, there should be absolutely no VMs + * registered at this point. + */ +GVMMR0DECL(void) GVMMR0Term(void) +{ + LogFlow(("GVMMR0Term:\n")); + + PGVMM pGVMM = g_pGVMM; + g_pGVMM = NULL; + if (RT_UNLIKELY(!VALID_PTR(pGVMM))) + { + SUPR0Printf("GVMMR0Term: pGVMM=%RKv\n", pGVMM); + return; + } + + /* + * First of all, stop all active timers. + */ + uint32_t cActiveTimers = 0; + uint32_t iCpu = pGVMM->cHostCpus; + while (iCpu-- > 0) + { + ASMAtomicWriteU32(&pGVMM->aHostCpus[iCpu].u32Magic, ~GVMMHOSTCPU_MAGIC); +#ifdef GVMM_SCHED_WITH_PPT + if ( pGVMM->aHostCpus[iCpu].Ppt.pTimer != NULL + && RT_SUCCESS(RTTimerStop(pGVMM->aHostCpus[iCpu].Ppt.pTimer))) + cActiveTimers++; +#endif + } + if (cActiveTimers) + RTThreadSleep(1); /* fudge */ + + /* + * Invalidate the and free resources. + */ + pGVMM->u32Magic = ~GVMM_MAGIC; + RTCritSectRwDelete(&pGVMM->UsedLock); + RTCritSectDelete(&pGVMM->CreateDestroyLock); + + pGVMM->iFreeHead = 0; + if (pGVMM->iUsedHead) + { + SUPR0Printf("GVMMR0Term: iUsedHead=%#x! (cVMs=%#x cEMTs=%#x)\n", pGVMM->iUsedHead, pGVMM->cVMs, pGVMM->cEMTs); + pGVMM->iUsedHead = 0; + } + +#ifdef GVMM_SCHED_WITH_PPT + iCpu = pGVMM->cHostCpus; + while (iCpu-- > 0) + { + RTTimerDestroy(pGVMM->aHostCpus[iCpu].Ppt.pTimer); + pGVMM->aHostCpus[iCpu].Ppt.pTimer = NULL; + RTSpinlockDestroy(pGVMM->aHostCpus[iCpu].Ppt.hSpinlock); + pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK; + } +#endif + + RTMemFree(pGVMM); +} + + +/** + * A quick hack for setting global config values. + * + * @returns VBox status code. + * + * @param pSession The session handle. Used for authentication. + * @param pszName The variable name. + * @param u64Value The new value. + */ +GVMMR0DECL(int) GVMMR0SetConfig(PSUPDRVSESSION pSession, const char *pszName, uint64_t u64Value) +{ + /* + * Validate input. + */ + PGVMM pGVMM; + GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE); + AssertPtrReturn(pSession, VERR_INVALID_HANDLE); + AssertPtrReturn(pszName, VERR_INVALID_POINTER); + + /* + * String switch time! + */ + if (strncmp(pszName, RT_STR_TUPLE("/GVMM/"))) + return VERR_CFGM_VALUE_NOT_FOUND; /* borrow status codes from CFGM... */ + int rc = VINF_SUCCESS; + pszName += sizeof("/GVMM/") - 1; + if (!strcmp(pszName, "cEMTsMeansCompany")) + { + if (u64Value <= UINT32_MAX) + pGVMM->cEMTsMeansCompany = u64Value; + else + rc = VERR_OUT_OF_RANGE; + } + else if (!strcmp(pszName, "MinSleepAlone")) + { + if (u64Value <= RT_NS_100MS) + pGVMM->nsMinSleepAlone = u64Value; + else + rc = VERR_OUT_OF_RANGE; + } + else if (!strcmp(pszName, "MinSleepCompany")) + { + if (u64Value <= RT_NS_100MS) + pGVMM->nsMinSleepCompany = u64Value; + else + rc = VERR_OUT_OF_RANGE; + } + else if (!strcmp(pszName, "EarlyWakeUp1")) + { + if (u64Value <= RT_NS_100MS) + { + pGVMM->nsEarlyWakeUp1 = u64Value; + pGVMM->fDoEarlyWakeUps = pGVMM->nsEarlyWakeUp1 > 0 && pGVMM->nsEarlyWakeUp2 > 0; + } + else + rc = VERR_OUT_OF_RANGE; + } + else if (!strcmp(pszName, "EarlyWakeUp2")) + { + if (u64Value <= RT_NS_100MS) + { + pGVMM->nsEarlyWakeUp2 = u64Value; + pGVMM->fDoEarlyWakeUps = pGVMM->nsEarlyWakeUp1 > 0 && pGVMM->nsEarlyWakeUp2 > 0; + } + else + rc = VERR_OUT_OF_RANGE; + } + else + rc = VERR_CFGM_VALUE_NOT_FOUND; + return rc; +} + + +/** + * A quick hack for getting global config values. + * + * @returns VBox status code. + * + * @param pSession The session handle. Used for authentication. + * @param pszName The variable name. + * @param pu64Value Where to return the value. + */ +GVMMR0DECL(int) GVMMR0QueryConfig(PSUPDRVSESSION pSession, const char *pszName, uint64_t *pu64Value) +{ + /* + * Validate input. + */ + PGVMM pGVMM; + GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE); + AssertPtrReturn(pSession, VERR_INVALID_HANDLE); + AssertPtrReturn(pszName, VERR_INVALID_POINTER); + AssertPtrReturn(pu64Value, VERR_INVALID_POINTER); + + /* + * String switch time! + */ + if (strncmp(pszName, RT_STR_TUPLE("/GVMM/"))) + return VERR_CFGM_VALUE_NOT_FOUND; /* borrow status codes from CFGM... */ + int rc = VINF_SUCCESS; + pszName += sizeof("/GVMM/") - 1; + if (!strcmp(pszName, "cEMTsMeansCompany")) + *pu64Value = pGVMM->cEMTsMeansCompany; + else if (!strcmp(pszName, "MinSleepAlone")) + *pu64Value = pGVMM->nsMinSleepAlone; + else if (!strcmp(pszName, "MinSleepCompany")) + *pu64Value = pGVMM->nsMinSleepCompany; + else if (!strcmp(pszName, "EarlyWakeUp1")) + *pu64Value = pGVMM->nsEarlyWakeUp1; + else if (!strcmp(pszName, "EarlyWakeUp2")) + *pu64Value = pGVMM->nsEarlyWakeUp2; + else + rc = VERR_CFGM_VALUE_NOT_FOUND; + return rc; +} + + +/** + * Acquire the 'used' lock in shared mode. + * + * This prevents destruction of the VM while we're in ring-0. + * + * @returns IPRT status code, see RTSemFastMutexRequest. + * @param a_pGVMM The GVMM instance data. + * @sa GVMMR0_USED_SHARED_UNLOCK, GVMMR0_USED_EXCLUSIVE_LOCK + */ +#define GVMMR0_USED_SHARED_LOCK(a_pGVMM) RTCritSectRwEnterShared(&(a_pGVMM)->UsedLock) + +/** + * Release the 'used' lock in when owning it in shared mode. + * + * @returns IPRT status code, see RTSemFastMutexRequest. + * @param a_pGVMM The GVMM instance data. + * @sa GVMMR0_USED_SHARED_LOCK + */ +#define GVMMR0_USED_SHARED_UNLOCK(a_pGVMM) RTCritSectRwLeaveShared(&(a_pGVMM)->UsedLock) + +/** + * Acquire the 'used' lock in exclusive mode. + * + * Only use this function when making changes to the used list. + * + * @returns IPRT status code, see RTSemFastMutexRequest. + * @param a_pGVMM The GVMM instance data. + * @sa GVMMR0_USED_EXCLUSIVE_UNLOCK + */ +#define GVMMR0_USED_EXCLUSIVE_LOCK(a_pGVMM) RTCritSectRwEnterExcl(&(a_pGVMM)->UsedLock) + +/** + * Release the 'used' lock when owning it in exclusive mode. + * + * @returns IPRT status code, see RTSemFastMutexRelease. + * @param a_pGVMM The GVMM instance data. + * @sa GVMMR0_USED_EXCLUSIVE_LOCK, GVMMR0_USED_SHARED_UNLOCK + */ +#define GVMMR0_USED_EXCLUSIVE_UNLOCK(a_pGVMM) RTCritSectRwLeaveExcl(&(a_pGVMM)->UsedLock) + + +/** + * Try acquire the 'create & destroy' lock. + * + * @returns IPRT status code, see RTSemFastMutexRequest. + * @param pGVMM The GVMM instance data. + */ +DECLINLINE(int) gvmmR0CreateDestroyLock(PGVMM pGVMM) +{ + LogFlow(("++gvmmR0CreateDestroyLock(%p)\n", pGVMM)); + int rc = RTCritSectEnter(&pGVMM->CreateDestroyLock); + LogFlow(("gvmmR0CreateDestroyLock(%p)->%Rrc\n", pGVMM, rc)); + return rc; +} + + +/** + * Release the 'create & destroy' lock. + * + * @returns IPRT status code, see RTSemFastMutexRequest. + * @param pGVMM The GVMM instance data. + */ +DECLINLINE(int) gvmmR0CreateDestroyUnlock(PGVMM pGVMM) +{ + LogFlow(("--gvmmR0CreateDestroyUnlock(%p)\n", pGVMM)); + int rc = RTCritSectLeave(&pGVMM->CreateDestroyLock); + AssertRC(rc); + return rc; +} + + +/** + * Request wrapper for the GVMMR0CreateVM API. + * + * @returns VBox status code. + * @param pReq The request buffer. + * @param pSession The session handle. The VM will be associated with this. + */ +GVMMR0DECL(int) GVMMR0CreateVMReq(PGVMMCREATEVMREQ pReq, PSUPDRVSESSION pSession) +{ + /* + * Validate the request. + */ + if (!VALID_PTR(pReq)) + return VERR_INVALID_POINTER; + if (pReq->Hdr.cbReq != sizeof(*pReq)) + return VERR_INVALID_PARAMETER; + if (pReq->pSession != pSession) + return VERR_INVALID_POINTER; + + /* + * Execute it. + */ + PVM pVM; + pReq->pVMR0 = NULL; + pReq->pVMR3 = NIL_RTR3PTR; + int rc = GVMMR0CreateVM(pSession, pReq->cCpus, &pVM); + if (RT_SUCCESS(rc)) + { + pReq->pVMR0 = pVM; + pReq->pVMR3 = pVM->pVMR3; + } + return rc; +} + + +/** + * Allocates the VM structure and registers it with GVM. + * + * The caller will become the VM owner and there by the EMT. + * + * @returns VBox status code. + * @param pSession The support driver session. + * @param cCpus Number of virtual CPUs for the new VM. + * @param ppVM Where to store the pointer to the VM structure. + * + * @thread EMT. + */ +GVMMR0DECL(int) GVMMR0CreateVM(PSUPDRVSESSION pSession, uint32_t cCpus, PVM *ppVM) +{ + LogFlow(("GVMMR0CreateVM: pSession=%p\n", pSession)); + PGVMM pGVMM; + GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE); + + AssertPtrReturn(ppVM, VERR_INVALID_POINTER); + *ppVM = NULL; + + if ( cCpus == 0 + || cCpus > VMM_MAX_CPU_COUNT) + return VERR_INVALID_PARAMETER; + + RTNATIVETHREAD hEMT0 = RTThreadNativeSelf(); + AssertReturn(hEMT0 != NIL_RTNATIVETHREAD, VERR_GVMM_BROKEN_IPRT); + RTPROCESS ProcId = RTProcSelf(); + AssertReturn(ProcId != NIL_RTPROCESS, VERR_GVMM_BROKEN_IPRT); + + /* + * The whole allocation process is protected by the lock. + */ + int rc = gvmmR0CreateDestroyLock(pGVMM); + AssertRCReturn(rc, rc); + + /* + * Only one VM per session. + */ + if (SUPR0GetSessionVM(pSession) != NULL) + { + gvmmR0CreateDestroyUnlock(pGVMM); + SUPR0Printf("GVMMR0CreateVM: The session %p already got a VM: %p\n", pSession, SUPR0GetSessionVM(pSession)); + return VERR_ALREADY_EXISTS; + } + + /* + * Allocate a handle first so we don't waste resources unnecessarily. + */ + uint16_t iHandle = pGVMM->iFreeHead; + if (iHandle) + { + PGVMHANDLE pHandle = &pGVMM->aHandles[iHandle]; + + /* consistency checks, a bit paranoid as always. */ + if ( !pHandle->pVM + && !pHandle->pGVM + && !pHandle->pvObj + && pHandle->iSelf == iHandle) + { + pHandle->pvObj = SUPR0ObjRegister(pSession, SUPDRVOBJTYPE_VM, gvmmR0HandleObjDestructor, pGVMM, pHandle); + if (pHandle->pvObj) + { + /* + * Move the handle from the free to used list and perform permission checks. + */ + rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM); + AssertRC(rc); + + pGVMM->iFreeHead = pHandle->iNext; + pHandle->iNext = pGVMM->iUsedHead; + pGVMM->iUsedHead = iHandle; + pGVMM->cVMs++; + + pHandle->pVM = NULL; + pHandle->pGVM = NULL; + pHandle->pSession = pSession; + pHandle->hEMT0 = NIL_RTNATIVETHREAD; + pHandle->ProcId = NIL_RTPROCESS; + + GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM); + + rc = SUPR0ObjVerifyAccess(pHandle->pvObj, pSession, NULL); + if (RT_SUCCESS(rc)) + { + /* + * Allocate the global VM structure (GVM) and initialize it. + */ + PGVM pGVM = (PGVM)RTMemAllocZ(RT_UOFFSETOF_DYN(GVM, aCpus[cCpus])); + if (pGVM) + { + pGVM->u32Magic = GVM_MAGIC; + pGVM->hSelf = iHandle; + pGVM->pVM = NULL; + pGVM->cCpus = cCpus; + pGVM->pSession = pSession; + + gvmmR0InitPerVMData(pGVM); + GMMR0InitPerVMData(pGVM); + + /* + * Allocate the shared VM structure and associated page array. + */ + const uint32_t cbVM = RT_UOFFSETOF_DYN(VM, aCpus[cCpus]); + const uint32_t cPages = RT_ALIGN_32(cbVM, PAGE_SIZE) >> PAGE_SHIFT; + rc = RTR0MemObjAllocLow(&pGVM->gvmm.s.VMMemObj, cPages << PAGE_SHIFT, false /* fExecutable */); + if (RT_SUCCESS(rc)) + { + PVM pVM = (PVM)RTR0MemObjAddress(pGVM->gvmm.s.VMMemObj); AssertPtr(pVM); + memset(pVM, 0, cPages << PAGE_SHIFT); + pVM->enmVMState = VMSTATE_CREATING; + pVM->pVMR0 = pVM; + pVM->pSession = pSession; + pVM->hSelf = iHandle; + pVM->cbSelf = cbVM; + pVM->cCpus = cCpus; + pVM->uCpuExecutionCap = 100; /* default is no cap. */ + pVM->offVMCPU = RT_UOFFSETOF_DYN(VM, aCpus); + AssertCompileMemberAlignment(VM, cpum, 64); + AssertCompileMemberAlignment(VM, tm, 64); + AssertCompileMemberAlignment(VM, aCpus, PAGE_SIZE); + + rc = RTR0MemObjAllocPage(&pGVM->gvmm.s.VMPagesMemObj, cPages * sizeof(SUPPAGE), false /* fExecutable */); + if (RT_SUCCESS(rc)) + { + PSUPPAGE paPages = (PSUPPAGE)RTR0MemObjAddress(pGVM->gvmm.s.VMPagesMemObj); AssertPtr(paPages); + for (uint32_t iPage = 0; iPage < cPages; iPage++) + { + paPages[iPage].uReserved = 0; + paPages[iPage].Phys = RTR0MemObjGetPagePhysAddr(pGVM->gvmm.s.VMMemObj, iPage); + Assert(paPages[iPage].Phys != NIL_RTHCPHYS); + } + + /* + * Map them into ring-3. + */ + rc = RTR0MemObjMapUser(&pGVM->gvmm.s.VMMapObj, pGVM->gvmm.s.VMMemObj, (RTR3PTR)-1, 0, + RTMEM_PROT_READ | RTMEM_PROT_WRITE, NIL_RTR0PROCESS); + if (RT_SUCCESS(rc)) + { + PVMR3 pVMR3 = RTR0MemObjAddressR3(pGVM->gvmm.s.VMMapObj); + pVM->pVMR3 = pVMR3; + AssertPtr((void *)pVMR3); + + /* Initialize all the VM pointers. */ + for (VMCPUID i = 0; i < cCpus; i++) + { + pVM->aCpus[i].idCpu = i; + pVM->aCpus[i].pVMR0 = pVM; + pVM->aCpus[i].pVMR3 = pVMR3; + pVM->aCpus[i].idHostCpu = NIL_RTCPUID; + pVM->aCpus[i].hNativeThreadR0 = NIL_RTNATIVETHREAD; + } + + rc = RTR0MemObjMapUser(&pGVM->gvmm.s.VMPagesMapObj, pGVM->gvmm.s.VMPagesMemObj, (RTR3PTR)-1, + 0 /* uAlignment */, RTMEM_PROT_READ | RTMEM_PROT_WRITE, + NIL_RTR0PROCESS); + if (RT_SUCCESS(rc)) + { + pVM->paVMPagesR3 = RTR0MemObjAddressR3(pGVM->gvmm.s.VMPagesMapObj); + AssertPtr((void *)pVM->paVMPagesR3); + + /* complete the handle - take the UsedLock sem just to be careful. */ + rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM); + AssertRC(rc); + + pHandle->pVM = pVM; + pHandle->pGVM = pGVM; + pHandle->hEMT0 = hEMT0; + pHandle->ProcId = ProcId; + pGVM->pVM = pVM; + pGVM->pVMR3 = pVMR3; + pGVM->aCpus[0].hEMT = hEMT0; + pVM->aCpus[0].hNativeThreadR0 = hEMT0; + pGVMM->cEMTs += cCpus; + + for (VMCPUID i = 0; i < cCpus; i++) + { + pGVM->aCpus[i].pVCpu = &pVM->aCpus[i]; + pGVM->aCpus[i].pVM = pVM; + } + + /* Associate it with the session and create the context hook for EMT0. */ + rc = SUPR0SetSessionVM(pSession, pGVM, pVM); + if (RT_SUCCESS(rc)) + { + rc = VMMR0ThreadCtxHookCreateForEmt(&pVM->aCpus[0]); + if (RT_SUCCESS(rc)) + { + /* + * Done! + */ + VBOXVMM_R0_GVMM_VM_CREATED(pGVM, pVM, ProcId, (void *)hEMT0, cCpus); + + GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM); + gvmmR0CreateDestroyUnlock(pGVMM); + + CPUMR0RegisterVCpuThread(&pVM->aCpus[0]); + + *ppVM = pVM; + Log(("GVMMR0CreateVM: pVM=%p pVMR3=%p pGVM=%p hGVM=%d\n", pVM, pVMR3, pGVM, iHandle)); + return VINF_SUCCESS; + } + + SUPR0SetSessionVM(pSession, NULL, NULL); + } + GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM); + } + + RTR0MemObjFree(pGVM->gvmm.s.VMMapObj, false /* fFreeMappings */); + pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ; + } + RTR0MemObjFree(pGVM->gvmm.s.VMPagesMemObj, false /* fFreeMappings */); + pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ; + } + RTR0MemObjFree(pGVM->gvmm.s.VMMemObj, false /* fFreeMappings */); + pGVM->gvmm.s.VMMemObj = NIL_RTR0MEMOBJ; + } + } + } + /* else: The user wasn't permitted to create this VM. */ + + /* + * The handle will be freed by gvmmR0HandleObjDestructor as we release the + * object reference here. A little extra mess because of non-recursive lock. + */ + void *pvObj = pHandle->pvObj; + pHandle->pvObj = NULL; + gvmmR0CreateDestroyUnlock(pGVMM); + + SUPR0ObjRelease(pvObj, pSession); + + SUPR0Printf("GVMMR0CreateVM: failed, rc=%d\n", rc); + return rc; + } + + rc = VERR_NO_MEMORY; + } + else + rc = VERR_GVMM_IPE_1; + } + else + rc = VERR_GVM_TOO_MANY_VMS; + + gvmmR0CreateDestroyUnlock(pGVMM); + return rc; +} + + +/** + * Initializes the per VM data belonging to GVMM. + * + * @param pGVM Pointer to the global VM structure. + */ +static void gvmmR0InitPerVMData(PGVM pGVM) +{ + AssertCompile(RT_SIZEOFMEMB(GVM,gvmm.s) <= RT_SIZEOFMEMB(GVM,gvmm.padding)); + AssertCompile(RT_SIZEOFMEMB(GVMCPU,gvmm.s) <= RT_SIZEOFMEMB(GVMCPU,gvmm.padding)); + pGVM->gvmm.s.VMMemObj = NIL_RTR0MEMOBJ; + pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ; + pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ; + pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ; + pGVM->gvmm.s.fDoneVMMR0Init = false; + pGVM->gvmm.s.fDoneVMMR0Term = false; + + for (VMCPUID i = 0; i < pGVM->cCpus; i++) + { + pGVM->aCpus[i].idCpu = i; + pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI; + pGVM->aCpus[i].hEMT = NIL_RTNATIVETHREAD; + pGVM->aCpus[i].pGVM = pGVM; + pGVM->aCpus[i].pVCpu = NULL; + pGVM->aCpus[i].pVM = NULL; + } +} + + +/** + * Does the VM initialization. + * + * @returns VBox status code. + * @param pGVM The global (ring-0) VM structure. + */ +GVMMR0DECL(int) GVMMR0InitVM(PGVM pGVM) +{ + LogFlow(("GVMMR0InitVM: pGVM=%p\n", pGVM)); + + int rc = VERR_INTERNAL_ERROR_3; + if ( !pGVM->gvmm.s.fDoneVMMR0Init + && pGVM->aCpus[0].gvmm.s.HaltEventMulti == NIL_RTSEMEVENTMULTI) + { + for (VMCPUID i = 0; i < pGVM->cCpus; i++) + { + rc = RTSemEventMultiCreate(&pGVM->aCpus[i].gvmm.s.HaltEventMulti); + if (RT_FAILURE(rc)) + { + pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI; + break; + } + } + } + else + rc = VERR_WRONG_ORDER; + + LogFlow(("GVMMR0InitVM: returns %Rrc\n", rc)); + return rc; +} + + +/** + * Indicates that we're done with the ring-0 initialization + * of the VM. + * + * @param pGVM The global (ring-0) VM structure. + * @thread EMT(0) + */ +GVMMR0DECL(void) GVMMR0DoneInitVM(PGVM pGVM) +{ + /* Set the indicator. */ + pGVM->gvmm.s.fDoneVMMR0Init = true; +} + + +/** + * Indicates that we're doing the ring-0 termination of the VM. + * + * @returns true if termination hasn't been done already, false if it has. + * @param pGVM Pointer to the global VM structure. Optional. + * @thread EMT(0) or session cleanup thread. + */ +GVMMR0DECL(bool) GVMMR0DoingTermVM(PGVM pGVM) +{ + /* Validate the VM structure, state and handle. */ + AssertPtrReturn(pGVM, false); + + /* Set the indicator. */ + if (pGVM->gvmm.s.fDoneVMMR0Term) + return false; + pGVM->gvmm.s.fDoneVMMR0Term = true; + return true; +} + + +/** + * Destroys the VM, freeing all associated resources (the ring-0 ones anyway). + * + * This is call from the vmR3DestroyFinalBit and from a error path in VMR3Create, + * and the caller is not the EMT thread, unfortunately. For security reasons, it + * would've been nice if the caller was actually the EMT thread or that we somehow + * could've associated the calling thread with the VM up front. + * + * @returns VBox status code. + * @param pGVM The global (ring-0) VM structure. + * @param pVM The cross context VM structure. + * + * @thread EMT(0) if it's associated with the VM, otherwise any thread. + */ +GVMMR0DECL(int) GVMMR0DestroyVM(PGVM pGVM, PVM pVM) +{ + LogFlow(("GVMMR0DestroyVM: pGVM=%p pVM=%p\n", pGVM, pVM)); + PGVMM pGVMM; + GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE); + + /* + * Validate the VM structure, state and caller. + */ + AssertPtrReturn(pGVM, VERR_INVALID_POINTER); + AssertPtrReturn(pVM, VERR_INVALID_POINTER); + AssertReturn(!((uintptr_t)pVM & PAGE_OFFSET_MASK), VERR_INVALID_POINTER); + AssertReturn(pGVM->pVM == pVM, VERR_INVALID_POINTER); + AssertMsgReturn(pVM->enmVMState >= VMSTATE_CREATING && pVM->enmVMState <= VMSTATE_TERMINATED, ("%d\n", pVM->enmVMState), + VERR_WRONG_ORDER); + + uint32_t hGVM = pGVM->hSelf; + ASMCompilerBarrier(); + AssertReturn(hGVM != NIL_GVM_HANDLE, VERR_INVALID_VM_HANDLE); + AssertReturn(hGVM < RT_ELEMENTS(pGVMM->aHandles), VERR_INVALID_VM_HANDLE); + + PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM]; + AssertReturn(pHandle->pVM == pVM, VERR_NOT_OWNER); + + RTPROCESS ProcId = RTProcSelf(); + RTNATIVETHREAD hSelf = RTThreadNativeSelf(); + AssertReturn( ( pHandle->hEMT0 == hSelf + && pHandle->ProcId == ProcId) + || pHandle->hEMT0 == NIL_RTNATIVETHREAD, VERR_NOT_OWNER); + + /* + * Lookup the handle and destroy the object. + * Since the lock isn't recursive and we'll have to leave it before dereferencing the + * object, we take some precautions against racing callers just in case... + */ + int rc = gvmmR0CreateDestroyLock(pGVMM); + AssertRC(rc); + + /* Be careful here because we might theoretically be racing someone else cleaning up. */ + if ( pHandle->pVM == pVM + && ( ( pHandle->hEMT0 == hSelf + && pHandle->ProcId == ProcId) + || pHandle->hEMT0 == NIL_RTNATIVETHREAD) + && VALID_PTR(pHandle->pvObj) + && VALID_PTR(pHandle->pSession) + && VALID_PTR(pHandle->pGVM) + && pHandle->pGVM->u32Magic == GVM_MAGIC) + { + /* Check that other EMTs have deregistered. */ + uint32_t cNotDeregistered = 0; + for (VMCPUID idCpu = 1; idCpu < pGVM->cCpus; idCpu++) + cNotDeregistered += pGVM->aCpus[idCpu].hEMT != ~(RTNATIVETHREAD)1; /* see GVMMR0DeregisterVCpu for the value */ + if (cNotDeregistered == 0) + { + /* Grab the object pointer. */ + void *pvObj = pHandle->pvObj; + pHandle->pvObj = NULL; + gvmmR0CreateDestroyUnlock(pGVMM); + + SUPR0ObjRelease(pvObj, pHandle->pSession); + } + else + { + gvmmR0CreateDestroyUnlock(pGVMM); + rc = VERR_GVMM_NOT_ALL_EMTS_DEREGISTERED; + } + } + else + { + SUPR0Printf("GVMMR0DestroyVM: pHandle=%RKv:{.pVM=%p, .hEMT0=%p, .ProcId=%u, .pvObj=%p} pVM=%p hSelf=%p\n", + pHandle, pHandle->pVM, pHandle->hEMT0, pHandle->ProcId, pHandle->pvObj, pVM, hSelf); + gvmmR0CreateDestroyUnlock(pGVMM); + rc = VERR_GVMM_IPE_2; + } + + return rc; +} + + +/** + * Performs VM cleanup task as part of object destruction. + * + * @param pGVM The GVM pointer. + */ +static void gvmmR0CleanupVM(PGVM pGVM) +{ + if ( pGVM->gvmm.s.fDoneVMMR0Init + && !pGVM->gvmm.s.fDoneVMMR0Term) + { + if ( pGVM->gvmm.s.VMMemObj != NIL_RTR0MEMOBJ + && RTR0MemObjAddress(pGVM->gvmm.s.VMMemObj) == pGVM->pVM) + { + LogFlow(("gvmmR0CleanupVM: Calling VMMR0TermVM\n")); + VMMR0TermVM(pGVM, pGVM->pVM, NIL_VMCPUID); + } + else + AssertMsgFailed(("gvmmR0CleanupVM: VMMemObj=%p pVM=%p\n", pGVM->gvmm.s.VMMemObj, pGVM->pVM)); + } + + GMMR0CleanupVM(pGVM); +#ifdef VBOX_WITH_NEM_R0 + NEMR0CleanupVM(pGVM); +#endif + + AssertCompile((uintptr_t)NIL_RTTHREADCTXHOOK == 0); /* Depends on zero initialized memory working for NIL at the moment. */ + for (VMCPUID idCpu = 0; idCpu < pGVM->cCpus; idCpu++) + { + /** @todo Can we busy wait here for all thread-context hooks to be + * deregistered before releasing (destroying) it? Only until we find a + * solution for not deregistering hooks everytime we're leaving HMR0 + * context. */ + VMMR0ThreadCtxHookDestroyForEmt(&pGVM->pVM->aCpus[idCpu]); + } +} + + +/** + * @callback_method_impl{FNSUPDRVDESTRUCTOR,VM handle destructor} + * + * pvUser1 is the GVM instance pointer. + * pvUser2 is the handle pointer. + */ +static DECLCALLBACK(void) gvmmR0HandleObjDestructor(void *pvObj, void *pvUser1, void *pvUser2) +{ + LogFlow(("gvmmR0HandleObjDestructor: %p %p %p\n", pvObj, pvUser1, pvUser2)); + + NOREF(pvObj); + + /* + * Some quick, paranoid, input validation. + */ + PGVMHANDLE pHandle = (PGVMHANDLE)pvUser2; + AssertPtr(pHandle); + PGVMM pGVMM = (PGVMM)pvUser1; + Assert(pGVMM == g_pGVMM); + const uint16_t iHandle = pHandle - &pGVMM->aHandles[0]; + if ( !iHandle + || iHandle >= RT_ELEMENTS(pGVMM->aHandles) + || iHandle != pHandle->iSelf) + { + SUPR0Printf("GVM: handle %d is out of range or corrupt (iSelf=%d)!\n", iHandle, pHandle->iSelf); + return; + } + + int rc = gvmmR0CreateDestroyLock(pGVMM); + AssertRC(rc); + rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM); + AssertRC(rc); + + /* + * This is a tad slow but a doubly linked list is too much hassle. + */ + if (RT_UNLIKELY(pHandle->iNext >= RT_ELEMENTS(pGVMM->aHandles))) + { + SUPR0Printf("GVM: used list index %d is out of range!\n", pHandle->iNext); + GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM); + gvmmR0CreateDestroyUnlock(pGVMM); + return; + } + + if (pGVMM->iUsedHead == iHandle) + pGVMM->iUsedHead = pHandle->iNext; + else + { + uint16_t iPrev = pGVMM->iUsedHead; + int c = RT_ELEMENTS(pGVMM->aHandles) + 2; + while (iPrev) + { + if (RT_UNLIKELY(iPrev >= RT_ELEMENTS(pGVMM->aHandles))) + { + SUPR0Printf("GVM: used list index %d is out of range!\n", iPrev); + GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM); + gvmmR0CreateDestroyUnlock(pGVMM); + return; + } + if (RT_UNLIKELY(c-- <= 0)) + { + iPrev = 0; + break; + } + + if (pGVMM->aHandles[iPrev].iNext == iHandle) + break; + iPrev = pGVMM->aHandles[iPrev].iNext; + } + if (!iPrev) + { + SUPR0Printf("GVM: can't find the handle previous previous of %d!\n", pHandle->iSelf); + GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM); + gvmmR0CreateDestroyUnlock(pGVMM); + return; + } + + Assert(pGVMM->aHandles[iPrev].iNext == iHandle); + pGVMM->aHandles[iPrev].iNext = pHandle->iNext; + } + pHandle->iNext = 0; + pGVMM->cVMs--; + + /* + * Do the global cleanup round. + */ + PGVM pGVM = pHandle->pGVM; + if ( VALID_PTR(pGVM) + && pGVM->u32Magic == GVM_MAGIC) + { + pGVMM->cEMTs -= pGVM->cCpus; + + if (pGVM->pSession) + SUPR0SetSessionVM(pGVM->pSession, NULL, NULL); + + GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM); + + gvmmR0CleanupVM(pGVM); + + /* + * Do the GVMM cleanup - must be done last. + */ + /* The VM and VM pages mappings/allocations. */ + if (pGVM->gvmm.s.VMPagesMapObj != NIL_RTR0MEMOBJ) + { + rc = RTR0MemObjFree(pGVM->gvmm.s.VMPagesMapObj, false /* fFreeMappings */); AssertRC(rc); + pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ; + } + + if (pGVM->gvmm.s.VMMapObj != NIL_RTR0MEMOBJ) + { + rc = RTR0MemObjFree(pGVM->gvmm.s.VMMapObj, false /* fFreeMappings */); AssertRC(rc); + pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ; + } + + if (pGVM->gvmm.s.VMPagesMemObj != NIL_RTR0MEMOBJ) + { + rc = RTR0MemObjFree(pGVM->gvmm.s.VMPagesMemObj, false /* fFreeMappings */); AssertRC(rc); + pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ; + } + + if (pGVM->gvmm.s.VMMemObj != NIL_RTR0MEMOBJ) + { + rc = RTR0MemObjFree(pGVM->gvmm.s.VMMemObj, false /* fFreeMappings */); AssertRC(rc); + pGVM->gvmm.s.VMMemObj = NIL_RTR0MEMOBJ; + } + + for (VMCPUID i = 0; i < pGVM->cCpus; i++) + { + if (pGVM->aCpus[i].gvmm.s.HaltEventMulti != NIL_RTSEMEVENTMULTI) + { + rc = RTSemEventMultiDestroy(pGVM->aCpus[i].gvmm.s.HaltEventMulti); AssertRC(rc); + pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI; + } + } + + /* the GVM structure itself. */ + pGVM->u32Magic |= UINT32_C(0x80000000); + RTMemFree(pGVM); + + /* Re-acquire the UsedLock before freeing the handle since we're updating handle fields. */ + rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM); + AssertRC(rc); + } + /* else: GVMMR0CreateVM cleanup. */ + + /* + * Free the handle. + */ + pHandle->iNext = pGVMM->iFreeHead; + pGVMM->iFreeHead = iHandle; + ASMAtomicWriteNullPtr(&pHandle->pGVM); + ASMAtomicWriteNullPtr(&pHandle->pVM); + ASMAtomicWriteNullPtr(&pHandle->pvObj); + ASMAtomicWriteNullPtr(&pHandle->pSession); + ASMAtomicWriteHandle(&pHandle->hEMT0, NIL_RTNATIVETHREAD); + ASMAtomicWriteU32(&pHandle->ProcId, NIL_RTPROCESS); + + GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM); + gvmmR0CreateDestroyUnlock(pGVMM); + LogFlow(("gvmmR0HandleObjDestructor: returns\n")); +} + + +/** + * Registers the calling thread as the EMT of a Virtual CPU. + * + * Note that VCPU 0 is automatically registered during VM creation. + * + * @returns VBox status code + * @param pGVM The global (ring-0) VM structure. + * @param pVM The cross context VM structure. + * @param idCpu VCPU id to register the current thread as. + */ +GVMMR0DECL(int) GVMMR0RegisterVCpu(PGVM pGVM, PVM pVM, VMCPUID idCpu) +{ + AssertReturn(idCpu != 0, VERR_INVALID_FUNCTION); + + /* + * Validate the VM structure, state and handle. + */ + PGVMM pGVMM; + int rc = gvmmR0ByGVMandVM(pGVM, pVM, &pGVMM, false /* fTakeUsedLock */); /** @todo take lock here. */ + if (RT_SUCCESS(rc)) + { + if (idCpu < pGVM->cCpus) + { + /* Check that the EMT isn't already assigned to a thread. */ + if (pGVM->aCpus[idCpu].hEMT == NIL_RTNATIVETHREAD) + { + Assert(pVM->aCpus[idCpu].hNativeThreadR0 == NIL_RTNATIVETHREAD); + + /* A thread may only be one EMT. */ + RTNATIVETHREAD const hNativeSelf = RTThreadNativeSelf(); + for (VMCPUID iCpu = 0; iCpu < pGVM->cCpus; iCpu++) + AssertBreakStmt(pGVM->aCpus[iCpu].hEMT != hNativeSelf, rc = VERR_INVALID_PARAMETER); + if (RT_SUCCESS(rc)) + { + /* + * Do the assignment, then try setup the hook. Undo if that fails. + */ + pVM->aCpus[idCpu].hNativeThreadR0 = pGVM->aCpus[idCpu].hEMT = RTThreadNativeSelf(); + + rc = VMMR0ThreadCtxHookCreateForEmt(&pVM->aCpus[idCpu]); + if (RT_SUCCESS(rc)) + CPUMR0RegisterVCpuThread(&pVM->aCpus[idCpu]); + else + pVM->aCpus[idCpu].hNativeThreadR0 = pGVM->aCpus[idCpu].hEMT = NIL_RTNATIVETHREAD; + } + } + else + rc = VERR_ACCESS_DENIED; + } + else + rc = VERR_INVALID_CPU_ID; + } + return rc; +} + + +/** + * Deregisters the calling thread as the EMT of a Virtual CPU. + * + * Note that VCPU 0 shall call GVMMR0DestroyVM intead of this API. + * + * @returns VBox status code + * @param pGVM The global (ring-0) VM structure. + * @param pVM The cross context VM structure. + * @param idCpu VCPU id to register the current thread as. + */ +GVMMR0DECL(int) GVMMR0DeregisterVCpu(PGVM pGVM, PVM pVM, VMCPUID idCpu) +{ + AssertReturn(idCpu != 0, VERR_INVALID_FUNCTION); + + /* + * Validate the VM structure, state and handle. + */ + PGVMM pGVMM; + int rc = gvmmR0ByGVMandVMandEMT(pGVM, pVM, idCpu, &pGVMM); + if (RT_SUCCESS(rc)) + { + /* + * Take the destruction lock and recheck the handle state to + * prevent racing GVMMR0DestroyVM. + */ + gvmmR0CreateDestroyLock(pGVMM); + uint32_t hSelf = pGVM->hSelf; + ASMCompilerBarrier(); + if ( hSelf < RT_ELEMENTS(pGVMM->aHandles) + && pGVMM->aHandles[hSelf].pvObj != NULL + && pGVMM->aHandles[hSelf].pGVM == pGVM) + { + /* + * Do per-EMT cleanups. + */ + VMMR0ThreadCtxHookDestroyForEmt(&pVM->aCpus[idCpu]); + + /* + * Invalidate hEMT. We don't use NIL here as that would allow + * GVMMR0RegisterVCpu to be called again, and we don't want that. + */ + AssertCompile(~(RTNATIVETHREAD)1 != NIL_RTNATIVETHREAD); + pGVM->aCpus[idCpu].hEMT = ~(RTNATIVETHREAD)1; + pVM->aCpus[idCpu].hNativeThreadR0 = NIL_RTNATIVETHREAD; + } + + gvmmR0CreateDestroyUnlock(pGVMM); + } + return rc; +} + + +/** + * Lookup a GVM structure by its handle. + * + * @returns The GVM pointer on success, NULL on failure. + * @param hGVM The global VM handle. Asserts on bad handle. + */ +GVMMR0DECL(PGVM) GVMMR0ByHandle(uint32_t hGVM) +{ + PGVMM pGVMM; + GVMM_GET_VALID_INSTANCE(pGVMM, NULL); + + /* + * Validate. + */ + AssertReturn(hGVM != NIL_GVM_HANDLE, NULL); + AssertReturn(hGVM < RT_ELEMENTS(pGVMM->aHandles), NULL); + + /* + * Look it up. + */ + PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM]; + AssertPtrReturn(pHandle->pVM, NULL); + AssertPtrReturn(pHandle->pvObj, NULL); + PGVM pGVM = pHandle->pGVM; + AssertPtrReturn(pGVM, NULL); + AssertReturn(pGVM->pVM == pHandle->pVM, NULL); + + return pHandle->pGVM; +} + + +/** + * Lookup a GVM structure by the shared VM structure. + * + * The calling thread must be in the same process as the VM. All current lookups + * are by threads inside the same process, so this will not be an issue. + * + * @returns VBox status code. + * @param pVM The cross context VM structure. + * @param ppGVM Where to store the GVM pointer. + * @param ppGVMM Where to store the pointer to the GVMM instance data. + * @param fTakeUsedLock Whether to take the used lock or not. We take it in + * shared mode when requested. + * + * Be very careful if not taking the lock as it's + * possible that the VM will disappear then! + * + * @remark This will not assert on an invalid pVM but try return silently. + */ +static int gvmmR0ByVM(PVM pVM, PGVM *ppGVM, PGVMM *ppGVMM, bool fTakeUsedLock) +{ + RTPROCESS ProcId = RTProcSelf(); + PGVMM pGVMM; + GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE); + + /* + * Validate. + */ + if (RT_UNLIKELY( !VALID_PTR(pVM) + || ((uintptr_t)pVM & PAGE_OFFSET_MASK))) + return VERR_INVALID_POINTER; + if (RT_UNLIKELY( pVM->enmVMState < VMSTATE_CREATING + || pVM->enmVMState >= VMSTATE_TERMINATED)) + return VERR_INVALID_POINTER; + + uint16_t hGVM = pVM->hSelf; + ASMCompilerBarrier(); + if (RT_UNLIKELY( hGVM == NIL_GVM_HANDLE + || hGVM >= RT_ELEMENTS(pGVMM->aHandles))) + return VERR_INVALID_HANDLE; + + /* + * Look it up. + */ + PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM]; + PGVM pGVM; + if (fTakeUsedLock) + { + int rc = GVMMR0_USED_SHARED_LOCK(pGVMM); + AssertRCReturn(rc, rc); + + pGVM = pHandle->pGVM; + if (RT_UNLIKELY( pHandle->pVM != pVM + || pHandle->ProcId != ProcId + || !VALID_PTR(pHandle->pvObj) + || !VALID_PTR(pGVM) + || pGVM->pVM != pVM)) + { + GVMMR0_USED_SHARED_UNLOCK(pGVMM); + return VERR_INVALID_HANDLE; + } + } + else + { + if (RT_UNLIKELY(pHandle->pVM != pVM)) + return VERR_INVALID_HANDLE; + if (RT_UNLIKELY(pHandle->ProcId != ProcId)) + return VERR_INVALID_HANDLE; + if (RT_UNLIKELY(!VALID_PTR(pHandle->pvObj))) + return VERR_INVALID_HANDLE; + + pGVM = pHandle->pGVM; + if (RT_UNLIKELY(!VALID_PTR(pGVM))) + return VERR_INVALID_HANDLE; + if (RT_UNLIKELY(pGVM->pVM != pVM)) + return VERR_INVALID_HANDLE; + } + + *ppGVM = pGVM; + *ppGVMM = pGVMM; + return VINF_SUCCESS; +} + + +/** + * Fast look up a GVM structure by the cross context VM structure. + * + * This is mainly used a glue function, so performance is . + * + * @returns GVM on success, NULL on failure. + * @param pVM The cross context VM structure. ASSUMES to be + * reasonably valid, so we can do fewer checks than in + * gvmmR0ByVM. + * + * @note Do not use this on pVM structures from userland! + */ +GVMMR0DECL(PGVM) GVMMR0FastGetGVMByVM(PVM pVM) +{ + AssertPtr(pVM); + Assert(!((uintptr_t)pVM & PAGE_OFFSET_MASK)); + + PGVMM pGVMM; + GVMM_GET_VALID_INSTANCE(pGVMM, NULL); + + /* + * Validate. + */ + uint16_t hGVM = pVM->hSelf; + ASMCompilerBarrier(); + AssertReturn(hGVM != NIL_GVM_HANDLE && hGVM < RT_ELEMENTS(pGVMM->aHandles), NULL); + + /* + * Look it up and check pVM against the value in the handle and GVM structures. + */ + PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM]; + AssertReturn(pHandle->pVM == pVM, NULL); + + PGVM pGVM = pHandle->pGVM; + AssertPtrReturn(pGVM, NULL); + AssertReturn(pGVM->pVM == pVM, NULL); + + return pGVM; +} + + +/** + * Check that the given GVM and VM structures match up. + * + * The calling thread must be in the same process as the VM. All current lookups + * are by threads inside the same process, so this will not be an issue. + * + * @returns VBox status code. + * @param pGVM The global (ring-0) VM structure. + * @param pVM The cross context VM structure. + * @param ppGVMM Where to store the pointer to the GVMM instance data. + * @param fTakeUsedLock Whether to take the used lock or not. We take it in + * shared mode when requested. + * + * Be very careful if not taking the lock as it's + * possible that the VM will disappear then! + * + * @remark This will not assert on an invalid pVM but try return silently. + */ +static int gvmmR0ByGVMandVM(PGVM pGVM, PVM pVM, PGVMM *ppGVMM, bool fTakeUsedLock) +{ + /* + * Check the pointers. + */ + int rc; + if (RT_LIKELY(RT_VALID_PTR(pGVM))) + { + if (RT_LIKELY( RT_VALID_PTR(pVM) + && ((uintptr_t)pVM & PAGE_OFFSET_MASK) == 0)) + { + if (RT_LIKELY(pGVM->pVM == pVM)) + { + /* + * Get the pGVMM instance and check the VM handle. + */ + PGVMM pGVMM; + GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE); + + uint16_t hGVM = pGVM->hSelf; + if (RT_LIKELY( hGVM != NIL_GVM_HANDLE + && hGVM < RT_ELEMENTS(pGVMM->aHandles))) + { + RTPROCESS const pidSelf = RTProcSelf(); + PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM]; + if (fTakeUsedLock) + { + rc = GVMMR0_USED_SHARED_LOCK(pGVMM); + AssertRCReturn(rc, rc); + } + + if (RT_LIKELY( pHandle->pGVM == pGVM + && pHandle->pVM == pVM + && pHandle->ProcId == pidSelf + && RT_VALID_PTR(pHandle->pvObj))) + { + /* + * Some more VM data consistency checks. + */ + if (RT_LIKELY( pVM->cCpus == pGVM->cCpus + && pVM->hSelf == hGVM + && pVM->enmVMState >= VMSTATE_CREATING + && pVM->enmVMState <= VMSTATE_TERMINATED + && pVM->pVMR0 == pVM)) + { + *ppGVMM = pGVMM; + return VINF_SUCCESS; + } + } + + if (fTakeUsedLock) + GVMMR0_USED_SHARED_UNLOCK(pGVMM); + } + } + rc = VERR_INVALID_VM_HANDLE; + } + else + rc = VERR_INVALID_POINTER; + } + else + rc = VERR_INVALID_POINTER; + return rc; +} + + +/** + * Check that the given GVM and VM structures match up. + * + * The calling thread must be in the same process as the VM. All current lookups + * are by threads inside the same process, so this will not be an issue. + * + * @returns VBox status code. + * @param pGVM The global (ring-0) VM structure. + * @param pVM The cross context VM structure. + * @param idCpu The (alleged) Virtual CPU ID of the calling EMT. + * @param ppGVMM Where to store the pointer to the GVMM instance data. + * @thread EMT + * + * @remarks This will assert in all failure paths. + */ +static int gvmmR0ByGVMandVMandEMT(PGVM pGVM, PVM pVM, VMCPUID idCpu, PGVMM *ppGVMM) +{ + /* + * Check the pointers. + */ + AssertPtrReturn(pGVM, VERR_INVALID_POINTER); + + AssertPtrReturn(pVM, VERR_INVALID_POINTER); + AssertReturn(((uintptr_t)pVM & PAGE_OFFSET_MASK) == 0, VERR_INVALID_POINTER); + AssertReturn(pGVM->pVM == pVM, VERR_INVALID_VM_HANDLE); + + + /* + * Get the pGVMM instance and check the VM handle. + */ + PGVMM pGVMM; + GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE); + + uint16_t hGVM = pGVM->hSelf; + ASMCompilerBarrier(); + AssertReturn( hGVM != NIL_GVM_HANDLE + && hGVM < RT_ELEMENTS(pGVMM->aHandles), VERR_INVALID_VM_HANDLE); + + RTPROCESS const pidSelf = RTProcSelf(); + PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM]; + AssertReturn( pHandle->pGVM == pGVM + && pHandle->pVM == pVM + && pHandle->ProcId == pidSelf + && RT_VALID_PTR(pHandle->pvObj), + VERR_INVALID_HANDLE); + + /* + * Check the EMT claim. + */ + RTNATIVETHREAD const hAllegedEMT = RTThreadNativeSelf(); + AssertReturn(idCpu < pGVM->cCpus, VERR_INVALID_CPU_ID); + AssertReturn(pGVM->aCpus[idCpu].hEMT == hAllegedEMT, VERR_NOT_OWNER); + + /* + * Some more VM data consistency checks. + */ + AssertReturn(pVM->cCpus == pGVM->cCpus, VERR_INCONSISTENT_VM_HANDLE); + AssertReturn(pVM->hSelf == hGVM, VERR_INCONSISTENT_VM_HANDLE); + AssertReturn(pVM->pVMR0 == pVM, VERR_INCONSISTENT_VM_HANDLE); + AssertReturn( pVM->enmVMState >= VMSTATE_CREATING + && pVM->enmVMState <= VMSTATE_TERMINATED, VERR_INCONSISTENT_VM_HANDLE); + + *ppGVMM = pGVMM; + return VINF_SUCCESS; +} + + +/** + * Validates a GVM/VM pair. + * + * @returns VBox status code. + * @param pGVM The global (ring-0) VM structure. + * @param pVM The cross context VM structure. + */ +GVMMR0DECL(int) GVMMR0ValidateGVMandVM(PGVM pGVM, PVM pVM) +{ + PGVMM pGVMM; + return gvmmR0ByGVMandVM(pGVM, pVM, &pGVMM, false /*fTakeUsedLock*/); +} + + + +/** + * Validates a GVM/VM/EMT combo. + * + * @returns VBox status code. + * @param pGVM The global (ring-0) VM structure. + * @param pVM The cross context VM structure. + * @param idCpu The Virtual CPU ID of the calling EMT. + * @thread EMT(idCpu) + */ +GVMMR0DECL(int) GVMMR0ValidateGVMandVMandEMT(PGVM pGVM, PVM pVM, VMCPUID idCpu) +{ + PGVMM pGVMM; + return gvmmR0ByGVMandVMandEMT(pGVM, pVM, idCpu, &pGVMM); +} + + +/** + * Looks up the VM belonging to the specified EMT thread. + * + * This is used by the assertion machinery in VMMR0.cpp to avoid causing + * unnecessary kernel panics when the EMT thread hits an assertion. The + * call may or not be an EMT thread. + * + * @returns Pointer to the VM on success, NULL on failure. + * @param hEMT The native thread handle of the EMT. + * NIL_RTNATIVETHREAD means the current thread + */ +GVMMR0DECL(PVM) GVMMR0GetVMByEMT(RTNATIVETHREAD hEMT) +{ + /* + * No Assertions here as we're usually called in a AssertMsgN or + * RTAssert* context. + */ + PGVMM pGVMM = g_pGVMM; + if ( !VALID_PTR(pGVMM) + || pGVMM->u32Magic != GVMM_MAGIC) + return NULL; + + if (hEMT == NIL_RTNATIVETHREAD) + hEMT = RTThreadNativeSelf(); + RTPROCESS ProcId = RTProcSelf(); + + /* + * Search the handles in a linear fashion as we don't dare to take the lock (assert). + */ +/** @todo introduce some pid hash table here, please. */ + for (unsigned i = 1; i < RT_ELEMENTS(pGVMM->aHandles); i++) + { + if ( pGVMM->aHandles[i].iSelf == i + && pGVMM->aHandles[i].ProcId == ProcId + && VALID_PTR(pGVMM->aHandles[i].pvObj) + && VALID_PTR(pGVMM->aHandles[i].pVM) + && VALID_PTR(pGVMM->aHandles[i].pGVM)) + { + if (pGVMM->aHandles[i].hEMT0 == hEMT) + return pGVMM->aHandles[i].pVM; + + /* This is fearly safe with the current process per VM approach. */ + PGVM pGVM = pGVMM->aHandles[i].pGVM; + VMCPUID const cCpus = pGVM->cCpus; + ASMCompilerBarrier(); + if ( cCpus < 1 + || cCpus > VMM_MAX_CPU_COUNT) + continue; + for (VMCPUID idCpu = 1; idCpu < cCpus; idCpu++) + if (pGVM->aCpus[idCpu].hEMT == hEMT) + return pGVMM->aHandles[i].pVM; + } + } + return NULL; +} + + +/** + * Looks up the GVMCPU belonging to the specified EMT thread. + * + * This is used by the assertion machinery in VMMR0.cpp to avoid causing + * unnecessary kernel panics when the EMT thread hits an assertion. The + * call may or not be an EMT thread. + * + * @returns Pointer to the VM on success, NULL on failure. + * @param hEMT The native thread handle of the EMT. + * NIL_RTNATIVETHREAD means the current thread + */ +GVMMR0DECL(PGVMCPU) GVMMR0GetGVCpuByEMT(RTNATIVETHREAD hEMT) +{ + /* + * No Assertions here as we're usually called in a AssertMsgN, + * RTAssert*, Log and LogRel contexts. + */ + PGVMM pGVMM = g_pGVMM; + if ( !VALID_PTR(pGVMM) + || pGVMM->u32Magic != GVMM_MAGIC) + return NULL; + + if (hEMT == NIL_RTNATIVETHREAD) + hEMT = RTThreadNativeSelf(); + RTPROCESS ProcId = RTProcSelf(); + + /* + * Search the handles in a linear fashion as we don't dare to take the lock (assert). + */ +/** @todo introduce some pid hash table here, please. */ + for (unsigned i = 1; i < RT_ELEMENTS(pGVMM->aHandles); i++) + { + if ( pGVMM->aHandles[i].iSelf == i + && pGVMM->aHandles[i].ProcId == ProcId + && VALID_PTR(pGVMM->aHandles[i].pvObj) + && VALID_PTR(pGVMM->aHandles[i].pVM) + && VALID_PTR(pGVMM->aHandles[i].pGVM)) + { + PGVM pGVM = pGVMM->aHandles[i].pGVM; + if (pGVMM->aHandles[i].hEMT0 == hEMT) + return &pGVM->aCpus[0]; + + /* This is fearly safe with the current process per VM approach. */ + VMCPUID const cCpus = pGVM->cCpus; + ASMCompilerBarrier(); + ASMCompilerBarrier(); + if ( cCpus < 1 + || cCpus > VMM_MAX_CPU_COUNT) + continue; + for (VMCPUID idCpu = 1; idCpu < cCpus; idCpu++) + if (pGVM->aCpus[idCpu].hEMT == hEMT) + return &pGVM->aCpus[idCpu]; + } + } + return NULL; +} + + +/** + * This is will wake up expired and soon-to-be expired VMs. + * + * @returns Number of VMs that has been woken up. + * @param pGVMM Pointer to the GVMM instance data. + * @param u64Now The current time. + */ +static unsigned gvmmR0SchedDoWakeUps(PGVMM pGVMM, uint64_t u64Now) +{ + /* + * Skip this if we've got disabled because of high resolution wakeups or by + * the user. + */ + if (!pGVMM->fDoEarlyWakeUps) + return 0; + +/** @todo Rewrite this algorithm. See performance defect XYZ. */ + + /* + * A cheap optimization to stop wasting so much time here on big setups. + */ + const uint64_t uNsEarlyWakeUp2 = u64Now + pGVMM->nsEarlyWakeUp2; + if ( pGVMM->cHaltedEMTs == 0 + || uNsEarlyWakeUp2 > pGVMM->uNsNextEmtWakeup) + return 0; + + /* + * Only one thread doing this at a time. + */ + if (!ASMAtomicCmpXchgBool(&pGVMM->fDoingEarlyWakeUps, true, false)) + return 0; + + /* + * The first pass will wake up VMs which have actually expired + * and look for VMs that should be woken up in the 2nd and 3rd passes. + */ + const uint64_t uNsEarlyWakeUp1 = u64Now + pGVMM->nsEarlyWakeUp1; + uint64_t u64Min = UINT64_MAX; + unsigned cWoken = 0; + unsigned cHalted = 0; + unsigned cTodo2nd = 0; + unsigned cTodo3rd = 0; + for (unsigned i = pGVMM->iUsedHead, cGuard = 0; + i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles); + i = pGVMM->aHandles[i].iNext) + { + PGVM pCurGVM = pGVMM->aHandles[i].pGVM; + if ( VALID_PTR(pCurGVM) + && pCurGVM->u32Magic == GVM_MAGIC) + { + for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++) + { + PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu]; + uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire); + if (u64) + { + if (u64 <= u64Now) + { + if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0)) + { + int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti); + AssertRC(rc); + cWoken++; + } + } + else + { + cHalted++; + if (u64 <= uNsEarlyWakeUp1) + cTodo2nd++; + else if (u64 <= uNsEarlyWakeUp2) + cTodo3rd++; + else if (u64 < u64Min) + u64 = u64Min; + } + } + } + } + AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles)); + } + + if (cTodo2nd) + { + for (unsigned i = pGVMM->iUsedHead, cGuard = 0; + i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles); + i = pGVMM->aHandles[i].iNext) + { + PGVM pCurGVM = pGVMM->aHandles[i].pGVM; + if ( VALID_PTR(pCurGVM) + && pCurGVM->u32Magic == GVM_MAGIC) + { + for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++) + { + PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu]; + uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire); + if ( u64 + && u64 <= uNsEarlyWakeUp1) + { + if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0)) + { + int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti); + AssertRC(rc); + cWoken++; + } + } + } + } + AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles)); + } + } + + if (cTodo3rd) + { + for (unsigned i = pGVMM->iUsedHead, cGuard = 0; + i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles); + i = pGVMM->aHandles[i].iNext) + { + PGVM pCurGVM = pGVMM->aHandles[i].pGVM; + if ( VALID_PTR(pCurGVM) + && pCurGVM->u32Magic == GVM_MAGIC) + { + for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++) + { + PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu]; + uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire); + if ( u64 + && u64 <= uNsEarlyWakeUp2) + { + if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0)) + { + int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti); + AssertRC(rc); + cWoken++; + } + } + } + } + AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles)); + } + } + + /* + * Set the minimum value. + */ + pGVMM->uNsNextEmtWakeup = u64Min; + + ASMAtomicWriteBool(&pGVMM->fDoingEarlyWakeUps, false); + return cWoken; +} + + +/** + * Halt the EMT thread. + * + * @returns VINF_SUCCESS normal wakeup (timeout or kicked by other thread). + * VERR_INTERRUPTED if a signal was scheduled for the thread. + * @param pGVM The global (ring-0) VM structure. + * @param pVM The cross context VM structure. + * @param pGVCpu The global (ring-0) CPU structure of the calling + * EMT. + * @param u64ExpireGipTime The time for the sleep to expire expressed as GIP time. + * @thread EMT(pGVCpu). + */ +GVMMR0DECL(int) GVMMR0SchedHalt(PGVM pGVM, PVM pVM, PGVMCPU pGVCpu, uint64_t u64ExpireGipTime) +{ + LogFlow(("GVMMR0SchedHalt: pGVM=%p pVM=%p pGVCpu=%p(%d) u64ExpireGipTime=%#RX64\n", + pGVM, pVM, pGVCpu, pGVCpu->idCpu, u64ExpireGipTime)); + GVMM_CHECK_SMAP_SETUP(); + GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING); + + PGVMM pGVMM; + GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE); + + pGVM->gvmm.s.StatsSched.cHaltCalls++; + Assert(!pGVCpu->gvmm.s.u64HaltExpire); + + /* + * If we're doing early wake-ups, we must take the UsedList lock before we + * start querying the current time. + * Note! Interrupts must NOT be disabled at this point because we ask for GIP time! + */ + bool const fDoEarlyWakeUps = pGVMM->fDoEarlyWakeUps; + if (fDoEarlyWakeUps) + { + int rc2 = GVMMR0_USED_SHARED_LOCK(pGVMM); AssertRC(rc2); + GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING); + } + + pGVCpu->gvmm.s.iCpuEmt = ASMGetApicId(); + + /* GIP hack: We might are frequently sleeping for short intervals where the + difference between GIP and system time matters on systems with high resolution + system time. So, convert the input from GIP to System time in that case. */ + Assert(ASMGetFlags() & X86_EFL_IF); + const uint64_t u64NowSys = RTTimeSystemNanoTS(); + const uint64_t u64NowGip = RTTimeNanoTS(); + GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING); + + if (fDoEarlyWakeUps) + { + pGVM->gvmm.s.StatsSched.cHaltWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64NowGip); + GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING); + } + + /* + * Go to sleep if we must... + * Cap the sleep time to 1 second to be on the safe side. + */ + int rc; + uint64_t cNsInterval = u64ExpireGipTime - u64NowGip; + if ( u64NowGip < u64ExpireGipTime + && cNsInterval >= (pGVMM->cEMTs > pGVMM->cEMTsMeansCompany + ? pGVMM->nsMinSleepCompany + : pGVMM->nsMinSleepAlone)) + { + pGVM->gvmm.s.StatsSched.cHaltBlocking++; + if (cNsInterval > RT_NS_1SEC) + u64ExpireGipTime = u64NowGip + RT_NS_1SEC; + ASMAtomicWriteU64(&pGVCpu->gvmm.s.u64HaltExpire, u64ExpireGipTime); + ASMAtomicIncU32(&pGVMM->cHaltedEMTs); + if (fDoEarlyWakeUps) + { + if (u64ExpireGipTime < pGVMM->uNsNextEmtWakeup) + pGVMM->uNsNextEmtWakeup = u64ExpireGipTime; + GVMMR0_USED_SHARED_UNLOCK(pGVMM); + } + GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING); + + rc = RTSemEventMultiWaitEx(pGVCpu->gvmm.s.HaltEventMulti, + RTSEMWAIT_FLAGS_ABSOLUTE | RTSEMWAIT_FLAGS_NANOSECS | RTSEMWAIT_FLAGS_INTERRUPTIBLE, + u64NowGip > u64NowSys ? u64ExpireGipTime : u64NowSys + cNsInterval); + GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING); + + ASMAtomicWriteU64(&pGVCpu->gvmm.s.u64HaltExpire, 0); + ASMAtomicDecU32(&pGVMM->cHaltedEMTs); + + /* Reset the semaphore to try prevent a few false wake-ups. */ + if (rc == VINF_SUCCESS) + { + RTSemEventMultiReset(pGVCpu->gvmm.s.HaltEventMulti); + GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING); + } + else if (rc == VERR_TIMEOUT) + { + pGVM->gvmm.s.StatsSched.cHaltTimeouts++; + rc = VINF_SUCCESS; + } + } + else + { + pGVM->gvmm.s.StatsSched.cHaltNotBlocking++; + if (fDoEarlyWakeUps) + GVMMR0_USED_SHARED_UNLOCK(pGVMM); + GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING); + RTSemEventMultiReset(pGVCpu->gvmm.s.HaltEventMulti); + GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING); + rc = VINF_SUCCESS; + } + + return rc; +} + + +/** + * Halt the EMT thread. + * + * @returns VINF_SUCCESS normal wakeup (timeout or kicked by other thread). + * VERR_INTERRUPTED if a signal was scheduled for the thread. + * @param pGVM The global (ring-0) VM structure. + * @param pVM The cross context VM structure. + * @param idCpu The Virtual CPU ID of the calling EMT. + * @param u64ExpireGipTime The time for the sleep to expire expressed as GIP time. + * @thread EMT(idCpu). + */ +GVMMR0DECL(int) GVMMR0SchedHaltReq(PGVM pGVM, PVM pVM, VMCPUID idCpu, uint64_t u64ExpireGipTime) +{ + GVMM_CHECK_SMAP_SETUP(); + GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING); + PGVMM pGVMM; + int rc = gvmmR0ByGVMandVMandEMT(pGVM, pVM, idCpu, &pGVMM); + if (RT_SUCCESS(rc)) + { + GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING); + rc = GVMMR0SchedHalt(pGVM, pVM, &pGVM->aCpus[idCpu], u64ExpireGipTime); + } + GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING); + return rc; +} + + + +/** + * Worker for GVMMR0SchedWakeUp and GVMMR0SchedWakeUpAndPokeCpus that wakes up + * the a sleeping EMT. + * + * @retval VINF_SUCCESS if successfully woken up. + * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked. + * + * @param pGVM The global (ring-0) VM structure. + * @param pGVCpu The global (ring-0) VCPU structure. + */ +DECLINLINE(int) gvmmR0SchedWakeUpOne(PGVM pGVM, PGVMCPU pGVCpu) +{ + pGVM->gvmm.s.StatsSched.cWakeUpCalls++; + + /* + * Signal the semaphore regardless of whether it's current blocked on it. + * + * The reason for this is that there is absolutely no way we can be 100% + * certain that it isn't *about* go to go to sleep on it and just got + * delayed a bit en route. So, we will always signal the semaphore when + * the it is flagged as halted in the VMM. + */ +/** @todo we can optimize some of that by means of the pVCpu->enmState now. */ + int rc; + if (pGVCpu->gvmm.s.u64HaltExpire) + { + rc = VINF_SUCCESS; + ASMAtomicWriteU64(&pGVCpu->gvmm.s.u64HaltExpire, 0); + } + else + { + rc = VINF_GVM_NOT_BLOCKED; + pGVM->gvmm.s.StatsSched.cWakeUpNotHalted++; + } + + int rc2 = RTSemEventMultiSignal(pGVCpu->gvmm.s.HaltEventMulti); + AssertRC(rc2); + + return rc; +} + + +/** + * Wakes up the halted EMT thread so it can service a pending request. + * + * @returns VBox status code. + * @retval VINF_SUCCESS if successfully woken up. + * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked. + * + * @param pGVM The global (ring-0) VM structure. + * @param pVM The cross context VM structure. + * @param idCpu The Virtual CPU ID of the EMT to wake up. + * @param fTakeUsedLock Take the used lock or not + * @thread Any but EMT(idCpu). + */ +GVMMR0DECL(int) GVMMR0SchedWakeUpEx(PGVM pGVM, PVM pVM, VMCPUID idCpu, bool fTakeUsedLock) +{ + GVMM_CHECK_SMAP_SETUP(); + GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING); + + /* + * Validate input and take the UsedLock. + */ + PGVMM pGVMM; + int rc = gvmmR0ByGVMandVM(pGVM, pVM, &pGVMM, fTakeUsedLock); + GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING); + if (RT_SUCCESS(rc)) + { + if (idCpu < pGVM->cCpus) + { + /* + * Do the actual job. + */ + rc = gvmmR0SchedWakeUpOne(pGVM, &pGVM->aCpus[idCpu]); + GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING); + + if (fTakeUsedLock && pGVMM->fDoEarlyWakeUps) + { + /* + * While we're here, do a round of scheduling. + */ + Assert(ASMGetFlags() & X86_EFL_IF); + const uint64_t u64Now = RTTimeNanoTS(); /* (GIP time) */ + pGVM->gvmm.s.StatsSched.cWakeUpWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64Now); + GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING); + } + } + else + rc = VERR_INVALID_CPU_ID; + + if (fTakeUsedLock) + { + int rc2 = GVMMR0_USED_SHARED_UNLOCK(pGVMM); + AssertRC(rc2); + GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING); + } + } + + LogFlow(("GVMMR0SchedWakeUpEx: returns %Rrc\n", rc)); + return rc; +} + + +/** + * Wakes up the halted EMT thread so it can service a pending request. + * + * @returns VBox status code. + * @retval VINF_SUCCESS if successfully woken up. + * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked. + * + * @param pGVM The global (ring-0) VM structure. + * @param pVM The cross context VM structure. + * @param idCpu The Virtual CPU ID of the EMT to wake up. + * @thread Any but EMT(idCpu). + */ +GVMMR0DECL(int) GVMMR0SchedWakeUp(PGVM pGVM, PVM pVM, VMCPUID idCpu) +{ + return GVMMR0SchedWakeUpEx(pGVM, pVM, idCpu, true /* fTakeUsedLock */); +} + + +/** + * Wakes up the halted EMT thread so it can service a pending request, no GVM + * parameter and no used locking. + * + * @returns VBox status code. + * @retval VINF_SUCCESS if successfully woken up. + * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked. + * + * @param pVM The cross context VM structure. + * @param idCpu The Virtual CPU ID of the EMT to wake up. + * @thread Any but EMT(idCpu). + * @deprecated Don't use in new code if possible! Use the GVM variant. + */ +GVMMR0DECL(int) GVMMR0SchedWakeUpNoGVMNoLock(PVM pVM, VMCPUID idCpu) +{ + GVMM_CHECK_SMAP_SETUP(); + GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING); + PGVM pGVM; + PGVMM pGVMM; + int rc = gvmmR0ByVM(pVM, &pGVM, &pGVMM, false /*fTakeUsedLock*/); + GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING); + if (RT_SUCCESS(rc)) + rc = GVMMR0SchedWakeUpEx(pGVM, pVM, idCpu, false /*fTakeUsedLock*/); + return rc; +} + + +/** + * Worker common to GVMMR0SchedPoke and GVMMR0SchedWakeUpAndPokeCpus that pokes + * the Virtual CPU if it's still busy executing guest code. + * + * @returns VBox status code. + * @retval VINF_SUCCESS if poked successfully. + * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC. + * + * @param pGVM The global (ring-0) VM structure. + * @param pVCpu The cross context virtual CPU structure. + */ +DECLINLINE(int) gvmmR0SchedPokeOne(PGVM pGVM, PVMCPU pVCpu) +{ + pGVM->gvmm.s.StatsSched.cPokeCalls++; + + RTCPUID idHostCpu = pVCpu->idHostCpu; + if ( idHostCpu == NIL_RTCPUID + || VMCPU_GET_STATE(pVCpu) != VMCPUSTATE_STARTED_EXEC) + { + pGVM->gvmm.s.StatsSched.cPokeNotBusy++; + return VINF_GVM_NOT_BUSY_IN_GC; + } + + /* Note: this function is not implemented on Darwin and Linux (kernel < 2.6.19) */ + RTMpPokeCpu(idHostCpu); + return VINF_SUCCESS; +} + + +/** + * Pokes an EMT if it's still busy running guest code. + * + * @returns VBox status code. + * @retval VINF_SUCCESS if poked successfully. + * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC. + * + * @param pGVM The global (ring-0) VM structure. + * @param pVM The cross context VM structure. + * @param idCpu The ID of the virtual CPU to poke. + * @param fTakeUsedLock Take the used lock or not + */ +GVMMR0DECL(int) GVMMR0SchedPokeEx(PGVM pGVM, PVM pVM, VMCPUID idCpu, bool fTakeUsedLock) +{ + /* + * Validate input and take the UsedLock. + */ + PGVMM pGVMM; + int rc = gvmmR0ByGVMandVM(pGVM, pVM, &pGVMM, fTakeUsedLock); + if (RT_SUCCESS(rc)) + { + if (idCpu < pGVM->cCpus) + rc = gvmmR0SchedPokeOne(pGVM, &pVM->aCpus[idCpu]); + else + rc = VERR_INVALID_CPU_ID; + + if (fTakeUsedLock) + { + int rc2 = GVMMR0_USED_SHARED_UNLOCK(pGVMM); + AssertRC(rc2); + } + } + + LogFlow(("GVMMR0SchedWakeUpAndPokeCpus: returns %Rrc\n", rc)); + return rc; +} + + +/** + * Pokes an EMT if it's still busy running guest code. + * + * @returns VBox status code. + * @retval VINF_SUCCESS if poked successfully. + * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC. + * + * @param pGVM The global (ring-0) VM structure. + * @param pVM The cross context VM structure. + * @param idCpu The ID of the virtual CPU to poke. + */ +GVMMR0DECL(int) GVMMR0SchedPoke(PGVM pGVM, PVM pVM, VMCPUID idCpu) +{ + return GVMMR0SchedPokeEx(pGVM, pVM, idCpu, true /* fTakeUsedLock */); +} + + +/** + * Pokes an EMT if it's still busy running guest code, no GVM parameter and no + * used locking. + * + * @returns VBox status code. + * @retval VINF_SUCCESS if poked successfully. + * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC. + * + * @param pVM The cross context VM structure. + * @param idCpu The ID of the virtual CPU to poke. + * + * @deprecated Don't use in new code if possible! Use the GVM variant. + */ +GVMMR0DECL(int) GVMMR0SchedPokeNoGVMNoLock(PVM pVM, VMCPUID idCpu) +{ + PGVM pGVM; + PGVMM pGVMM; + int rc = gvmmR0ByVM(pVM, &pGVM, &pGVMM, false /*fTakeUsedLock*/); + if (RT_SUCCESS(rc)) + { + if (idCpu < pGVM->cCpus) + rc = gvmmR0SchedPokeOne(pGVM, &pVM->aCpus[idCpu]); + else + rc = VERR_INVALID_CPU_ID; + } + return rc; +} + + +/** + * Wakes up a set of halted EMT threads so they can service pending request. + * + * @returns VBox status code, no informational stuff. + * + * @param pGVM The global (ring-0) VM structure. + * @param pVM The cross context VM structure. + * @param pSleepSet The set of sleepers to wake up. + * @param pPokeSet The set of CPUs to poke. + */ +GVMMR0DECL(int) GVMMR0SchedWakeUpAndPokeCpus(PGVM pGVM, PVM pVM, PCVMCPUSET pSleepSet, PCVMCPUSET pPokeSet) +{ + AssertPtrReturn(pSleepSet, VERR_INVALID_POINTER); + AssertPtrReturn(pPokeSet, VERR_INVALID_POINTER); + GVMM_CHECK_SMAP_SETUP(); + GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING); + RTNATIVETHREAD hSelf = RTThreadNativeSelf(); + + /* + * Validate input and take the UsedLock. + */ + PGVMM pGVMM; + int rc = gvmmR0ByGVMandVM(pGVM, pVM, &pGVMM, true /* fTakeUsedLock */); + GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING); + if (RT_SUCCESS(rc)) + { + rc = VINF_SUCCESS; + VMCPUID idCpu = pGVM->cCpus; + while (idCpu-- > 0) + { + /* Don't try poke or wake up ourselves. */ + if (pGVM->aCpus[idCpu].hEMT == hSelf) + continue; + + /* just ignore errors for now. */ + if (VMCPUSET_IS_PRESENT(pSleepSet, idCpu)) + { + gvmmR0SchedWakeUpOne(pGVM, &pGVM->aCpus[idCpu]); + GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING); + } + else if (VMCPUSET_IS_PRESENT(pPokeSet, idCpu)) + { + gvmmR0SchedPokeOne(pGVM, &pVM->aCpus[idCpu]); + GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING); + } + } + + int rc2 = GVMMR0_USED_SHARED_UNLOCK(pGVMM); + AssertRC(rc2); + GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING); + } + + LogFlow(("GVMMR0SchedWakeUpAndPokeCpus: returns %Rrc\n", rc)); + return rc; +} + + +/** + * VMMR0 request wrapper for GVMMR0SchedWakeUpAndPokeCpus. + * + * @returns see GVMMR0SchedWakeUpAndPokeCpus. + * @param pGVM The global (ring-0) VM structure. + * @param pVM The cross context VM structure. + * @param pReq Pointer to the request packet. + */ +GVMMR0DECL(int) GVMMR0SchedWakeUpAndPokeCpusReq(PGVM pGVM, PVM pVM, PGVMMSCHEDWAKEUPANDPOKECPUSREQ pReq) +{ + /* + * Validate input and pass it on. + */ + AssertPtrReturn(pReq, VERR_INVALID_POINTER); + AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER); + + return GVMMR0SchedWakeUpAndPokeCpus(pGVM, pVM, &pReq->SleepSet, &pReq->PokeSet); +} + + + +/** + * Poll the schedule to see if someone else should get a chance to run. + * + * This is a bit hackish and will not work too well if the machine is + * under heavy load from non-VM processes. + * + * @returns VINF_SUCCESS if not yielded. + * VINF_GVM_YIELDED if an attempt to switch to a different VM task was made. + * @param pGVM The global (ring-0) VM structure. + * @param pVM The cross context VM structure. + * @param idCpu The Virtual CPU ID of the calling EMT. + * @param fYield Whether to yield or not. + * This is for when we're spinning in the halt loop. + * @thread EMT(idCpu). + */ +GVMMR0DECL(int) GVMMR0SchedPoll(PGVM pGVM, PVM pVM, VMCPUID idCpu, bool fYield) +{ + /* + * Validate input. + */ + PGVMM pGVMM; + int rc = gvmmR0ByGVMandVMandEMT(pGVM, pVM, idCpu, &pGVMM); + if (RT_SUCCESS(rc)) + { + /* + * We currently only implement helping doing wakeups (fYield = false), so don't + * bother taking the lock if gvmmR0SchedDoWakeUps is not going to do anything. + */ + if (!fYield && pGVMM->fDoEarlyWakeUps) + { + rc = GVMMR0_USED_SHARED_LOCK(pGVMM); AssertRC(rc); + pGVM->gvmm.s.StatsSched.cPollCalls++; + + Assert(ASMGetFlags() & X86_EFL_IF); + const uint64_t u64Now = RTTimeNanoTS(); /* (GIP time) */ + + pGVM->gvmm.s.StatsSched.cPollWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64Now); + + GVMMR0_USED_SHARED_UNLOCK(pGVMM); + } + /* + * Not quite sure what we could do here... + */ + else if (fYield) + rc = VERR_NOT_IMPLEMENTED; /** @todo implement this... */ + else + rc = VINF_SUCCESS; + } + + LogFlow(("GVMMR0SchedWakeUp: returns %Rrc\n", rc)); + return rc; +} + + +#ifdef GVMM_SCHED_WITH_PPT +/** + * Timer callback for the periodic preemption timer. + * + * @param pTimer The timer handle. + * @param pvUser Pointer to the per cpu structure. + * @param iTick The current tick. + */ +static DECLCALLBACK(void) gvmmR0SchedPeriodicPreemptionTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick) +{ + PGVMMHOSTCPU pCpu = (PGVMMHOSTCPU)pvUser; + NOREF(pTimer); NOREF(iTick); + + /* + * Termination check + */ + if (pCpu->u32Magic != GVMMHOSTCPU_MAGIC) + return; + + /* + * Do the house keeping. + */ + RTSpinlockAcquire(pCpu->Ppt.hSpinlock); + + if (++pCpu->Ppt.iTickHistorization >= pCpu->Ppt.cTicksHistoriziationInterval) + { + /* + * Historicize the max frequency. + */ + uint32_t iHzHistory = ++pCpu->Ppt.iHzHistory % RT_ELEMENTS(pCpu->Ppt.aHzHistory); + pCpu->Ppt.aHzHistory[iHzHistory] = pCpu->Ppt.uDesiredHz; + pCpu->Ppt.iTickHistorization = 0; + pCpu->Ppt.uDesiredHz = 0; + + /* + * Check if the current timer frequency. + */ + uint32_t uHistMaxHz = 0; + for (uint32_t i = 0; i < RT_ELEMENTS(pCpu->Ppt.aHzHistory); i++) + if (pCpu->Ppt.aHzHistory[i] > uHistMaxHz) + uHistMaxHz = pCpu->Ppt.aHzHistory[i]; + if (uHistMaxHz == pCpu->Ppt.uTimerHz) + RTSpinlockRelease(pCpu->Ppt.hSpinlock); + else if (uHistMaxHz) + { + /* + * Reprogram it. + */ + pCpu->Ppt.cChanges++; + pCpu->Ppt.iTickHistorization = 0; + pCpu->Ppt.uTimerHz = uHistMaxHz; + uint32_t const cNsInterval = RT_NS_1SEC / uHistMaxHz; + pCpu->Ppt.cNsInterval = cNsInterval; + if (cNsInterval < GVMMHOSTCPU_PPT_HIST_INTERVAL_NS) + pCpu->Ppt.cTicksHistoriziationInterval = ( GVMMHOSTCPU_PPT_HIST_INTERVAL_NS + + GVMMHOSTCPU_PPT_HIST_INTERVAL_NS / 2 - 1) + / cNsInterval; + else + pCpu->Ppt.cTicksHistoriziationInterval = 1; + RTSpinlockRelease(pCpu->Ppt.hSpinlock); + + /*SUPR0Printf("Cpu%u: change to %u Hz / %u ns\n", pCpu->idxCpuSet, uHistMaxHz, cNsInterval);*/ + RTTimerChangeInterval(pTimer, cNsInterval); + } + else + { + /* + * Stop it. + */ + pCpu->Ppt.fStarted = false; + pCpu->Ppt.uTimerHz = 0; + pCpu->Ppt.cNsInterval = 0; + RTSpinlockRelease(pCpu->Ppt.hSpinlock); + + /*SUPR0Printf("Cpu%u: stopping (%u Hz)\n", pCpu->idxCpuSet, uHistMaxHz);*/ + RTTimerStop(pTimer); + } + } + else + RTSpinlockRelease(pCpu->Ppt.hSpinlock); +} +#endif /* GVMM_SCHED_WITH_PPT */ + + +/** + * Updates the periodic preemption timer for the calling CPU. + * + * The caller must have disabled preemption! + * The caller must check that the host can do high resolution timers. + * + * @param pVM The cross context VM structure. + * @param idHostCpu The current host CPU id. + * @param uHz The desired frequency. + */ +GVMMR0DECL(void) GVMMR0SchedUpdatePeriodicPreemptionTimer(PVM pVM, RTCPUID idHostCpu, uint32_t uHz) +{ + NOREF(pVM); +#ifdef GVMM_SCHED_WITH_PPT + Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); + Assert(RTTimerCanDoHighResolution()); + + /* + * Resolve the per CPU data. + */ + uint32_t iCpu = RTMpCpuIdToSetIndex(idHostCpu); + PGVMM pGVMM = g_pGVMM; + if ( !VALID_PTR(pGVMM) + || pGVMM->u32Magic != GVMM_MAGIC) + return; + AssertMsgReturnVoid(iCpu < pGVMM->cHostCpus, ("iCpu=%d cHostCpus=%d\n", iCpu, pGVMM->cHostCpus)); + PGVMMHOSTCPU pCpu = &pGVMM->aHostCpus[iCpu]; + AssertMsgReturnVoid( pCpu->u32Magic == GVMMHOSTCPU_MAGIC + && pCpu->idCpu == idHostCpu, + ("u32Magic=%#x idCpu=% idHostCpu=%d\n", pCpu->u32Magic, pCpu->idCpu, idHostCpu)); + + /* + * Check whether we need to do anything about the timer. + * We have to be a little bit careful since we might be race the timer + * callback here. + */ + if (uHz > 16384) + uHz = 16384; /** @todo add a query method for this! */ + if (RT_UNLIKELY( uHz > ASMAtomicReadU32(&pCpu->Ppt.uDesiredHz) + && uHz >= pCpu->Ppt.uMinHz + && !pCpu->Ppt.fStarting /* solaris paranoia */)) + { + RTSpinlockAcquire(pCpu->Ppt.hSpinlock); + + pCpu->Ppt.uDesiredHz = uHz; + uint32_t cNsInterval = 0; + if (!pCpu->Ppt.fStarted) + { + pCpu->Ppt.cStarts++; + pCpu->Ppt.fStarted = true; + pCpu->Ppt.fStarting = true; + pCpu->Ppt.iTickHistorization = 0; + pCpu->Ppt.uTimerHz = uHz; + pCpu->Ppt.cNsInterval = cNsInterval = RT_NS_1SEC / uHz; + if (cNsInterval < GVMMHOSTCPU_PPT_HIST_INTERVAL_NS) + pCpu->Ppt.cTicksHistoriziationInterval = ( GVMMHOSTCPU_PPT_HIST_INTERVAL_NS + + GVMMHOSTCPU_PPT_HIST_INTERVAL_NS / 2 - 1) + / cNsInterval; + else + pCpu->Ppt.cTicksHistoriziationInterval = 1; + } + + RTSpinlockRelease(pCpu->Ppt.hSpinlock); + + if (cNsInterval) + { + RTTimerChangeInterval(pCpu->Ppt.pTimer, cNsInterval); + int rc = RTTimerStart(pCpu->Ppt.pTimer, cNsInterval); + AssertRC(rc); + + RTSpinlockAcquire(pCpu->Ppt.hSpinlock); + if (RT_FAILURE(rc)) + pCpu->Ppt.fStarted = false; + pCpu->Ppt.fStarting = false; + RTSpinlockRelease(pCpu->Ppt.hSpinlock); + } + } +#else /* !GVMM_SCHED_WITH_PPT */ + NOREF(idHostCpu); NOREF(uHz); +#endif /* !GVMM_SCHED_WITH_PPT */ +} + + +/** + * Retrieves the GVMM statistics visible to the caller. + * + * @returns VBox status code. + * + * @param pStats Where to put the statistics. + * @param pSession The current session. + * @param pGVM The GVM to obtain statistics for. Optional. + * @param pVM The VM structure corresponding to @a pGVM. + */ +GVMMR0DECL(int) GVMMR0QueryStatistics(PGVMMSTATS pStats, PSUPDRVSESSION pSession, PGVM pGVM, PVM pVM) +{ + LogFlow(("GVMMR0QueryStatistics: pStats=%p pSession=%p pGVM=%p pVM=%p\n", pStats, pSession, pGVM, pVM)); + + /* + * Validate input. + */ + AssertPtrReturn(pSession, VERR_INVALID_POINTER); + AssertPtrReturn(pStats, VERR_INVALID_POINTER); + pStats->cVMs = 0; /* (crash before taking the sem...) */ + + /* + * Take the lock and get the VM statistics. + */ + PGVMM pGVMM; + if (pGVM) + { + int rc = gvmmR0ByGVMandVM(pGVM, pVM, &pGVMM, true /*fTakeUsedLock*/); + if (RT_FAILURE(rc)) + return rc; + pStats->SchedVM = pGVM->gvmm.s.StatsSched; + } + else + { + GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE); + memset(&pStats->SchedVM, 0, sizeof(pStats->SchedVM)); + + int rc = GVMMR0_USED_SHARED_LOCK(pGVMM); + AssertRCReturn(rc, rc); + } + + /* + * Enumerate the VMs and add the ones visible to the statistics. + */ + pStats->cVMs = 0; + pStats->cEMTs = 0; + memset(&pStats->SchedSum, 0, sizeof(pStats->SchedSum)); + + for (unsigned i = pGVMM->iUsedHead; + i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles); + i = pGVMM->aHandles[i].iNext) + { + PGVM pOtherGVM = pGVMM->aHandles[i].pGVM; + void *pvObj = pGVMM->aHandles[i].pvObj; + if ( VALID_PTR(pvObj) + && VALID_PTR(pOtherGVM) + && pOtherGVM->u32Magic == GVM_MAGIC + && RT_SUCCESS(SUPR0ObjVerifyAccess(pvObj, pSession, NULL))) + { + pStats->cVMs++; + pStats->cEMTs += pOtherGVM->cCpus; + + pStats->SchedSum.cHaltCalls += pOtherGVM->gvmm.s.StatsSched.cHaltCalls; + pStats->SchedSum.cHaltBlocking += pOtherGVM->gvmm.s.StatsSched.cHaltBlocking; + pStats->SchedSum.cHaltTimeouts += pOtherGVM->gvmm.s.StatsSched.cHaltTimeouts; + pStats->SchedSum.cHaltNotBlocking += pOtherGVM->gvmm.s.StatsSched.cHaltNotBlocking; + pStats->SchedSum.cHaltWakeUps += pOtherGVM->gvmm.s.StatsSched.cHaltWakeUps; + + pStats->SchedSum.cWakeUpCalls += pOtherGVM->gvmm.s.StatsSched.cWakeUpCalls; + pStats->SchedSum.cWakeUpNotHalted += pOtherGVM->gvmm.s.StatsSched.cWakeUpNotHalted; + pStats->SchedSum.cWakeUpWakeUps += pOtherGVM->gvmm.s.StatsSched.cWakeUpWakeUps; + + pStats->SchedSum.cPokeCalls += pOtherGVM->gvmm.s.StatsSched.cPokeCalls; + pStats->SchedSum.cPokeNotBusy += pOtherGVM->gvmm.s.StatsSched.cPokeNotBusy; + + pStats->SchedSum.cPollCalls += pOtherGVM->gvmm.s.StatsSched.cPollCalls; + pStats->SchedSum.cPollHalts += pOtherGVM->gvmm.s.StatsSched.cPollHalts; + pStats->SchedSum.cPollWakeUps += pOtherGVM->gvmm.s.StatsSched.cPollWakeUps; + } + } + + /* + * Copy out the per host CPU statistics. + */ + uint32_t iDstCpu = 0; + uint32_t cSrcCpus = pGVMM->cHostCpus; + for (uint32_t iSrcCpu = 0; iSrcCpu < cSrcCpus; iSrcCpu++) + { + if (pGVMM->aHostCpus[iSrcCpu].idCpu != NIL_RTCPUID) + { + pStats->aHostCpus[iDstCpu].idCpu = pGVMM->aHostCpus[iSrcCpu].idCpu; + pStats->aHostCpus[iDstCpu].idxCpuSet = pGVMM->aHostCpus[iSrcCpu].idxCpuSet; +#ifdef GVMM_SCHED_WITH_PPT + pStats->aHostCpus[iDstCpu].uDesiredHz = pGVMM->aHostCpus[iSrcCpu].Ppt.uDesiredHz; + pStats->aHostCpus[iDstCpu].uTimerHz = pGVMM->aHostCpus[iSrcCpu].Ppt.uTimerHz; + pStats->aHostCpus[iDstCpu].cChanges = pGVMM->aHostCpus[iSrcCpu].Ppt.cChanges; + pStats->aHostCpus[iDstCpu].cStarts = pGVMM->aHostCpus[iSrcCpu].Ppt.cStarts; +#else + pStats->aHostCpus[iDstCpu].uDesiredHz = 0; + pStats->aHostCpus[iDstCpu].uTimerHz = 0; + pStats->aHostCpus[iDstCpu].cChanges = 0; + pStats->aHostCpus[iDstCpu].cStarts = 0; +#endif + iDstCpu++; + if (iDstCpu >= RT_ELEMENTS(pStats->aHostCpus)) + break; + } + } + pStats->cHostCpus = iDstCpu; + + GVMMR0_USED_SHARED_UNLOCK(pGVMM); + + return VINF_SUCCESS; +} + + +/** + * VMMR0 request wrapper for GVMMR0QueryStatistics. + * + * @returns see GVMMR0QueryStatistics. + * @param pGVM The global (ring-0) VM structure. Optional. + * @param pVM The cross context VM structure. Optional. + * @param pReq Pointer to the request packet. + * @param pSession The current session. + */ +GVMMR0DECL(int) GVMMR0QueryStatisticsReq(PGVM pGVM, PVM pVM, PGVMMQUERYSTATISTICSSREQ pReq, PSUPDRVSESSION pSession) +{ + /* + * Validate input and pass it on. + */ + AssertPtrReturn(pReq, VERR_INVALID_POINTER); + AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER); + AssertReturn(pReq->pSession == pSession, VERR_INVALID_PARAMETER); + + return GVMMR0QueryStatistics(&pReq->Stats, pSession, pGVM, pVM); +} + + +/** + * Resets the specified GVMM statistics. + * + * @returns VBox status code. + * + * @param pStats Which statistics to reset, that is, non-zero fields indicates which to reset. + * @param pSession The current session. + * @param pGVM The GVM to reset statistics for. Optional. + * @param pVM The VM structure corresponding to @a pGVM. + */ +GVMMR0DECL(int) GVMMR0ResetStatistics(PCGVMMSTATS pStats, PSUPDRVSESSION pSession, PGVM pGVM, PVM pVM) +{ + LogFlow(("GVMMR0ResetStatistics: pStats=%p pSession=%p pGVM=%p pVM=%p\n", pStats, pSession, pGVM, pVM)); + + /* + * Validate input. + */ + AssertPtrReturn(pSession, VERR_INVALID_POINTER); + AssertPtrReturn(pStats, VERR_INVALID_POINTER); + + /* + * Take the lock and get the VM statistics. + */ + PGVMM pGVMM; + if (pGVM) + { + int rc = gvmmR0ByGVMandVM(pGVM, pVM, &pGVMM, true /*fTakeUsedLock*/); + if (RT_FAILURE(rc)) + return rc; +# define MAYBE_RESET_FIELD(field) \ + do { if (pStats->SchedVM. field ) { pGVM->gvmm.s.StatsSched. field = 0; } } while (0) + MAYBE_RESET_FIELD(cHaltCalls); + MAYBE_RESET_FIELD(cHaltBlocking); + MAYBE_RESET_FIELD(cHaltTimeouts); + MAYBE_RESET_FIELD(cHaltNotBlocking); + MAYBE_RESET_FIELD(cHaltWakeUps); + MAYBE_RESET_FIELD(cWakeUpCalls); + MAYBE_RESET_FIELD(cWakeUpNotHalted); + MAYBE_RESET_FIELD(cWakeUpWakeUps); + MAYBE_RESET_FIELD(cPokeCalls); + MAYBE_RESET_FIELD(cPokeNotBusy); + MAYBE_RESET_FIELD(cPollCalls); + MAYBE_RESET_FIELD(cPollHalts); + MAYBE_RESET_FIELD(cPollWakeUps); +# undef MAYBE_RESET_FIELD + } + else + { + GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE); + + int rc = GVMMR0_USED_SHARED_LOCK(pGVMM); + AssertRCReturn(rc, rc); + } + + /* + * Enumerate the VMs and add the ones visible to the statistics. + */ + if (!ASMMemIsZero(&pStats->SchedSum, sizeof(pStats->SchedSum))) + { + for (unsigned i = pGVMM->iUsedHead; + i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles); + i = pGVMM->aHandles[i].iNext) + { + PGVM pOtherGVM = pGVMM->aHandles[i].pGVM; + void *pvObj = pGVMM->aHandles[i].pvObj; + if ( VALID_PTR(pvObj) + && VALID_PTR(pOtherGVM) + && pOtherGVM->u32Magic == GVM_MAGIC + && RT_SUCCESS(SUPR0ObjVerifyAccess(pvObj, pSession, NULL))) + { +# define MAYBE_RESET_FIELD(field) \ + do { if (pStats->SchedSum. field ) { pOtherGVM->gvmm.s.StatsSched. field = 0; } } while (0) + MAYBE_RESET_FIELD(cHaltCalls); + MAYBE_RESET_FIELD(cHaltBlocking); + MAYBE_RESET_FIELD(cHaltTimeouts); + MAYBE_RESET_FIELD(cHaltNotBlocking); + MAYBE_RESET_FIELD(cHaltWakeUps); + MAYBE_RESET_FIELD(cWakeUpCalls); + MAYBE_RESET_FIELD(cWakeUpNotHalted); + MAYBE_RESET_FIELD(cWakeUpWakeUps); + MAYBE_RESET_FIELD(cPokeCalls); + MAYBE_RESET_FIELD(cPokeNotBusy); + MAYBE_RESET_FIELD(cPollCalls); + MAYBE_RESET_FIELD(cPollHalts); + MAYBE_RESET_FIELD(cPollWakeUps); +# undef MAYBE_RESET_FIELD + } + } + } + + GVMMR0_USED_SHARED_UNLOCK(pGVMM); + + return VINF_SUCCESS; +} + + +/** + * VMMR0 request wrapper for GVMMR0ResetStatistics. + * + * @returns see GVMMR0ResetStatistics. + * @param pGVM The global (ring-0) VM structure. Optional. + * @param pVM The cross context VM structure. Optional. + * @param pReq Pointer to the request packet. + * @param pSession The current session. + */ +GVMMR0DECL(int) GVMMR0ResetStatisticsReq(PGVM pGVM, PVM pVM, PGVMMRESETSTATISTICSSREQ pReq, PSUPDRVSESSION pSession) +{ + /* + * Validate input and pass it on. + */ + AssertPtrReturn(pReq, VERR_INVALID_POINTER); + AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER); + AssertReturn(pReq->pSession == pSession, VERR_INVALID_PARAMETER); + + return GVMMR0ResetStatistics(&pReq->Stats, pSession, pGVM, pVM); +} + |